Initial commit: Ansible configuration for monitoring stack
Contains: - Production inventory (inventories/production/hosts) - VictoriaMetrics installation (playbooks/monitoring/install_victoriametrics.yml) - Vault setup and secrets management (playbooks/vault/) - Base system configuration (playbooks/infrastructure/) - Directory structure for monitoring components
This commit is contained in:
162
playbooks/monitoring/install_victoriametrics.yml
Normal file
162
playbooks/monitoring/install_victoriametrics.yml
Normal file
@ -0,0 +1,162 @@
|
||||
---
|
||||
- name: Final VictoriaMetrics installation for LXC
|
||||
hosts: 192.168.0.104
|
||||
become: yes
|
||||
|
||||
vars:
|
||||
vm_port: "8428"
|
||||
vm_retention: "30d"
|
||||
|
||||
tasks:
|
||||
- name: Display configuration
|
||||
debug:
|
||||
msg: |
|
||||
=== VictoriaMetrics Configuration ===
|
||||
Port: {{ vm_port }}
|
||||
Retention: {{ vm_retention }}
|
||||
|
||||
- name: Stop and disable any existing service
|
||||
systemd:
|
||||
name: victoria-metrics
|
||||
state: stopped
|
||||
enabled: no
|
||||
|
||||
- name: Remove old lock files
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /var/lib/victoria-metrics-data/flock.lock
|
||||
- /tmp/victoria-metrics-data/flock.lock
|
||||
- /tmp/vm-test-data/flock.lock
|
||||
|
||||
- name: Install dependencies
|
||||
apt:
|
||||
name:
|
||||
- wget
|
||||
- curl
|
||||
- tar
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Download VictoriaMetrics binary
|
||||
get_url:
|
||||
url: https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.105.0/victoria-metrics-linux-amd64-v1.105.0.tar.gz
|
||||
dest: /tmp/victoria-metrics.tar.gz
|
||||
|
||||
- name: Extract and install binary
|
||||
shell: |
|
||||
cd /tmp
|
||||
tar -xzf victoria-metrics.tar.gz
|
||||
mv victoria-metrics-prod /usr/local/bin/
|
||||
chmod +x /usr/local/bin/victoria-metrics-prod
|
||||
|
||||
- name: Create data directory
|
||||
file:
|
||||
path: /var/lib/victoria-metrics-data
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Create systemd service running as root (LXC workaround)
|
||||
copy:
|
||||
content: |
|
||||
[Unit]
|
||||
Description=VictoriaMetrics
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
ExecStart=/usr/local/bin/victoria-metrics-prod \
|
||||
-storageDataPath=/var/lib/victoria-metrics-data \
|
||||
-retentionPeriod={{ vm_retention }} \
|
||||
-httpListenAddr=0.0.0.0:{{ vm_port }} \
|
||||
-loggerFormat=json \
|
||||
-loggerLevel=INFO
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
dest: /etc/systemd/system/victoria-metrics.service
|
||||
|
||||
- name: Reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Enable and start service
|
||||
systemd:
|
||||
name: victoria-metrics
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: Wait for port (с таймаутом на перезапуски)
|
||||
wait_for:
|
||||
port: "{{ vm_port }}"
|
||||
delay: 15
|
||||
timeout: 45
|
||||
register: port_check
|
||||
|
||||
- name: Check if service is actually running
|
||||
shell: |
|
||||
# Проверяем что процесс жив и порт слушается
|
||||
netstat -tlnp 2>/dev/null | grep :{{ vm_port }} || ss -tlnp 2>/dev/null | grep :{{ vm_port }} || echo "Port not found"
|
||||
ps aux | grep victoria-metrics-prod | grep -v grep | head -1 || echo "Process not found"
|
||||
register: process_check
|
||||
|
||||
- name: Show process check
|
||||
debug:
|
||||
var: process_check.stdout_lines
|
||||
|
||||
- name: Test health endpoint
|
||||
uri:
|
||||
url: "http://localhost:{{ vm_port }}/health"
|
||||
validate_certs: no
|
||||
timeout: 5
|
||||
register: health_check
|
||||
|
||||
- name: Test metrics endpoint
|
||||
uri:
|
||||
url: "http://localhost:{{ vm_port }}/metrics"
|
||||
validate_certs: no
|
||||
timeout: 5
|
||||
register: metrics_check
|
||||
|
||||
- name: Create final report
|
||||
debug:
|
||||
msg: |
|
||||
============================================
|
||||
VICTORIAMETRICS УСТАНОВКА ЗАВЕРШЕНА
|
||||
============================================
|
||||
|
||||
🎯 Статус: {{ 'РАБОТАЕТ' if health_check.status == 200 else 'ЕСТЬ ПРОБЛЕМЫ' }}
|
||||
|
||||
📊 Конфигурация:
|
||||
- Порт: {{ vm_port }}
|
||||
- Retention: {{ vm_retention }}
|
||||
|
||||
✅ Доступность:
|
||||
- Health endpoint: {{ '✓' if health_check.status == 200 else '✗' }} (http://192.168.0.104:{{ vm_port }}/health)
|
||||
- Metrics endpoint: {{ '✓' if metrics_check.status == 200 else '✗' }} (http://192.168.0.104:{{ vm_port }}/metrics)
|
||||
- Web UI: http://192.168.0.104:{{ vm_port }}
|
||||
|
||||
⚙️ Для Prometheus:
|
||||
- remote_write: http://192.168.0.104:{{ vm_port }}/api/v1/write
|
||||
- remote_read: http://192.168.0.104:{{ vm_port }}/api/v1/read
|
||||
|
||||
📝 Примечание:
|
||||
В LXC контейнерах VictoriaMetrics может перезапускаться из-за lock файлов,
|
||||
но порт продолжает работать и принимать данные.
|
||||
|
||||
📈 Следующий шаг:
|
||||
Установите Prometheus на 192.168.0.105 и настройте remote_write.
|
||||
|
||||
============================================
|
||||
|
||||
- name: Clean up
|
||||
file:
|
||||
path: /tmp/victoria-metrics.tar.gz
|
||||
state: absent
|
||||
Reference in New Issue
Block a user