Initial commit: Ansible configuration for monitoring stack

Contains:
- Production inventory (inventories/production/hosts)
- VictoriaMetrics installation (playbooks/monitoring/install_victoriametrics.yml)
- Vault setup and secrets management (playbooks/vault/)
- Base system configuration (playbooks/infrastructure/)
- Directory structure for monitoring components
This commit is contained in:
Freazzzing
2026-02-02 11:22:24 +00:00
commit 30d35bc401
12 changed files with 548 additions and 0 deletions

42
.gitignore vendored Normal file
View File

@ -0,0 +1,42 @@
# Logs
*.log
ansible-log-*.log
# Python
*.pyc
__pycache__/
*.pyo
*.pyd
# Ansible
*.retry
.vault_pass
*.vault
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Virtual environments
venv/
ansible-venv/
env/
# Temporary files
/tmp/
*.tar.gz
*.zip
downloads/
# Sensitive data
*.key
*.pem
id_*
secrets/
credentials.yml

8
ansible.cfg Normal file
View File

@ -0,0 +1,8 @@
[defaults]
inventory = inventories/production/hosts
host_key_checking = False
interpreter_python = /usr/bin/python3
gathering = smart
[ssh_connection]
pipelining = True

43
diagnose_vm.yml Normal file
View File

@ -0,0 +1,43 @@
---
- name: Diagnose VictoriaMetrics installation
hosts: 192.168.0.104
become: yes
tasks:
- name: Check VictoriaMetrics service status
shell: |
systemctl status victoria-metrics --no-pager
journalctl -u victoria-metrics --no-pager -n 20
register: service_info
- name: Show service status
debug:
var: service_info.stdout_lines
- name: Check if port is listening
shell: |
netstat -tlnp | grep :8428 || ss -tlnp | grep :8428 || echo "Port 8428 not listening"
ps aux | grep victoria-metrics | grep -v grep || echo "VictoriaMetrics process not found"
register: port_check
- name: Show port check
debug:
var: port_check.stdout_lines
- name: Check binary exists and is executable
stat:
path: /usr/local/bin/victoria-metrics-prod
register: binary_stat
- name: Show binary info
debug:
var: binary_stat
- name: Try to run VictoriaMetrics manually
shell: |
timeout 5 /usr/local/bin/victoria-metrics-prod --help || echo "Binary help not working"
register: manual_test
- name: Show manual test result
debug:
var: manual_test.stdout_lines

View File

@ -0,0 +1,26 @@
[all:vars]
ansible_user=root
ansible_ssh_private_key_file=~/.ssh/id_ansible
# Основные группы (без ansible ноды)
[infrastructure]
192.168.0.100 # git
192.168.0.102 # terraform
192.168.0.103 # vault
192.168.0.104 # victoriametrics
192.168.0.105 # prometheus
192.168.0.106 # grafana
[applications]
192.168.0.110 # app1
192.168.0.111 # app2
192.168.0.112 # app3
# Ansbile нода отдельно
[ansible_control]
192.168.0.101
# Группа для всех кроме ansible
[all_except_ansible:children]
infrastructure
applications

View File

@ -0,0 +1,31 @@
---
- name: System update and cleanup
hosts: all_except_ansible
become: yes
tasks:
- name: Stop unattended upgrades
systemd:
name: unattended-upgrades
state: stopped
- name: Clean any apt locks
shell: |
pkill -9 apt-get apt dpkg 2>/dev/null || true
rm -f /var/lib/apt/lists/lock /var/lib/dpkg/lock*
dpkg --configure -a 2>/dev/null || true
ignore_errors: yes
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 86400
- name: Upgrade system packages
apt:
upgrade: safe
autoremove: yes
- name: Clean apt cache
apt:
autoclean: yes

View File

@ -0,0 +1,21 @@
---
- name: Install essential packages
hosts: all_except_ansible
become: yes
tasks:
- name: Install system utilities
apt:
name:
- curl
- wget
- git
- htop
- net-tools
- ufw
- software-properties-common
- ca-certificates
- gnupg
- lsb-release
state: present
update_cache: yes

View File

@ -0,0 +1,162 @@
---
- name: Final VictoriaMetrics installation for LXC
hosts: 192.168.0.104
become: yes
vars:
vm_port: "8428"
vm_retention: "30d"
tasks:
- name: Display configuration
debug:
msg: |
=== VictoriaMetrics Configuration ===
Port: {{ vm_port }}
Retention: {{ vm_retention }}
- name: Stop and disable any existing service
systemd:
name: victoria-metrics
state: stopped
enabled: no
- name: Remove old lock files
file:
path: "{{ item }}"
state: absent
loop:
- /var/lib/victoria-metrics-data/flock.lock
- /tmp/victoria-metrics-data/flock.lock
- /tmp/vm-test-data/flock.lock
- name: Install dependencies
apt:
name:
- wget
- curl
- tar
state: present
update_cache: yes
- name: Download VictoriaMetrics binary
get_url:
url: https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.105.0/victoria-metrics-linux-amd64-v1.105.0.tar.gz
dest: /tmp/victoria-metrics.tar.gz
- name: Extract and install binary
shell: |
cd /tmp
tar -xzf victoria-metrics.tar.gz
mv victoria-metrics-prod /usr/local/bin/
chmod +x /usr/local/bin/victoria-metrics-prod
- name: Create data directory
file:
path: /var/lib/victoria-metrics-data
state: directory
owner: root
group: root
mode: '0755'
- name: Create systemd service running as root (LXC workaround)
copy:
content: |
[Unit]
Description=VictoriaMetrics
After=network.target
[Service]
Type=simple
User=root
Group=root
ExecStart=/usr/local/bin/victoria-metrics-prod \
-storageDataPath=/var/lib/victoria-metrics-data \
-retentionPeriod={{ vm_retention }} \
-httpListenAddr=0.0.0.0:{{ vm_port }} \
-loggerFormat=json \
-loggerLevel=INFO
Restart=on-failure
RestartSec=10
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/victoria-metrics.service
- name: Reload systemd
systemd:
daemon_reload: yes
- name: Enable and start service
systemd:
name: victoria-metrics
enabled: yes
state: started
- name: Wait for port (с таймаутом на перезапуски)
wait_for:
port: "{{ vm_port }}"
delay: 15
timeout: 45
register: port_check
- name: Check if service is actually running
shell: |
# Проверяем что процесс жив и порт слушается
netstat -tlnp 2>/dev/null | grep :{{ vm_port }} || ss -tlnp 2>/dev/null | grep :{{ vm_port }} || echo "Port not found"
ps aux | grep victoria-metrics-prod | grep -v grep | head -1 || echo "Process not found"
register: process_check
- name: Show process check
debug:
var: process_check.stdout_lines
- name: Test health endpoint
uri:
url: "http://localhost:{{ vm_port }}/health"
validate_certs: no
timeout: 5
register: health_check
- name: Test metrics endpoint
uri:
url: "http://localhost:{{ vm_port }}/metrics"
validate_certs: no
timeout: 5
register: metrics_check
- name: Create final report
debug:
msg: |
============================================
VICTORIAMETRICS УСТАНОВКА ЗАВЕРШЕНА
============================================
🎯 Статус: {{ 'РАБОТАЕТ' if health_check.status == 200 else 'ЕСТЬ ПРОБЛЕМЫ' }}
📊 Конфигурация:
- Порт: {{ vm_port }}
- Retention: {{ vm_retention }}
✅ Доступность:
- Health endpoint: {{ '✓' if health_check.status == 200 else '✗' }} (http://192.168.0.104:{{ vm_port }}/health)
- Metrics endpoint: {{ '✓' if metrics_check.status == 200 else '✗' }} (http://192.168.0.104:{{ vm_port }}/metrics)
- Web UI: http://192.168.0.104:{{ vm_port }}
⚙️ Для Prometheus:
- remote_write: http://192.168.0.104:{{ vm_port }}/api/v1/write
- remote_read: http://192.168.0.104:{{ vm_port }}/api/v1/read
📝 Примечание:
В LXC контейнерах VictoriaMetrics может перезапускаться из-за lock файлов,
но порт продолжает работать и принимать данные.
📈 Следующий шаг:
Установите Prometheus на 192.168.0.105 и настройте remote_write.
============================================
- name: Clean up
file:
path: /tmp/victoria-metrics.tar.gz
state: absent

View File

@ -0,0 +1,68 @@
---
- name: Create monitoring secrets in Vault
hosts: localhost
connection: local
vars:
vault_addr: "http://192.168.0.103:8200"
vault_token: "hvs.DhQx1U9igYhLfoUHIGtLgqs8"
tasks:
- name: Create VictoriaMetrics secret
uri:
url: "{{ vault_addr }}/v1/secret/data/monitoring/victoriametrics"
method: POST
headers:
X-Vault-Token: "{{ vault_token }}"
Content-Type: application/json
body_format: json
body:
data:
host: "192.168.0.104"
port: "8428"
url: "http://192.168.0.104:8428"
retention_days: "30"
description: "VictoriaMetrics single instance"
- name: Create Prometheus secret
uri:
url: "{{ vault_addr }}/v1/secret/data/monitoring/prometheus"
method: POST
headers:
X-Vault-Token: "{{ vault_token }}"
Content-Type: application/json
body_format: json
body:
data:
host: "192.168.0.105"
port: "9090"
scrape_interval: "30s"
- name: Create Grafana secret
uri:
url: "{{ vault_addr }}/v1/secret/data/monitoring/grafana"
method: POST
headers:
X-Vault-Token: "{{ vault_token }}"
Content-Type: application/json
body_format: json
body:
data:
host: "192.168.0.106"
port: "3000"
admin_user: "admin"
admin_password: "admin123"
- name: Verify secrets created
uri:
url: "{{ vault_addr }}/v1/secret/metadata"
method: LIST
headers:
X-Vault-Token: "{{ vault_token }}"
return_content: yes
validate_certs: no
register: secrets_list
- name: Show created secrets
debug:
msg: "Secrets in Vault: {{ (secrets_list.content | from_json).data.keys }}"

View File

@ -0,0 +1,16 @@
---
- name: Setup HashiCorp Vault
hosts: 192.168.0.103
become: yes
tasks:
- name: Check Vault status
uri:
url: http://localhost:8200/v1/sys/health
validate_certs: no
register: vault_status
ignore_errors: yes
- name: Display Vault status
debug:
msg: "Vault status: {{ vault_status.status }}"

22
test_vault.py Normal file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python3
import hvac
import os
# Подключение к Vault
client = hvac.Client(
url=os.environ.get('VAULT_ADDR'),
token=os.environ.get('VAULT_TOKEN')
)
# Проверка подключения
print(f"Vault is authenticated: {client.is_authenticated()}")
print(f"Vault seal status: {client.sys.is_sealed()}")
# Чтение секрета
try:
secret = client.secrets.kv.v2.read_secret_version(path='Forgeo')
print("\nSecret data:")
print(f"URL: {secret['data']['data']['url']}")
print(f"User: {secret['data']['data']['user']}")
except Exception as e:
print(f"Error reading secret: {e}")

78
test_vault_correct.yml Normal file
View File

@ -0,0 +1,78 @@
---
- name: Test Vault Integration with CORRECT paths
hosts: localhost
connection: local
gather_facts: no
vars:
vault_addr: "http://192.168.0.103:8200"
vault_token: "hvs.DhQx1U9igYhLfoUHIGtLgqs8"
tasks:
- name: Test 1 - Read Git secret from Vault
uri:
url: "{{ vault_addr }}/v1/secret/data/git/forgejo"
method: GET
headers:
X-Vault-Token: "{{ vault_token }}"
return_content: yes
validate_certs: no
register: git_secret
- name: Display Git credentials
debug:
msg: |
Git Forgejo Credentials:
URL: {{ (git_secret.content | from_json).data.data.url }}
User: {{ (git_secret.content | from_json).data.data.user }}
Password: {{ (git_secret.content | from_json).data.data.password }}
- name: Test 2 - Create VictoriaMetrics secret
uri:
url: "{{ vault_addr }}/v1/secret/data/monitoring/victoriametrics"
method: POST
headers:
X-Vault-Token: "{{ vault_token }}"
Content-Type: application/json
body_format: json
body:
data:
host: "192.168.0.104"
port: "8428"
url: "http://192.168.0.104:8428"
retention_days: "30"
validate_certs: no
register: create_vm_secret
- name: Test 3 - Read VictoriaMetrics secret
uri:
url: "{{ vault_addr }}/v1/secret/data/monitoring/victoriametrics"
method: GET
headers:
X-Vault-Token: "{{ vault_token }}"
return_content: yes
validate_certs: no
register: vm_secret
- name: Display VictoriaMetrics configuration
debug:
msg: |
VictoriaMetrics (Container 119):
Host: {{ (vm_secret.content | from_json).data.data.host }}
Port: {{ (vm_secret.content | from_json).data.data.port }}
URL: {{ (vm_secret.content | from_json).data.data.url }}
Retention: {{ (vm_secret.content | from_json).data.data.retention_days }} days
- name: Test 4 - List all secrets
uri:
url: "{{ vault_addr }}/v1/secret/metadata"
method: LIST
headers:
X-Vault-Token: "{{ vault_token }}"
return_content: yes
validate_certs: no
register: secrets_list
- name: Display secrets structure
debug:
msg: "Secrets in Vault: {{ (secrets_list.content | from_json).data.keys }}"

31
test_vault_simple.py Normal file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python3
import subprocess
import json
import os
# Получаем переменные окружения
vault_addr = os.environ.get('VAULT_ADDR')
vault_token = os.environ.get('VAULT_TOKEN')
if not vault_addr or not vault_token:
print("Ошибка: Не установлены переменные VAULT_ADDR или VAULT_TOKEN")
exit(1)
# Проверяем через curl (простой способ)
cmd = [
'curl', '-s',
'-H', f'X-Vault-Token: {vault_token}',
f'{vault_addr}/v1/secret/data/Forgeo'
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
data = json.loads(result.stdout)
print("Успешное подключение к Vault!")
print(f"URL: {data['data']['data']['url']}")
print(f"User: {data['data']['data']['user']}")
else:
print(f"Ошибка curl: {result.stderr}")
except Exception as e:
print(f"Ошибка: {e}")