Initial commit: Ansible configuration for monitoring stack
Contains: - Production inventory (inventories/production/hosts) - VictoriaMetrics installation (playbooks/monitoring/install_victoriametrics.yml) - Vault setup and secrets management (playbooks/vault/) - Base system configuration (playbooks/infrastructure/) - Directory structure for monitoring components
This commit is contained in:
42
.gitignore
vendored
Normal file
42
.gitignore
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
# Logs
|
||||
*.log
|
||||
ansible-log-*.log
|
||||
|
||||
# Python
|
||||
*.pyc
|
||||
__pycache__/
|
||||
*.pyo
|
||||
*.pyd
|
||||
|
||||
# Ansible
|
||||
*.retry
|
||||
.vault_pass
|
||||
*.vault
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
ansible-venv/
|
||||
env/
|
||||
|
||||
# Temporary files
|
||||
/tmp/
|
||||
*.tar.gz
|
||||
*.zip
|
||||
downloads/
|
||||
|
||||
# Sensitive data
|
||||
*.key
|
||||
*.pem
|
||||
id_*
|
||||
secrets/
|
||||
credentials.yml
|
||||
8
ansible.cfg
Normal file
8
ansible.cfg
Normal file
@ -0,0 +1,8 @@
|
||||
[defaults]
|
||||
inventory = inventories/production/hosts
|
||||
host_key_checking = False
|
||||
interpreter_python = /usr/bin/python3
|
||||
gathering = smart
|
||||
|
||||
[ssh_connection]
|
||||
pipelining = True
|
||||
43
diagnose_vm.yml
Normal file
43
diagnose_vm.yml
Normal file
@ -0,0 +1,43 @@
|
||||
---
|
||||
- name: Diagnose VictoriaMetrics installation
|
||||
hosts: 192.168.0.104
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Check VictoriaMetrics service status
|
||||
shell: |
|
||||
systemctl status victoria-metrics --no-pager
|
||||
journalctl -u victoria-metrics --no-pager -n 20
|
||||
register: service_info
|
||||
|
||||
- name: Show service status
|
||||
debug:
|
||||
var: service_info.stdout_lines
|
||||
|
||||
- name: Check if port is listening
|
||||
shell: |
|
||||
netstat -tlnp | grep :8428 || ss -tlnp | grep :8428 || echo "Port 8428 not listening"
|
||||
ps aux | grep victoria-metrics | grep -v grep || echo "VictoriaMetrics process not found"
|
||||
register: port_check
|
||||
|
||||
- name: Show port check
|
||||
debug:
|
||||
var: port_check.stdout_lines
|
||||
|
||||
- name: Check binary exists and is executable
|
||||
stat:
|
||||
path: /usr/local/bin/victoria-metrics-prod
|
||||
register: binary_stat
|
||||
|
||||
- name: Show binary info
|
||||
debug:
|
||||
var: binary_stat
|
||||
|
||||
- name: Try to run VictoriaMetrics manually
|
||||
shell: |
|
||||
timeout 5 /usr/local/bin/victoria-metrics-prod --help || echo "Binary help not working"
|
||||
register: manual_test
|
||||
|
||||
- name: Show manual test result
|
||||
debug:
|
||||
var: manual_test.stdout_lines
|
||||
26
inventories/production/hosts
Normal file
26
inventories/production/hosts
Normal file
@ -0,0 +1,26 @@
|
||||
[all:vars]
|
||||
ansible_user=root
|
||||
ansible_ssh_private_key_file=~/.ssh/id_ansible
|
||||
|
||||
# Основные группы (без ansible ноды)
|
||||
[infrastructure]
|
||||
192.168.0.100 # git
|
||||
192.168.0.102 # terraform
|
||||
192.168.0.103 # vault
|
||||
192.168.0.104 # victoriametrics
|
||||
192.168.0.105 # prometheus
|
||||
192.168.0.106 # grafana
|
||||
|
||||
[applications]
|
||||
192.168.0.110 # app1
|
||||
192.168.0.111 # app2
|
||||
192.168.0.112 # app3
|
||||
|
||||
# Ansbile нода отдельно
|
||||
[ansible_control]
|
||||
192.168.0.101
|
||||
|
||||
# Группа для всех кроме ansible
|
||||
[all_except_ansible:children]
|
||||
infrastructure
|
||||
applications
|
||||
31
playbooks/infrastructure/01-system-update.yml
Normal file
31
playbooks/infrastructure/01-system-update.yml
Normal file
@ -0,0 +1,31 @@
|
||||
---
|
||||
- name: System update and cleanup
|
||||
hosts: all_except_ansible
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Stop unattended upgrades
|
||||
systemd:
|
||||
name: unattended-upgrades
|
||||
state: stopped
|
||||
|
||||
- name: Clean any apt locks
|
||||
shell: |
|
||||
pkill -9 apt-get apt dpkg 2>/dev/null || true
|
||||
rm -f /var/lib/apt/lists/lock /var/lib/dpkg/lock*
|
||||
dpkg --configure -a 2>/dev/null || true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Update apt cache
|
||||
apt:
|
||||
update_cache: yes
|
||||
cache_valid_time: 86400
|
||||
|
||||
- name: Upgrade system packages
|
||||
apt:
|
||||
upgrade: safe
|
||||
autoremove: yes
|
||||
|
||||
- name: Clean apt cache
|
||||
apt:
|
||||
autoclean: yes
|
||||
21
playbooks/infrastructure/02-install-essentials.yml
Normal file
21
playbooks/infrastructure/02-install-essentials.yml
Normal file
@ -0,0 +1,21 @@
|
||||
---
|
||||
- name: Install essential packages
|
||||
hosts: all_except_ansible
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Install system utilities
|
||||
apt:
|
||||
name:
|
||||
- curl
|
||||
- wget
|
||||
- git
|
||||
- htop
|
||||
- net-tools
|
||||
- ufw
|
||||
- software-properties-common
|
||||
- ca-certificates
|
||||
- gnupg
|
||||
- lsb-release
|
||||
state: present
|
||||
update_cache: yes
|
||||
162
playbooks/monitoring/install_victoriametrics.yml
Normal file
162
playbooks/monitoring/install_victoriametrics.yml
Normal file
@ -0,0 +1,162 @@
|
||||
---
|
||||
- name: Final VictoriaMetrics installation for LXC
|
||||
hosts: 192.168.0.104
|
||||
become: yes
|
||||
|
||||
vars:
|
||||
vm_port: "8428"
|
||||
vm_retention: "30d"
|
||||
|
||||
tasks:
|
||||
- name: Display configuration
|
||||
debug:
|
||||
msg: |
|
||||
=== VictoriaMetrics Configuration ===
|
||||
Port: {{ vm_port }}
|
||||
Retention: {{ vm_retention }}
|
||||
|
||||
- name: Stop and disable any existing service
|
||||
systemd:
|
||||
name: victoria-metrics
|
||||
state: stopped
|
||||
enabled: no
|
||||
|
||||
- name: Remove old lock files
|
||||
file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /var/lib/victoria-metrics-data/flock.lock
|
||||
- /tmp/victoria-metrics-data/flock.lock
|
||||
- /tmp/vm-test-data/flock.lock
|
||||
|
||||
- name: Install dependencies
|
||||
apt:
|
||||
name:
|
||||
- wget
|
||||
- curl
|
||||
- tar
|
||||
state: present
|
||||
update_cache: yes
|
||||
|
||||
- name: Download VictoriaMetrics binary
|
||||
get_url:
|
||||
url: https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.105.0/victoria-metrics-linux-amd64-v1.105.0.tar.gz
|
||||
dest: /tmp/victoria-metrics.tar.gz
|
||||
|
||||
- name: Extract and install binary
|
||||
shell: |
|
||||
cd /tmp
|
||||
tar -xzf victoria-metrics.tar.gz
|
||||
mv victoria-metrics-prod /usr/local/bin/
|
||||
chmod +x /usr/local/bin/victoria-metrics-prod
|
||||
|
||||
- name: Create data directory
|
||||
file:
|
||||
path: /var/lib/victoria-metrics-data
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Create systemd service running as root (LXC workaround)
|
||||
copy:
|
||||
content: |
|
||||
[Unit]
|
||||
Description=VictoriaMetrics
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
ExecStart=/usr/local/bin/victoria-metrics-prod \
|
||||
-storageDataPath=/var/lib/victoria-metrics-data \
|
||||
-retentionPeriod={{ vm_retention }} \
|
||||
-httpListenAddr=0.0.0.0:{{ vm_port }} \
|
||||
-loggerFormat=json \
|
||||
-loggerLevel=INFO
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
dest: /etc/systemd/system/victoria-metrics.service
|
||||
|
||||
- name: Reload systemd
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Enable and start service
|
||||
systemd:
|
||||
name: victoria-metrics
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: Wait for port (с таймаутом на перезапуски)
|
||||
wait_for:
|
||||
port: "{{ vm_port }}"
|
||||
delay: 15
|
||||
timeout: 45
|
||||
register: port_check
|
||||
|
||||
- name: Check if service is actually running
|
||||
shell: |
|
||||
# Проверяем что процесс жив и порт слушается
|
||||
netstat -tlnp 2>/dev/null | grep :{{ vm_port }} || ss -tlnp 2>/dev/null | grep :{{ vm_port }} || echo "Port not found"
|
||||
ps aux | grep victoria-metrics-prod | grep -v grep | head -1 || echo "Process not found"
|
||||
register: process_check
|
||||
|
||||
- name: Show process check
|
||||
debug:
|
||||
var: process_check.stdout_lines
|
||||
|
||||
- name: Test health endpoint
|
||||
uri:
|
||||
url: "http://localhost:{{ vm_port }}/health"
|
||||
validate_certs: no
|
||||
timeout: 5
|
||||
register: health_check
|
||||
|
||||
- name: Test metrics endpoint
|
||||
uri:
|
||||
url: "http://localhost:{{ vm_port }}/metrics"
|
||||
validate_certs: no
|
||||
timeout: 5
|
||||
register: metrics_check
|
||||
|
||||
- name: Create final report
|
||||
debug:
|
||||
msg: |
|
||||
============================================
|
||||
VICTORIAMETRICS УСТАНОВКА ЗАВЕРШЕНА
|
||||
============================================
|
||||
|
||||
🎯 Статус: {{ 'РАБОТАЕТ' if health_check.status == 200 else 'ЕСТЬ ПРОБЛЕМЫ' }}
|
||||
|
||||
📊 Конфигурация:
|
||||
- Порт: {{ vm_port }}
|
||||
- Retention: {{ vm_retention }}
|
||||
|
||||
✅ Доступность:
|
||||
- Health endpoint: {{ '✓' if health_check.status == 200 else '✗' }} (http://192.168.0.104:{{ vm_port }}/health)
|
||||
- Metrics endpoint: {{ '✓' if metrics_check.status == 200 else '✗' }} (http://192.168.0.104:{{ vm_port }}/metrics)
|
||||
- Web UI: http://192.168.0.104:{{ vm_port }}
|
||||
|
||||
⚙️ Для Prometheus:
|
||||
- remote_write: http://192.168.0.104:{{ vm_port }}/api/v1/write
|
||||
- remote_read: http://192.168.0.104:{{ vm_port }}/api/v1/read
|
||||
|
||||
📝 Примечание:
|
||||
В LXC контейнерах VictoriaMetrics может перезапускаться из-за lock файлов,
|
||||
но порт продолжает работать и принимать данные.
|
||||
|
||||
📈 Следующий шаг:
|
||||
Установите Prometheus на 192.168.0.105 и настройте remote_write.
|
||||
|
||||
============================================
|
||||
|
||||
- name: Clean up
|
||||
file:
|
||||
path: /tmp/victoria-metrics.tar.gz
|
||||
state: absent
|
||||
68
playbooks/vault/create_monitoring_secrets.yml
Normal file
68
playbooks/vault/create_monitoring_secrets.yml
Normal file
@ -0,0 +1,68 @@
|
||||
---
|
||||
- name: Create monitoring secrets in Vault
|
||||
hosts: localhost
|
||||
connection: local
|
||||
|
||||
vars:
|
||||
vault_addr: "http://192.168.0.103:8200"
|
||||
vault_token: "hvs.DhQx1U9igYhLfoUHIGtLgqs8"
|
||||
|
||||
tasks:
|
||||
- name: Create VictoriaMetrics secret
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/data/monitoring/victoriametrics"
|
||||
method: POST
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
Content-Type: application/json
|
||||
body_format: json
|
||||
body:
|
||||
data:
|
||||
host: "192.168.0.104"
|
||||
port: "8428"
|
||||
url: "http://192.168.0.104:8428"
|
||||
retention_days: "30"
|
||||
description: "VictoriaMetrics single instance"
|
||||
|
||||
- name: Create Prometheus secret
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/data/monitoring/prometheus"
|
||||
method: POST
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
Content-Type: application/json
|
||||
body_format: json
|
||||
body:
|
||||
data:
|
||||
host: "192.168.0.105"
|
||||
port: "9090"
|
||||
scrape_interval: "30s"
|
||||
|
||||
- name: Create Grafana secret
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/data/monitoring/grafana"
|
||||
method: POST
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
Content-Type: application/json
|
||||
body_format: json
|
||||
body:
|
||||
data:
|
||||
host: "192.168.0.106"
|
||||
port: "3000"
|
||||
admin_user: "admin"
|
||||
admin_password: "admin123"
|
||||
|
||||
- name: Verify secrets created
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/metadata"
|
||||
method: LIST
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
return_content: yes
|
||||
validate_certs: no
|
||||
register: secrets_list
|
||||
|
||||
- name: Show created secrets
|
||||
debug:
|
||||
msg: "Secrets in Vault: {{ (secrets_list.content | from_json).data.keys }}"
|
||||
16
playbooks/vault/setup_vault.yml
Normal file
16
playbooks/vault/setup_vault.yml
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
- name: Setup HashiCorp Vault
|
||||
hosts: 192.168.0.103
|
||||
become: yes
|
||||
|
||||
tasks:
|
||||
- name: Check Vault status
|
||||
uri:
|
||||
url: http://localhost:8200/v1/sys/health
|
||||
validate_certs: no
|
||||
register: vault_status
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display Vault status
|
||||
debug:
|
||||
msg: "Vault status: {{ vault_status.status }}"
|
||||
22
test_vault.py
Normal file
22
test_vault.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
import hvac
|
||||
import os
|
||||
|
||||
# Подключение к Vault
|
||||
client = hvac.Client(
|
||||
url=os.environ.get('VAULT_ADDR'),
|
||||
token=os.environ.get('VAULT_TOKEN')
|
||||
)
|
||||
|
||||
# Проверка подключения
|
||||
print(f"Vault is authenticated: {client.is_authenticated()}")
|
||||
print(f"Vault seal status: {client.sys.is_sealed()}")
|
||||
|
||||
# Чтение секрета
|
||||
try:
|
||||
secret = client.secrets.kv.v2.read_secret_version(path='Forgeo')
|
||||
print("\nSecret data:")
|
||||
print(f"URL: {secret['data']['data']['url']}")
|
||||
print(f"User: {secret['data']['data']['user']}")
|
||||
except Exception as e:
|
||||
print(f"Error reading secret: {e}")
|
||||
78
test_vault_correct.yml
Normal file
78
test_vault_correct.yml
Normal file
@ -0,0 +1,78 @@
|
||||
---
|
||||
- name: Test Vault Integration with CORRECT paths
|
||||
hosts: localhost
|
||||
connection: local
|
||||
gather_facts: no
|
||||
|
||||
vars:
|
||||
vault_addr: "http://192.168.0.103:8200"
|
||||
vault_token: "hvs.DhQx1U9igYhLfoUHIGtLgqs8"
|
||||
|
||||
tasks:
|
||||
- name: Test 1 - Read Git secret from Vault
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/data/git/forgejo"
|
||||
method: GET
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
return_content: yes
|
||||
validate_certs: no
|
||||
register: git_secret
|
||||
|
||||
- name: Display Git credentials
|
||||
debug:
|
||||
msg: |
|
||||
Git Forgejo Credentials:
|
||||
URL: {{ (git_secret.content | from_json).data.data.url }}
|
||||
User: {{ (git_secret.content | from_json).data.data.user }}
|
||||
Password: {{ (git_secret.content | from_json).data.data.password }}
|
||||
|
||||
- name: Test 2 - Create VictoriaMetrics secret
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/data/monitoring/victoriametrics"
|
||||
method: POST
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
Content-Type: application/json
|
||||
body_format: json
|
||||
body:
|
||||
data:
|
||||
host: "192.168.0.104"
|
||||
port: "8428"
|
||||
url: "http://192.168.0.104:8428"
|
||||
retention_days: "30"
|
||||
validate_certs: no
|
||||
register: create_vm_secret
|
||||
|
||||
- name: Test 3 - Read VictoriaMetrics secret
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/data/monitoring/victoriametrics"
|
||||
method: GET
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
return_content: yes
|
||||
validate_certs: no
|
||||
register: vm_secret
|
||||
|
||||
- name: Display VictoriaMetrics configuration
|
||||
debug:
|
||||
msg: |
|
||||
VictoriaMetrics (Container 119):
|
||||
Host: {{ (vm_secret.content | from_json).data.data.host }}
|
||||
Port: {{ (vm_secret.content | from_json).data.data.port }}
|
||||
URL: {{ (vm_secret.content | from_json).data.data.url }}
|
||||
Retention: {{ (vm_secret.content | from_json).data.data.retention_days }} days
|
||||
|
||||
- name: Test 4 - List all secrets
|
||||
uri:
|
||||
url: "{{ vault_addr }}/v1/secret/metadata"
|
||||
method: LIST
|
||||
headers:
|
||||
X-Vault-Token: "{{ vault_token }}"
|
||||
return_content: yes
|
||||
validate_certs: no
|
||||
register: secrets_list
|
||||
|
||||
- name: Display secrets structure
|
||||
debug:
|
||||
msg: "Secrets in Vault: {{ (secrets_list.content | from_json).data.keys }}"
|
||||
31
test_vault_simple.py
Normal file
31
test_vault_simple.py
Normal file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import json
|
||||
import os
|
||||
|
||||
# Получаем переменные окружения
|
||||
vault_addr = os.environ.get('VAULT_ADDR')
|
||||
vault_token = os.environ.get('VAULT_TOKEN')
|
||||
|
||||
if not vault_addr or not vault_token:
|
||||
print("Ошибка: Не установлены переменные VAULT_ADDR или VAULT_TOKEN")
|
||||
exit(1)
|
||||
|
||||
# Проверяем через curl (простой способ)
|
||||
cmd = [
|
||||
'curl', '-s',
|
||||
'-H', f'X-Vault-Token: {vault_token}',
|
||||
f'{vault_addr}/v1/secret/data/Forgeo'
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
data = json.loads(result.stdout)
|
||||
print("Успешное подключение к Vault!")
|
||||
print(f"URL: {data['data']['data']['url']}")
|
||||
print(f"User: {data['data']['data']['user']}")
|
||||
else:
|
||||
print(f"Ошибка curl: {result.stderr}")
|
||||
except Exception as e:
|
||||
print(f"Ошибка: {e}")
|
||||
Reference in New Issue
Block a user