5 Commits

Author SHA1 Message Date
17dd0fddff Добавлена установка Grafana: роли, плейбуки и конфигурация 2026-02-04 02:28:50 +00:00
0abdb8b0a5 Merge pull request 'feat: Добавлена установка Node Exporter на все хосты' (#2) from feature/prometheus-installation into main
Reviewed-on: #2
2026-02-03 04:44:22 +00:00
26f6832275 feat: Добавлена установка Node Exporter на все хосты
- Создана роль node_exporter для установки на LXC контейнеры
- Добавлен плейбук deploy_all_node_exporters.yml
- Настроен ansible.cfg для правильного поиска ролей
- Node Exporter успешно установлен на все 10 хостов
- Prometheus собирает метрики со всех Node Exporters
- Все 14 таргетов в состоянии UP
2026-02-03 04:43:05 +00:00
1bab23c929 Merge pull request 'feat: Add Prometheus installation role and playbook' (#1) from feature/prometheus-installation into main
Reviewed-on: #1
2026-02-03 02:29:20 +00:00
8212b75ac9 feat: Add Prometheus installation role and playbook
- Created Prometheus role with automated installation
- Version: 2.48.1
- Configured remote_write to VictoriaMetrics (192.168.0.104:8428)
- Added scrape configs for monitoring infrastructure
- Created systemd service configuration
- Successfully tested on 192.168.0.105
2026-02-03 02:26:22 +00:00
28 changed files with 1218 additions and 0 deletions

View File

@ -3,6 +3,7 @@ inventory = inventories/production/hosts
host_key_checking = False
interpreter_python = /usr/bin/python3
gathering = smart
roles_path = ./roles
[ssh_connection]
pipelining = True

View File

@ -24,3 +24,6 @@ ansible_ssh_private_key_file=~/.ssh/id_ansible
[all_except_ansible:children]
infrastructure
applications
[grafana]
192.168.0.106 # pvestandt1-grafana

View File

@ -0,0 +1,46 @@
---
- name: Install and Configure Prometheus
hosts: 192.168.0.105 # Prometheus container
become: yes
gather_facts: yes
pre_tasks:
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
- name: Install required packages
apt:
name:
- curl
- wget
- ca-certificates
state: present
roles:
- role: ../../roles/prometheus
post_tasks:
- name: Verify Prometheus is accessible from target host
uri:
url: "http://localhost:9090/metrics"
status_code: 200
- name: Check VictoriaMetrics connectivity from Prometheus
uri:
url: "http://192.168.0.104:8428/health"
status_code: 200
- name: Display access information
debug:
msg:
- "========================================="
- "Prometheus installed successfully!"
- "========================================="
- "Web UI: http://{{ ansible_host }}:9090"
- "Metrics: http://{{ ansible_host }}:9090/metrics"
- "Targets: http://{{ ansible_host }}:9090/targets"
- "Configuration: /etc/prometheus/prometheus.yml"
- "Data directory: /var/lib/prometheus"
- "========================================="

View File

@ -0,0 +1,47 @@
---
- name: Check Grafana installation status
hosts: grafana
become: yes
tasks:
- name: Check Grafana service
systemd:
name: grafana
register: service_status
- name: Check if Grafana is listening on port
wait_for:
port: 3000
host: 127.0.0.1
timeout: 10
state: started
register: port_status
- name: Check Grafana API health
uri:
url: "http://localhost:3000/api/health"
method: GET
status_code: 200
timeout: 10
register: api_status
ignore_errors: yes
- name: Get Grafana version
command: /usr/local/bin/grafana-server --version
register: version_info
changed_when: false
ignore_errors: yes
- name: Display Grafana status report
debug:
msg: |
📊 Статус Grafana на {{ inventory_hostname }}:
Служба: {{ "✅ Запущена" if service_status.status.ActiveState == "active" else "❌ Остановлена" }}
Порт 3000: {{ "✅ Открыт" if port_status.state == "started" else "❌ Закрыт" }}
API: {{ "✅ Доступен (HTTP " ~ api_status.status ~ ")" if api_status.status == 200 else "❌ Недоступен" }}
{% if version_info is succeeded %}
Версия: {{ version_info.stdout_lines[-1] | regex_search('Version ([0-9.]+)') | default('Неизвестна') }}
{% else %}
Версия: Не удалось определить
{% endif %}

View File

@ -0,0 +1,55 @@
---
- name: Clean up Grafana completely
hosts: grafana
become: yes
tasks:
- name: Stop and disable Grafana service
systemd:
name: grafana
state: stopped
enabled: no
daemon_reload: yes
ignore_errors: yes
- name: Remove systemd service file
file:
path: /etc/systemd/system/grafana.service
state: absent
- name: Remove symlinks
file:
path: "{{ item }}"
state: absent
loop:
- /usr/local/bin/grafana-server
- /usr/local/bin/grafana-cli
- name: Remove Grafana directories
file:
path: "{{ item }}"
state: absent
loop:
- /usr/share/grafana
- /usr/share/grafana-*
- /var/lib/grafana
- /var/log/grafana
- /etc/grafana
- name: Remove temporary files
file:
path: /tmp/grafana-*.tar.gz
state: absent
- name: Remove Grafana user and group
user:
name: grafana
state: absent
remove: yes
- name: Reload systemd
systemd:
daemon_reload: yes
- name: Verify cleanup
debug:
msg: "✅ Grafana полностью удалена с хоста {{ inventory_hostname }}"

View File

@ -0,0 +1,15 @@
---
- name: Deploy Node Exporter to ALL hosts
hosts: all
become: yes
gather_facts: yes
pre_tasks:
- name: Update apt cache
apt:
update_cache: yes
when: ansible_os_family == 'Debian'
roles:
- role: node_exporter
tags: node_exporter

View File

@ -0,0 +1,9 @@
---
- name: Install and configure Grafana
hosts: grafana
become: yes
vars:
grafana_version: "12.3.2"
grafana_admin_password: "admin"
roles:
- grafana

View File

@ -0,0 +1,47 @@
---
- name: Install and configure Grafana (with health checks)
hosts: grafana
become: yes
vars:
grafana_version: "12.3.2"
grafana_admin_password: "admin"
tasks:
- name: Include Grafana role
include_role:
name: grafana
- name: Final verification from control node
delegate_to: localhost
run_once: yes
block:
- name: Wait for Grafana to be fully ready
pause:
seconds: 30
prompt: "Waiting for Grafana to complete initialization..."
- name: Test Grafana access from control node
uri:
url: "http://{{ hostvars[groups['grafana'][0]]['ansible_default_ipv4']['address'] | default(groups['grafana'][0]) }}:3000/api/health"
method: GET
status_code: 200
timeout: 30
register: final_check
until: final_check.status == 200
retries: 12 # 12 попыток * 5 секунд = 60 секунд
delay: 5
- name: Display final success message
debug:
msg: |
🎉 Grafana успешно установлена и готова к работе!
Доступ по адресу: http://{{ hostvars[groups['grafana'][0]]['ansible_default_ipv4']['address'] | default(groups['grafana'][0]) }}:3000
Логин: admin
Пароль: {{ grafana_admin_password }}
Для проверки выполните команду:
curl http://{{ hostvars[groups['grafana'][0]]['ansible_default_ipv4']['address'] | default(groups['grafana'][0]) }}:3000/api/health
Или откройте в браузере:
http://{{ hostvars[groups['grafana'][0]]['ansible_default_ipv4']['address'] | default(groups['grafana'][0]) }}:3000

View File

@ -0,0 +1,4 @@
---
grafana_admin_password: "admin"
grafana_version: "12.3.2"
grafana_archive_type: "tar.gz" # или "zip"

View File

@ -0,0 +1,6 @@
---
- name: restart grafana
systemd:
name: grafana
state: restarted
daemon_reload: yes

View File

@ -0,0 +1,142 @@
---
- name: Debug - Show Grafana version
debug:
msg: "Устанавливаем Grafana версии {{ grafana_version }}"
tags: grafana
- name: Install minimal dependencies
apt:
name:
- curl
- adduser
- libfontconfig1
- tar
- gzip
- procps
state: present
update_cache: yes
tags: grafana
- name: Create Grafana user and group
user:
name: grafana
system: yes
shell: /bin/false
home: /usr/share/grafana
comment: "Grafana Server"
tags: grafana
- name: Create Grafana data/log/config directories
file:
path: "{{ item }}"
state: directory
owner: grafana
group: grafana
mode: '0755'
loop:
- /var/lib/grafana
- /var/log/grafana
- /etc/grafana
tags: grafana
- name: Download Grafana from official site
get_url:
url: "https://dl.grafana.com/oss/release/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
dest: "/tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
timeout: 300
validate_certs: no
tags: grafana
- name: Show download info
debug:
msg: "Grafana скачан: /tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
tags: grafana
- name: Extract Grafana archive
unarchive:
src: "/tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
dest: "/usr/share/"
remote_src: yes
owner: grafana
group: grafana
creates: "/usr/share/grafana-{{ grafana_version }}"
tags: grafana
- name: Remove existing /usr/share/grafana if it exists (cleanup)
file:
path: /usr/share/grafana
state: absent
tags: grafana
- name: Create symlink from extracted version
file:
src: "/usr/share/grafana-{{ grafana_version }}"
dest: "/usr/share/grafana"
state: link
owner: grafana
group: grafana
tags: grafana
- name: Create binary symlinks
file:
src: "/usr/share/grafana/bin/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
state: link
owner: root
group: root
loop:
- grafana-server
- grafana-cli
tags: grafana
- name: Create Grafana configuration directory
file:
path: /etc/grafana
state: directory
owner: grafana
group: grafana
mode: '0755'
tags: grafana
- name: Configure Grafana
template:
src: grafana.ini.j2
dest: /etc/grafana/grafana.ini
owner: grafana
group: grafana
mode: '0644'
notify: restart grafana
tags: grafana
- name: Install systemd service
template:
src: grafana.service.j2
dest: /etc/systemd/system/grafana.service
owner: root
group: root
mode: '0644'
notify: restart grafana
tags: grafana
- name: Reload systemd
systemd:
daemon_reload: yes
tags: grafana
- name: Enable and start Grafana service
systemd:
name: grafana
enabled: yes
state: started
daemon_reload: yes
tags: grafana
- name: Wait and verify Grafana is fully operational
include_tasks: wait_and_verify.yml
tags: grafana
- name: Clean up temporary files
file:
path: "/tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
state: absent
tags: grafana

View File

@ -0,0 +1,68 @@
---
- name: Wait for Grafana to start (initial wait)
wait_for:
timeout: 30
tags: grafana
- name: Check if Grafana is listening on port 3000 (with retries)
wait_for:
port: 3000
host: 127.0.0.1
delay: 10
timeout: 300 # 5 минут максимум
state: started
register: grafana_port_check
tags: grafana
- name: Debug port check result
debug:
msg: "Grafana port check: {{ grafana_port_check.state }} after {{ grafana_port_check.elapsed }} seconds"
tags: grafana
- name: Wait for Grafana API to be ready
uri:
url: "http://localhost:3000/api/health"
method: GET
status_code: 200
timeout: 30
register: grafana_api_check
until: grafana_api_check.status == 200
retries: 30 # 30 попыток * 5 секунд = 150 секунд
delay: 5
tags: grafana
- name: Debug API check result
debug:
msg: "Grafana API responded with HTTP {{ grafana_api_check.status }} after {{ grafana_api_check.attempts }} attempts"
tags: grafana
- name: Verify Grafana installation (final check)
block:
- name: Check Grafana service status
systemd:
name: grafana
register: grafana_service_status
tags: grafana
- name: Check Grafana version
command: /usr/local/bin/grafana-server --version
register: grafana_version_check
changed_when: false
tags: grafana
- name: Show installation summary
debug:
msg: |
✅ Grafana успешно установлена!
Версия: {{ grafana_version_check.stdout_lines[-1] | regex_search('Version ([0-9.]+)') | default('12.3.2') }}
Служба: {{ grafana_service_status.status.ActiveState }}
Порт 3000: {{ 'открыт' if grafana_port_check.state == 'started' else 'закрыт' }}
API: {{ 'доступен' if grafana_api_check.status == 200 else 'недоступен' }}
Время установки: {{ grafana_port_check.elapsed | default(0) | round(2) }} секунд
Доступ по адресу: http://{{ inventory_hostname }}:3000
Логин: admin
Пароль: {{ grafana_admin_password | default('admin') }}
tags: grafana
tags: grafana

View File

@ -0,0 +1,27 @@
[server]
http_port = 3000
domain = 0.0.0.0
root_url = http://%s:3000
router_logging = true
enable_gzip = false
[security]
admin_user = admin
admin_password = {{ grafana_admin_password | default('admin') }}
[database]
type = sqlite3
path = /var/lib/grafana/grafana.db
[session]
provider = file
[analytics]
reporting_enabled = false
check_for_updates = false
[paths]
data = /var/lib/grafana
logs = /var/log/grafana
plugins = /var/lib/grafana/plugins
provisioning = /etc/grafana/provisioning

View File

@ -0,0 +1,24 @@
[Unit]
Description=Grafana Server
Documentation=https://grafana.com/docs
After=network.target
[Service]
Type=simple
User=grafana
Group=grafana
ExecStart=/usr/share/grafana/bin/grafana-server \
--config=/etc/grafana/grafana.ini \
--homepath=/usr/share/grafana \
--packaging=tar
Restart=on-failure
RestartSec=10
LimitNOFILE=10000
Environment="GF_PATHS_HOME=/usr/share/grafana"
Environment="GF_PATHS_CONFIG=/etc/grafana/grafana.ini"
Environment="GF_PATHS_DATA=/var/lib/grafana"
Environment="GF_PATHS_LOGS=/var/log/grafana"
Environment="GF_PATHS_PLUGINS=/var/lib/grafana/plugins"
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,4 @@
---
grafana_admin_password: "admin"
grafana_version: "12.3.2"
grafana_archive_type: "tar.gz" # или "zip"

View File

@ -0,0 +1,6 @@
---
- name: restart grafana
systemd:
name: grafana
state: restarted
daemon_reload: yes

View File

@ -0,0 +1,142 @@
---
- name: Debug - Show Grafana version
debug:
msg: "Устанавливаем Grafana версии {{ grafana_version }}"
tags: grafana
- name: Install minimal dependencies
apt:
name:
- curl
- adduser
- libfontconfig1
- tar
- gzip
- procps
state: present
update_cache: yes
tags: grafana
- name: Create Grafana user and group
user:
name: grafana
system: yes
shell: /bin/false
home: /usr/share/grafana
comment: "Grafana Server"
tags: grafana
- name: Create Grafana data/log/config directories
file:
path: "{{ item }}"
state: directory
owner: grafana
group: grafana
mode: '0755'
loop:
- /var/lib/grafana
- /var/log/grafana
- /etc/grafana
tags: grafana
- name: Download Grafana from official site
get_url:
url: "https://dl.grafana.com/oss/release/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
dest: "/tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
timeout: 300
validate_certs: no
tags: grafana
- name: Show download info
debug:
msg: "Grafana скачан: /tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
tags: grafana
- name: Extract Grafana archive
unarchive:
src: "/tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
dest: "/usr/share/"
remote_src: yes
owner: grafana
group: grafana
creates: "/usr/share/grafana-{{ grafana_version }}"
tags: grafana
- name: Remove existing /usr/share/grafana if it exists (cleanup)
file:
path: /usr/share/grafana
state: absent
tags: grafana
- name: Create symlink from extracted version
file:
src: "/usr/share/grafana-{{ grafana_version }}"
dest: "/usr/share/grafana"
state: link
owner: grafana
group: grafana
tags: grafana
- name: Create binary symlinks
file:
src: "/usr/share/grafana/bin/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
state: link
owner: root
group: root
loop:
- grafana-server
- grafana-cli
tags: grafana
- name: Create Grafana configuration directory
file:
path: /etc/grafana
state: directory
owner: grafana
group: grafana
mode: '0755'
tags: grafana
- name: Configure Grafana
template:
src: grafana.ini.j2
dest: /etc/grafana/grafana.ini
owner: grafana
group: grafana
mode: '0644'
notify: restart grafana
tags: grafana
- name: Install systemd service
template:
src: grafana.service.j2
dest: /etc/systemd/system/grafana.service
owner: root
group: root
mode: '0644'
notify: restart grafana
tags: grafana
- name: Reload systemd
systemd:
daemon_reload: yes
tags: grafana
- name: Enable and start Grafana service
systemd:
name: grafana
enabled: yes
state: started
daemon_reload: yes
tags: grafana
- name: Wait and verify Grafana is fully operational
include_tasks: wait_and_verify.yml
tags: grafana
- name: Clean up temporary files
file:
path: "/tmp/grafana-{{ grafana_version }}.linux-amd64.tar.gz"
state: absent
tags: grafana

View File

@ -0,0 +1,110 @@
---
- name: Phase 1: Initial wait for Grafana to start
pause:
seconds: 60
prompt: "Phase 1/5: Initial wait for Grafana startup (60 seconds)..."
tags: grafana
- name: Check if Grafana service is active (with retries)
shell: |
systemctl is-active grafana
register: grafana_active
until: grafana_active.stdout == "active"
retries: 60 # 60 * 5 = 300 секунд (5 минут)
delay: 5
tags: grafana
- name: Phase 2: Wait for database migrations (wave 1)
pause:
seconds: 180
prompt: "Phase 2/5: Waiting for database migrations (180 seconds)..."
tags: grafana
- name: Phase 3: Wait for plugins installation (wave 2)
pause:
seconds: 180
prompt: "Phase 3/5: Waiting for plugins installation (180 seconds)..."
tags: grafana
- name: Phase 4: Wait for HTTP server startup (wave 3)
pause:
seconds: 180
prompt: "Phase 4/5: Waiting for HTTP server startup (180 seconds)..."
tags: grafana
- name: Check if port 3000 is listening (with very long timeout)
wait_for:
port: 3000
host: 127.0.0.1
timeout: 600 # 10 минут
state: started
register: port_check
tags: grafana
- name: Phase 5: Final verification (wave 4)
pause:
seconds: 120
prompt: "Phase 5/5: Final verification (120 seconds)..."
tags: grafana
- name: Check Grafana API health (with many retries)
uri:
url: "http://localhost:3000/api/health"
method: GET
status_code: 200
timeout: 10
register: api_check
until: api_check.status == 200
retries: 60 # 60 * 5 = 300 секунд (5 минут)
delay: 5
tags: grafana
- name: Calculate total wait time
set_fact:
total_wait_time: "{{ 60 + 180 + 180 + 180 + 120 }}"
tags: grafana
- name: Show installation success with detailed info
debug:
msg: |
🎉 Grafana успешно установлена и готова к работе!
⏱️ Общее время установки: {{ total_wait_time }} секунд
📊 Статус компонентов:
• Служба: ✅ {{ grafana_active.stdout }}
• Порт 3000: {{ '✅ открыт' if port_check is defined and port_check.state == 'started' else '❌ закрыт' }}
• API: {{ '✅ доступен (HTTP ' ~ api_check.status ~ ')' if api_check is defined and api_check.status == 200 else '❌ недоступен' }}
🔗 Доступ:
• URL: http://{{ inventory_hostname }}:3000
• Логин: admin
• Пароль: {{ grafana_admin_password | default('admin') }}
📋 Для проверки выполните:
curl http://{{ inventory_hostname }}:3000/api/health
💡 Примечание: Первый запуск Grafana занимает время из-за:
1. Миграций базы данных
2. Установки плагинов по умолчанию
3. Инициализации сервиса
Последующие запуски будут значительно быстрее.
tags: grafana
- name: Final check from control node (optional)
delegate_to: localhost
run_once: yes
when: false # Отключено по умолчанию, можно включить
tags: grafana
block:
- name: Test external access
uri:
url: "http://{{ hostvars[groups['grafana'][0]]['ansible_default_ipv4']['address'] | default(groups['grafana'][0]) }}:3000/api/health"
method: GET
status_code: 200
timeout: 30
register: external_check
- name: Show external access result
debug:
msg: "External access: {{ '✅ успешно' if external_check.status == 200 else '❌ недоступно' }}"

View File

@ -0,0 +1,27 @@
[server]
http_port = 3000
domain = 0.0.0.0
root_url = http://localhost:3000
router_logging = true
enable_gzip = false
[security]
admin_user = admin
admin_password = {{ grafana_admin_password | default('admin') }}
[database]
type = sqlite3
path = /var/lib/grafana/grafana.db
[session]
provider = file
[analytics]
reporting_enabled = false
check_for_updates = false
[paths]
data = /var/lib/grafana
logs = /var/log/grafana
plugins = /var/lib/grafana/plugins
provisioning = /etc/grafana/provisioning

View File

@ -0,0 +1,24 @@
[Unit]
Description=Grafana Server
Documentation=https://grafana.com/docs
After=network.target
[Service]
Type=simple
User=grafana
Group=grafana
ExecStart=/usr/share/grafana/bin/grafana-server \
--config=/etc/grafana/grafana.ini \
--homepath=/usr/share/grafana \
--packaging=tar
Restart=on-failure
RestartSec=10
LimitNOFILE=10000
Environment="GF_PATHS_HOME=/usr/share/grafana"
Environment="GF_PATHS_CONFIG=/etc/grafana/grafana.ini"
Environment="GF_PATHS_DATA=/var/lib/grafana"
Environment="GF_PATHS_LOGS=/var/log/grafana"
Environment="GF_PATHS_PLUGINS=/var/lib/grafana/plugins"
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,3 @@
---
node_exporter_version: "1.7.0"
node_exporter_port: 9100

View File

@ -0,0 +1,103 @@
---
- name: Install required packages
apt:
name:
- wget
- tar
state: present
update_cache: yes
tags: node_exporter
- name: Create node_exporter user
user:
name: node_exporter
system: yes
shell: /bin/false
home: /nonexistent
comment: "Node Exporter Service User"
tags: node_exporter
- name: Download Node Exporter
get_url:
url: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-amd64.tar.gz"
dest: "/tmp/node_exporter-{{ node_exporter_version }}.tar.gz"
timeout: 30
validate_certs: no
tags: node_exporter
- name: Extract Node Exporter
unarchive:
src: "/tmp/node_exporter-{{ node_exporter_version }}.tar.gz"
dest: "/tmp/"
remote_src: yes
creates: "/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64"
tags: node_exporter
- name: Install Node Exporter binary
copy:
src: "/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64/node_exporter"
dest: "/usr/local/bin/node_exporter"
owner: node_exporter
group: node_exporter
mode: '0755'
remote_src: yes
tags: node_exporter
- name: Create systemd service for LXC
template:
src: node_exporter.service.j2
dest: /etc/systemd/system/node_exporter.service
owner: root
group: root
mode: '0644'
tags: node_exporter
- name: Create textfile collector directory
file:
path: /var/lib/node_exporter/textfile_collector
state: directory
owner: node_exporter
group: node_exporter
mode: '0755'
tags: node_exporter
- name: Clean up temp files
file:
path: "/tmp/node_exporter-{{ node_exporter_version }}.tar.gz"
state: absent
tags: node_exporter
- name: Clean up extracted directory
file:
path: "/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64"
state: absent
tags: node_exporter
- name: Reload systemd
systemd:
daemon_reload: yes
tags: node_exporter
- name: Enable and start Node Exporter
systemd:
name: node_exporter
enabled: yes
state: started
daemon_reload: yes
tags: node_exporter
- name: Configure UFW for Node Exporter
ufw:
rule: allow
port: "{{ node_exporter_port }}"
proto: tcp
comment: "Node Exporter metrics"
tags: node_exporter
- name: Verify Node Exporter is running
wait_for:
port: "{{ node_exporter_port }}"
host: "{{ ansible_host }}"
delay: 3
timeout: 60
tags: node_exporter

View File

@ -0,0 +1,30 @@
[Unit]
Description=Node Exporter
After=network.target
Wants=network.target
[Service]
User=node_exporter
Group=node_exporter
Type=simple
ExecStart=/usr/local/bin/node_exporter \
--collector.cpu \
--collector.diskstats \
--collector.filesystem \
--collector.loadavg \
--collector.meminfo \
--collector.netdev \
--collector.netstat \
--collector.stat \
--collector.time \
--collector.uname \
--collector.vmstat \
--collector.systemd \
--collector.textfile \
--web.listen-address=:{{ node_exporter_port }}
Restart=always
RestartSec=3
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,32 @@
---
# Версия Prometheus
prometheus_version: "2.48.1"
prometheus_user: prometheus
prometheus_group: prometheus
# Директории
prometheus_config_dir: /etc/prometheus
prometheus_data_dir: /var/lib/prometheus
prometheus_binary_dir: /usr/local/bin
# Настройки сервиса
prometheus_port: 9090
prometheus_retention: "7d"
prometheus_log_level: info
# VictoriaMetrics для remote_write
victoriametrics_host: "192.168.0.104"
victoriametrics_port: 8428
victoriametrics_url: "http://{{ victoriametrics_host }}:{{ victoriametrics_port }}/api/v1/write"
# Vault для метрик
vault_host: "192.168.0.103"
vault_port: 8200
vault_token: "root"
# Git/Forgejo
git_host: "192.168.0.100"
git_port: 3000
# Список хостов для node_exporter (будем добавлять позже)
node_exporter_targets: []

View File

@ -0,0 +1,13 @@
---
- name: restart prometheus
systemd:
name: prometheus
state: restarted
daemon_reload: yes
become: yes
- name: reload prometheus
uri:
url: "http://localhost:{{ prometheus_port }}/-/reload"
method: POST
become: yes

View File

@ -0,0 +1,121 @@
---
- name: Create prometheus system user
user:
name: "{{ prometheus_user }}"
shell: /bin/false
home: /nonexistent
create_home: no
system: yes
state: present
- name: Create prometheus directories
file:
path: "{{ item }}"
state: directory
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: '0755'
loop:
- "{{ prometheus_config_dir }}"
- "{{ prometheus_config_dir }}/rules"
- "{{ prometheus_config_dir }}/file_sd"
- "{{ prometheus_data_dir }}"
- name: Check if Prometheus is already installed
stat:
path: "{{ prometheus_binary_dir }}/prometheus"
register: prometheus_binary
- name: Check Prometheus version
shell: "{{ prometheus_binary_dir }}/prometheus --version 2>&1 | head -1 | awk '{print $3}'"
register: prometheus_installed_version
when: prometheus_binary.stat.exists
changed_when: false
failed_when: false
- name: Download and install Prometheus
block:
- name: Download Prometheus {{ prometheus_version }}
unarchive:
src: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz"
dest: /tmp
remote_src: yes
owner: root
group: root
mode: '0755'
- name: Copy Prometheus binaries
copy:
src: "/tmp/prometheus-{{ prometheus_version }}.linux-amd64/{{ item }}"
dest: "{{ prometheus_binary_dir }}/{{ item }}"
owner: root
group: root
mode: '0755'
remote_src: yes
loop:
- prometheus
- promtool
notify: restart prometheus
- name: Copy console libraries
copy:
src: "/tmp/prometheus-{{ prometheus_version }}.linux-amd64/{{ item }}/"
dest: "{{ prometheus_config_dir }}/{{ item }}/"
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
remote_src: yes
loop:
- consoles
- console_libraries
- name: Clean up downloaded files
file:
path: "/tmp/prometheus-{{ prometheus_version }}.linux-amd64"
state: absent
when: not prometheus_binary.stat.exists or (prometheus_installed_version.stdout != prometheus_version)
- name: Configure Prometheus
template:
src: prometheus.yml.j2
dest: "{{ prometheus_config_dir }}/prometheus.yml"
owner: "{{ prometheus_user }}"
group: "{{ prometheus_group }}"
mode: '0644'
backup: yes
validate: "{{ prometheus_binary_dir }}/promtool check config %s"
notify: reload prometheus
- name: Create Prometheus systemd service
template:
src: prometheus.service.j2
dest: /etc/systemd/system/prometheus.service
mode: '0644'
notify: restart prometheus
- name: Ensure Prometheus is started and enabled
systemd:
name: prometheus
state: started
enabled: yes
daemon_reload: yes
- name: Wait for Prometheus to be ready
uri:
url: "http://localhost:{{ prometheus_port }}/metrics"
status_code: 200
register: prometheus_health
until: prometheus_health.status == 200
retries: 10
delay: 5
- name: Check Prometheus targets status
uri:
url: "http://localhost:{{ prometheus_port }}/api/v1/targets"
register: targets_response
- name: Display Prometheus status
debug:
msg:
- "Prometheus version: {{ prometheus_version }}"
- "Prometheus URL: http://{{ ansible_host }}:{{ prometheus_port }}"
- "Active targets: {{ targets_response.json.data.activeTargets | length }}"

View File

@ -0,0 +1,29 @@
[Unit]
Description=Prometheus Monitoring System
Documentation=https://prometheus.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
User={{ prometheus_user }}
Group={{ prometheus_group }}
Type=simple
ExecStart={{ prometheus_binary_dir }}/prometheus \
--config.file={{ prometheus_config_dir }}/prometheus.yml \
--storage.tsdb.path={{ prometheus_data_dir }} \
--storage.tsdb.retention.time={{ prometheus_retention }} \
--web.console.templates={{ prometheus_config_dir }}/consoles \
--web.console.libraries={{ prometheus_config_dir }}/console_libraries \
--web.enable-lifecycle \
--web.enable-admin-api \
--log.level={{ prometheus_log_level }}
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
RestartSec=5
StandardOutput=journal
StandardError=journal
SyslogIdentifier=prometheus
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,80 @@
# Ansible managed - do not edit manually
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'pve-monitoring'
environment: 'production'
prometheus_replica: '{{ ansible_hostname }}'
# Alertmanager configuration (optional)
alerting:
alertmanagers:
- static_configs:
- targets: []
# Load rules once and periodically evaluate them
rule_files:
- "{{ prometheus_config_dir }}/rules/*.yml"
# Remote write to VictoriaMetrics
remote_write:
- url: "{{ victoriametrics_url }}"
queue_config:
max_samples_per_send: 10000
capacity: 20000
max_shards: 30
metadata_config:
send: true
send_interval: 1m
# Scrape configurations
scrape_configs:
# Prometheus self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:{{ prometheus_port }}']
labels:
instance: 'prometheus'
container: 'pvestandt6'
# VictoriaMetrics monitoring
- job_name: 'victoriametrics'
static_configs:
- targets: ['{{ victoriametrics_host }}:{{ victoriametrics_port }}']
labels:
instance: 'victoriametrics'
container: 'pvestandt5'
# Vault metrics
- job_name: 'vault'
metrics_path: '/v1/sys/metrics'
params:
format: ['prometheus']
bearer_token: '{{ vault_token }}'
static_configs:
- targets: ['{{ vault_host }}:{{ vault_port }}']
labels:
instance: 'vault'
container: 'pvestandt4'
# Git/Forgejo metrics
- job_name: 'gitea'
metrics_path: '/metrics'
static_configs:
- targets: ['{{ git_host }}:{{ git_port }}']
labels:
instance: 'git'
container: 'pvestandt1'
# Node exporters - будем добавлять динамически
- job_name: 'node'
static_configs:
{% if node_exporter_targets | length > 0 %}
- targets:
{% for target in node_exporter_targets %}
- {{ target }}:9100
{% endfor %}
{% else %}
- targets: [] # Will be populated after installing node_exporter
{% endif %}