Compare commits
2 Commits
30d35bc401
...
feature/pr
| Author | SHA1 | Date | |
|---|---|---|---|
| 26f6832275 | |||
| 8212b75ac9 |
@ -3,6 +3,7 @@ inventory = inventories/production/hosts
|
|||||||
host_key_checking = False
|
host_key_checking = False
|
||||||
interpreter_python = /usr/bin/python3
|
interpreter_python = /usr/bin/python3
|
||||||
gathering = smart
|
gathering = smart
|
||||||
|
roles_path = ./roles
|
||||||
|
|
||||||
[ssh_connection]
|
[ssh_connection]
|
||||||
pipelining = True
|
pipelining = True
|
||||||
|
|||||||
46
playbooks/infrastructure/install-prometheus.yml
Normal file
46
playbooks/infrastructure/install-prometheus.yml
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
---
|
||||||
|
- name: Install and Configure Prometheus
|
||||||
|
hosts: 192.168.0.105 # Prometheus container
|
||||||
|
become: yes
|
||||||
|
gather_facts: yes
|
||||||
|
|
||||||
|
pre_tasks:
|
||||||
|
- name: Update apt cache
|
||||||
|
apt:
|
||||||
|
update_cache: yes
|
||||||
|
cache_valid_time: 3600
|
||||||
|
|
||||||
|
- name: Install required packages
|
||||||
|
apt:
|
||||||
|
name:
|
||||||
|
- curl
|
||||||
|
- wget
|
||||||
|
- ca-certificates
|
||||||
|
state: present
|
||||||
|
|
||||||
|
roles:
|
||||||
|
- role: ../../roles/prometheus
|
||||||
|
|
||||||
|
post_tasks:
|
||||||
|
- name: Verify Prometheus is accessible from target host
|
||||||
|
uri:
|
||||||
|
url: "http://localhost:9090/metrics"
|
||||||
|
status_code: 200
|
||||||
|
|
||||||
|
- name: Check VictoriaMetrics connectivity from Prometheus
|
||||||
|
uri:
|
||||||
|
url: "http://192.168.0.104:8428/health"
|
||||||
|
status_code: 200
|
||||||
|
|
||||||
|
- name: Display access information
|
||||||
|
debug:
|
||||||
|
msg:
|
||||||
|
- "========================================="
|
||||||
|
- "Prometheus installed successfully!"
|
||||||
|
- "========================================="
|
||||||
|
- "Web UI: http://{{ ansible_host }}:9090"
|
||||||
|
- "Metrics: http://{{ ansible_host }}:9090/metrics"
|
||||||
|
- "Targets: http://{{ ansible_host }}:9090/targets"
|
||||||
|
- "Configuration: /etc/prometheus/prometheus.yml"
|
||||||
|
- "Data directory: /var/lib/prometheus"
|
||||||
|
- "========================================="
|
||||||
15
playbooks/monitoring/deploy_all_node_exporters.yml
Normal file
15
playbooks/monitoring/deploy_all_node_exporters.yml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
---
|
||||||
|
- name: Deploy Node Exporter to ALL hosts
|
||||||
|
hosts: all
|
||||||
|
become: yes
|
||||||
|
gather_facts: yes
|
||||||
|
|
||||||
|
pre_tasks:
|
||||||
|
- name: Update apt cache
|
||||||
|
apt:
|
||||||
|
update_cache: yes
|
||||||
|
when: ansible_os_family == 'Debian'
|
||||||
|
|
||||||
|
roles:
|
||||||
|
- role: node_exporter
|
||||||
|
tags: node_exporter
|
||||||
3
roles/node_exporter/defaults/main.yml
Normal file
3
roles/node_exporter/defaults/main.yml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
---
|
||||||
|
node_exporter_version: "1.7.0"
|
||||||
|
node_exporter_port: 9100
|
||||||
103
roles/node_exporter/tasks/main.yml
Normal file
103
roles/node_exporter/tasks/main.yml
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
---
|
||||||
|
- name: Install required packages
|
||||||
|
apt:
|
||||||
|
name:
|
||||||
|
- wget
|
||||||
|
- tar
|
||||||
|
state: present
|
||||||
|
update_cache: yes
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Create node_exporter user
|
||||||
|
user:
|
||||||
|
name: node_exporter
|
||||||
|
system: yes
|
||||||
|
shell: /bin/false
|
||||||
|
home: /nonexistent
|
||||||
|
comment: "Node Exporter Service User"
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Download Node Exporter
|
||||||
|
get_url:
|
||||||
|
url: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-amd64.tar.gz"
|
||||||
|
dest: "/tmp/node_exporter-{{ node_exporter_version }}.tar.gz"
|
||||||
|
timeout: 30
|
||||||
|
validate_certs: no
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Extract Node Exporter
|
||||||
|
unarchive:
|
||||||
|
src: "/tmp/node_exporter-{{ node_exporter_version }}.tar.gz"
|
||||||
|
dest: "/tmp/"
|
||||||
|
remote_src: yes
|
||||||
|
creates: "/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64"
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Install Node Exporter binary
|
||||||
|
copy:
|
||||||
|
src: "/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64/node_exporter"
|
||||||
|
dest: "/usr/local/bin/node_exporter"
|
||||||
|
owner: node_exporter
|
||||||
|
group: node_exporter
|
||||||
|
mode: '0755'
|
||||||
|
remote_src: yes
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Create systemd service for LXC
|
||||||
|
template:
|
||||||
|
src: node_exporter.service.j2
|
||||||
|
dest: /etc/systemd/system/node_exporter.service
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0644'
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Create textfile collector directory
|
||||||
|
file:
|
||||||
|
path: /var/lib/node_exporter/textfile_collector
|
||||||
|
state: directory
|
||||||
|
owner: node_exporter
|
||||||
|
group: node_exporter
|
||||||
|
mode: '0755'
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Clean up temp files
|
||||||
|
file:
|
||||||
|
path: "/tmp/node_exporter-{{ node_exporter_version }}.tar.gz"
|
||||||
|
state: absent
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Clean up extracted directory
|
||||||
|
file:
|
||||||
|
path: "/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64"
|
||||||
|
state: absent
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Reload systemd
|
||||||
|
systemd:
|
||||||
|
daemon_reload: yes
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Enable and start Node Exporter
|
||||||
|
systemd:
|
||||||
|
name: node_exporter
|
||||||
|
enabled: yes
|
||||||
|
state: started
|
||||||
|
daemon_reload: yes
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Configure UFW for Node Exporter
|
||||||
|
ufw:
|
||||||
|
rule: allow
|
||||||
|
port: "{{ node_exporter_port }}"
|
||||||
|
proto: tcp
|
||||||
|
comment: "Node Exporter metrics"
|
||||||
|
tags: node_exporter
|
||||||
|
|
||||||
|
- name: Verify Node Exporter is running
|
||||||
|
wait_for:
|
||||||
|
port: "{{ node_exporter_port }}"
|
||||||
|
host: "{{ ansible_host }}"
|
||||||
|
delay: 3
|
||||||
|
timeout: 60
|
||||||
|
tags: node_exporter
|
||||||
30
roles/node_exporter/templates/node_exporter.service.j2
Normal file
30
roles/node_exporter/templates/node_exporter.service.j2
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Node Exporter
|
||||||
|
After=network.target
|
||||||
|
Wants=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=node_exporter
|
||||||
|
Group=node_exporter
|
||||||
|
Type=simple
|
||||||
|
ExecStart=/usr/local/bin/node_exporter \
|
||||||
|
--collector.cpu \
|
||||||
|
--collector.diskstats \
|
||||||
|
--collector.filesystem \
|
||||||
|
--collector.loadavg \
|
||||||
|
--collector.meminfo \
|
||||||
|
--collector.netdev \
|
||||||
|
--collector.netstat \
|
||||||
|
--collector.stat \
|
||||||
|
--collector.time \
|
||||||
|
--collector.uname \
|
||||||
|
--collector.vmstat \
|
||||||
|
--collector.systemd \
|
||||||
|
--collector.textfile \
|
||||||
|
--web.listen-address=:{{ node_exporter_port }}
|
||||||
|
|
||||||
|
Restart=always
|
||||||
|
RestartSec=3
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
32
roles/prometheus/defaults/main.yml
Normal file
32
roles/prometheus/defaults/main.yml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
---
|
||||||
|
# Версия Prometheus
|
||||||
|
prometheus_version: "2.48.1"
|
||||||
|
prometheus_user: prometheus
|
||||||
|
prometheus_group: prometheus
|
||||||
|
|
||||||
|
# Директории
|
||||||
|
prometheus_config_dir: /etc/prometheus
|
||||||
|
prometheus_data_dir: /var/lib/prometheus
|
||||||
|
prometheus_binary_dir: /usr/local/bin
|
||||||
|
|
||||||
|
# Настройки сервиса
|
||||||
|
prometheus_port: 9090
|
||||||
|
prometheus_retention: "7d"
|
||||||
|
prometheus_log_level: info
|
||||||
|
|
||||||
|
# VictoriaMetrics для remote_write
|
||||||
|
victoriametrics_host: "192.168.0.104"
|
||||||
|
victoriametrics_port: 8428
|
||||||
|
victoriametrics_url: "http://{{ victoriametrics_host }}:{{ victoriametrics_port }}/api/v1/write"
|
||||||
|
|
||||||
|
# Vault для метрик
|
||||||
|
vault_host: "192.168.0.103"
|
||||||
|
vault_port: 8200
|
||||||
|
vault_token: "root"
|
||||||
|
|
||||||
|
# Git/Forgejo
|
||||||
|
git_host: "192.168.0.100"
|
||||||
|
git_port: 3000
|
||||||
|
|
||||||
|
# Список хостов для node_exporter (будем добавлять позже)
|
||||||
|
node_exporter_targets: []
|
||||||
13
roles/prometheus/handlers/main.yml
Normal file
13
roles/prometheus/handlers/main.yml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
---
|
||||||
|
- name: restart prometheus
|
||||||
|
systemd:
|
||||||
|
name: prometheus
|
||||||
|
state: restarted
|
||||||
|
daemon_reload: yes
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: reload prometheus
|
||||||
|
uri:
|
||||||
|
url: "http://localhost:{{ prometheus_port }}/-/reload"
|
||||||
|
method: POST
|
||||||
|
become: yes
|
||||||
121
roles/prometheus/tasks/main.yml
Normal file
121
roles/prometheus/tasks/main.yml
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
---
|
||||||
|
- name: Create prometheus system user
|
||||||
|
user:
|
||||||
|
name: "{{ prometheus_user }}"
|
||||||
|
shell: /bin/false
|
||||||
|
home: /nonexistent
|
||||||
|
create_home: no
|
||||||
|
system: yes
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: Create prometheus directories
|
||||||
|
file:
|
||||||
|
path: "{{ item }}"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ prometheus_user }}"
|
||||||
|
group: "{{ prometheus_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
loop:
|
||||||
|
- "{{ prometheus_config_dir }}"
|
||||||
|
- "{{ prometheus_config_dir }}/rules"
|
||||||
|
- "{{ prometheus_config_dir }}/file_sd"
|
||||||
|
- "{{ prometheus_data_dir }}"
|
||||||
|
|
||||||
|
- name: Check if Prometheus is already installed
|
||||||
|
stat:
|
||||||
|
path: "{{ prometheus_binary_dir }}/prometheus"
|
||||||
|
register: prometheus_binary
|
||||||
|
|
||||||
|
- name: Check Prometheus version
|
||||||
|
shell: "{{ prometheus_binary_dir }}/prometheus --version 2>&1 | head -1 | awk '{print $3}'"
|
||||||
|
register: prometheus_installed_version
|
||||||
|
when: prometheus_binary.stat.exists
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Download and install Prometheus
|
||||||
|
block:
|
||||||
|
- name: Download Prometheus {{ prometheus_version }}
|
||||||
|
unarchive:
|
||||||
|
src: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz"
|
||||||
|
dest: /tmp
|
||||||
|
remote_src: yes
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Copy Prometheus binaries
|
||||||
|
copy:
|
||||||
|
src: "/tmp/prometheus-{{ prometheus_version }}.linux-amd64/{{ item }}"
|
||||||
|
dest: "{{ prometheus_binary_dir }}/{{ item }}"
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0755'
|
||||||
|
remote_src: yes
|
||||||
|
loop:
|
||||||
|
- prometheus
|
||||||
|
- promtool
|
||||||
|
notify: restart prometheus
|
||||||
|
|
||||||
|
- name: Copy console libraries
|
||||||
|
copy:
|
||||||
|
src: "/tmp/prometheus-{{ prometheus_version }}.linux-amd64/{{ item }}/"
|
||||||
|
dest: "{{ prometheus_config_dir }}/{{ item }}/"
|
||||||
|
owner: "{{ prometheus_user }}"
|
||||||
|
group: "{{ prometheus_group }}"
|
||||||
|
remote_src: yes
|
||||||
|
loop:
|
||||||
|
- consoles
|
||||||
|
- console_libraries
|
||||||
|
|
||||||
|
- name: Clean up downloaded files
|
||||||
|
file:
|
||||||
|
path: "/tmp/prometheus-{{ prometheus_version }}.linux-amd64"
|
||||||
|
state: absent
|
||||||
|
when: not prometheus_binary.stat.exists or (prometheus_installed_version.stdout != prometheus_version)
|
||||||
|
|
||||||
|
- name: Configure Prometheus
|
||||||
|
template:
|
||||||
|
src: prometheus.yml.j2
|
||||||
|
dest: "{{ prometheus_config_dir }}/prometheus.yml"
|
||||||
|
owner: "{{ prometheus_user }}"
|
||||||
|
group: "{{ prometheus_group }}"
|
||||||
|
mode: '0644'
|
||||||
|
backup: yes
|
||||||
|
validate: "{{ prometheus_binary_dir }}/promtool check config %s"
|
||||||
|
notify: reload prometheus
|
||||||
|
|
||||||
|
- name: Create Prometheus systemd service
|
||||||
|
template:
|
||||||
|
src: prometheus.service.j2
|
||||||
|
dest: /etc/systemd/system/prometheus.service
|
||||||
|
mode: '0644'
|
||||||
|
notify: restart prometheus
|
||||||
|
|
||||||
|
- name: Ensure Prometheus is started and enabled
|
||||||
|
systemd:
|
||||||
|
name: prometheus
|
||||||
|
state: started
|
||||||
|
enabled: yes
|
||||||
|
daemon_reload: yes
|
||||||
|
|
||||||
|
- name: Wait for Prometheus to be ready
|
||||||
|
uri:
|
||||||
|
url: "http://localhost:{{ prometheus_port }}/metrics"
|
||||||
|
status_code: 200
|
||||||
|
register: prometheus_health
|
||||||
|
until: prometheus_health.status == 200
|
||||||
|
retries: 10
|
||||||
|
delay: 5
|
||||||
|
|
||||||
|
- name: Check Prometheus targets status
|
||||||
|
uri:
|
||||||
|
url: "http://localhost:{{ prometheus_port }}/api/v1/targets"
|
||||||
|
register: targets_response
|
||||||
|
|
||||||
|
- name: Display Prometheus status
|
||||||
|
debug:
|
||||||
|
msg:
|
||||||
|
- "Prometheus version: {{ prometheus_version }}"
|
||||||
|
- "Prometheus URL: http://{{ ansible_host }}:{{ prometheus_port }}"
|
||||||
|
- "Active targets: {{ targets_response.json.data.activeTargets | length }}"
|
||||||
29
roles/prometheus/templates/prometheus.service.j2
Normal file
29
roles/prometheus/templates/prometheus.service.j2
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Prometheus Monitoring System
|
||||||
|
Documentation=https://prometheus.io/docs/
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User={{ prometheus_user }}
|
||||||
|
Group={{ prometheus_group }}
|
||||||
|
Type=simple
|
||||||
|
ExecStart={{ prometheus_binary_dir }}/prometheus \
|
||||||
|
--config.file={{ prometheus_config_dir }}/prometheus.yml \
|
||||||
|
--storage.tsdb.path={{ prometheus_data_dir }} \
|
||||||
|
--storage.tsdb.retention.time={{ prometheus_retention }} \
|
||||||
|
--web.console.templates={{ prometheus_config_dir }}/consoles \
|
||||||
|
--web.console.libraries={{ prometheus_config_dir }}/console_libraries \
|
||||||
|
--web.enable-lifecycle \
|
||||||
|
--web.enable-admin-api \
|
||||||
|
--log.level={{ prometheus_log_level }}
|
||||||
|
|
||||||
|
ExecReload=/bin/kill -HUP $MAINPID
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=prometheus
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
80
roles/prometheus/templates/prometheus.yml.j2
Normal file
80
roles/prometheus/templates/prometheus.yml.j2
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# Ansible managed - do not edit manually
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
external_labels:
|
||||||
|
monitor: 'pve-monitoring'
|
||||||
|
environment: 'production'
|
||||||
|
prometheus_replica: '{{ ansible_hostname }}'
|
||||||
|
|
||||||
|
# Alertmanager configuration (optional)
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets: []
|
||||||
|
|
||||||
|
# Load rules once and periodically evaluate them
|
||||||
|
rule_files:
|
||||||
|
- "{{ prometheus_config_dir }}/rules/*.yml"
|
||||||
|
|
||||||
|
# Remote write to VictoriaMetrics
|
||||||
|
remote_write:
|
||||||
|
- url: "{{ victoriametrics_url }}"
|
||||||
|
queue_config:
|
||||||
|
max_samples_per_send: 10000
|
||||||
|
capacity: 20000
|
||||||
|
max_shards: 30
|
||||||
|
metadata_config:
|
||||||
|
send: true
|
||||||
|
send_interval: 1m
|
||||||
|
|
||||||
|
# Scrape configurations
|
||||||
|
scrape_configs:
|
||||||
|
# Prometheus self-monitoring
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:{{ prometheus_port }}']
|
||||||
|
labels:
|
||||||
|
instance: 'prometheus'
|
||||||
|
container: 'pvestandt6'
|
||||||
|
|
||||||
|
# VictoriaMetrics monitoring
|
||||||
|
- job_name: 'victoriametrics'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['{{ victoriametrics_host }}:{{ victoriametrics_port }}']
|
||||||
|
labels:
|
||||||
|
instance: 'victoriametrics'
|
||||||
|
container: 'pvestandt5'
|
||||||
|
|
||||||
|
# Vault metrics
|
||||||
|
- job_name: 'vault'
|
||||||
|
metrics_path: '/v1/sys/metrics'
|
||||||
|
params:
|
||||||
|
format: ['prometheus']
|
||||||
|
bearer_token: '{{ vault_token }}'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['{{ vault_host }}:{{ vault_port }}']
|
||||||
|
labels:
|
||||||
|
instance: 'vault'
|
||||||
|
container: 'pvestandt4'
|
||||||
|
|
||||||
|
# Git/Forgejo metrics
|
||||||
|
- job_name: 'gitea'
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['{{ git_host }}:{{ git_port }}']
|
||||||
|
labels:
|
||||||
|
instance: 'git'
|
||||||
|
container: 'pvestandt1'
|
||||||
|
|
||||||
|
# Node exporters - будем добавлять динамически
|
||||||
|
- job_name: 'node'
|
||||||
|
static_configs:
|
||||||
|
{% if node_exporter_targets | length > 0 %}
|
||||||
|
- targets:
|
||||||
|
{% for target in node_exporter_targets %}
|
||||||
|
- {{ target }}:9100
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
- targets: [] # Will be populated after installing node_exporter
|
||||||
|
{% endif %}
|
||||||
Reference in New Issue
Block a user