阅读量:3
Python 自动化巡检服务器并分析数据
1、期望目标:
- 通过 Python 实现 自动化巡检,包含CPU、内存指标、进程指标,并且生成表格
- 加入配置文件的概念配置,Python脚本会读取配置文件,根据配置文件的配置,去巡检服务器
- 实际效果:
- 执行py脚本使用
-configfile
指定配置文件,就可以实现我们想要巡检的机器、认证方式、生成表格的文件名
- 执行py脚本使用
以下开发坏境为:Python3.9.10
以上就单纯这么做的话就太 low 了,数据都采集出来了,肯定要做数据分析啊,反正都有数据之后再把数据分析加上
2、示例表格
主机名称 | IP 地址 | CPU 核心 | CPU 空闲率 | CPU 使用率 | 内存总大小 | 内存使用大小 |
---|---|---|---|---|---|---|
3、安装库依赖(以下的依赖包都是我在开发过程中使用到的,请现在服务器上安装好包)
# 更新pip python -m pip install --upgrade pip --trusted-host mirrors.aliyun.com # 先下来以下软件包,如果下载不下来就执行上面的更新命令,再下载 pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple paramiko pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple pandas pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple openpyxl pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple requests pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple tqdm pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple plotly
pip3 install wheel
- 安装了
wheel
包,wheel
是Python的一种打包格式,它比传统的.egg
格式更高效,可以显著加快包的安装速度。
pip3 install paramiko
- 安装了
paramiko
包,这是一个实现了SSHv2协议的Python库,常用于进行远程服务器上的文件传输和命令执行。
pip3 install pandas
- 安装了
pandas
包,这是一个强大的数据分析和操作库,提供了高性能的数据结构和数据分析工具。
pip3 install openpyxl
- 安装了
openpyxl
包,这是一个用于读写Microsoft Excel 2010 xlsx/xlsm/xltx/xltm文件格式的库。
pip3 install tqdm
- 显示进度条
pip3 install plotly
- 它提供了一种交互式的绘图体验,能够创建出既美观又功能丰富的图表
一、编写Py巡检脚本
1、编写 py 脚本
#!/usr/bin/python3 import openpyxl import paramiko import pandas as pd from openpyxl.styles import Alignment from datetime import datetime import requests import configparser import argparse import os from tqdm import tqdm def connect_server(servers, df): for server_info in tqdm(servers,desc="巡检服务器"): ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: if 'key_filename' in server_info: ssh.connect(server_info['hostname'], port=server_info['port'], username=server_info['username'], key_filename=server_info['key_filename']) elif 'password' in server_info: ssh.connect(server_info['hostname'], port=server_info['port'], username=server_info['username'], password=server_info['password']) _, stdout, _ = ssh.exec_command('hostname') hostname = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('cat /proc/cpuinfo | grep processor | wc -l') cpu_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command(r'top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/"') cpu_free = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('top -bn1 | grep "Cpu(s)" | awk \'{print $2}\'') user_cpu_usage = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('top -bn1 | grep "Cpu(s)" | awk \'{print $4}\'') system_cpu_usage = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -g | grep "Mem" | awk \'{print $2}\'') total_memory_gb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -m | grep "Mem" | awk \'{print $2}\'') total_memory_mb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -g | grep "Mem" | awk \'{print $NF}\'') application_free_gb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -m | grep "Mem" | awk \'{print $NF}\'') application_free_mb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -A -o pid= | wc -l') total_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^R\'') running_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^S\'') sleep_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^T\'') stop_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^T\'') zombie_process = stdout.read().decode('utf-8').strip() def get_public_ip(): response = requests.get('https://api.ipify.org') if response.status_code == 200: return response.text else: return None df = df.append({ 'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'Hostname': hostname, 'Local IP/EIP': f"{server_info['hostname']}/{get_public_ip()}", 'CPU Process': cpu_process, 'CPU Free (%)': cpu_free, 'User CPU Usage (%)': user_cpu_usage, 'System CPU Usage (%)': system_cpu_usage, 'Process Total/Running/Sleep/Stop/Zombie': f"{total_process}/{running_process}/{sleep_process}/{stop_process}/{zombie_process}", 'Total Memory Size (GB/MB)': f"{total_memory_gb}/{total_memory_mb}", 'Application Free Size (GB/MB)': f"{application_free_gb}/{application_free_mb}" }, ignore_index=True) except Exception as e: print(f"[Error] connecting to {server_info['hostname']}: {str(e)}") finally: ssh.close() return df def write_and_beautify_xlsx(dir, now_date_file, sheet_name, df): with pd.ExcelWriter(f'{dir}{now_date_file}', engine='openpyxl') as writer: df.to_excel(writer, sheet_name=f'{sheet_name}', index=False) print(f"[Success] Excel表格:{dir}{now_date_file} 已写出完毕") workbook = openpyxl.load_workbook(f'{dir}{now_date_file}') worksheet = workbook[f'{sheet_name}'] alignment = Alignment(horizontal='center', vertical='center') for row in worksheet.iter_rows(min_row=2, max_col=len(df.columns)): for cell in row: cell.alignment = alignment for column in worksheet.columns: max_length = max(len(str(cell.value)) for cell in column) + 6 worksheet.column_dimensions[column[0].column_letter].width = max_length worksheet.row_dimensions[1].height = 00 writer.book = workbook writer.save() print(f'[Success] Excel表格:{dir}{now_date_file} 已美化完毕') def read_configure(file_path): read_configure_list = list() config = configparser.ConfigParser() config.read(file_path) filename = config.get('Config', 'filename') dir = config.get('Config', 'dir') auth = config.get('Config', 'auth') if not dir.endswith(os.path.sep): dir = dir + os.path.sep if not os.path.exists(dir): os.makedirs(dir, exist_ok=True) if auth == "keys": for section in tqdm(config.sections(),desc=f"加载 {file_path} 配置文件"): if section.startswith('host'): hostname = config.get(section, 'hostname') port = config.get(section, 'port') username = config.get(section, 'username') key_filename = config.get(section, 'key_filename') read_configure_list.append( {'hostname': hostname, 'port': port, 'username': username, 'key_filename': key_filename} ) elif auth == "password": for section in tqdm(config.sections(),desc=f"加载 {file_path} 配置文件"): if section.startswith('host'): hostname = config.get(section, 'hostname') port = config.get(section, 'port') username = config.get(section, 'username') password = config.get(section, 'password') read_configure_list.append( {'hostname': hostname, 'port': port, 'username': username, 'password': password} ) return read_configure_list, filename, dir if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-configfile', type=str, default="None", help="指定配置文件") args = parser.parse_args() if args.configfile != "None": read_configure_list, filename, dir = read_configure(f"{args.configfile}") data = { 'Date': [], 'Hostname': [], 'Local IP/EIP': [], 'CPU Process': [], 'CPU Free (%)': [], 'User CPU Usage (%)': [], 'System CPU Usage (%)': [], 'Process Total/Running/Sleep/Stop/Zombie': [], 'Total Memory Size (GB/MB)': [], 'Application Free Size (GB/MB)': [] } df = pd.DataFrame(data) data_df = connect_server(read_configure_list, df) now_date_file = f"{filename}_{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.xlsx" sheet_name = 'Server Monitoring' write_and_beautify_xlsx(dir, now_date_file, sheet_name, data_df) else: print("Monitoring.py -h 查看帮助命令")
二、Pthony 脚本上传编写配置文件
1、上传 Py 脚本
[14:07:48] root@ansible-server:~ $ 110 ==> mkdir /root/python/ [14:07:58] root@ansible-server:~ $ 111 ==> cd /root/python/ # 将以上的代码内容粘贴到这个文件当中 [14:08:00] root@ansible-server:~/python $ 112 ==> vim Monitoring.py
2、赋予权限
[14:09:06] root@ansible-server:~/python $ 123 ==> chmod +x Monitoring.py [14:10:03] root@ansible-server:~/python $ 126 ==> mv Monitoring.py /usr/bin/Monitoring
3、查看帮助命令
[14:10:30] root@ansible-server:~/python $ 128 ==> Monitoring -h optional arguments: -h, --help show this help message and exit -configfile CONFIGFILE 指定配置文件
4、编写自动化巡检脚本的配置文件(以下我演示的是使用密钥连接服务器巡检,当然也支持使用密码连接服务器)
[14:15:42] root@ansible-server:~/python $ 132 ==> vim monitoring.conf # Config:为公共配置区域 [Config] # filename:配置生成巡检文件的名称 filename = server # dir:配置巡检文件存储的目录位置(目录不存在会创建) dir = /root/python/xlsx # auth:定义主机认证方式,keys 代表使用密钥连接服务器 password 代表使用密码连接服务器 auth = keys # 主机1配置(需要巡检的主机1) [host1] # 连接的IP地址 hostname = 192.168.100.102 # 端口 port = 22 # 用户名 username = root # 密钥文件(如果你上面定义的 auth=password 那就是使用密码连接服务器,这里就不用谢 key_filename 写 password 指定服务器密码即可) key_filename = /root/.ssh/id_rsa # 主机2配置(跟主机1配置的意思一样)需要巡检的主机2 [host2] hostname = 192.168.100.103 port = 22 username = root key_filename = /root/.ssh/id_rsa
三、运行 Python 脚本测试
1、运行 python 脚本,并指定配置文件位置
[14:19:12] root@ansible-server:~/python $ 134 ==> ls monitoring.conf # 通过 -configfile 参数指定配置文件位置 [14:19:13] root@ansible-server:~/python $ 135 ==> Monitoring -configfile monitoring.conf
2、查看运行结果
3、将 xlsx 表格下载到 Windows 上查看结果
四、增加数据分析功能
1、编写 Python 脚本
#!/usr/bin/python3 import openpyxl import paramiko from openpyxl.styles import Alignment from datetime import datetime import requests import configparser import argparse import os from tqdm import tqdm import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots def connect_server(servers, df): for server_info in tqdm(servers,desc="巡检服务器"): ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: if 'key_filename' in server_info: ssh.connect(server_info['hostname'], port=server_info['port'], username=server_info['username'], key_filename=server_info['key_filename']) elif 'password' in server_info: ssh.connect(server_info['hostname'], port=server_info['port'], username=server_info['username'], password=server_info['password']) _, stdout, _ = ssh.exec_command('hostname') hostname = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('cat /proc/cpuinfo | grep processor | wc -l') cpu_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command(r'top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/"') cpu_free = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('top -bn1 | grep "Cpu(s)" | awk \'{print $2}\'') user_cpu_usage = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('top -bn1 | grep "Cpu(s)" | awk \'{print $4}\'') system_cpu_usage = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -g | grep "Mem" | awk \'{print $2}\'') total_memory_gb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -m | grep "Mem" | awk \'{print $2}\'') total_memory_mb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -g | grep "Mem" | awk \'{print $NF}\'') application_free_gb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('free -m | grep "Mem" | awk \'{print $NF}\'') application_free_mb = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -A -o pid= | wc -l') total_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^R\'') running_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^S\'') sleep_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^T\'') stop_process = stdout.read().decode('utf-8').strip() _, stdout, _ = ssh.exec_command('ps -eo state | grep -c \'^T\'') zombie_process = stdout.read().decode('utf-8').strip() def get_public_ip(): response = requests.get('https://api.ipify.org') if response.status_code == 200: return response.text else: return None df = df.append({ 'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'Hostname': hostname, 'Local IP/EIP': f"{server_info['hostname']}/{get_public_ip()}", 'CPU Process': cpu_process, 'CPU Free (%)': cpu_free, 'User CPU Usage (%)': user_cpu_usage, 'System CPU Usage (%)': system_cpu_usage, 'Process Total/Running/Sleep/Stop/Zombie': f"{total_process}/{running_process}/{sleep_process}/{stop_process}/{zombie_process}", 'Total Memory Size (GB/MB)': f"{total_memory_gb}/{total_memory_mb}", 'Application Free Size (GB/MB)': f"{application_free_gb}/{application_free_mb}" }, ignore_index=True) except Exception as e: print(f"[Error] connecting to {server_info['hostname']}: {str(e)}") finally: ssh.close() return df def write_and_beautify_xlsx(dir, now_date_file, sheet_name, df): with pd.ExcelWriter(f'{dir}{now_date_file}', engine='openpyxl') as writer: df.to_excel(writer, sheet_name=f'{sheet_name}', index=False) print(f"[Success] Excel表格:{dir}{now_date_file} 已写出完毕") workbook = openpyxl.load_workbook(f'{dir}{now_date_file}') worksheet = workbook[f'{sheet_name}'] alignment = Alignment(horizontal='center', vertical='center') for row in worksheet.iter_rows(min_row=2, max_col=len(df.columns)): for cell in row: cell.alignment = alignment for column in worksheet.columns: max_length = max(len(str(cell.value)) for cell in column) + 6 worksheet.column_dimensions[column[0].column_letter].width = max_length worksheet.row_dimensions[1].height = 00 writer.book = workbook writer.save() print(f'[Success] Excel表格:{dir}{now_date_file} 已美化完毕') def read_configure(file_path): read_configure_list = list() config = configparser.ConfigParser() config.read(file_path) filename = config.get('Config', 'filename') dir = config.get('Config', 'dir') auth = config.get('Config', 'auth') if not dir.endswith(os.path.sep): dir = dir + os.path.sep if not os.path.exists(dir): os.makedirs(dir, exist_ok=True) if auth == "keys": for section in tqdm(config.sections(),desc=f"加载 {file_path} 配置文件"): if section.startswith('host'): hostname = config.get(section, 'hostname') port = config.get(section, 'port') username = config.get(section, 'username') key_filename = config.get(section, 'key_filename') read_configure_list.append( {'hostname': hostname, 'port': port, 'username': username, 'key_filename': key_filename} ) elif auth == "password": for section in tqdm(config.sections(),desc=f"加载 {file_path} 配置文件"): if section.startswith('host'): hostname = config.get(section, 'hostname') port = config.get(section, 'port') username = config.get(section, 'username') password = config.get(section, 'password') read_configure_list.append( {'hostname': hostname, 'port': port, 'username': username, 'password': password} ) return read_configure_list, filename, dir def data_translate(dir,file): df = pd.read_excel(fr'{dir}{file}',engine='openpyxl') df['Date'] = pd.to_datetime(df['Date']) columns_to_split = ['Process Total', 'Running', 'Sleep', 'Stop', 'Zombie'] df[columns_to_split] = df['Process Total/Running/Sleep/Stop/Zombie'].str.split('/', expand=True).astype(int) memory_columns = ['Total Memory Size (GB/MB)', 'Application Free Size (GB/MB)'] df[['Total Memory GB', 'Total Memory MB']] = df[memory_columns[0]].str.split('/', expand=True).astype(float) df[['App Free Memory GB', 'App Free Memory MB']] = df[memory_columns[1]].str.split('/', expand=True).astype(float) num_hosts = df['Hostname'].nunique() specs = [[{'type': 'xy'}] * num_hosts, [{'type': 'xy'}] * num_hosts, [{'type': 'domain'}] * num_hosts] fig = make_subplots(rows=3, cols=num_hosts, specs=specs, subplot_titles=df['Hostname'].unique().tolist() * 3, vertical_spacing=0.1) for index, hostname in tqdm(enumerate(df['Hostname'].unique()),desc="数据分析中"): host_df = df[df['Hostname'] == hostname] for i, metric in enumerate(['CPU Process', 'CPU Free (%)', 'User CPU Usage (%)', 'System CPU Usage (%)']): mean_metric = host_df[metric].mean() fig.add_trace( go.Bar(x=[hostname], y=[mean_metric], name=metric, offsetgroup=i), row=1, col=index + 1 ) for i, process in enumerate(columns_to_split): process_count = host_df[process].sum() fig.add_trace( go.Bar(x=[hostname], y=[process_count], name=process, offsetgroup=i), row=2, col=index + 1 ) for index, hostname in enumerate(df['Hostname'].unique()): host_df = df[df['Hostname'] == hostname] total_memory_mb = host_df['Total Memory MB'].iloc[0] app_free_memory_mb = host_df['App Free Memory MB'].iloc[0] used_memory_mb = total_memory_mb - app_free_memory_mb fig.add_trace( go.Pie(labels=['Used Memory', 'Free Memory'], values=[used_memory_mb, app_free_memory_mb], name=f'Memory Usage for {hostname}', hole=.4), row=3, col=index + 1 ) fig.update_layout( height=1500, width=1000 * num_hosts, showlegend=False, title_text="CPU 数据、进程状态数据分析及内存使用情况", xaxis_title="Hostname", yaxis_title="% CPU Usage", yaxis2_title="进程状态数量", yaxis3_title="内存状态") for i in range(1, num_hosts + 1): fig.update_yaxes(title_text="% CPU Usage", row=1, col=i) fig.update_yaxes(title_text="进程状态数量", row=2, col=i) fig.update_yaxes(title_text="内存状态", row=3, col=i) file2 = file.split('.')[0] fig.write_html(f"{dir}{file2}.html") print(f"[Success] 数据分析:{dir}{file2}.html 已分析完毕") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-configfile', type=str, default="None", help="指定配置文件") args = parser.parse_args() if args.configfile != "None": read_configure_list, filename, dir = read_configure(f"{args.configfile}") data = { 'Date': [], 'Hostname': [], 'Local IP/EIP': [], 'CPU Process': [], 'CPU Free (%)': [], 'User CPU Usage (%)': [], 'System CPU Usage (%)': [], 'Process Total/Running/Sleep/Stop/Zombie': [], 'Total Memory Size (GB/MB)': [], 'Application Free Size (GB/MB)': [] } df = pd.DataFrame(data) data_df = connect_server(read_configure_list, df) now_date_file = f"{filename}_{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.xlsx" sheet_name = 'Server Monitoring' write_and_beautify_xlsx(dir, now_date_file, sheet_name, data_df) data_translate(dir,now_date_file) else: print("Monitoring.py -h 查看帮助命令")
2、将以上新加的数据分析功能加入到py脚本中
[16:23:43] root@ansible-server:~/python $ 175 ==> vim /usr/bin/Monitoring
3、查看配置文件内容
[17:36:20] root@ansible-server:~/python $ 201 ==> pwd /root/python [17:38:45] root@ansible-server:~/python $ 202 ==> cat monitoring.conf #Config:为公共配置区域 [Config] # filename:配置生成巡检文件的名称 filename = server # dir:配置巡检文件存储的目录位置(目录不存在会创建) dir = /root/python/xlsx # auth:定义主机认证方式,keys 代表使用密钥连接服务器 password 代表使用密码连接服务器 auth = keys # 主机1配置(需要巡检的主机1) [host1] # 连接的IP地址 hostname = 192.168.100.102 # 端口 port = 22 # 用户名 username = root # 密钥文件(如果你上面定义的 auth=password 那就是使用密码连接服务器,这里就不用谢 key_filename 写 password 指定服务器密码即可) key_filename = /root/.ssh/id_rsa # 主机2配置(跟主机1配置的意思一样)需要巡检的主机2 [host2] hostname = 192.168.100.103 port = 22 username = root key_filename = /root/.ssh/id_rsa
4、运行 python 脚本,并指定配置文件位置
[17:31:54] root@ansible-server:~/python $ 190 ==> pwd /root/python [17:31:55] root@ansible-server:~/python $ 191 ==> ls monitoring.conf xlsx [17:30:15] root@ansible-server:~/python $ 186 ==> Monitoring -configfile monitoring.conf
5、查看存储目录是否生成了巡检文件和数据分析文件
[17:40:49] root@ansible-server:~/python $ 210 ==> pwd /root/python [17:41:25] root@ansible-server:~/python $ 211 ==> ls xlsx/ server_2024-07-12-17-40-49.html server_2024-07-12-17-40-49.xlsx
6、将 xlsx文件 和 html文件全部下载到Windows上
[17:43:14] root@ansible-server:~/python $ ==> sz xlsx/server_2024-07-12-17-40-49.xlsx [17:43:18] root@ansible-server:~/python $ ==> sz xlsx/server_2024-07-12-17-40-49.html