阅读量:0
通过redfish协议实现服务器固件升级、从虚拟光驱启动自检盘并等待完成,最后截图保存
新开发的PCIE设备在做服务器适配时,有时需要服务器厂家更新BMC或BIOS固件。同时,我们也希望对PCIE设备做一些检测,最后收集一些信息存档。如果需要处理的服务器很多,通过BMC的界面进行人工操作就会比较麻烦。以下提供了一个脚本,供参考。
主要思路:
- 采用haneWIN NFS Server搭建一个NFS服务,目录为nfs,里面存放着boot.iso(设备检测镜像)
- 通过redfish协议登录BMC,获取PCIE设备信息,服务器信息,升级固件,重启服务器,挂iso,设置启动方式
- 截屏获取KVM的内容,通过图片相似度的方法判断ISO里的检测程序是否运行完成.
版本信息
属性 | 值 |
---|---|
NFS服务器 | haneWIN NFS Server |
服务器型号 | NF5270M6 |
代码
# -*- coding: utf-8 -*- from queue import Queue from skimage.metrics import structural_similarity import traceback import cv2 import codecs import csv import argparse import shutil import json import time import redfish import sys import os import uuid import threading import warnings warnings.filterwarnings("ignore") import logging parser = argparse.ArgumentParser() parser.add_argument('-server_list', type=str, required=True, help="server_list") parser.add_argument('-nfs_server', type=str, required=True, help="nfs server") parser.add_argument('-threads', type=int, required=True, help="nfs server") parser.add_argument('-checkonly', type=int, required=True, help="check only") args = parser.parse_args() class TimeSpan: """统计代码段的耗时 """ def __init__(self,logger,prefix=""): self.prefix = prefix self.logger=logger def __enter__(self): self.end = None self.start = time.time() def __exit__(self, exc_type, exc_val, exc_tb): self.end = time.time() interval = self.end - self.start unit = "sec" if interval > 60: unit = "min" interval = interval/60 self.logger.info('%-64s:%.3f(%s)' % (self.prefix, interval, unit)) def isSimilarity(filename): """判断自检程序是否运行完成 Args: filename ([string]): [截屏图片路径] Returns: [bool]: [是否完成] """ last_image = cv2.imread('target_image.jpg', cv2.IMREAD_GRAYSCALE) img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE) cell_h, cell_w = last_image.shape hoff = 280 h, w = img.shape while hoff < 320: img2 = img[hoff:hoff+cell_h, 0:cell_w] ssim = structural_similarity(last_image, img2) if ssim > 0.5: # cv2.imwrite("{}_{}.jpg".format(hoff,int(ssim)),img2) return True hoff += 3 return False class RedFishProxy: def __init__(self,handle,retry_count=3): self.handle=handle self.retry_count=retry_count def post(self, path, args=None, body=None, headers=None): count=0 while True: response=self.handle.post(path,args,body,headers) if response._status == 500 and count<self.retry_count: time.sleep(2) count+=1 continue else: return response def get(self, path, args=None, headers=None): count=0 while True: response=self.handle.get(path,args,headers) if response._status == 500 and count<self.retry_count: time.sleep(2) count+=1 continue else: return response def delete(self,path, args=None, headers=None): count=0 while True: response=self.handle.delete(path,args,headers) if response._status == 500 and count<self.retry_count: time.sleep(2) count+=1 continue else: return response def patch(self, path, args=None, body=None, headers=None): count=0 while True: response=self.handle.patch(path,args,body,headers) if response._status == 500 and count<self.retry_count: time.sleep(2) count+=1 continue else: return response class InspurVA1Query: def __init__(self,logger,index,bmc_host, username, password, nfs_server, try_count): self.logger=logger self.nfs_server = nfs_server self.username = username self.password = password self.try_count = try_count self.bmc_host = bmc_host self.seq = 0 self.token=None self.index=index self.redfish_client=RedFishProxy(redfish.redfish_client(base_url=self.bmc_host, username=self.username, password=self.password)) def Login(self): url = '/redfish/v1/SessionService/Sessions' req_body = {"UserName": self.username, "Password": self.password, "SessionTimeOut": 300} req_headers = {"Content-Type": "application/json"} response = self.redfish_client.post( url, headers=req_headers, body=req_body) if response._status == 201: session = json.loads(response._read.decode()) self.token = session["Oem"]['Public']['X-Auth-Token'] self.Id = session["Id"] return True else: self.logger.error("Thermal:{}".format(response)) return False def Logout(self): if self.token: url = '/redfish/v1/SessionService/Sessions/{}'.format(self.Id) req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.delete(url, headers=req_headers) self.token=None if response._status == 200: return True self.logger.error("Logout:{}".format(response)) return False else: return False def QueryMedia(self): url = '/redfish/v1/Managers/1/VirtualMedia/CD' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) self.logger.info("QueryMedia:{}".format(response)) return True def IsMounted(self): """查询是否已经加载 """ url = '/redfish/v1/Managers/1/VirtualMedia/CD' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: body = json.loads(response._read.decode()) if 'Inserted' in body: return body['Inserted'] return False self.logger.error("IsMounted:{}".format(response)) return False def InsertMedia(self): url = '/redfish/v1/Managers/1/VirtualMedia/CD/Actions/VirtualMedia.InsertMedia' req_headers = {"X-Auth-Token": self.token} req_body = {"TransferProtocolType": 'NFS', "Image": '{}/nfs/boot.iso'.format(self.nfs_server)} response = self.redfish_client.post( url, headers=req_headers, body=req_body) if response._status == 200: while True: if self.IsMounted(): return True self.logger.error("InsertMedia:{}".format(response)) return False def EnableVirtualCDBoot(self,timeout=400): url = '/redfish/v1/Systems/1/Bios' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: ETag = response.getheader("ETag") #获取 ETag body = json.loads(response._read.decode()) # with open("{}_{}.json".format(self.bmc_host,uuid.uuid4().hex[:8]),"w") as f: # f.write(json.dumps(body)) bootOrderName=['UefiBootOrder1','UefiBootOrder2','UefiBootOrder3','UefiBootOrder4'] cureOrder="" for name in bootOrderName: if name in body['Attributes'] and body['Attributes'][name].find('CD/DVD')>=0: cureOrder=name break if body['Attributes']['FixedBootOrderEn']=='Disabled' and cureOrder!="": self.logger.info("CDBoot Already Enable:{} {} {}".format(body['Attributes']['FixedBootOrderEn'],cureOrder,body['Attributes'][cureOrder])) return True url = '/redfish/v1/Systems/1/Bios/Settings' req_headers = {"X-Auth-Token": self.token,"If-Match":ETag} req_body = {} req_body['Attributes']={} req_body['Attributes']['FixedBootOrderEn']='Disabled' if cureOrder=="": req_body['Attributes']['UefiBootOrder4']="CD/DVD:UEFI: AMI Virtual CDROM0 1.00" #最后一个启动项设置为CD启动,不影响正常的启动 response = self.redfish_client.patch(url, headers=req_headers, body=req_body) if response._status in [200]: if not self.ComputerSystemReset("ForceRestart"): return False beg=time.time() ii=0 while True: time.sleep(2) cur=time.time() if cur-beg>timeout: self.logger.error("EnableVirtualCDBoot Timeout:{}".format(cur-beg)) return False # url = '/redfish/v1/Systems/1/Bios/Settings' # req_headers = {"X-Auth-Token": self.token} # response = self.redfish_client.get(url, headers=req_headers) # if response._status == 200: # body = json.loads(response._read.decode()) # print(body) # if 'Attributes' not in body: # return True url = '/redfish/v1/Systems/1/Bios' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: body = json.loads(response._read.decode()) cureOrder="" for name in bootOrderName: if name in body['Attributes'] and body['Attributes'][name].find('CD/DVD')>=0: cureOrder=name break if body['Attributes']['FixedBootOrderEn']=='Disabled' and cureOrder!="": self.logger.info("EnableVirtualCDBoot Finished:{} {} {}".format(cur-beg,cureOrder,body['Attributes'][cureOrder])) return True else: self.logger.error("QueryBiosSetting2:{}".format(response)) else: self.logger.error("BiosSetting:{}".format(response)) else: self.logger.error("QueryBiosSetting1:{}".format(response)) return False def SetBootOrder(self): url = '/redfish/v1/Systems/1' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: ETag = response.getheader("ETag") body = json.loads(response._read.decode()) AssetTag = body['AssetTag'] IndicatorLED = body['IndicatorLED'] HostName = body['HostName'] req_headers = {"X-Auth-Token": self.token, "If-Match": ETag} req_body = {} # req_body['AssetTag']=AssetTag # req_body['IndicatorLED']=IndicatorLED # req_body['HostName']=HostName req_body['Boot'] = {} req_body['Boot']['BootSourceOverrideTarget'] = 'Cd' req_body['Boot']['BootSourceOverrideEnabled'] = 'Once' req_body['Boot']['BootSourceOverrideMode'] = 'UEFI' response = self.redfish_client.patch( url, headers=req_headers, body=req_body) if response._status == 200: for i in range(10): url = '/redfish/v1/Systems/1' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: body = json.loads(response._read.decode()) BootSourceOverrideTarget=body['Boot']['BootSourceOverrideTarget'] if BootSourceOverrideTarget=="Cd": return True self.logger.error("SetBootOrder:{}".format(response)) else: self.logger.error("SetBootOrder:{}".format(response)) return False def SetBootOrderLegacy(self): url = '/redfish/v1/Systems/1' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: ETag = response.getheader("ETag") body = json.loads(response._read.decode()) AssetTag = body['AssetTag'] IndicatorLED = body['IndicatorLED'] HostName = body['HostName'] req_headers = {"X-Auth-Token": self.token, "If-Match": ETag} req_body = {} # req_body['AssetTag']=AssetTag # req_body['IndicatorLED']=IndicatorLED # req_body['HostName']=HostName req_body['Boot'] = {} req_body['Boot']['BootSourceOverrideTarget'] = 'Hdd' req_body['Boot']['BootSourceOverrideEnabled'] = 'Continuous' req_body['Boot']['BootSourceOverrideMode'] = 'UEFI' #UEFI Legacy response = self.redfish_client.patch( url, headers=req_headers, body=req_body) if response._status == 200: body = json.loads(response._read.decode()) print(body) for i in range(10): url = '/redfish/v1/Systems/1' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: body = json.loads(response._read.decode()) BootSourceOverrideMode=body['Boot']['BootSourceOverrideMode'] print("BootSourceOverrideMode:",BootSourceOverrideMode) if BootSourceOverrideMode=="UEFI": return True else: self.logger.error("SetBootOrder:{}".format(response)) else: self.logger.error("SetBootOrder:{}".format(response)) return False def EjectMedia(self): if self.IsMounted(): url = '/redfish/v1/Managers/1/VirtualMedia/CD/Actions/VirtualMedia.EjectMedia' req_headers = {"X-Auth-Token": self.token} req_body = {"TransferProtocolType": 'NFS', "ImageName": 'boot.iso'} response = self.redfish_client.post( url, headers=req_headers, body=req_body) if response._status in [200, 500]: while True: if not self.IsMounted(): return True self.logger.error("EjectMedia:{}".format(response)) return False return True def ComputerSystemReset(self, ResetType): url = '/redfish/v1/Systems/1/Actions/ComputerSystem.Reset' req_headers = {"X-Auth-Token": self.token} req_body = {"ResetType": ResetType} response = self.redfish_client.post(url, headers=req_headers, body=req_body) if response._status == 200: body = json.loads(response._read.decode()) return True self.logger.error("ComputerSystemReset:{}".format(response)) return False def ChassisReset(self, ResetType,retry_count=3): for i in range(retry_count): url = '/redfish/v1/Chassis/1/Actions/Chassis.Reset' req_headers = {"X-Auth-Token": self.token} req_body = {"ResetType": ResetType} response = self.redfish_client.post(url, headers=req_headers, body=req_body) if response._status == 200: body = json.loads(response._read.decode()) return True self.logger.error("ChassisReset[{}-{}]:{}".format(i,ResetType,response)) time.sleep(2) return False def WaitFinished(self,timeout=5*60): beg = time.time() snap_count=0 while True: time.sleep(3) #频率不宜太快,否则容易导致系统出问题 cur = time.time() if cur-beg > timeout: self.logger.error("WaitFinished,Timeout") return False url = '/redfish/v1/Managers/1/Actions/Oem/Public/KVM/Screenshot' req_headers = {"X-Auth-Token": self.token} snap_count+=1 response = self.redfish_client.post(url, headers=req_headers) if response._status == 200: retry_count=10 #如果正在生成,不要再触发抓图,等待抓屏完成,否则会导致黑屏 while retry_count>0: cur = time.time() if cur-beg > timeout: self.logger.error("WaitFinished,Timeout") return False url = '/redfish/v1/Managers/1/Actions/Oem/Public/KVM/ScreenshotDownload' req_headers = {"X-Auth-Token": self.token} req_body = {"PictureAttributes": 'manual'} response = self.redfish_client.post(url, headers=req_headers, body=req_body) #if response._status != 200: # print("{}-{} {}:{}".format(snap_count,retry_count,response._status,json.loads(response._read.decode("utf-8","ignore")))) if response._status == 404: # The file is being generated time.sleep(2) retry_count-=1 continue elif response._status == 500: # There are no manual pictures at present break elif response._status == 401: # Invalid Authentication break elif response._status == 200: image_path = "{}-{}-{}-ing.jpg".format(self.index,self.bmc_host, self.seq) with open(image_path, "wb") as f: f.write(response.read) if isSimilarity(image_path): shutil.move(image_path, self.result_image) return True break else: self.logger.error("ScreenshotDownload:{}".format(response)) break else: self.logger.error("Screenshot:{}".format(response)) def PCIEDeviceSummary(self,target_dev_count=3): """获取PCIE链路信息 """ url = '/redfish/v1/Systems/1' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status != 200: self.logger.error("Systems:{}".format(response)) return False url = '/redfish/v1/Systems/1/Bios' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status != 200: self.logger.error("Bios:{}".format(response)) return False url = '/redfish/v1/Chassis/1/PCIeDevices' req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: body = json.loads(response._read.decode()) count=body['Members@odata.count'] #如果没开机,则跳过 if count==0: self.logger.info("{} PowerStatus=Off".format(self.bmc_host)) return True #如果发现掉卡,返回失败 if count<target_dev_count: self.logger.error("VA1 Lost,Current:{}".format(count)) return False for i in range(count): for _ in range(30): url = '/redfish/v1/Chassis/1/PCIeDevices/{}'.format(i) req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: body = json.loads(response._read.decode()) State=body['Status']['State'] Health=body['Status']['Health'] SlotNumber=body['Oem']['Public']['SlotNumber'] Manufacturer=body['Manufacturer'] if Manufacturer!="NVIDIA": continue url = '/redfish/v1/Chassis/1/PCIeDevices/{}/PCIeFunctions/1'.format(i) req_headers = {"X-Auth-Token": self.token} response = self.redfish_client.get(url, headers=req_headers) if response._status == 200: body = json.loads(response._read.decode()) LinkWidth=body['Oem']['Public']['LinkWidth'] LinkSpeed=body['Oem']['Public']['LinkSpeed'] self.logger.info("{} {} {} {} {} {} {}".format(i,Manufacturer,State,Health,SlotNumber,LinkWidth,LinkSpeed)) break else: time.sleep(1) else: time.sleep(1) else: self.logger.error("PCIeDevices1:{}".format(response)) return False return True def run(self): """测试序列 """ # 测试次数 for i in range(self.try_count): self.seq = i self.result_image = "{}-{}-{}-done.jpg".format(self.index,self.bmc_host, self.seq) #如果运行过,则跳过 if os.path.exists(self.result_image): continue try: #统计总耗时 with TimeSpan(self.logger,"{}-{} InspurVA1QueryE2E:".format(self.bmc_host, self.seq)): #创建RedFish会话,获取token with TimeSpan(self.logger,"*1.{}-{}-Login".format(self.bmc_host, self.seq)): self.Logout() if not self.Login(): continue #通过BMC查看卡数是否正常,如果掉卡直接返回失败 with TimeSpan(self.logger,"*2.{}-{}-PCIEDeviceSummary".format(self.bmc_host, self.seq)): if not self.PCIEDeviceSummary(): self.logger.error("ERROR,{}".format(self.bmc_host)) continue #服务器下电,防止虚拟光驱被占用,导致后续加载失败 with TimeSpan(self.logger,"*3.{}-{}-PowerDown".format(self.bmc_host, self.seq)): if not self.ChassisReset("ForceOff"): continue #弹出虚拟光驱 with TimeSpan(self.logger,"*4.{}-{}-EjectMedia".format(self.bmc_host, self.seq)): if not self.EjectMedia(): continue #设置虚拟光驱NFS挂载参数 with TimeSpan(self.logger,"*5.{}-{}-InsertMedia".format(self.bmc_host, self.seq)): if not self.InsertMedia(): continue #服务器上电 with TimeSpan(self.logger,"*6.{}-{}-PowerOn".format(self.bmc_host, self.seq)): if not self.ChassisReset("On"): continue #确认并开启虚拟光驱启动功能 with TimeSpan(self.logger,"*7.{}-{}-EnableVirtualCDBoot".format(self.bmc_host, self.seq)): if not self.EnableVirtualCDBoot(): continue #设置下一次从虚拟光驱启动 with TimeSpan(self.logger,"*8.{}-{}-SetBootOrder".format(self.bmc_host, self.seq)): if not self.SetBootOrder(): continue #重启,从光驱启动 with TimeSpan(self.logger,"*9.{}-{}-ForceRestart".format(self.bmc_host, self.seq)): if not self.ComputerSystemReset("ForceRestart"): return False if i==0: time.sleep(120) continue #KVM循环截屏,ISO中的检测程序完后会打印"Please press Enter to activate this console" #通过计算图像的SSIM,判断测图片中是否出现了以上打印 #5分钟如果没有检测到,则超时退出 with TimeSpan(self.logger,"*10.{}-{}-WaitFinished".format(self.bmc_host, self.seq)): if not self.WaitFinished(): continue #弹出虚拟光驱 with TimeSpan(self.logger,"*11.{}-{}-EjectMedia".format(self.bmc_host, self.seq)): if not self.EjectMedia(): continue #服务器下电再上电 with TimeSpan(self.logger,"*12.{}-{}-PowerCycle".format(self.bmc_host, self.seq)): if not self.ChassisReset('PowerCycle'): continue #注销RedFish会话 with TimeSpan(self.logger,"*13.{}-{}-Logout".format(self.bmc_host, self.seq)): if not self.Logout(): continue return True except: self.logger.error("{}-{} Failed:".format(self.bmc_host, self.seq)) traceback.print_exc(file=open('traceback_info.txt','a+')) def FetchThread(checkonly,index,q): """自检任务线程 """ logger=None while True: if q.empty(): time.sleep(0.1) continue row = q.get() if row is None: break if logger is None: logger = logging.getLogger("FetchThread:{}".format(index)) logger.setLevel(level = logging.INFO) handler = logging.FileHandler("nvidia_aic_check_inspur_{}.log".format(index)) handler_ch=logging.StreamHandler() handler.setLevel(logging.INFO) handler_ch.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) handler_ch.setFormatter(formatter) logger.addHandler(handler) logger.addHandler(handler_ch) index, bmc_addr, username, password, try_count = row logger.info(row) t = InspurVA1Query(logger,index,bmc_addr, username, password, nfs_server, int(try_count)) if checkonly==1: if t.Login() and t.PCIEDeviceSummary(): print("{} True".format(bmc_addr)) else: print("{} False".format(bmc_addr)) else: #t.run() t.Login() t.SetBootOrderLegacy() t.ComputerSystemReset("ForceRestart") if __name__ == '__main__': nfs_server = args.nfs_server.strip() server_list = args.server_list.strip() threads = args.threads checkonly=args.checkonly request_queue = Queue(threads) tasks = [] for i in range(threads): t = threading.Thread(target=FetchThread, args=(checkonly,i,request_queue, )) t.start() tasks.append(t) with codecs.open(server_list, "r", 'utf-8') as csvfile: csvreader = csv.reader(csvfile) next(csvreader) for row in csvreader: if row[0].startswith("#"): continue request_queue.put(row) for i in range(threads): request_queue.put(None) for t in tasks: t.join() ''' 重启BMC curl -X POST https://192.168.1.100/redfish/v1/Systems/1/Actions/ComputerSystem.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin curl -X POST https://192.168.1.100/redfish/v1/Managers/1/Actions/Manager.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin curl -X POST https://192.168.1.100/redfish/v1/Managers/1/Actions/Manager.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin '''