通过redfish协议实现服务器固件升级、从虚拟光驱启动自检盘并等待完成,最后截图保存

avatar
作者
筋斗云
阅读量:0

通过redfish协议实现服务器固件升级、从虚拟光驱启动自检盘并等待完成,最后截图保存

新开发的PCIE设备在做服务器适配时,有时需要服务器厂家更新BMC或BIOS固件。同时,我们也希望对PCIE设备做一些检测,最后收集一些信息存档。如果需要处理的服务器很多,通过BMC的界面进行人工操作就会比较麻烦。以下提供了一个脚本,供参考。

主要思路:

  • 采用haneWIN NFS Server搭建一个NFS服务,目录为nfs,里面存放着boot.iso(设备检测镜像)
  • 通过redfish协议登录BMC,获取PCIE设备信息,服务器信息,升级固件,重启服务器,挂iso,设置启动方式
  • 截屏获取KVM的内容,通过图片相似度的方法判断ISO里的检测程序是否运行完成.

版本信息

属性
NFS服务器haneWIN NFS Server
服务器型号NF5270M6

代码

# -*- coding: utf-8 -*-  from queue import Queue from skimage.metrics import structural_similarity import traceback import cv2 import codecs import csv import argparse import shutil import json import time import redfish import sys import os import uuid import threading import warnings warnings.filterwarnings("ignore") import logging  parser = argparse.ArgumentParser() parser.add_argument('-server_list', type=str,                     required=True, help="server_list") parser.add_argument('-nfs_server', type=str, required=True, help="nfs server") parser.add_argument('-threads', type=int, required=True, help="nfs server") parser.add_argument('-checkonly', type=int, required=True, help="check only")  args = parser.parse_args()   class TimeSpan:     """统计代码段的耗时     """         def __init__(self,logger,prefix=""):         self.prefix = prefix         self.logger=logger      def __enter__(self):         self.end = None         self.start = time.time()      def __exit__(self, exc_type, exc_val, exc_tb):         self.end = time.time()         interval = self.end - self.start         unit = "sec"         if interval > 60:             unit = "min"             interval = interval/60         self.logger.info('%-64s:%.3f(%s)' % (self.prefix, interval, unit))  def isSimilarity(filename):     """判断自检程序是否运行完成      Args:         filename ([string]): [截屏图片路径]      Returns:         [bool]: [是否完成]     """         last_image = cv2.imread('target_image.jpg', cv2.IMREAD_GRAYSCALE)     img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)     cell_h, cell_w = last_image.shape     hoff = 280     h, w = img.shape     while hoff < 320:         img2 = img[hoff:hoff+cell_h, 0:cell_w]         ssim = structural_similarity(last_image, img2)         if ssim > 0.5:             # cv2.imwrite("{}_{}.jpg".format(hoff,int(ssim)),img2)             return True         hoff += 3     return False  class RedFishProxy:     def __init__(self,handle,retry_count=3):         self.handle=handle         self.retry_count=retry_count      def post(self, path, args=None, body=None, headers=None):         count=0         while True:             response=self.handle.post(path,args,body,headers)             if response._status == 500 and count<self.retry_count:                 time.sleep(2)                 count+=1                 continue             else:                 return response                  def get(self, path, args=None, headers=None):         count=0         while True:             response=self.handle.get(path,args,headers)             if response._status == 500 and count<self.retry_count:                 time.sleep(2)                 count+=1                 continue             else:                 return response                     def delete(self,path, args=None, headers=None):         count=0         while True:             response=self.handle.delete(path,args,headers)             if response._status == 500 and count<self.retry_count:                 time.sleep(2)                 count+=1                 continue             else:                 return response         def patch(self, path, args=None, body=None, headers=None):         count=0         while True:             response=self.handle.patch(path,args,body,headers)             if response._status == 500 and count<self.retry_count:                 time.sleep(2)                 count+=1                 continue             else:                 return response          class InspurVA1Query:     def __init__(self,logger,index,bmc_host, username, password, nfs_server, try_count):         self.logger=logger         self.nfs_server = nfs_server         self.username = username         self.password = password         self.try_count = try_count         self.bmc_host = bmc_host         self.seq = 0         self.token=None         self.index=index         self.redfish_client=RedFishProxy(redfish.redfish_client(base_url=self.bmc_host, username=self.username, password=self.password))      def Login(self):         url = '/redfish/v1/SessionService/Sessions'         req_body = {"UserName": self.username,                     "Password": self.password, "SessionTimeOut": 300}         req_headers = {"Content-Type": "application/json"}         response = self.redfish_client.post(             url, headers=req_headers, body=req_body)         if response._status == 201:             session = json.loads(response._read.decode())             self.token = session["Oem"]['Public']['X-Auth-Token']             self.Id = session["Id"]             return True         else:             self.logger.error("Thermal:{}".format(response))         return False      def Logout(self):         if self.token:             url = '/redfish/v1/SessionService/Sessions/{}'.format(self.Id)             req_headers = {"X-Auth-Token": self.token}             response = self.redfish_client.delete(url, headers=req_headers)             self.token=None             if response._status == 200:                 return True             self.logger.error("Logout:{}".format(response))             return False         else:             return False                  def QueryMedia(self):         url = '/redfish/v1/Managers/1/VirtualMedia/CD'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         self.logger.info("QueryMedia:{}".format(response))         return True      def IsMounted(self):         """查询是否已经加载         """                 url = '/redfish/v1/Managers/1/VirtualMedia/CD'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         if response._status == 200:             body = json.loads(response._read.decode())             if 'Inserted' in body:                 return body['Inserted']             return False         self.logger.error("IsMounted:{}".format(response))         return False      def InsertMedia(self):         url = '/redfish/v1/Managers/1/VirtualMedia/CD/Actions/VirtualMedia.InsertMedia'         req_headers = {"X-Auth-Token": self.token}         req_body = {"TransferProtocolType": 'NFS',                     "Image": '{}/nfs/boot.iso'.format(self.nfs_server)}         response = self.redfish_client.post(             url, headers=req_headers, body=req_body)         if response._status == 200:             while True:                 if self.IsMounted():                     return True         self.logger.error("InsertMedia:{}".format(response))         return False         def EnableVirtualCDBoot(self,timeout=400):         url = '/redfish/v1/Systems/1/Bios'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         if response._status == 200:             ETag = response.getheader("ETag")  #获取 ETag             body = json.loads(response._read.decode())             # with open("{}_{}.json".format(self.bmc_host,uuid.uuid4().hex[:8]),"w") as f:             #     f.write(json.dumps(body))              bootOrderName=['UefiBootOrder1','UefiBootOrder2','UefiBootOrder3','UefiBootOrder4']              cureOrder=""             for name in bootOrderName:                 if name in body['Attributes'] and body['Attributes'][name].find('CD/DVD')>=0:                     cureOrder=name                     break              if body['Attributes']['FixedBootOrderEn']=='Disabled' and cureOrder!="":                 self.logger.info("CDBoot Already Enable:{} {} {}".format(body['Attributes']['FixedBootOrderEn'],cureOrder,body['Attributes'][cureOrder]))                 return True                          url = '/redfish/v1/Systems/1/Bios/Settings'             req_headers = {"X-Auth-Token": self.token,"If-Match":ETag}             req_body = {}             req_body['Attributes']={}             req_body['Attributes']['FixedBootOrderEn']='Disabled'             if cureOrder=="":                 req_body['Attributes']['UefiBootOrder4']="CD/DVD:UEFI: AMI Virtual CDROM0 1.00"  #最后一个启动项设置为CD启动,不影响正常的启动             response = self.redfish_client.patch(url, headers=req_headers, body=req_body)             if response._status in [200]:                 if not self.ComputerSystemReset("ForceRestart"):                     return False                 beg=time.time()                 ii=0                 while True:                     time.sleep(2)                     cur=time.time()                     if cur-beg>timeout:                         self.logger.error("EnableVirtualCDBoot Timeout:{}".format(cur-beg))                         return False                                          # url = '/redfish/v1/Systems/1/Bios/Settings'                     # req_headers = {"X-Auth-Token": self.token}                     # response = self.redfish_client.get(url, headers=req_headers)                     # if response._status == 200:                     #     body = json.loads(response._read.decode())                     #     print(body)                         # if 'Attributes' not in body:                         #     return True                     url = '/redfish/v1/Systems/1/Bios'                     req_headers = {"X-Auth-Token": self.token}                     response = self.redfish_client.get(url, headers=req_headers)                     if response._status == 200:                         body = json.loads(response._read.decode())                         cureOrder=""                         for name in bootOrderName:                             if name in body['Attributes'] and body['Attributes'][name].find('CD/DVD')>=0:                                 cureOrder=name                                 break                                             if body['Attributes']['FixedBootOrderEn']=='Disabled' and cureOrder!="":                             self.logger.info("EnableVirtualCDBoot Finished:{} {} {}".format(cur-beg,cureOrder,body['Attributes'][cureOrder]))                             return True                                             else:                         self.logger.error("QueryBiosSetting2:{}".format(response))             else:                 self.logger.error("BiosSetting:{}".format(response))         else:             self.logger.error("QueryBiosSetting1:{}".format(response))                      return False      def SetBootOrder(self):         url = '/redfish/v1/Systems/1'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         if response._status == 200:             ETag = response.getheader("ETag")             body = json.loads(response._read.decode())             AssetTag = body['AssetTag']             IndicatorLED = body['IndicatorLED']             HostName = body['HostName']             req_headers = {"X-Auth-Token": self.token, "If-Match": ETag}             req_body = {}             # req_body['AssetTag']=AssetTag             # req_body['IndicatorLED']=IndicatorLED             # req_body['HostName']=HostName             req_body['Boot'] = {}             req_body['Boot']['BootSourceOverrideTarget'] = 'Cd'             req_body['Boot']['BootSourceOverrideEnabled'] = 'Once'             req_body['Boot']['BootSourceOverrideMode'] = 'UEFI'             response = self.redfish_client.patch(                 url, headers=req_headers, body=req_body)             if response._status == 200:                          for i in range(10):                     url = '/redfish/v1/Systems/1'                     req_headers = {"X-Auth-Token": self.token}                     response = self.redfish_client.get(url, headers=req_headers)                     if response._status == 200:                         body = json.loads(response._read.decode())                         BootSourceOverrideTarget=body['Boot']['BootSourceOverrideTarget']                         if BootSourceOverrideTarget=="Cd":                             return True             self.logger.error("SetBootOrder:{}".format(response))         else:             self.logger.error("SetBootOrder:{}".format(response))          return False       def SetBootOrderLegacy(self):         url = '/redfish/v1/Systems/1'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         if response._status == 200:             ETag = response.getheader("ETag")             body = json.loads(response._read.decode())             AssetTag = body['AssetTag']             IndicatorLED = body['IndicatorLED']             HostName = body['HostName']             req_headers = {"X-Auth-Token": self.token, "If-Match": ETag}             req_body = {}             # req_body['AssetTag']=AssetTag             # req_body['IndicatorLED']=IndicatorLED             # req_body['HostName']=HostName             req_body['Boot'] = {}             req_body['Boot']['BootSourceOverrideTarget'] = 'Hdd'             req_body['Boot']['BootSourceOverrideEnabled'] = 'Continuous'             req_body['Boot']['BootSourceOverrideMode'] = 'UEFI' #UEFI Legacy             response = self.redfish_client.patch(                 url, headers=req_headers, body=req_body)             if response._status == 200:                   body = json.loads(response._read.decode())                 print(body)                 for i in range(10):                     url = '/redfish/v1/Systems/1'                     req_headers = {"X-Auth-Token": self.token}                     response = self.redfish_client.get(url, headers=req_headers)                     if response._status == 200:                         body = json.loads(response._read.decode())                         BootSourceOverrideMode=body['Boot']['BootSourceOverrideMode']                         print("BootSourceOverrideMode:",BootSourceOverrideMode)                         if BootSourceOverrideMode=="UEFI":                             return True                                         else:                 self.logger.error("SetBootOrder:{}".format(response))         else:             self.logger.error("SetBootOrder:{}".format(response))          return False      def EjectMedia(self):         if self.IsMounted():             url = '/redfish/v1/Managers/1/VirtualMedia/CD/Actions/VirtualMedia.EjectMedia'             req_headers = {"X-Auth-Token": self.token}             req_body = {"TransferProtocolType": 'NFS', "ImageName": 'boot.iso'}             response = self.redfish_client.post(                 url, headers=req_headers, body=req_body)             if response._status in [200, 500]:                 while True:                     if not self.IsMounted():                         return True             self.logger.error("EjectMedia:{}".format(response))             return False         return True      def ComputerSystemReset(self, ResetType):         url = '/redfish/v1/Systems/1/Actions/ComputerSystem.Reset'         req_headers = {"X-Auth-Token": self.token}         req_body = {"ResetType": ResetType}         response = self.redfish_client.post(url, headers=req_headers, body=req_body)         if response._status == 200:             body = json.loads(response._read.decode())             return True         self.logger.error("ComputerSystemReset:{}".format(response))         return False      def ChassisReset(self, ResetType,retry_count=3):         for i in range(retry_count):             url = '/redfish/v1/Chassis/1/Actions/Chassis.Reset'             req_headers = {"X-Auth-Token": self.token}             req_body = {"ResetType": ResetType}             response = self.redfish_client.post(url, headers=req_headers, body=req_body)             if response._status == 200:                 body = json.loads(response._read.decode())                 return True             self.logger.error("ChassisReset[{}-{}]:{}".format(i,ResetType,response))             time.sleep(2)         return False      def WaitFinished(self,timeout=5*60):         beg = time.time()         snap_count=0         while True:             time.sleep(3)  #频率不宜太快,否则容易导致系统出问题             cur = time.time()             if cur-beg > timeout:                 self.logger.error("WaitFinished,Timeout")                 return False             url = '/redfish/v1/Managers/1/Actions/Oem/Public/KVM/Screenshot'             req_headers = {"X-Auth-Token": self.token}             snap_count+=1             response = self.redfish_client.post(url, headers=req_headers)             if response._status == 200:                 retry_count=10  #如果正在生成,不要再触发抓图,等待抓屏完成,否则会导致黑屏                 while retry_count>0:                     cur = time.time()                     if cur-beg > timeout:                         self.logger.error("WaitFinished,Timeout")                         return False                     url = '/redfish/v1/Managers/1/Actions/Oem/Public/KVM/ScreenshotDownload'                     req_headers = {"X-Auth-Token": self.token}                     req_body = {"PictureAttributes": 'manual'}                     response = self.redfish_client.post(url, headers=req_headers, body=req_body)                                         #if response._status != 200:                     #    print("{}-{} {}:{}".format(snap_count,retry_count,response._status,json.loads(response._read.decode("utf-8","ignore"))))                     if response._status == 404:  # The file is being generated                         time.sleep(2)                         retry_count-=1                         continue                     elif response._status == 500:  # There are no manual pictures at present                         break                     elif response._status == 401:  # Invalid Authentication                         break                     elif response._status == 200:                         image_path = "{}-{}-{}-ing.jpg".format(self.index,self.bmc_host, self.seq)                         with open(image_path, "wb") as f:                             f.write(response.read)                         if isSimilarity(image_path):                             shutil.move(image_path, self.result_image)                             return True                         break                     else:                         self.logger.error("ScreenshotDownload:{}".format(response))                         break             else:                 self.logger.error("Screenshot:{}".format(response))      def PCIEDeviceSummary(self,target_dev_count=3):         """获取PCIE链路信息         """          url = '/redfish/v1/Systems/1'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         if response._status != 200:             self.logger.error("Systems:{}".format(response))             return False          url = '/redfish/v1/Systems/1/Bios'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         if response._status != 200:             self.logger.error("Bios:{}".format(response))             return False                  url = '/redfish/v1/Chassis/1/PCIeDevices'         req_headers = {"X-Auth-Token": self.token}         response = self.redfish_client.get(url, headers=req_headers)         if response._status == 200:             body = json.loads(response._read.decode())             count=body['Members@odata.count']             #如果没开机,则跳过             if count==0:                 self.logger.info("{} PowerStatus=Off".format(self.bmc_host))                 return True             #如果发现掉卡,返回失败             if count<target_dev_count:                 self.logger.error("VA1 Lost,Current:{}".format(count))                 return False             for i in range(count):                 for _ in range(30):                     url = '/redfish/v1/Chassis/1/PCIeDevices/{}'.format(i)                     req_headers = {"X-Auth-Token": self.token}                     response = self.redfish_client.get(url, headers=req_headers)                     if response._status == 200:                         body = json.loads(response._read.decode())                         State=body['Status']['State']                         Health=body['Status']['Health']                         SlotNumber=body['Oem']['Public']['SlotNumber']                         Manufacturer=body['Manufacturer']                         if Manufacturer!="NVIDIA":                             continue                         url = '/redfish/v1/Chassis/1/PCIeDevices/{}/PCIeFunctions/1'.format(i)                         req_headers = {"X-Auth-Token": self.token}                         response = self.redfish_client.get(url, headers=req_headers)                         if response._status == 200:                             body = json.loads(response._read.decode())                             LinkWidth=body['Oem']['Public']['LinkWidth']                             LinkSpeed=body['Oem']['Public']['LinkSpeed']                             self.logger.info("{} {} {} {} {} {} {}".format(i,Manufacturer,State,Health,SlotNumber,LinkWidth,LinkSpeed))                             break                         else:                             time.sleep(1)                     else:                         time.sleep(1)         else:             self.logger.error("PCIeDevices1:{}".format(response))             return False         return True      def run(self):         """测试序列         """         # 测试次数         for i in range(self.try_count):             self.seq = i             self.result_image = "{}-{}-{}-done.jpg".format(self.index,self.bmc_host, self.seq)              #如果运行过,则跳过             if os.path.exists(self.result_image):                 continue             try:                 #统计总耗时                 with TimeSpan(self.logger,"{}-{} InspurVA1QueryE2E:".format(self.bmc_host, self.seq)):                      #创建RedFish会话,获取token                     with TimeSpan(self.logger,"*1.{}-{}-Login".format(self.bmc_host, self.seq)):                         self.Logout()                         if not self.Login():                             continue                                          #通过BMC查看卡数是否正常,如果掉卡直接返回失败                     with TimeSpan(self.logger,"*2.{}-{}-PCIEDeviceSummary".format(self.bmc_host, self.seq)):                         if not self.PCIEDeviceSummary():                             self.logger.error("ERROR,{}".format(self.bmc_host))                             continue                                                             #服务器下电,防止虚拟光驱被占用,导致后续加载失败                     with TimeSpan(self.logger,"*3.{}-{}-PowerDown".format(self.bmc_host, self.seq)):                         if not self.ChassisReset("ForceOff"):                             continue                                          #弹出虚拟光驱                     with TimeSpan(self.logger,"*4.{}-{}-EjectMedia".format(self.bmc_host, self.seq)):                         if not self.EjectMedia():                             continue                                                  #设置虚拟光驱NFS挂载参数                     with TimeSpan(self.logger,"*5.{}-{}-InsertMedia".format(self.bmc_host, self.seq)):                         if not self.InsertMedia():                             continue                      #服务器上电                     with TimeSpan(self.logger,"*6.{}-{}-PowerOn".format(self.bmc_host, self.seq)):                         if not self.ChassisReset("On"):                             continue                      #确认并开启虚拟光驱启动功能                     with TimeSpan(self.logger,"*7.{}-{}-EnableVirtualCDBoot".format(self.bmc_host, self.seq)):                         if not self.EnableVirtualCDBoot():                             continue                      #设置下一次从虚拟光驱启动                     with TimeSpan(self.logger,"*8.{}-{}-SetBootOrder".format(self.bmc_host, self.seq)):                         if not self.SetBootOrder():                             continue                                          #重启,从光驱启动                     with TimeSpan(self.logger,"*9.{}-{}-ForceRestart".format(self.bmc_host, self.seq)):                                        if not self.ComputerSystemReset("ForceRestart"):                             return False                                                  if i==0:                         time.sleep(120)                         continue                                              #KVM循环截屏,ISO中的检测程序完后会打印"Please press Enter to activate this console"                     #通过计算图像的SSIM,判断测图片中是否出现了以上打印                     #5分钟如果没有检测到,则超时退出                     with TimeSpan(self.logger,"*10.{}-{}-WaitFinished".format(self.bmc_host, self.seq)):                         if not self.WaitFinished():                             continue                                          #弹出虚拟光驱                     with TimeSpan(self.logger,"*11.{}-{}-EjectMedia".format(self.bmc_host, self.seq)):                         if not self.EjectMedia():                             continue                                          #服务器下电再上电                     with TimeSpan(self.logger,"*12.{}-{}-PowerCycle".format(self.bmc_host, self.seq)):                         if not self.ChassisReset('PowerCycle'):                             continue                                          #注销RedFish会话                     with TimeSpan(self.logger,"*13.{}-{}-Logout".format(self.bmc_host, self.seq)):                         if not self.Logout():                             continue                                                  return True             except:                 self.logger.error("{}-{} Failed:".format(self.bmc_host, self.seq))                 traceback.print_exc(file=open('traceback_info.txt','a+'))  def FetchThread(checkonly,index,q):     """自检任务线程     """          logger=None      while True:         if q.empty():             time.sleep(0.1)             continue         row = q.get()         if row is None:             break          if logger is None:             logger = logging.getLogger("FetchThread:{}".format(index))             logger.setLevel(level = logging.INFO)             handler = logging.FileHandler("nvidia_aic_check_inspur_{}.log".format(index))             handler_ch=logging.StreamHandler()             handler.setLevel(logging.INFO)             handler_ch.setLevel(logging.INFO)             formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')             handler.setFormatter(formatter)             handler_ch.setFormatter(formatter)             logger.addHandler(handler)             logger.addHandler(handler_ch)                 index, bmc_addr, username, password, try_count = row         logger.info(row)         t = InspurVA1Query(logger,index,bmc_addr, username, password,                            nfs_server, int(try_count))                 if checkonly==1:             if t.Login() and t.PCIEDeviceSummary():                 print("{} True".format(bmc_addr))             else:                 print("{} False".format(bmc_addr))         else:             #t.run()             t.Login()             t.SetBootOrderLegacy()             t.ComputerSystemReset("ForceRestart")          if __name__ == '__main__':      nfs_server = args.nfs_server.strip()     server_list = args.server_list.strip()     threads = args.threads     checkonly=args.checkonly          request_queue = Queue(threads)     tasks = []      for i in range(threads):         t = threading.Thread(target=FetchThread, args=(checkonly,i,request_queue, ))         t.start()         tasks.append(t)          with codecs.open(server_list, "r", 'utf-8') as csvfile:         csvreader = csv.reader(csvfile)         next(csvreader)         for row in csvreader:             if row[0].startswith("#"):                 continue                         request_queue.put(row)      for i in range(threads):         request_queue.put(None)      for t in tasks:         t.join()  ''' 重启BMC curl -X POST  https://192.168.1.100/redfish/v1/Systems/1/Actions/ComputerSystem.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin curl -X POST  https://192.168.1.100/redfish/v1/Managers/1/Actions/Manager.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin curl -X POST  https://192.168.1.100/redfish/v1/Managers/1/Actions/Manager.Reset -d '{"ResetType": "ForceRestart"}' -H "Content-Type: application/json" -k -u admin:admin ''' 

广告一刻

为您即时展示最新活动产品广告消息,让您随时掌握产品活动新动态!