阅读量:0
一、代码
import os import cv2 import numpy as np import onnxruntime import time CLASSES = ['person'] # coco80类别 class YOLOV5(): def __init__(self, onnxpath): self.onnx_session = onnxruntime.InferenceSession(onnxpath) self.input_name = self.get_input_name() self.output_name = self.get_output_name() # ------------------------------------------------------- # 获取输入输出的名字 # ------------------------------------------------------- def get_input_name(self): input_name = [] for node in self.onnx_session.get_inputs(): input_name.append(node.name) return input_name def get_output_name(self): output_name = [] for node in self.onnx_session.get_outputs(): output_name.append(node.name) return output_name # ------------------------------------------------------- # 输入图像 # ------------------------------------------------------- def get_input_feed(self, img_tensor): input_feed = {} for name in self.input_name: input_feed[name] = img_tensor return input_feed # ------------------------------------------------------- # 1.cv2读取图像并resize # 2.图像转BGR2RGB和HWC2CHW # 3.图像归一化 # 4.图像增加维度 # 5.onnx_session 推理 # ------------------------------------------------------- # def inference(self, img_path): # img = cv2.imread(img_path) # or_img = cv2.resize(img, (640, 640)) # img = or_img[:, :, ::-1].transpose(2, 0, 1) # BGR2RGB和HWC2CHW # img = img.astype(dtype=np.float32) # img /= 255.0 # img = np.expand_dims(img, axis=0) # input_feed = self.get_input_feed(img) # pred = self.onnx_session.run(None, input_feed)[0] # return pred, or_img # def inference(self, img_path): # try: # img = cv2.imread(img_path) # or_img = cv2.resize(img, (640, 640)) # img = or_img[:, :, ::-1].transpose(2, 0, 1) # BGR2RGB和HWC2CHW # img = img.astype(dtype=np.float32) / 255.0 # img = np.expand_dims(img, axis=0) # input_feed = self.get_input_feed(img) # pred = self.onnx_session.run(None, input_feed)[0] # return pred, or_img # except Exception as e: # print(f"Error during inference: {e}") # return None, None def inference(self, img): try: img = cv2.imread(img) or_img = img.copy() resized_img = cv2.resize(img, (640, 640)) # 根据模型要求进行resize resized_img = resized_img[:, :, ::-1].transpose(2, 0, 1) # BGR2RGB和HWC2CHW resized_img = resized_img.astype(dtype=np.float32) / 255.0 resized_img = np.expand_dims(resized_img, axis=0) input_feed = self.get_input_feed(resized_img) pred = self.onnx_session.run(None, input_feed)[0] return pred, or_img, resized_img except Exception as e: print(f"Error during inference: {e}") return None, None, None # dets: array [x,6] 6个值分别为x1,y1,x2,y2,score,class # thresh: 阈值 def nms(dets, thresh): x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] # ------------------------------------------------------- # 计算框的面积 # 置信度从大到小排序 # ------------------------------------------------------- areas = (y2 - y1 + 1) * (x2 - x1 + 1) scores = dets[:, 4] keep = [] index = scores.argsort()[::-1] while index.size > 0: i = index[0] keep.append(i) # ------------------------------------------------------- # 计算相交面积 # 1.相交 # 2.不相交 # ------------------------------------------------------- x11 = np.maximum(x1[i], x1[index[1:]]) y11 = np.maximum(y1[i], y1[index[1:]]) x22 = np.minimum(x2[i], x2[index[1:]]) y22 = np.minimum(y2[i], y2[index[1:]]) w = np.maximum(0, x22 - x11 + 1) h = np.maximum(0, y22 - y11 + 1) overlaps = w * h # ------------------------------------------------------- # 计算该框与其它框的IOU,去除掉重复的框,即IOU值大的框 # IOU小于thresh的框保留下来 # ------------------------------------------------------- ious = overlaps / (areas[i] + areas[index[1:]] - overlaps) idx = np.where(ious <= thresh)[0] index = index[idx + 1] return keep def xywh2xyxy(x): # [x, y, w, h] to [x1, y1, x2, y2] y = np.copy(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 y[:, 1] = x[:, 1] - x[:, 3] / 2 y[:, 2] = x[:, 0] + x[:, 2] / 2 y[:, 3] = x[:, 1] + x[:, 3] / 2 return y def filter_box(org_box, conf_thres, iou_thres): # 过滤掉无用的框 # ------------------------------------------------------- # 删除为1的维度 # 删除置信度小于conf_thres的BOX # ------------------------------------------------------- org_box = np.squeeze(org_box) conf = org_box[..., 4] > conf_thres box = org_box[conf == True] # ------------------------------------------------------- # 通过argmax获取置信度最大的类别 # ------------------------------------------------------- cls_cinf = box[..., 5:] cls = [] for i in range(len(cls_cinf)): cls.append(int(np.argmax(cls_cinf[i]))) all_cls = list(set(cls)) # ------------------------------------------------------- # 分别对每个类别进行过滤 # 1.将第6列元素替换为类别下标 # 2.xywh2xyxy 坐标转换 # 3.经过非极大抑制后输出的BOX下标 # 4.利用下标取出非极大抑制后的BOX # ------------------------------------------------------- output = [] for i in range(len(all_cls)): curr_cls = all_cls[i] curr_cls_box = [] curr_out_box = [] for j in range(len(cls)): if cls[j] == curr_cls: box[j][5] = curr_cls curr_cls_box.append(box[j][:6]) curr_cls_box = np.array(curr_cls_box) # curr_cls_box_old = np.copy(curr_cls_box) curr_cls_box = xywh2xyxy(curr_cls_box) curr_out_box = nms(curr_cls_box, iou_thres) for k in curr_out_box: output.append(curr_cls_box[k]) output = np.array(output) return output def draw(image, box_data): # ------------------------------------------------------- # 取整,方便画框 # ------------------------------------------------------- boxes = box_data[..., :4].astype(np.int32) scores = box_data[..., 4] classes = box_data[..., 5].astype(np.int32) for box, score, cl in zip(boxes, scores, classes): top, left, right, bottom = box print('class: {}, score: {}'.format(CLASSES[cl], score)) print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom)) cv2.rectangle(image, (top, left), (right, bottom), (0, 0, 255), 2) #红色 #cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2) #蓝色 cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score), (top, left), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) #可能存在多个人体,提取置信度最高的人体框 def extract_person(image, box_data): image = np.squeeze(image, axis=0) # 形状变为 (3, 640, 640) # 转换为 (H, W, C) 格式 image = np.transpose(image, (1, 2, 0)) print(f"Resized image shape: {image.shape}") cv2.imshow('read Image',image) cv2.waitKey(0) cv2.destroyAllWindows() boxes = box_data[..., :4].astype(np.int32) scores = box_data[..., 4] classes = box_data[..., 5].astype(np.int32) if len(boxes) > 0: max_index = np.argmax(scores) #top, left, right, bottom = boxes[max_index] left,top, right, bottom = boxes[max_index] # 打印原始坐标 print(f"Original Box coordinates: top={top}, left={left}, right={right}, bottom={bottom}") # 提取目标区域 person = image[top:bottom, left:right] print(f"Extracted person image size: {person.shape}") cv2.imshow('extracted person',person) cv2.waitKey(0) cv2.destroyAllWindows() # 保存图像 cv2.imwrite('extract.png', person*255) return person if __name__ == "__main__": onnx_path = r'E:\detect_person\person.onnx' model = YOLOV5(onnx_path) output, or_img, resize_img = model.inference(r"G:\depth_detect\huang2\huang1.png") outbox = filter_box(output, 0.35, 0.35) if len(outbox) > 0: #原图画框可视化 #draw(or_img, outbox) #提取目标区域 extract = extract_person(resize_img,outbox) else: print("No objects detected.")