阅读量:0
- 人脸检测与定位:
find_host_face_location:在视频的前几秒内快速检测并定位主持人的人脸,缩小帧尺寸以提高处理速度。
- 裁剪框计算:
calculate_cropping_box:基于检测到的人脸位置,计算一个适合的裁剪框,确保主持人的人脸处于视频画面的中心位置。
- 动态帧率采样与首次人脸检测时间:
find_first_face_time:动态调整帧率采样(sample_rate),优化处理速度,找到视频中首次出现人脸的确切时间。
- 视频裁剪与帧率调整:
process_video:综合以上功能,裁剪视频以保留从首次出现人脸开始的10秒片段,调整视频尺寸和帧率为标准格式,输出处理后的视频。
# python data_utils/video/cut_crop_fps_1.1.py import os import cv2 import math import numpy as np import face_recognition from moviepy.editor import VideoFileClip, concatenate_videoclips from tqdm import tqdm def find_host_face_location(video_path): """ 在视频的前几秒内检测并返回主持人面部的大致位置 """ cap = cv2.VideoCapture(video_path) found_face = False host_face_location = None while cap.isOpened(): ret, frame = cap.read() if not ret: break # 缩小帧尺寸以加快处理速度 small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) rgb_small_frame = small_frame[:, :, ::-1] # 检测人脸 face_locations = face_recognition.face_locations(rgb_small_frame) if face_locations: # 取第一张脸的位置,假设主持人位于视频画面的中心位置附近 host_face_location = face_locations[0] # 将位置放大回原始大小 host_face_location = (host_face_location[0]*4, host_face_location[1]*4, host_face_location[2]*4, host_face_location[3]*4) found_face = True break cap.release() return host_face_location if found_face else None def calculate_cropping_box(face_location, frame_shape): """ 根据主持人面部位置计算裁剪框 """ top, right, bottom, left = face_location center_x, center_y = (left + right) // 2, (top + bottom) // 2 half_width, half_height = 256, 256 left_cropped = max(center_x - half_width, 0) top_cropped = max(center_y - half_height, 0) right_cropped = min(center_x + half_width, frame_shape[1]) bottom_cropped = min(center_y + half_height, frame_shape[0]) return (top_cropped, right_cropped, bottom_cropped, left_cropped) def find_first_face_time(video_path, sample_rate, min_confidence=0.5): """ 找到视频中第一次出现人脸的时间戳,优化处理速度 """ cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) first_face_time = None while cap.isOpened(): ret, frame = cap.read() if not ret: break # 按照sample_rate进行帧率采样 if cap.get(cv2.CAP_PROP_POS_FRAMES) % sample_rate != 0: continue timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert to seconds # 缩小帧尺寸以加快处理速度 small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) rgb_small_frame = small_frame[:, :, ::-1] # 检测人脸 face_locations = face_recognition.face_locations(rgb_small_frame, model='hog') # 可以选择 'cnn' 或 'hog' if face_locations: if not first_face_time: first_face_time = timestamp break cap.release() return first_face_time def process_video(input_path, output_path): """ 处理视频,裁剪并调整帧率 """ # 检测主持人面部位置 host_face_location = find_host_face_location(input_path) if host_face_location is None: print(f"No face detected in video {input_path}") return # 读取视频,获取视频的宽度、高度和帧率 clip = VideoFileClip(input_path) frame_shape = clip.size[::-1] # 电影剪辑的尺寸是(width, height),我们需要(height, width) fps = clip.fps # 动态设置sample_rate,例如,我们希望每秒检测10次 desired_detection_frequency = 10 # 每秒检测次数 sample_rate = int(fps / desired_detection_frequency) # 确保sample_rate至少为1,避免除以0的情况 sample_rate = max(sample_rate, 1) # 计算裁剪框 cropping_box = calculate_cropping_box(host_face_location, frame_shape) # 找到第一次出现人脸的时间 # 使用动态设置的sample_rate调用find_first_face_time first_face_time = find_first_face_time(input_path, sample_rate=sample_rate) print(f"First face time: {first_face_time}") # 裁剪视频以保留从第一次出现人脸开始的10秒 start_trim = math.ceil(first_face_time) # 向上取整 end_trim = min(start_trim + 10, clip.duration) # 确保不超过视频总时长 print(f"Start trim: {start_trim}, End trim: {end_trim}") trimmed_clip = clip.subclip(start_trim, end_trim) # 裁剪视频 cropped_clip = trimmed_clip.crop(x1=cropping_box[3], y1=cropping_box[0], x2=cropping_box[1], y2=cropping_box[2]) cropped_clip = cropped_clip.resize((512, 512)) # 调整帧率 cropped_clip = cropped_clip.set_fps(25) # 保存最终视频 cropped_clip.write_videofile(output_path, codec='libx264', audio_codec='aac') # 清理资源 cropped_clip.close() if __name__ == "__main__": # 遍历指定文件夹中的所有视频文件 input_folder = 'video/HDTF' video_files = [f for f in os.listdir(input_folder) if f.endswith(('.mp4', '.avi', '.mkv'))] # 创建一个进度条 with tqdm(total=len(video_files), desc="Processing Videos") as pbar: for filename in video_files: input_path = os.path.join(input_folder, filename) print("input_path:", input_path) # 动态生成输出文件名 output_filename = f"{os.path.splitext(filename)[0]}_p{os.path.splitext(filename)[1]}" output_path = os.path.join(input_folder, output_filename) # 处理视频 process_video(input_path, output_path) # 更新进度条 pbar.update(1)