阅读量:0
血的教训:采用md5哈希算法+时间戳重命名,避免覆盖
合并后就能安安心心排序了
import os import hashlib import shutil # 指定的源文件夹列表 source_dirs = ["video/vox2/8-12/10s", "video/vox2/12-20/10s", "video/vox2/20-30/10s", "video/vox2/30-60/10s", "video/vox2/60-inf"] # 添加更多文件夹 # 目标文件夹 target_dir = "video/vox2/10s" # 创建目标文件夹,如果它不存在 if not os.path.exists(target_dir): os.makedirs(target_dir) # 遍历每个源文件夹 for source_dir in source_dirs: if not os.path.exists(source_dir): print(f"Warning: Directory does not exist: {source_dir}") continue print(f"Processing directory: {source_dir}") # 使用 os.walk() 递归遍历文件夹 for root, dirs, files in os.walk(source_dir): for filename in files: print(f"Checking file: {os.path.join(root, filename)}") # 检查文件是否为.jpg或.png格式 if filename.lower().endswith('.mp4'): # 分离文件名和扩展名 base_name, ext = os.path.splitext(filename) # 生成文件内容的哈希值 with open(os.path.join(root, filename), 'rb') as f: file_hash = hashlib.md5(f.read()).hexdigest() # 获取文件的修改时间戳 modification_time = os.path.getmtime(os.path.join(root, filename)) # 构建目标文件路径 dst_file_path = os.path.join(target_dir, f"{file_hash}_{modification_time}{ext}") # 如果文件已存在,递增后缀直到找到唯一文件名 while os.path.exists(dst_file_path): modification_time += 0.001 # 微小增加时间戳 dst_file_path = os.path.join(target_dir, f"{file_hash}_{modification_time}{ext}") # 构建源文件的完整路径 src_file_path = os.path.join(root, filename) # 复制文件到目标文件夹 shutil.copy2(src_file_path, dst_file_path) # 输出信息 print(f'Copied "{filename}" from "{root}" to "{dst_file_path}"') else: print(f"Ignored file: {os.path.join(root, filename)}") print("All images have been merged successfully.")