Python自动清理错误图片,深度学习训练数据集准备
使用python运行
from PIL import Image
from pathlib import Path
import os
path = r'.'
def check_pic(path_pic):
try:
img = Image.open(path_pic,'r')
img.load()
print(path_pic)
print('OK')
return True
except(FileNotFoundError,OSError,Image.DecompressionBombError):
print(path_pic)
print('FALSE')
f = open('False.txt', 'a+')
f.write(str(path_pic) + '\n')
f.close()
os.remove(path_pic)
return False
def Pic_Find(path):
p = Path(path)
for Pic in p.rglob('*.jpg'):
check_pic(Pic)
for Pic in p.rglob('*.jpeg'):
check_pic(Pic)
for Pic in p.rglob('*.tiff'):
check_pic(Pic)
for Pic in p.rglob('*.bmp'):
check_pic(Pic)
for Pic in p.rglob('*.png'):
check_pic(Pic)
if __name__ == '__main__':
for i in os.listdir(path):
if os.path.isdir(i):
Pic_Find(i)
#Pic_Find(path)脚本会自动删除错误的图片。
20230521更新,修正了存在的问题
import os
from pathlib import Path
from PIL import Image
def check_pic(path_pic, files_to_delete):
try:
img = Image.open(path_pic,'r')
img.load()
#print(path_pic)
#print('OK')
return True
except (FileNotFoundError, OSError, Image.DecompressionBombError):
#print(path_pic)
#print('FALSE')
f = open('False.txt', 'a+')
f.write(str(path_pic) + '\n')
f.close()
try:
os.remove(path_pic)
except PermissionError:
print('File In Use:'+path_pic)
files_to_delete.append(path_pic)
return False
def Pic_Find(path):
files_to_delete = []
p = Path(path)
for Pic in p.rglob('*.jpg'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.jpeg'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.tiff'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.bmp'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.png'):
check_pic(Pic, files_to_delete)
# 删除文件
for file in files_to_delete:
try:
os.remove(file)
except Exception as e:
print(f"删除文件 {file} 失败: {e}")
if __name__ == '__main__':
for i in os.listdir('.'):
if os.path.isdir(i):
Pic_Find(i)这段代码是之前代码的一个修改版本,其中添加了处理文件删除失败的情况。这段代码的作用是递归查找当前目录及其子目录中的所有图像文件,检查它们是否可以成功打开,如果不能打开,则尝试将其删除。
如果在尝试删除文件时出现PermissionError异常,该文件路径将被添加到files_to_delete列表中。在Pic_Find函数完成后,该函数会循环该列表并尝试删除这些文件。如果删除文件时出现错误,则会打印出错误消息。
请注意,这段代码假定所有图像文件的扩展名都是.jpg、.jpeg、.tiff、.bmp或.png。如果您需要处理其他类型的图像文件,请相应地修改文件扩展名的列表。
此外,由于这段代码会尝试删除文件,因此请确保在运行此代码之前备份了您的文件,以防意外删除了您想要保留的文件。
所以继续进行了改进,
在循环os.listdir的过程中捕获FileNotFoundError异常,以便处理可能不存在的目录。
import os
from pathlib import Path
from PIL import Image
def check_pic(path_pic, files_to_delete):
try:
img = Image.open(path_pic,'r')
img.load()
#print(path_pic)
#print('OK')
return True
except (FileNotFoundError, OSError, Image.DecompressionBombError):
#print(path_pic)
#print('FALSE')
f = open('False.txt', 'a+')
f.write(str(path_pic) + '\n')
f.close()
try:
os.remove(path_pic)
except PermissionError:
print('File In Use:'+path_pic)
files_to_delete.append(path_pic)
except (FileNotFoundError, OSError) as e:
print(f"删除文件 {path_pic} 失败: {e}")
def Pic_Find(path):
files_to_delete = []
p = Path(path)
for Pic in p.rglob('*.jpg'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.jpeg'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.tiff'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.bmp'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.png'):
check_pic(Pic, files_to_delete)
# 删除文件
for file in files_to_delete:
try:
os.remove(file)
except Exception as e:
print(f"删除文件 {file} 失败: {e}")
def main():
try:
for i in os.listdir('.'):
if os.path.isdir(i):
Pic_Find(i)
except FileNotFoundError as e:
print(f"找不到目录: {e}")
except Exception as e:
print(f"发生未知错误: {e}")
if __name__ == '__main__':
main()在上述代码中,我添加了一个名为main的函数,该函数用于将所有代码放在一个封装的块中,并添加异常处理机制。在main函数中,我使用try语句来捕获可能发生的异常,例如FileNotFoundError异常和其他未知异常。如果发生这些异常,程序将打印错误消息并停止运行。
此外,我还在check_pic函数中添加了异常处理机制,以便在尝试删除文件时捕获其他异常,例如FileNotFoundError和OSError异常。如果发生这些异常,程序将打印错误消息并继续执行。
最后,我在main函数中调用Pic_Find函数,以便处理当前目录及其子目录中的所有图像文件。如果在循环os.listdir的过程中发生错误,程序将打印错误消息并继续执行。
但是在运行时发现会有一些警告,不影响功能,但是影响运行日志效果,故改进:
import os
from pathlib import Path
from PIL import Image
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
def check_pic(path_pic, files_to_delete):
try:
img = Image.open(path_pic,'r')
img.load()
#print(path_pic)
#print('OK')
return True
except (FileNotFoundError, OSError, Image.DecompressionBombError):
#print(path_pic)
#print('FALSE')
f = open('False.txt', 'a+')
f.write(str(path_pic) + '\n')
f.close()
try:
os.remove(path_pic)
except PermissionError:
print('File In Use:'+path_pic)
files_to_delete.append(path_pic)
except (FileNotFoundError, OSError) as e:
print(f"删除文件 {path_pic} 失败: {e}")
def Pic_Find(path):
files_to_delete = []
p = Path(path)
for Pic in p.rglob('*.jpg'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.jpeg'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.tiff'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.bmp'):
check_pic(Pic, files_to_delete)
for Pic in p.rglob('*.png'):
check_pic(Pic, files_to_delete)
# 删除文件
for file in files_to_delete:
try:
os.remove(file)
except Exception as e:
print(f"删除文件 {file} 失败: {e}")
def main():
try:
for i in os.listdir('.'):
if os.path.isdir(i):
Pic_Find(i)
except FileNotFoundError as e:
print(f"找不到目录: {e}")
except Exception as e:
print(f"发生未知错误: {e}")
if __name__ == '__main__':
main()修改之后,发现运行的时候,显示不够直观,再进行优化:
import os
from pathlib import Path
from PIL import Image
import time
def check_pic(path_pic, files_to_delete):
try:
img = Image.open(path_pic,'r')
img.load()
#print(path_pic)
#print('OK')
return True
except (FileNotFoundError, OSError, Image.DecompressionBombError):
#print(path_pic)
#print('FALSE')
f = open('False.txt', 'a+')
f.write(str(path_pic) + '\n')
f.close()
try:
os.remove(path_pic)
except PermissionError:
print('File In Use:'+path_pic)
files_to_delete.append(path_pic)
except (FileNotFoundError, OSError) as e:
print(f"删除文件 {path_pic} 失败: {e}")
def Pic_Find(path):
files_to_delete = []
p = Path(path)
count = 0 # 图片计数器
start_time = time.time() # 计时器开始时间
for Pic in p.rglob('*.jpg'):
check_pic(Pic, files_to_delete)
count += 1
for Pic in p.glob('*.jpeg'):
check_pic(Pic, files_to_delete)
count += 1
for Pic in p.rglob('*.tiff'):
check_pic(Pic, files_to_delete)
count += 1
for Pic in p.rglob('*.bmp'):
check_pic(Pic, files_to_delete)
count += 1
for Pic in p.rglob('*.png'):
check_pic(Pic, files_to_delete)
count += 1
# 删除文件
for file in files_to_delete:
try:
os.remove(file)
except Exception as e:
print(f"删除文件 {file} 失败: {e}")
end_time = time.time() # 计时器结束时间
elapsed_time = end_time - start_time # 计算时间差
fps = count / elapsed_time # 计算FPS
print(f"处理了 {count} 张图片,用时 {elapsed_time:.2f} 秒,平均每秒处理 {fps:.2f} 张图片。")
def main():
try:
for i in os.listdir('.'):
if os.path.isdir(i):
Pic_Find(i)
except FileNotFoundError as e:
print(f"找不到目录: {e}")
except Exception as e:
print(f"发生未知错误: {e}")
if __name__ == '__main__':
main()



