Added order by CheckSum

master
Jason Zhu 2021-08-15 13:11:59 +10:00
parent 76cacaeb14
commit 69a9a0db8e
1 changed files with 14 additions and 3 deletions

View File

@ -3,6 +3,7 @@ import os
import argparse import argparse
from typing import List from typing import List
import pandas as pd import pandas as pd
import hashlib
import csv import csv
from pandas.core.series import Series from pandas.core.series import Series
@ -27,6 +28,13 @@ def view_unqiue_files(p: pathlib.Path, min_size):
str(p.lstat().st_size/1024/1024) + str(p.lstat().st_size/1024/1024) +
"MB with number of links: " + str(p.lstat().st_nlink)) "MB with number of links: " + str(p.lstat().st_nlink))
def get_file_checksum(p: pathlib.Path):
m = hashlib.md5()
with open(p, 'rb') as f:
while chunk := f.read(8192):
m.update(chunk)
return m.hexdigest()
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder') parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
@ -50,7 +58,7 @@ if __name__ == "__main__":
parser.add_argument('--order', parser.add_argument('--order',
type=str, type=str,
help='order exported csv by name or inode', help='order exported csv by name or inode',
choices=['inode','name']) choices=['inode','name','checksum'])
parser.add_argument('--csv_path', parser.add_argument('--csv_path',
type=str, type=str,
@ -71,18 +79,21 @@ if __name__ == "__main__":
if args.csv == "True": if args.csv == "True":
csv_path = "result.csv" csv_path = "result.csv"
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks']) df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks', 'CheckSum'])
for x in paths: for x in paths:
if x.lstat().st_size > min_file_size * 1024 * 1024: if x.lstat().st_size > min_file_size * 1024 * 1024:
new_row = {'FileName': x.name, new_row = {'FileName': x.name,
'Path': str(x), 'Path': str(x),
'inode': x.lstat().st_ino, 'inode': x.lstat().st_ino,
'NumberOfLinks': x.lstat().st_nlink} 'NumberOfLinks': x.lstat().st_nlink,
'CheckSum': get_file_checksum(x)}
df_csv = df_csv.append(new_row, ignore_index=True) df_csv = df_csv.append(new_row, ignore_index=True)
if args.order == "inode": if args.order == "inode":
df_csv = df_csv.sort_values(by="inode") df_csv = df_csv.sort_values(by="inode")
elif args.order == "name": elif args.order == "name":
df_csv = df_csv.sort_values(by="FileName") df_csv = df_csv.sort_values(by="FileName")
elif args.order == "checksum":
df_csv = df_csv.sort_values(by="CheckSum")
if args.csv_path: if args.csv_path:
csv_path = args.csv_path csv_path = args.csv_path