Added order by CheckSum

master
Jason Zhu 2021-08-15 13:11:59 +10:00
parent 76cacaeb14
commit 69a9a0db8e
1 changed files with 14 additions and 3 deletions

View File

@ -3,6 +3,7 @@ import os
import argparse
from typing import List
import pandas as pd
import hashlib
import csv
from pandas.core.series import Series
@ -27,6 +28,13 @@ def view_unqiue_files(p: pathlib.Path, min_size):
str(p.lstat().st_size/1024/1024) +
"MB with number of links: " + str(p.lstat().st_nlink))
def get_file_checksum(p: pathlib.Path):
m = hashlib.md5()
with open(p, 'rb') as f:
while chunk := f.read(8192):
m.update(chunk)
return m.hexdigest()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
@ -50,7 +58,7 @@ if __name__ == "__main__":
parser.add_argument('--order',
type=str,
help='order exported csv by name or inode',
choices=['inode','name'])
choices=['inode','name','checksum'])
parser.add_argument('--csv_path',
type=str,
@ -71,18 +79,21 @@ if __name__ == "__main__":
if args.csv == "True":
csv_path = "result.csv"
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks'])
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks', 'CheckSum'])
for x in paths:
if x.lstat().st_size > min_file_size * 1024 * 1024:
new_row = {'FileName': x.name,
'Path': str(x),
'inode': x.lstat().st_ino,
'NumberOfLinks': x.lstat().st_nlink}
'NumberOfLinks': x.lstat().st_nlink,
'CheckSum': get_file_checksum(x)}
df_csv = df_csv.append(new_row, ignore_index=True)
if args.order == "inode":
df_csv = df_csv.sort_values(by="inode")
elif args.order == "name":
df_csv = df_csv.sort_values(by="FileName")
elif args.order == "checksum":
df_csv = df_csv.sort_values(by="CheckSum")
if args.csv_path:
csv_path = args.csv_path