Added order by CheckSum
parent
76cacaeb14
commit
69a9a0db8e
|
@ -3,6 +3,7 @@ import os
|
||||||
import argparse
|
import argparse
|
||||||
from typing import List
|
from typing import List
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import hashlib
|
||||||
import csv
|
import csv
|
||||||
|
|
||||||
from pandas.core.series import Series
|
from pandas.core.series import Series
|
||||||
|
@ -27,6 +28,13 @@ def view_unqiue_files(p: pathlib.Path, min_size):
|
||||||
str(p.lstat().st_size/1024/1024) +
|
str(p.lstat().st_size/1024/1024) +
|
||||||
"MB with number of links: " + str(p.lstat().st_nlink))
|
"MB with number of links: " + str(p.lstat().st_nlink))
|
||||||
|
|
||||||
|
def get_file_checksum(p: pathlib.Path):
|
||||||
|
m = hashlib.md5()
|
||||||
|
with open(p, 'rb') as f:
|
||||||
|
while chunk := f.read(8192):
|
||||||
|
m.update(chunk)
|
||||||
|
return m.hexdigest()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
|
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
|
||||||
|
|
||||||
|
@ -50,7 +58,7 @@ if __name__ == "__main__":
|
||||||
parser.add_argument('--order',
|
parser.add_argument('--order',
|
||||||
type=str,
|
type=str,
|
||||||
help='order exported csv by name or inode',
|
help='order exported csv by name or inode',
|
||||||
choices=['inode','name'])
|
choices=['inode','name','checksum'])
|
||||||
|
|
||||||
parser.add_argument('--csv_path',
|
parser.add_argument('--csv_path',
|
||||||
type=str,
|
type=str,
|
||||||
|
@ -71,18 +79,21 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
if args.csv == "True":
|
if args.csv == "True":
|
||||||
csv_path = "result.csv"
|
csv_path = "result.csv"
|
||||||
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks'])
|
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks', 'CheckSum'])
|
||||||
for x in paths:
|
for x in paths:
|
||||||
if x.lstat().st_size > min_file_size * 1024 * 1024:
|
if x.lstat().st_size > min_file_size * 1024 * 1024:
|
||||||
new_row = {'FileName': x.name,
|
new_row = {'FileName': x.name,
|
||||||
'Path': str(x),
|
'Path': str(x),
|
||||||
'inode': x.lstat().st_ino,
|
'inode': x.lstat().st_ino,
|
||||||
'NumberOfLinks': x.lstat().st_nlink}
|
'NumberOfLinks': x.lstat().st_nlink,
|
||||||
|
'CheckSum': get_file_checksum(x)}
|
||||||
df_csv = df_csv.append(new_row, ignore_index=True)
|
df_csv = df_csv.append(new_row, ignore_index=True)
|
||||||
if args.order == "inode":
|
if args.order == "inode":
|
||||||
df_csv = df_csv.sort_values(by="inode")
|
df_csv = df_csv.sort_values(by="inode")
|
||||||
elif args.order == "name":
|
elif args.order == "name":
|
||||||
df_csv = df_csv.sort_values(by="FileName")
|
df_csv = df_csv.sort_values(by="FileName")
|
||||||
|
elif args.order == "checksum":
|
||||||
|
df_csv = df_csv.sort_values(by="CheckSum")
|
||||||
|
|
||||||
if args.csv_path:
|
if args.csv_path:
|
||||||
csv_path = args.csv_path
|
csv_path = args.csv_path
|
||||||
|
|
Loading…
Reference in New Issue