synology_link_manipultor/mv_unique_files.py

109 lines
3.7 KiB
Python

import pathlib
import os
import argparse
from typing import List
import pandas as pd
import hashlib
import csv
from pandas.core.series import Series
from media import Media
def find_all_files(directory: pathlib.Path) -> list:
ps = []
for p in directory.rglob('*'):
if "eaDir" not in str(p):
ps.append((p))
return ps
def view_unqiue_files(p: pathlib.Path, min_size):
if p.lstat().st_size > min_size * 1024 * 1024:
if p.lstat().st_nlink == 1:
print("File with only one hardlink:" + str(p) + "; size: " +
str(p.lstat().st_size/1024/1024) +
"MB with number of links: " + str(p.lstat().st_nlink))
else:
print("File with more than one hardlink:" + str(p) + "; size: " +
str(p.lstat().st_size/1024/1024) +
"MB with number of links: " + str(p.lstat().st_nlink))
def get_file_checksum(p: pathlib.Path):
m = hashlib.md5()
with open(p, 'rb') as f:
while chunk := f.read(8192):
m.update(chunk)
return m.hexdigest()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
parser.add_argument('-s','--source',
type=str,
help='the path to directory with source files')
parser.add_argument('-t', '--target',
type=str,
help='the path to directory to move files')
parser.add_argument('--size',
type=int,
help='Expected minimum file size')
parser.add_argument('--csv',
type=str,
choices=['False','True'],
help='export source folders unique files into csv')
parser.add_argument('--order',
type=str,
help='order exported csv by name or inode',
choices=['inode','name','checksum'])
parser.add_argument('--csv_path',
type=str,
help='path to export csv')
parser.add_argument('--move',
type=str,
help='Confirm whether to move all files with NumberOfLinks as 1 from source directory to target directory')
args = parser.parse_args()
min_file_size = 50
path_library = args.source
path_inventory = args.target
if args.size:
min_file_size = args.size
paths = find_all_files(pathlib.Path(path_library))
if args.csv == "True":
csv_path = "result.csv"
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks', 'CheckSum'])
for x in paths:
if x.lstat().st_size > min_file_size * 1024 * 1024:
new_row = {'FileName': x.name,
'Path': str(x),
'inode': x.lstat().st_ino,
'NumberOfLinks': x.lstat().st_nlink,
'CheckSum': get_file_checksum(x)}
df_csv = df_csv.append(new_row, ignore_index=True)
if args.order == "inode":
df_csv = df_csv.sort_values(by="inode")
elif args.order == "name":
df_csv = df_csv.sort_values(by="FileName")
elif args.order == "checksum":
df_csv = df_csv.sort_values(by="CheckSum")
if args.csv_path:
csv_path = args.csv_path
df_csv.to_csv(csv_path)
else:
for x in paths:
print(str(x))
view_unqiue_files(x, min_file_size)
# dst_path = pathlib.Path(os.path.join(path_inventory,x.name))
# print("Its new path: " + str(dst_path))
# x.link_to(dst_path)