import pathlib import os import argparse from typing import List import pandas as pd import csv from pandas.core.series import Series from media import Media def find_all_files(directory: pathlib.Path) -> list: ps = [] for p in directory.rglob('*'): if "eaDir" not in str(p): ps.append((p)) return ps def view_unqiue_files(p: pathlib.Path, min_size): if p.lstat().st_size > min_size * 1024 * 1024: if p.lstat().st_nlink == 1: print("File with only one hardlink:" + str(p) + "; size: " + str(p.lstat().st_size/1024/1024) + "MB with number of links: " + str(p.lstat().st_nlink)) else: print("File with more than one hardlink:" + str(p) + "; size: " + str(p.lstat().st_size/1024/1024) + "MB with number of links: " + str(p.lstat().st_nlink)) if __name__ == "__main__": parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder') parser.add_argument('-s','--source', type=str, help='the path to directory with source files') parser.add_argument('-t', '--target', type=str, help='the path to directory to move files') parser.add_argument('--size', type=int, help='Expected minimum file size') parser.add_argument('--csv', type=str, choices=['False','True'], help='export source folders unique files into csv') parser.add_argument('--order', type=str, help='order exported csv by name or inode', choices=['inode','name']) parser.add_argument('--csv_path', type=str, help='path to export csv') parser.add_argument('--move', type=str, help='Confirm whether to move all files with NumberOfLinks as 1 from source directory to target directory') args = parser.parse_args() min_file_size = 50 path_library = args.source path_inventory = args.target if args.size: min_file_size = args.size paths = find_all_files(pathlib.Path(path_library)) if args.csv == "True": csv_path = "result.csv" df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks']) for x in paths: if x.lstat().st_size > min_file_size * 1024 * 1024: new_row = {'FileName': x.name, 'Path': str(x), 'inode': x.lstat().st_ino, 'NumberOfLinks': x.lstat().st_nlink} df_csv = df_csv.append(new_row, ignore_index=True) print(df_csv) if args.order == "inode": df_csv.sort_values(by="inode") else: df_csv.sort_values(by="FileName") if args.csv_path: csv_path = args.csv_path df_csv.to_csv(csv_path) else: for x in paths: view_unqiue_files(x, min_file_size) # dst_path = pathlib.Path(os.path.join(path_inventory,x.name)) # print("Its new path: " + str(dst_path)) # x.link_to(dst_path)