diff --git a/mv_unique_files.py b/mv_unique_files.py index f62a159..88df1b8 100644 --- a/mv_unique_files.py +++ b/mv_unique_files.py @@ -1,45 +1,97 @@ import pathlib import os import argparse +from typing import List +import pandas as pd +import csv + +from pandas.core.series import Series from media import Media -def find_all_files(directory: pathlib.Path): +def find_all_files(directory: pathlib.Path) -> list: ps = [] for p in directory.rglob('*'): if "eaDir" not in str(p): ps.append((p)) return ps +def view_unqiue_files(p: pathlib.Path, min_size): + if p.lstat().st_size > min_size * 1024 * 1024: + if p.lstat().st_nlink == 1: + print("File with only one hardlink:" + str(p) + "; size: " + + str(p.lstat().st_size/1024/1024) + + "MB with number of links: " + str(p.lstat().st_nlink)) + else: + print("File with more than one hardlink:" + str(p) + "; size: " + + str(p.lstat().st_size/1024/1024) + + "MB with number of links: " + str(p.lstat().st_nlink)) + if __name__ == "__main__": parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder') - parser.add_argument('Library', - metavar='--library', + parser.add_argument('-s','--source', type=str, - help='the path to library') + help='the path to directory with source files') - parser.add_argument('Inventory', - metavar='--inventory', + parser.add_argument('-t', '--target', type=str, - help='the path to inventory for non-repeatable videos') + help='the path to directory to move files') + + parser.add_argument('--size', + type=int, + help='Expected minimum file size') + + parser.add_argument('--csv', + type=str, + choices=['False','True'], + help='export source folders unique files into csv') + + parser.add_argument('--order', + type=str, + help='order exported csv by name or inode', + choices=['inode','name']) + + parser.add_argument('--csv_path', + type=str, + help='path to export csv') + + parser.add_argument('--move', + type=str, + help='Confirm whether to move all files with NumberOfLinks as 1 from source directory to target directory') args = parser.parse_args() - path_library = args.Library - path_inventory = args.Inventory - paths = find_all_files(pathlib.Path(path_library)) + min_file_size = 50 + path_library = args.source + path_inventory = args.target - i = 0 - for x in paths: - if x.lstat().st_size > 50 * 1024 * 1024: - if x.lstat().st_nlink == 1: - print("File with only one hardlink:" + str(x) + "; size: " + - str(x.lstat().st_size/1024/1024) + - "MB with number of links: " + str(x.lstat().st_nlink)) - else: - print("File with more than one hardlink:" + str(x) + "; size: " + - str(x.lstat().st_size/1024/1024) + - "MB with number of links: " + str(x.lstat().st_nlink)) + if args.size: + min_file_size = args.size + paths = find_all_files(pathlib.Path(path_library)) + + if args.csv == "True": + csv_path = "result.csv" + df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks']) + for x in paths: + if x.lstat().st_size > min_file_size * 1024 * 1024: + new_row = {'FileName': x.name, + 'Path': str(x), + 'inode': x.lstat().st_ino, + 'NumberOfLinks': x.lstat().st_nlink} + df_csv = df_csv.append(new_row, ignore_index=True) + + print(df_csv) + if args.order == "inode": + df_csv.sort_values(by="inode") + else: + df_csv.sort_values(by="FileName") + + if args.csv_path: + csv_path = args.csv_path + df_csv.to_csv(csv_path) + else: + for x in paths: + view_unqiue_files(x, min_file_size) # dst_path = pathlib.Path(os.path.join(path_inventory,x.name)) # print("Its new path: " + str(dst_path))