import pathlib import os import argparse from numpy import source import pandas as pd if __name__ == "__main__": parser = argparse.ArgumentParser(description='Deal with duplicate files in synology') parser.add_argument('-s', '--source', type=str, help='csv file to the directory to be de-duplicate') parser.add_argument('-t', '--target', type=str, help='csv file to the directory that be compared') parser.add_argument('--remove', type=str, help='remove duplicated file or not', choices=['True','False']) args = parser.parse_args() csv_source = args.source csv_target = args.target df_source = pd.DataFrame(pd.read_csv(csv_source, index_col='Index', dtype={'NumberOfLinks':'int', 'inode':'int'})) df_target = pd.DataFrame(pd.read_csv(csv_target, index_col='Index', dtype={'NumberOfLinks':'int', 'inode':'int'})) df_filtered = pd.DataFrame() for index, row in df_source.iterrows(): checksum = row['CheckSum'] print(checksum) for yrow in df_target.CheckSum: if checksum == yrow: print(row) df_filtered = df_filtered.append(row) print("Found Duplicate") print(df_filtered) print("Source Directory") print(df_source) print("Target") print(df_target) for row in df_filtered.Path: p = pathlib.Path(row) print(str(p) + ": " + str(p.exists())) if args.remove == "True" and p.exists() == True: os.remove(p) print(str(p) + " Removed")