diff --git a/deal_duplicate_files.py b/deal_duplicate_files.py index d6672f9..b7affdf 100644 --- a/deal_duplicate_files.py +++ b/deal_duplicate_files.py @@ -1,4 +1,5 @@ import pathlib +import os import argparse from numpy import source import pandas as pd @@ -15,6 +16,11 @@ if __name__ == "__main__": type=str, help='csv file to the directory that be compared') + parser.add_argument('--remove', + type=str, + help='remove duplicated file or not', + choices=['True','False']) + args = parser.parse_args() csv_source = args.source csv_target = args.target @@ -40,4 +46,7 @@ if __name__ == "__main__": for row in df_filtered.Path: p = pathlib.Path(row) - print(str(p) + ": " + str(p.exists())) \ No newline at end of file + print(str(p) + ": " + str(p.exists())) + if args.remove == "True" and p.exists() == True: + os.remove(p) + print(str(p) + " Removed") \ No newline at end of file diff --git a/mv_unique_files.py b/mv_unique_files.py index 1d4a206..6ea8f86 100644 --- a/mv_unique_files.py +++ b/mv_unique_files.py @@ -101,7 +101,7 @@ if __name__ == "__main__": if args.csv_path: csv_path = args.csv_path - df_csv.to_csv(csv_path) + df_csv.to_csv(csv_path,index_label='Index') else: for x in paths: print(str(x))