Created deal_duplicate_files.py
parent
d1f0d47fdf
commit
5ed4d99f43
|
@ -0,0 +1,43 @@
|
||||||
|
import pathlib
|
||||||
|
import argparse
|
||||||
|
from numpy import source
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Deal with duplicate files in synology')
|
||||||
|
|
||||||
|
parser.add_argument('-s', '--source',
|
||||||
|
type=str,
|
||||||
|
|
||||||
|
help='csv file to the directory to be de-duplicate')
|
||||||
|
|
||||||
|
parser.add_argument('-t', '--target',
|
||||||
|
type=str,
|
||||||
|
help='csv file to the directory that be compared')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
csv_source = args.source
|
||||||
|
csv_target = args.target
|
||||||
|
|
||||||
|
df_source = pd.DataFrame(pd.read_csv(csv_source, index_col='Index', dtype={'NumberOfLinks':'int', 'inode':'int'}))
|
||||||
|
df_target = pd.DataFrame(pd.read_csv(csv_target, index_col='Index', dtype={'NumberOfLinks':'int', 'inode':'int'}))
|
||||||
|
|
||||||
|
df_filtered = pd.DataFrame()
|
||||||
|
for index, row in df_source.iterrows():
|
||||||
|
checksum = row['CheckSum']
|
||||||
|
print(checksum)
|
||||||
|
for yrow in df_target.CheckSum:
|
||||||
|
if checksum == yrow:
|
||||||
|
print(row)
|
||||||
|
df_filtered = df_filtered.append(row)
|
||||||
|
|
||||||
|
print("Found Duplicate")
|
||||||
|
print(df_filtered)
|
||||||
|
print("Source Directory")
|
||||||
|
print(df_source)
|
||||||
|
print("Target")
|
||||||
|
print(df_target)
|
||||||
|
|
||||||
|
for row in df_filtered.Path:
|
||||||
|
p = pathlib.Path(row)
|
||||||
|
print(str(p) + ": " + str(p.exists()))
|
Loading…
Reference in New Issue