Added create hardlink feature
parent
a498e0d502
commit
bda89dba44
|
@ -35,9 +35,31 @@ def get_file_checksum(p: pathlib.Path):
|
||||||
while chunk := f.read(8192):
|
while chunk := f.read(8192):
|
||||||
m.update(chunk)
|
m.update(chunk)
|
||||||
hexvalue = m.hexdigest()
|
hexvalue = m.hexdigest()
|
||||||
print(hexvalue)
|
|
||||||
return hexvalue
|
return hexvalue
|
||||||
|
|
||||||
|
def get_file_dataframe(ps, min_file_size, bool_checksum: bool) -> pd.DataFrame:
|
||||||
|
if bool_checksum == True:
|
||||||
|
df = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks', 'CheckSum'])
|
||||||
|
for p in ps:
|
||||||
|
if p.lstat().st_size > min_file_size * 1024 * 1024:
|
||||||
|
new_row = {'FileName': p.name,
|
||||||
|
'Path': str(p),
|
||||||
|
'inode': p.lstat().st_ino,
|
||||||
|
'NumberOfLinks': p.lstat().st_nlink,
|
||||||
|
'CheckSum': get_file_checksum(p)}
|
||||||
|
df = df.append(new_row, ignore_index=True)
|
||||||
|
return df
|
||||||
|
else:
|
||||||
|
df = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks'])
|
||||||
|
for p in ps:
|
||||||
|
if p.lstat().st_size > min_file_size * 1024 * 1024:
|
||||||
|
new_row = {'FileName': p.name,
|
||||||
|
'Path': str(p),
|
||||||
|
'inode': p.lstat().st_ino,
|
||||||
|
'NumberOfLinks': p.lstat().st_nlink}
|
||||||
|
df = df.append(new_row, ignore_index=True)
|
||||||
|
return df
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
|
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
|
||||||
|
|
||||||
|
@ -72,27 +94,31 @@ if __name__ == "__main__":
|
||||||
help='Confirm whether to move all files with NumberOfLinks as 1 from source directory to target directory',
|
help='Confirm whether to move all files with NumberOfLinks as 1 from source directory to target directory',
|
||||||
choices=['False','True'])
|
choices=['False','True'])
|
||||||
|
|
||||||
|
parser.add_argument('--hardlink',
|
||||||
|
type=str,
|
||||||
|
help='Whether copy files in source directory (via hardlink) to target directory',
|
||||||
|
choices=['True','False'])
|
||||||
|
|
||||||
|
parser.add_argument('--unique',
|
||||||
|
type=str,
|
||||||
|
help='Wether the copy file is unqiue in target directory',
|
||||||
|
choices=['True','False'])
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
min_file_size = 50
|
min_file_size = 50
|
||||||
path_library = args.source
|
if args.source:
|
||||||
path_inventory = args.target
|
path_source = args.source
|
||||||
|
if args.target:
|
||||||
|
path_target = args.target
|
||||||
|
|
||||||
if args.size:
|
if args.size:
|
||||||
min_file_size = args.size
|
min_file_size = args.size
|
||||||
paths_library = find_all_files(pathlib.Path(path_library))
|
paths_source = find_all_files(pathlib.Path(path_source))
|
||||||
|
|
||||||
if args.csv == "True":
|
if args.csv == "True":
|
||||||
|
# Export csv of source directory to csv_path, can be order by name, inode, or checksum
|
||||||
csv_path = "result.csv"
|
csv_path = "result.csv"
|
||||||
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks', 'CheckSum'])
|
df_csv = get_file_dataframe(paths_source, min_file_size, True)
|
||||||
for x in paths_library:
|
|
||||||
if x.lstat().st_size > min_file_size * 1024 * 1024:
|
|
||||||
print(str(datetime.now()) + " : " + str(x))
|
|
||||||
new_row = {'FileName': x.name,
|
|
||||||
'Path': str(x),
|
|
||||||
'inode': x.lstat().st_ino,
|
|
||||||
'NumberOfLinks': x.lstat().st_nlink,
|
|
||||||
'CheckSum': get_file_checksum(x)}
|
|
||||||
df_csv = df_csv.append(new_row, ignore_index=True)
|
|
||||||
if args.order == "inode":
|
if args.order == "inode":
|
||||||
df_csv = df_csv.sort_values(by="inode")
|
df_csv = df_csv.sort_values(by="inode")
|
||||||
elif args.order == "name":
|
elif args.order == "name":
|
||||||
|
@ -103,16 +129,31 @@ if __name__ == "__main__":
|
||||||
if args.csv_path:
|
if args.csv_path:
|
||||||
csv_path = args.csv_path
|
csv_path = args.csv_path
|
||||||
df_csv.to_csv(csv_path,index_label='Index')
|
df_csv.to_csv(csv_path,index_label='Index')
|
||||||
else:
|
|
||||||
for x in paths_library:
|
|
||||||
print(str(x))
|
|
||||||
view_unqiue_files(x, min_file_size)
|
|
||||||
|
|
||||||
if args.move == "True":
|
if args.move == "True":
|
||||||
target_dir = pathlib.Path(path_inventory)
|
target_dir = pathlib.Path(path_target)
|
||||||
for x in paths_library:
|
for x in paths_source:
|
||||||
if x.lstat().st_size > min_file_size * 1024 * 1024:
|
if x.lstat().st_size > min_file_size * 1024 * 1024:
|
||||||
print(str(datetime.now()) + " : " + str(x))
|
print(str(datetime.now()) + " : " + str(x))
|
||||||
new_path = pathlib.Path(target_dir, x.name).resolve()
|
new_path = pathlib.Path(target_dir, x.name).resolve()
|
||||||
print("New path: " + str(new_path))
|
print("New path: " + str(new_path))
|
||||||
x.rename(new_path)
|
x.rename(new_path)
|
||||||
|
|
||||||
|
if args.hardlink == 'True':
|
||||||
|
target_dir = pathlib.Path(path_target)
|
||||||
|
paths_target = find_all_files(target_dir)
|
||||||
|
|
||||||
|
df_library = get_file_dataframe(paths_source, min_file_size, False)
|
||||||
|
df_target = get_file_dataframe(paths_target, min_file_size, False)
|
||||||
|
print(df_library)
|
||||||
|
print(df_target)
|
||||||
|
print(df_target.inode)
|
||||||
|
for index, row in df_library.iterrows():
|
||||||
|
print(row.Path)
|
||||||
|
if row.inode not in df_target.inode.to_list():
|
||||||
|
p = pathlib.Path(row.Path)
|
||||||
|
target_path = pathlib.Path(target_dir, row.FileName)
|
||||||
|
p.link_to(target_path)
|
||||||
|
print("Create hardlink of " + str(p) + "at location" + str(target_path))
|
||||||
|
else:
|
||||||
|
print("Is in target directory")
|
Loading…
Reference in New Issue