Added export csv

master
Jason Zhu 2021-08-15 00:13:02 +10:00
parent ec4acbd78b
commit 1febef406d
1 changed files with 73 additions and 21 deletions

View File

@ -1,45 +1,97 @@
import pathlib
import os
import argparse
from typing import List
import pandas as pd
import csv
from pandas.core.series import Series
from media import Media
def find_all_files(directory: pathlib.Path):
def find_all_files(directory: pathlib.Path) -> list:
ps = []
for p in directory.rglob('*'):
if "eaDir" not in str(p):
ps.append((p))
return ps
def view_unqiue_files(p: pathlib.Path, min_size):
if p.lstat().st_size > min_size * 1024 * 1024:
if p.lstat().st_nlink == 1:
print("File with only one hardlink:" + str(p) + "; size: " +
str(p.lstat().st_size/1024/1024) +
"MB with number of links: " + str(p.lstat().st_nlink))
else:
print("File with more than one hardlink:" + str(p) + "; size: " +
str(p.lstat().st_size/1024/1024) +
"MB with number of links: " + str(p.lstat().st_nlink))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='convert hardlink to symlink in download folder')
parser.add_argument('Library',
metavar='--library',
parser.add_argument('-s','--source',
type=str,
help='the path to library')
help='the path to directory with source files')
parser.add_argument('Inventory',
metavar='--inventory',
parser.add_argument('-t', '--target',
type=str,
help='the path to inventory for non-repeatable videos')
help='the path to directory to move files')
parser.add_argument('--size',
type=int,
help='Expected minimum file size')
parser.add_argument('--csv',
type=str,
choices=['False','True'],
help='export source folders unique files into csv')
parser.add_argument('--order',
type=str,
help='order exported csv by name or inode',
choices=['inode','name'])
parser.add_argument('--csv_path',
type=str,
help='path to export csv')
parser.add_argument('--move',
type=str,
help='Confirm whether to move all files with NumberOfLinks as 1 from source directory to target directory')
args = parser.parse_args()
path_library = args.Library
path_inventory = args.Inventory
min_file_size = 50
path_library = args.source
path_inventory = args.target
if args.size:
min_file_size = args.size
paths = find_all_files(pathlib.Path(path_library))
i = 0
for x in paths:
if x.lstat().st_size > 50 * 1024 * 1024:
if x.lstat().st_nlink == 1:
print("File with only one hardlink:" + str(x) + "; size: " +
str(x.lstat().st_size/1024/1024) +
"MB with number of links: " + str(x.lstat().st_nlink))
else:
print("File with more than one hardlink:" + str(x) + "; size: " +
str(x.lstat().st_size/1024/1024) +
"MB with number of links: " + str(x.lstat().st_nlink))
if args.csv == "True":
csv_path = "result.csv"
df_csv = pd.DataFrame(columns=['FileName', 'Path', 'inode', 'NumberOfLinks'])
for x in paths:
if x.lstat().st_size > min_file_size * 1024 * 1024:
new_row = {'FileName': x.name,
'Path': str(x),
'inode': x.lstat().st_ino,
'NumberOfLinks': x.lstat().st_nlink}
df_csv = df_csv.append(new_row, ignore_index=True)
print(df_csv)
if args.order == "inode":
df_csv.sort_values(by="inode")
else:
df_csv.sort_values(by="FileName")
if args.csv_path:
csv_path = args.csv_path
df_csv.to_csv(csv_path)
else:
for x in paths:
view_unqiue_files(x, min_file_size)
# dst_path = pathlib.Path(os.path.join(path_inventory,x.name))
# print("Its new path: " + str(dst_path))