I just start python and i have to compare filename with folder name to launch the good sh script. (i'm using airflow)
import glob
import os
import shutil
from os import path
odsPath = '/apps/data/02_ODS/'
receiptPath = '/apps/data/80_DATA/01_Receipt/'
for files in os.listdir(receiptPath):
if(files.startswith('MEM_ZMII') or files.startswith('FMS') and files.endswith('.csv')):
parsedFiles = files.split('_')
pattern = '_'.join(parsedFiles[0:2])
fileName = '_'.join(parsedFiles[2:5])
fileName = fileName.split('-')[0].lower()
# print('appCode: ', pattern)
# print('fileName: ', fileName)
for odsFolder in os.listdir(odsPath):
if(odsFolder == fileName):
print('it exist: ', str(fileName))
else:
print('it\'s not')
I got 3 files in receiptPath , it only matching for 1 file, but not the others. Can someone help me?
Thank a lot!
Ok, your problem is that you overwrite your variable fileName, so at the end of the first for loop, it only keeps the last value, which is material_makt. The solution consists in saving all the filenames in a list fileNames_list, and then you can check if (odsFolder in fileNames_list) :
import glob
import os
import shutil
from os import path
odsPath = '/apps/data/02_ODS/'
receiptPath = '/apps/data/80_DATA/01_Receipt/'
fileNames_list = []
for files in os.listdir(receiptPath):
if(files.startswith('MEM_ZMII') or files.startswith('FMS') and files.endswith('.csv')):
parsedFiles = files.split('_')
pattern = '_'.join(parsedFiles[0:2])
fileName = '_'.join(parsedFiles[2:5])
fileName = fileName.split('-')[0].lower()
fileNames_list.append(fileName)
for odsFolder in os.listdir(odsPath):
if (odsFolder in fileNames_list):
print('it exist:', str(odsFolder))
else:
print('it\'s not')
Output :
it exist: zcormm_familymc
it exist: kpi_obj_data
it exist: material_makt
Related
As part of a wider project (to learn) I am building a script to discover discovering the files recursively in a folder. Then adding the filename (including the path) and the size in bytes to a CSV file.
I've then loaded that CSV file as a python dictionary.
What I would like to do now, is have python parse over each value in the dictionary (which is the size) and compare it to all others in the dictionary. If it finds a match, I want it to show me which keys (file name) have the matching values. I'll then do an MD5 hash on those that appear to have the same size.
The code below is as far as I've got - can anyone assist please?
#!/usr/bin/env python3
import argparse
import os
import sys
import csv
import fnmatch
def verify_args():
parser = argparse.ArgumentParser(description='Compare files recursively.')
parser.add_argument('path', help='Location to begin file comparison from.')
check = parser.parse_args()
if os.path.isdir(check.path):
print(check.path,'is a valid path - continuing' + '\n')
else:
print(check.path,'is an invalid path - exiting' + '\n')
sys.exit()
return parser.parse_args()
def listfiles(file_path):
print ('Starting comparison')
pattern = '*'
with open('/tmp/foo','w') as fo:
fo.write('file,size' + '\n')
for root, dirs, files in os.walk(file_path):
for filename in fnmatch.filter(files, pattern):
fo.write(os.path.join(root, filename) + ',' + str(os.path.getsize(os.path.join(root, filename))) + '\n')
files = {}
with open('/tmp/foo') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
files[row['file']] = row['size']
x = files.keys()
print(x)
# Not sure now what to do
def main():
args = verify_args()
file_path = args.path
listfiles(file_path)
if __name__ == '__main__':
main()
I want to change filename for all my files in a folder. They all end with a date and time like "filename 2019-05-20 1357" and I want the date first for all files. How can I do that simplest way?
#!/usr/bin/python3
import shutil, os, re
r = re.compile(r"^(.*) (\d{4}-\d{2}-\d{2} \d{4})$")
for f in os.listdir():
m = r.match(f)
if m:
shutil.move(f, "{} {}".format(m.group(2), m.group(1)))
Quick and roughly tested version
Here is my Implementation:
from datetime import datetime
import os
path = '/Users/name/desktop/directory'
for _, file in enumerate(os.listdir(path)):
os.rename(os.path.join(path, file), os.path.join(path, str(datetime.now().strftime("%d-%m-%Y %H%M"))+str(file)))
Output Format:
20-05-2019 1749filename.ext
import os
import re
import shutil
dir_path = '' # give the dir name
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))
This piece of code is my first attempt at creating a program. I'm getting an error when running it that reads:
PermissionError: [WinError 32] The process cannot access the file
because it is being used by another process:
'C:\Users\gabri\Desktop\' -> 'C:\Users\gabri\Desktop\Planilhas
Excel\'
What am I doing wrong? The goal of this program is to get all excel, then pdf, then word files and put them in folders created by the program.
import os
from glob import glob
# import cx_Freeze
print("Digite o diretório de origem.")
dirOrigem = input()
os.chdir(dirOrigem)
excel_files = glob('*.xlsx')
excel_files.append(''.join(glob('*.xls')))
dirDestinoXL = dirOrigem + '\\' + 'Planilhas Excel'
if not os.path.exists(dirDestinoXL):
os.makedirs(dirDestinoXL)
for i in excel_files:
os.rename(f'{dirOrigem}\\{"".join(i)}', f'{dirDestinoXL}\\{"".join(i)}')
os.chdir(dirOrigem)
pdf_files = glob('*.pdf')
dirDestinoPDF = dirOrigem + '\\' + 'PDF'
if not os.path.exists(dirDestinoPDF):
os.makedirs(dirDestinoPDF)
for p in pdf_files:
os.rename(f'{dirOrigem}\\{"".join(p)}', f'{dirDestinoPDF}\\{"".join(p)}')
os.chdir(dirOrigem)
word_files = glob('*.doc')
word_files.append(glob('*.docx'))
dirDestinoWord = dirOrigem + '\\' + 'Word'
if not os.path.exists(dirDestinoWord):
os.makedirs(dirDestinoWord)
for d in word_files:
os.rename(f'{dirOrigem}\\{"".join(d)}', f'{dirDestinoWord}\\{"".join(d)}')
I tried your program and it doesn't work as it is on my computer. I changed some lines and it works. Hope it helps
import os
from glob import glob
dirOrigem = r'C:\Users\fchal\Desktop\temp' # here I changed the code just because I didn't want to bother using input()
os.chdir(dirOrigem)
excel_files = glob('*.xlsx')
excel_files.extend(glob('*.xls'))
dirDestinoXL = dirOrigem + '\\' + 'xlsfile'
if not os.path.exists(dirDestinoXL):
os.makedirs(dirDestinoXL)
for i in excel_files:
os.rename(i, os.path.join(dirDestinoXL, i))
# same procedure for pdf and word files
I know that glob can be a mess sometimes. And if the files are open, you can get errors. Here's what I would do:
import os
def move_files_with_extension(from_dir, to_dir, *extensions):
if not os.path.isdir(from_dir):
raise ValueError('{} is not a real directory'.format(from_dir))
elif not os.path.isdir(to_dir):
raise ValueError('{} is not a real directory'.format(to_dir))
files_with_extensions = all_files_with_extensions_in(from_dir, *extensions)
for file_path in files_with_extensions:
os.rename(file_path, os.path.join(to_dir, os.path.basename(file_path)))
def all_files_with_extensions_in(dir, *extensions):
files_with_extensions = list()
for dir_path, dir_names, file_names in os.walk(dir):
for file_name in file_names:
if file_name.endswith(extensions):
files_with_extensions.append(os.path.join(dir_path, file_name))
return files_with_extensions
and then you can do:
dirOrigem = input()
excel_location = os.path.join(dirOrigem, 'Planilhas Excel')
move_files_with_extension(dirOrigem, excel_location, '.xls', '.xlsx')
and so on
In below program I am trying to rename files present in my directory. However each time I run different number of files are renamed and program stops without any error message.
'''
Created on 08-Jul-2017
#author: Pranav
'''
import os
import re
from shutil import move
class MyPatterns:
brackets = "([\(\[]).*?([\)\]])"
extra_spaces_pattern = " +"
class MyConstants:
#dir_path = "D:\Other\Books\Knowledge"
space = ' '
underscore = '_'
blank = ''
def resolve_filenames(dir_path,filename):
new_file_name = re.sub(MyPatterns.brackets,MyConstants.blank,filename)
new_file_name = re.sub(MyPatterns.extra_spaces_pattern,MyConstants.space,new_file_name)
new_file_name = new_file_name.replace(MyConstants.underscore, MyConstants.space)
new_file_name = new_file_name.title()
move(os.path.join(dir_path,filename),os.path.join(dir_path, new_file_name))
base_path = "D:\Other\Books"
directories = os.listdir(base_path)
print('Starting process to resolve file names from base path : {}: '.format(base_path))
for d in directories:
dir_path = os.path.join(base_path, d)
print('Resolving files for directory : {} '.format(d))
for i,fname in enumerate(os.listdir(dir_path)):
print(i,fname)
resolve_filenames(dir_path, fname)
I have a list of files with names such as these:
20140911_085234.csv
20140912_040056.csv
What is known is the first part which is the date (the second is a random number). How can I open the correct file if I know the date?
Update: There is one file per day.
As #isedev says, you could use the fnmatch method to find all the files with the "date" pattern. The code could be like this:
from fnmatch import fnmatch
import os
folder_path = '/home/Desktop/project'
all_files = os.listdir(folder_path)
content_file = 'Hello World'
_date = '20140911'
_pattern = _date + '*'
for file_name in all_files:
if fnmatch(file_name, _pattern):
with open(os.path.join(folder_path, file_name), 'wb') as f:
f.write(content_file)
I hope it helps you!
Using glob :
import time
import glob
import os
def open_file_by_date(date):
path = "/path/to/file"
files = glob.glob1(path, date + "_*.csv")
for file in files:
with open(os.path.join(path, file), 'wb') as f:
#do your stuff with file
if __name__ == "__main__":
today = time.strftime("%Y%m%d")
open_file_by_date(today)