I'm running a program daily and want the .csv is generates to be written to a folder on my C drive. For some reason, I can create the folder and write 1 file but no others are being written. Not getting any errors, just no other files are being written to that folder. Here's the code. Thanks
Code:
CSVdir = r"C:\Users\Maurice\Desktop\Python\New_Project\OptionsData\\OptionsData-{}.csv"
realCSVdir = os.path.realpath(CSVdir)
if not os.path.exists(CSVdir):
os.makedirs(CSVdir)
str1 = "\n".join(data)
now = datetime.datetime.now() #+ datetime.timedelta(days=1)
now_str = now.strftime("%Y-%m-%d")
new_file_name = os.path.join(realCSVdir,'OptionsData-{}.csv'.format(now_str))
new_file = open(new_file_name, 'wb')
for item in money_list:
if len(item) != 0 :
for other_item in item :
new_file.write(other_item + str1 + new_file)
new_file.close()
print("Eureka!")
CSVdir = r"C:\Users\Maurice\Desktop\Python\New_Project\OptionsData\\OptionsData-{}.csv"
should be
CSVdir = r"C:\Users\Maurice\Desktop\Python\New_Project\OptionsData"
if not os.path.exists(CSVdir):
os.makedirs(CSVdir)
# The following lines should be out of if statement.
str1 = "\n".join(data)
now = datetime.datetime.now() #+ datetime.timedelta(days=1)
now_str = now.strftime("%Y-%m-%d")
new_file_name = os.path.join(realCSVdir,'OptionsData-{}.csv'.format(now_str))
new_file = open(new_file_name, 'wb')
for item in money_list:
if len(item) != 0 :
for other_item in item :
new_file.write(other_item + str1 + new_file)
new_file.close()
print("Eureka!")
Related
This code saves text files from a data frame of sentences, then saves each one as a ssml file.
How can I get the sentences to be saved in a new folder?
max = len(sentences)
for i in range(0,max):
txt = sentences[i]
new_txt = starter + txt + ender
print(new_txt)
num = num + 1
with open("text" + str(num) + ".ssml", 'w+') as f:
f.writelines(new_txt)
Add this at the start:
import os
folder_name = 'my_folder'
os.makedirs(folder_name, exist_ok=True)
Then change:
with open("text" + str(num) + ".ssml", 'w+') as f:
to:
with open(f'{folder_name}\\text{num}.ssml', 'w+') as f:
for example i have two files .txt.
First file has 78 lines, second file has 30 lines.
Is there any easy way to pass a number as a parameter to the result?
Currently in result I get:
first_file_20.txt
first_file_40.txt
first_file_60.txt
first_file_80.txt
second_file_20.txt
second_file_40.txt
but I would like to have as a result:
first_file_1.txt
first_file_2.txt
first_file_3.txt
first_file_4.txt
second_file_1.txt
second_file_2.txt
code:
import re
import os
lines_per_file = 20
smallfile = None
root_path = os.getcwd()
if os.path.exists(root_path):
files = []
for name in os.listdir(root_path):
if os.path.isfile(os.path.join(root_path,name)):
files.append(os.path.join(root_path,name))
print(files) #list all files in directory
for ii in files:
if ii.endswith(".txt"): # only txt files
with open(ii,'r') as bigfile:
name1 = str(os.path.basename(ii).split(".")[0])
name2 = str(name1 + '_{}.txt')
#
print('name', name2)
for lineno, line in enumerate(bigfile):
w = 1
if lineno % lines_per_file == 0:
if smallfile:
smallfile.close()
small_filename = name2.format(lineno + lines_per_file)
smallfile = open(small_filename, "w")
smallfile.write(line)
if smallfile:
smallfile.close()
Anyone can help me?
Don't add lineno and lines_per_file, divide them.
small_filename = name2.format(lineno//lines_per_file + 1)
I have a folder with multiple PDFs with datestamps at the end of their names e.g.
hello_20200820.pdf
hello_20200821.pdf
hello_20200822.pdf
hello_20200717.pdf
I am trying to write a function to remove all the PDFs in the folder other than the TWO most recent pdf's.
The code I have written however is deleting the only the 3rd most recent file or the oldest file if there are less than 3. How can I fix this and remove ALL pdfs with the name 'hello' other than the two most recent?
Here is my code so far:
def remove_old_pdf(wsp, folder):
date_diff = float('inf')
today = datetime.now()
filename = ''
files = os.listdir('PDFs/' + folder)
# print(files)
for file in files:
if file.endswith('.pdf') and wsp in file:
date_str = file.split('_')[1].split('.')[0]
curr_diff = today - datetime.strptime(date_str, '%Y%m%d')
if date_diff == float('inf') or curr_diff < date_diff:
date_diff = curr_diff
filename = file
# print(filename)
else:
pass
print(filename)
files.remove(filename)
# print(files)
for file in files:
if file.endswith('.pdf') and wsp in file:
date_str = file.split('_')[1].split('.')[0]
curr_diff = today - datetime.strptime(date_str, '%Y%m%d')
filename = file
else:
pass
if filename in files:
files.remove(filename)
print(filename)
else:
print('lol')
# print(files)
for file in files:
if file.endswith('.pdf') and wsp in file:
date_str = file.split('_')[1].split('.')[0]
curr_diff = today - datetime.strptime(date_str, '%Y%m%d')
filename = file
else:
pass
delFile = 'PDFs/' + folder + '/' + filename
finalFiles = os.listdir('PDFs/' + folder)
if filename in finalFiles:
os.remove('PDFs/' + folder + '/' + filename)
print('Deleted ' + filename +'.')
else:
print("No PDFs deleted")
You can use glob to list all the files that match, restrict to the first n-2 and delete those:
import os
from glob import glob
dryrun = True # change this to False to actually delete
wc = 'hello_????????.pdf'
for name in sorted(glob(wc))[:-2]:
print(f'delete {name}{" (DRY-RUN)" if dryrun else ""}')
if not dryrun:
os.unlink(name)
Note: personally I always prefer to have globs that are as strict as possible. So I often define something like:
wildcards = {
'Y': '[12][0-9][0-9][0-9]',
'm': '[01][0-9]',
'd': '[0-3][0-9]',
'H': '[0-2][0-9]',
'M': '[0-5][0-9]',
'S': '[0-5][0-9]',
}
# and then:
ymdglob = ''.join([wildcards[datepart] for datepart in 'Ymd'])
wc = f'hello_{ymdglob}.pdf'
# etc.
Good day.
I wrote a little Python program to help me easily create .cbc files for Calibre, which is just a renamed .zip file with a text file called comics.txt for TOC purposes. Each chapter is another zip file.
The issue is that the last zip file zipped always has the error "Unexpected end of data". The file itself is not corrupt, if I unzip it and rezip it it works perfectly. Playing around it seems that the problem is that Python doesn't close the last zip file after zipping it, since I can't delete the last zip while the program is still running since it's still open in Python. Needless to say, Calibre doesn't like the file and fails to convert it unless I manually rezip the affected chapters.
The code is as follows, checking the folders for not-image files, zipping the folders, zipping the zips while creating the text file, and "changing" extension.
import re, glob, os, zipfile, shutil, pathlib, gzip, itertools
Folders = glob.glob("*/")
items = len(Folders)
cn_list = []
cn_list_filtered = []
dirs_filtered = []
ch_id = ["c", "Ch. "]
subdir_im = []
total = 0
Dirs = next(os.walk('.'))[1]
for i in range(0, len(Dirs)):
for items in os.listdir("./" + Dirs[i]):
if items.__contains__('.png') or items.__contains__('.jpg'):
total+=1
else:
print(items + " not an accepted format.")
subdir_im.append(total)
total = 0
for fname in Folders:
if re.search(ch_id[0] + r'\d+' + r'[\S]' + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[0] + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+' + '[\S]' + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+)", fname)[0]
cn_list.append(cn)
else:
print('Warning: File found without proper filename format.')
cn_list_filtered = set(cn_list)
cn_list_filtered = sorted(cn_list_filtered)
cwd = os.getcwd()
Dirs = Folders
subdir_zi = []
total = 0
for i in range(0, len(cn_list_filtered)):
for folders in Dirs:
if folders.__contains__(ch_id[0] + cn_list_filtered[i] + " ")\
or folders.__contains__(ch_id[1] + cn_list_filtered[i] + " "):
print('Zipping folder ', folders)
namezip = "Chapter " + cn_list_filtered[i] + ".zip"
current_zip = zipfile.ZipFile(namezip, "a")
for items in os.listdir(folders):
if items.__contains__('.png') or items.__contains__('.jpg'):
current_zip.write(folders + "/" + items, items)
total+=1
subdir_zi.append(total)
total = 0
print('Folder contents in order:', subdir_im, ' Total:', sum(subdir_im))
print("Number of items per zip: ", subdir_zi, ' Total:', sum(subdir_zi))
if subdir_im == subdir_zi:
print("All items in folders have been successfully zipped")
else:
print("Warning: File count in folders and zips do not match. Please check the affected chapters")
zips = glob.glob("*.zip")
namezip2 = os.path.basename(os.getcwd()) + ".zip"
zipfinal = zipfile.ZipFile(namezip2, "a")
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
Data = []
for i in range (0,len(cn_list_filtered),1):
Datai = ("Chapter " + cn_list_filtered[i] + ".zip" + ":Chapter " + cn_list_filtered[i] + "\r\n")
Data.append(Datai)
Dataok = ''.join(Data)
with zipfile.ZipFile(namezip2, 'a') as myzip:
myzip.writestr("comics.txt", Dataok)
zipfinal.close()
os.rename(namezip2, namezip2 + ".cbc")
os.system("pause")
I am by no means a programmer, that is just a Frankenstein monster code I eventually managed to put together by checking threads, but this last issue has me stumped.
Some solutions I tried are:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[i].close()
Fails with:
zips[i].close()
AttributeError: 'str' object has no attribute 'close'
and:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[len(zips)].close()
Fails with:
zips[len(zips)].close()
IndexError: list index out of range
Thanks for the help.
This solved my issue:
def generate_zip(file_list, file_name=None):
zip_buffer = io.BytesIO()
zf = zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED)
for file in file_list:
print(f"Filename: {file[0]}\nData: {file[1]}")
zf.writestr(file[0], file[1])
**zf.close()**
with open(file_name, 'wb') as f:
f.write(zip_buffer.getvalue())
f.close()
Below is the data in CFS_Config.txt. What this textfile does is to know where the documents have stored and to avoid hardcodes in the program. For instance, if the program is moved to other environment, we only need to change the directory paths in the CFS_Config.txt file.
Folder Path = ../dataprep/source_documents
ED Notes name = ED Notes
ED Notes output = ../dataprep/ED_Notes
This below codes in a python file what it actually does is to read configuration from the CFS_Config.txt mentioned earlier and also to do an auto generated textfile.
The problem encountered is that they tell me the ../dataprep/ED_Notes path was not found. Please do take a look at the codes if need more codes I will try my best to provide, thanks!!! :((
from preprocessing import ednotes_extractor
import os
def read_config():
# open existing file to read configuration
cfs_config_txt = open("..\CFS_Config.txt", "r")
file_list = []
root_dir = ""
ednotes_name = ""
ednotes_output = ""
for line in cfs_config_txt:
file_list.append(line)
if "Folder Path = " in file_list[0]:
root_dir = str(file_list[0])
root_dir = root_dir.replace("Folder Path = ", "")
root_dir = root_dir.replace("\n", "")
if "ED Notes name = " in file_list[1]:
ednotes_name = str(file_list[1])
ednotes_name = ednotes_name.replace("ED Notes name = ", "")
ednotes_name = ednotes_name.replace("\n", "")
if "ED Notes output = " in file_list[2]:
ednotes_output = str(file_list[2])
ednotes_output = ednotes_output.replace("ED Notes output = ", "")
ednotes_output = ednotes_output + ".txt"
ednotes_output = ednotes_output.replace("\n", "")
return root_dir, ednotes_name, ednotes_output
def convert_txt(choices):
root_dir, ednotes_name, ednotes_output = read_config()
if(choices == 1):
# open new file to write string data textfile
text_file = open(ednotes_output, 'w', encoding='utf-8')
text_file.write("cat_id|content\n")
for filename in os.listdir(root_dir):
source_directory = root_dir + '/' + filename
arr = ednotes_extractor.get_ednotes(source_directory)
# open existing file to append the items in the array to the previously written textfile
text_file = open(ednotes_output, 'a', encoding='utf-8')
for item in arr:
text_file.write("%s\n" % item)
elif(choices==2):
print("Hi")