I wrote a program that makes a list
But when the content of the imported files is high
The program is off(crash)
This is the code I wrote
In this photo, a file with low content is used
enter image description here
This photo uses a file with a lot of content
enter image description here
file zip
def btn_start():
try:
if open_file1_text and open_file2_text :
file_1 = open(open_file1_text.get(), 'r')
file_2 = open(open_file2_text.get(), 'r')
lines_1 = file_1.readlines()
lines_2 = file_2.readlines()
global text
text = ("")
demo_text_box.delete(1.0,'end')
demo_text_box_a.delete(1.0,'end')
demo_text_box_a.insert(INSERT,start_text_demo )
for pline in lines_2:
for uline in lines_1:
demo_text_box.insert(INSERT,uline.rstrip('\n') + separator_text_.get() + pline)
text += (uline.rstrip('\n') + separator_text_.get() + pline)
file_1.close()
file_2.close()
except FileNotFoundError :
demo_text_box.delete(1.0,'end')
demo_text_box_a.delete(1.0,'end')
demo_text_box_a.insert(INSERT,File_Not_Found_Error )
Your code has another problem: if file_2 is not found, then file_1 will be left open, which may be bad (you want to close files as soon as you no longer need them).
You can solve this with with statements, which automatically close your files even when an exception occurs.
As for your memory problem, I guess text does not fit in memory, so you may want writing its content to another file.
def btn_start(open_file1_text, open_file2_text):
if not (open_file1_text and open_file2_text):
return
try:
with open(open_file1_text.get(), 'r') as file_1:
lines_1 = file_1.readlines()
with open(open_file1_text.get(), 'r') as file_2:
lines_2 = file_2.readlines()
demo_text_box.delete(1.0, 'end')
demo_text_box_a.delete(1.0, 'end')
demo_text_box_a.insert(INSERT, start_text_demo)
with open('text.txt', 'w') as text_file:
for pline in lines_2:
for uline in lines_1:
demo_text_box.insert(INSERT,uline.rstrip('\n') + separator_text_.get() + pline)
text_file.write(uline.rstrip('\n') + separator_text_.get() + pline)
except FileNotFoundError :
demo_text_box.delete(1.0,'end')
demo_text_box_a.delete(1.0,'end')
demo_text_box_a.insert(INSERT,File_Not_Found_Error )
If the files themselves do not fit in memory (meaning you cannot call file.readlines()),
you can also read them at each iteration of the loop:
with open('text.txt', 'w') as text_file:
with open(open_file1_text.get(), 'r') as file_2:
for pline in file_2:
with open(open_file1_text.get(), 'r') as file_1:
for uline in file_1:
demo_text_box.insert(INSERT,uline.rstrip('\n') + separator_text_.get() + pline)
text_file.write(uline.rstrip('\n') + separator_text_.get() + pline)
Related
I'm very new to python and this is far beyond what I'm capable of.
I have multiple text files:
test01.txt
test02.txt
test03.txt
test*.txt
Each file has same number of lines, and the same structure.
I want to extract lines 20-25 and put that into a text file that I can manipulate in excel.
Because there are hundreds of files, it would be great if we could put the text file name on top or next to the data.
This is what I was able to do, but as you can see it's not exactly "fast":
file1 = open("test01.txt", "r")
content = file1.readlines()
file1 = open("values.txt","w")
file1.write("test01.txt" + "\n")
file1.writelines(content[33:36])
file1.close()
file1 = open("test02.txt", "r")
content = file1.readlines()
#Append-adds at last
file1 = open("values.txt","a")#append mode
file1.write("test02.txt" + "\n")
file1.writelines(content[33:36])
file1.close()
file1 = open("test03.txt", "r")
content = file1.readlines()
#Append-adds at last
file1 = open("values.txt","a")#append mode
file1.write("test03.txt" + "\n")
file1.writelines(content[33:36])
file1.close()
Here is a script where you can read all files in a directory and write the name of the file and the content into a another file like you did.
import os
ValuesTextFile = open("values.txt","a")
Path = './files/'
for Filename in os.listdir(Path):
print (Filename)
ValuesTextFile.writelines(Filename)
File = open(Path + Filename, "r")
Content = File.readlines()
ValuesTextFile.writelines(Content[33:36])
File.close()
ValuesTextFile.close()
i have a python file named file_1.py
it has some code in which, i just have to change a word "file_1" to "file_2"
and also preserve indentation of other functions`
and save it as file_2.py
there are 3 occurances of the word file_1
i have to do this for 100 such times. `file_1.py, file_2.py.....file_100.py`
is there any way to automate this?
Run this script:
import fileinput
with fileinput.FileInput('file_1.py', inplace=True, backup='.bak') as file:
for line in file:
print(line.replace('file_1', 'file_2'), end='')
hope this help :)
create a script:
first: read file
with open("./file1.py") as f:
content = f.read()
second: replace filename
new_content = content.replace("file1","file2")
third: write new file(I would suggest you write a new file)
with open("./file2.py", "w") as f:
f.write(new_content)
if you have multiple files, use something like
filenames = ["file" + str(item) for item in range(1,100)]
for filename in filenames:
with open(filename + ".py") as f:
content = f.read()
new_filename = filename[:-1] + str(int(filename[-1]) + 1)
new_content = content.replace(filename,new_filename)
with open("./another_folder" + new_filename + ".py", "w") as f:
f.write(new_content)
I am writing a script that reads files from different directories; then I am using the file ID to search in the csv file. Here is the piece of code.
import os
import glob
searchfile = open("file.csv", "r")
train_file = open('train.csv','w')
listOfFiles = os.listdir("train")
for l in listOfFiles:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
id = d.split("/")
id = id[-1].split(".")
print id[0] # ID
for line in searchfile:
if id[0] in line: # search in csv file
value= line.split(",")
value= value[1]+" "+ value[2] + "\n"
train_file.write(id[0]+","+value) # write description
break
searchfile.close()
train_file.close()
However, I am only able search couple of ID's from the csv file. Can someone point out my mistake. (please see comments for description)
EDITED
Instance of the text file.
192397335,carrello porta utensili 18x27 eh l 411 x p 572 x h 872 6 cassetti,,691.74,192397335.jpg
Your issue is that when you do for line in searchfile: you're looping over a generator. The file doesn't reset for every id - for example, if the first id you pass to it is in line 50, the next id will start checking at line 51.
Instead, you can read your file to a list and loop over the list instead:
import os
import glob
with open("file.csv", "r") as s:
search_file = s.readlines()
train_file = open('train.csv', 'w')
list_of_files = os.listdir("train")
for l in list_of_files:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
fname = os.path.splitext(os.path.basename(d))
print fname[0] # ID
for line in search_file:
if fname[0] in line: # search in csv file
value = line.split(",")
value = value[1]+" " + value[2] + "\n"
train_file.write(fname[0]+","+value) # write description
break
train_file.close()
I made a couple of other changes too - firstly, you shouldn't use the name id as it has meaning in Python - I picked fname instead to indicate the file name. Secondly, I canged your CamelCase names to lowercase, as is the convention. Finally, getting the file name and extension is neat and fairly consistent through a combination of os.path.splitext and os.path.basename.
You need to browse of lines of searchfile for each id found, but as you open the file outside of the loop, you only read each line once in the whole loop.
You should either load the whole file in a list and iterate the list of lines inside the loop, or if searchfile is really large and would hardly fit in memory reopen the file inside the loop:
List version:
with open("file.csv", "r") as searchfile:
searchlines = searchfile.readlines()
train_file = open('train.csv','w')
listOfFiles = os.listdir("train")
for l in listOfFiles:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
id = d.split("/")
id = id[-1].split(".")
print id[0] # ID
for line in searchlines: # now a list so start at the beginning on each pass
if id[0] in line: # search in csv file
value= line.split(",")
value= value[1]+" "+ value[2] + "\n"
train_file.write(id[0]+","+value) # write description
break
train_file.close()
Re-open version
train_file = open('train.csv','w')
listOfFiles = os.listdir("train")
for l in listOfFiles:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
id = d.split("/")
id = id[-1].split(".")
print id[0] # ID
searchfile = open("file.csv", "r")
for line in searchfile:
if id[0] in line: # search in csv file
value= line.split(",")
value= value[1]+" "+ value[2] + "\n"
train_file.write(id[0]+","+value) # write description
break
searchfile.close()
train_file.close()
I have a folder full of .mpt files, each of them having the same data format.
I need to delete the first 57 lines from all files and append these files into one csv - output.csv.
I have that section already:
import glob
import os
dir_name = 'path name'
lines_to_ignore = 57
input_file_format = '*.mpt'
output_file_name = "output.csv"
def convert():
files = glob.glob(os.path.join(dir_name, input_file_format))
with open(os.path.join(dir_name, output_file_name), 'w') as out_file:
for f in files:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
out_file.write(i)
print("working")
convert()
print("done")
This part works ok.
how do i add the filename of each .mpt file as the last column of the output.csv
Thank you!
This is a quick 'n dirty solution.
In this loop the variable i is just a string (a line from a CSV file):
for i in content:
out_file.write(i)
So you just need to 1) strip off the end of line character(s) (either "\n" or "\r\n") and append ",".
If you're using Unix, try:
for i in content:
i = i.rstrip("\n") + "," + output_file_name + "\n"
out_file.write(i)
This assumes that the field separator is a comma. Another option is:
for i in content:
i = i.rstrip() + "," + output_file_name
print >>out_file, i
This will strip all white space from the end of i.
Add quotes if you need to quote the output file name:
i = i.rstrip(...) + ',"' + output_file_name '"'
The relevant part:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
new_line = ",".join([i.rstrip(), f]) + "\n" #<-- this is new
out_file.write(new_line) #<-- this is new
My python program loops through a bunch of csv-files, read them, and write specific columns in the file to another csv file. While the program runs, i can see the files being written in the correct manner, but once the program is finished, all the files i've just written become empty.
The solution to all the other similar threads seems to be closing the file you write to properly, but i cant seem to figure out what im doing wrong. Anyone?
import os
import csv
def ensure_dir(f):
d = os.path.dirname(f)
if not os.path.exists(d):
os.makedirs(d)
readpath = os.path.join("d:\\", "project")
savepath=os.path.join("d:\\", "save")
ensure_dir(savepath)
contents_1=os.listdir(readpath)
for i in contents_1[1:len(contents_1)]:
readpath_2=os.path.join(readpath, i)
if os.path.isdir(readpath_2)== True :
contents_2=os.listdir(readpath_2)
for i in contents_2:
readpath_3=os.path.join(readpath_2, i)
if os.path.isfile(readpath_3)== True :
savefile=savepath + "\\" + i
savefile = open(savefile, 'wb')
writer = csv.writer(savefile, delimiter=';')
readfile=open(readpath_3, 'rb')
reader = csv.reader(readfile, delimiter=';')
try:
for row in reader:
writer.writerow([row[0], row[3]])
except:
print(i)
finally:
savefile.close()
readfile.close()
savefile=savepath + "\\" + i is the error. If both "d:\\project\a\x.csv" and "d:\\project\b\x.csv" exist, then you will write to savepath + "\\" + i more than once. If the second path as an empty "x.csv", then it would overwrite the result with an empty file.
Try this instead:
import os
import csv
def ensure_dir(f):
d = os.path.dirname(f)
if not os.path.exists(d):
os.makedirs(d)
readpath = os.path.join("d:\\", "project")
savepath = os.path.join("d:\\", "save")
ensure_dir(savepath)
for dname in os.listdir(readpath)[1:]:
readpath_2 = os.path.join(dname, fname)
if not os.path.isdir(readpath_2):
continue
for fname in os.listdir(readpath_2)
fullfname = os.path.join(readpath_2, fname)
if not os.path.isfile(fullfname):
continue
savefile = open(savepath + "\\" + dname + "_" + fname, wb)
writer = csv.writer(savefile, delimiter=';')
readfile=open(fullfname, 'rb')
reader = csv.reader(readfile, delimiter=';')
try:
for row in reader:
writer.writerow([row[0], row[3]])
except:
print(i)
finally:
savefile.close()
readfile.close()
This code could be greatly improved with os.walk
Quoting from the python documentation:
If csvfile is a file object, it must be opened with the ‘b’ flag on platforms where that makes a difference.
Change the 'w' and 'r' flags to 'wb' and 'rb'.
(1) Your outer loop AND your inner loop both use i as the loop variable. This has no hope of (a) being understood by a human (b) working properly.
(2) except: print(i) ... What??? I'd suggest you remove the try/except and fix any bugs that you come across.