I'm very new to python and this is far beyond what I'm capable of.
I have multiple text files:
test01.txt
test02.txt
test03.txt
test*.txt
Each file has same number of lines, and the same structure.
I want to extract lines 20-25 and put that into a text file that I can manipulate in excel.
Because there are hundreds of files, it would be great if we could put the text file name on top or next to the data.
This is what I was able to do, but as you can see it's not exactly "fast":
file1 = open("test01.txt", "r")
content = file1.readlines()
file1 = open("values.txt","w")
file1.write("test01.txt" + "\n")
file1.writelines(content[33:36])
file1.close()
file1 = open("test02.txt", "r")
content = file1.readlines()
#Append-adds at last
file1 = open("values.txt","a")#append mode
file1.write("test02.txt" + "\n")
file1.writelines(content[33:36])
file1.close()
file1 = open("test03.txt", "r")
content = file1.readlines()
#Append-adds at last
file1 = open("values.txt","a")#append mode
file1.write("test03.txt" + "\n")
file1.writelines(content[33:36])
file1.close()
Here is a script where you can read all files in a directory and write the name of the file and the content into a another file like you did.
import os
ValuesTextFile = open("values.txt","a")
Path = './files/'
for Filename in os.listdir(Path):
print (Filename)
ValuesTextFile.writelines(Filename)
File = open(Path + Filename, "r")
Content = File.readlines()
ValuesTextFile.writelines(Content[33:36])
File.close()
ValuesTextFile.close()
Related
Here below is my code about how to edit text file.
Since python can't just edit a line and save it at the same time,
I save the previous text file's content into a list first then write it out.
For example,if there are two text files called sample1.txt and sample2.txt in the same folder.
Sample1.txt
A for apple.
Second line.
Third line.
Sample2.txt
First line.
An apple a day.
Third line.
Execute python
import glob
import os
#search all text files which are in the same folder with python script
path = os.path.dirname(os.path.abspath(__file__))
txtlist = glob.glob(path + '\*.txt')
for file in txtlist:
fp1 = open(file, 'r+')
strings = [] #create a list to store the content
for line in fp1:
if 'apple' in line:
strings.append('banana\n') #change the content and store into list
else:
strings.append(line) #store the contents did not be changed
fp2 = open (file, 'w+') # rewrite the original text files
for line in strings:
fp2.write(line)
fp1.close()
fp2.close()
Sample1.txt
banana
Second line.
Third line.
Sample2.txt
First line.
banana
Third line.
That's how I edit specific line for text file.
My question is : Is there any method can do the same thing?
Like using the other functions or using the other data type rather than list.
Thank you everyone.
Simplify it to this:
with open(fname) as f:
content = f.readlines()
content = ['banana' if line.find('apple') != -1 else line for line in content]
and then write value of content to file back.
Instead of putting all the lines in a list and writing it, you can read it into memory, replace, and write it using same file.
def replace_word(filename):
with open(filename, 'r') as file:
data = file.read()
data = data.replace('word1', 'word2')
with open(filename, 'w') as file:
file.write(data)
Then you can loop through all of your files and apply this function
The built-in fileinput module makes this quite simple:
import fileinput
import glob
with fileinput.input(files=glob.glob('*.txt'), inplace=True) as files:
for line in files:
if 'apple' in line:
print('banana')
else:
print(line, end='')
fileinput redirects print into the active file.
import glob
import os
def replace_line(file_path, replace_table: dict) -> None:
list_lines = []
need_rewrite = False
with open(file_path, 'r') as f:
for line in f:
flag_rewrite = False
for key, new_val in replace_table.items():
if key in line:
list_lines.append(new_val+'\n')
flag_rewrite = True
need_rewrite = True
break # only replace first find the words.
if not flag_rewrite:
list_lines.append(line)
if not need_rewrite:
return
with open(file_path, 'w') as f:
[f.write(line) for line in list_lines]
if __name__ == '__main__':
work_dir = os.path.dirname(os.path.abspath(__file__))
txt_list = glob.glob(work_dir + '/*.txt')
replace_dict = dict(apple='banana', orange='grape')
for txt_path in txt_list:
replace_line(txt_path, replace_dict)
Below code works perfectly, where it opens one text files and function parse_messages gets as parameter
def parse_messages(hl7):
hl7_msgs = hl7.split("MSH|")
hl7_msgs = ["{}{}".format("MSH|", x) for x in hl7_msgs if x]
for hl7_msg in hl7_msgs:
#does something..
with open('sample.txt', 'r') as f:
hl7 = f.read()
df = parse_messages(hl7)
But now I have multiple text files in directory. I want to do open each one then call from parse_messages function. Here is what I tried so far.
But this only read last text file, not all of them
import glob
data_directory = "C:/Users/.../"
hl7_file = glob.glob(data_directory + '*.txt')
for file in hl7_file:
with open(file, 'r') as hl7:
hl7 = f.read()
df = parse_messages(hl7)
in your read file loop for file in hl7_file, you are overwrite hl7 at every iteration leaving only the last read store at hl7
You probably wanna concatenate all contents of the files together
hl7 = ''
for file in hl7_file:
with open(file, 'r') as f:
hl7 += f.read()
df = parse_messages(hl7) # process all concatenate contents together
or you can call parse_messages function inside the loop with df list store the results as below
df = []
for file in hl7_file:
with open(file, 'r') as f:
hl7 = f.read()
df.append(parse_messages(hl7))
# df[0] holds the result for 1st file read, df[1] for 2nd file and so on
This should work, if I understood what you want to do
import os
all = []
files = [x for x in os.listdir() if x.endswith(".txt")]
for x in files:
with open(x, encoding='utf-8','r') as fileobj:
content = fileobj.read()
all.append(parse_message(content))
I have many text files, and each of them has a empty line at the end. My scripts did not seem to remove them. Can anyone help please?
# python 2.7
import os
import sys
import re
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
all_file = open(filepath,'r')
lines = all_file.readlines()
output = 'F:/WF/new/' + filename
# Read in each row and parse out components
for line in lines:
# Weed out blank lines
line = filter(lambda x: not x.isspace(), lines)
# Write to the new directory
f = open(output,'w')
f.writelines(line)
f.close()
You can use Python's rstrip() function to do this as follows:
filename = "test.txt"
with open(filename) as f_input:
data = f_input.read().rstrip('\n')
with open(filename, 'w') as f_output:
f_output.write(data)
This will remove all empty lines from the end of the file. It will not change the file if there are no empty lines.
you can remove last empty line by using:
with open(filepath, 'r') as f:
data = f.read()
with open(output, 'w') as w:
w.write(data[:-1])
You can try this without using the re module:
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
f = open(filepath).readlines()
new_file = open(filepath, 'w')
new_file.write('')
for i in f[:-1]:
new_file.write(i)
new_file.close()
For each filepath, the code opens the file, reads in its contents line by line, then writes over the file, and lastly writes the contents of f to the file, except for the last element in f, which is the empty line.
You can remove the last blank line by the following command. This worked for me:
file = open(file_path_src,'r')
lines = file.read()
with open(file_path_dst,'w') as f:
for indx, line in enumerate(lines):
f.write(line)
if indx != len(lines) - 1:
f.write('\n')
i think this should work fine
new_file.write(f[:-1])
i have a python file named file_1.py
it has some code in which, i just have to change a word "file_1" to "file_2"
and also preserve indentation of other functions`
and save it as file_2.py
there are 3 occurances of the word file_1
i have to do this for 100 such times. `file_1.py, file_2.py.....file_100.py`
is there any way to automate this?
Run this script:
import fileinput
with fileinput.FileInput('file_1.py', inplace=True, backup='.bak') as file:
for line in file:
print(line.replace('file_1', 'file_2'), end='')
hope this help :)
create a script:
first: read file
with open("./file1.py") as f:
content = f.read()
second: replace filename
new_content = content.replace("file1","file2")
third: write new file(I would suggest you write a new file)
with open("./file2.py", "w") as f:
f.write(new_content)
if you have multiple files, use something like
filenames = ["file" + str(item) for item in range(1,100)]
for filename in filenames:
with open(filename + ".py") as f:
content = f.read()
new_filename = filename[:-1] + str(int(filename[-1]) + 1)
new_content = content.replace(filename,new_filename)
with open("./another_folder" + new_filename + ".py", "w") as f:
f.write(new_content)
I have written a code which opens multiple files in a directory and prints only the first instance of match of required text from each file as output.
Now I want this output in a file. Doing it simply by putting print >> file.txt,... or .write or csv.write inside loop will not serve the purpose.
My code is:
import re, os, csv, sys
path = "D:\\"
in_files = os.listdir(path)
moldesc = ['Match1', 'Match2']
for f in in_files:
file = os.path.join(path, f)
text = open(file, "r")
for line in text:
if moldesc[0] in line:
Text1 = line.split()[-1]
if moldesc[1] in line:
Text2 = line.split()[-1]
print f, Text1, Text2 # I WANT THIS OUTPUT IN A FILE
break
text.close()
print "We are extraction done !!!"
you managed to open a file for reading, that's just one step away from opening a file for writing.
out = open(outfile, "w")
for f in in_files:
...
output_string = "{},{},{}\n".format(f, HOMO, LUMO)
out.write(output_string)