Open Multiple text files and call function - python

Below code works perfectly, where it opens one text files and function parse_messages gets as parameter
def parse_messages(hl7):
hl7_msgs = hl7.split("MSH|")
hl7_msgs = ["{}{}".format("MSH|", x) for x in hl7_msgs if x]
for hl7_msg in hl7_msgs:
#does something..
with open('sample.txt', 'r') as f:
hl7 = f.read()
df = parse_messages(hl7)
But now I have multiple text files in directory. I want to do open each one then call from parse_messages function. Here is what I tried so far.
But this only read last text file, not all of them
import glob
data_directory = "C:/Users/.../"
hl7_file = glob.glob(data_directory + '*.txt')
for file in hl7_file:
with open(file, 'r') as hl7:
hl7 = f.read()
df = parse_messages(hl7)

in your read file loop for file in hl7_file, you are overwrite hl7 at every iteration leaving only the last read store at hl7
You probably wanna concatenate all contents of the files together
hl7 = ''
for file in hl7_file:
with open(file, 'r') as f:
hl7 += f.read()
df = parse_messages(hl7) # process all concatenate contents together
or you can call parse_messages function inside the loop with df list store the results as below
df = []
for file in hl7_file:
with open(file, 'r') as f:
hl7 = f.read()
df.append(parse_messages(hl7))
# df[0] holds the result for 1st file read, df[1] for 2nd file and so on

This should work, if I understood what you want to do
import os
all = []
files = [x for x in os.listdir() if x.endswith(".txt")]
for x in files:
with open(x, encoding='utf-8','r') as fileobj:
content = fileobj.read()
all.append(parse_message(content))

Related

Reading from a text file, parsing it, then converting it to a csv

I have this text file, that contains user information. I want to parse the data, so I only have the username, and then I want to create a csv file with that parsed data.
This is the text file, my script is reading from.
blah.com\user1:dajlfnadjhlasdjasnasjlfn:test1
blah.com\user2:dajlfnadjhlasdjasnasjlfn:test2
blah.com\user3:dajlfnadjhlasdjasnasjlfn:test3
blah.com\user4:dajlfnadjhlasdjasnasjlfn:test4
blah.com\user5:dajlfnadjhlasdjasnasjlfn:test5
blah.com\user6:dajlfnadjhlasdjasnasjlfn:test6
Here is my script
import time, os, os.path, sys, string, datetime, time, shutil, csv
#Locate the file
globalpath = 'C:\\users\\userinfo\\'
todaysdatefull = datetime.datetime.now()
todaysdate = todaysdatefull.strftime("%Y-%m-%d")
datapath = globalpath + 'data\\' + todaysdate + "\\"
logfile = datapath + 'userinfo.txt'
potfile = datapath + 'parsed.csv'
infile = logfile
outfile = potfile
lines = []
# Open the file, find the username and parses it
with open(infile, 'r') as f:
for line in f:
usernamestart = line.find('\\')
usernameend = line.find(':')
username = line[usernamestart+1:usernameend]
lines.append(username)
print(username)
# Outputs the data as a csv file
with open(outfile, 'w') as csv:
writer = csv.writer(csv)
for i in range(len(lines)):
writer.writerow(('Username', 'Date'))
writer.writerow(lines[i])
Result:
Traceback (most recent call last):
File "C:\Automation\autocrack\highrisk_parser.py", line 33, in <module>
writer = csv.writer(csv)
AttributeError: 'file' object has no attribute 'writer'
It is coming from this line
with open(outfile, 'w') as csv:, your are overwriting the csv import. You should rename the file where you write like this
with open(outfile, 'w') as csv_to_write:
writer = csv.writer(csv_to_write)
# Write the header once.
writer.writerow(tuple(['Username', 'Date']))
for one_line in lines:
# you have to give the function a tuple, if not, the writerow iterates on each element of the string for writing it in a new line.
writer.writerow(tuple([one_line, '']))
Your first part of code finding the username can be done as following:
with open(infile, 'r') as f:
lines = [line.split('\\')[-1].split(':')[0] for line in f]

How to edit specific line for all text files in a folder by python?

Here below is my code about how to edit text file.
Since python can't just edit a line and save it at the same time,
I save the previous text file's content into a list first then write it out.
For example,if there are two text files called sample1.txt and sample2.txt in the same folder.
Sample1.txt
A for apple.
Second line.
Third line.
Sample2.txt
First line.
An apple a day.
Third line.
Execute python
import glob
import os
#search all text files which are in the same folder with python script
path = os.path.dirname(os.path.abspath(__file__))
txtlist = glob.glob(path + '\*.txt')
for file in txtlist:
fp1 = open(file, 'r+')
strings = [] #create a list to store the content
for line in fp1:
if 'apple' in line:
strings.append('banana\n') #change the content and store into list
else:
strings.append(line) #store the contents did not be changed
fp2 = open (file, 'w+') # rewrite the original text files
for line in strings:
fp2.write(line)
fp1.close()
fp2.close()
Sample1.txt
banana
Second line.
Third line.
Sample2.txt
First line.
banana
Third line.
That's how I edit specific line for text file.
My question is : Is there any method can do the same thing?
Like using the other functions or using the other data type rather than list.
Thank you everyone.
Simplify it to this:
with open(fname) as f:
content = f.readlines()
content = ['banana' if line.find('apple') != -1 else line for line in content]
and then write value of content to file back.
Instead of putting all the lines in a list and writing it, you can read it into memory, replace, and write it using same file.
def replace_word(filename):
with open(filename, 'r') as file:
data = file.read()
data = data.replace('word1', 'word2')
with open(filename, 'w') as file:
file.write(data)
Then you can loop through all of your files and apply this function
The built-in fileinput module makes this quite simple:
import fileinput
import glob
with fileinput.input(files=glob.glob('*.txt'), inplace=True) as files:
for line in files:
if 'apple' in line:
print('banana')
else:
print(line, end='')
fileinput redirects print into the active file.
import glob
import os
def replace_line(file_path, replace_table: dict) -> None:
list_lines = []
need_rewrite = False
with open(file_path, 'r') as f:
for line in f:
flag_rewrite = False
for key, new_val in replace_table.items():
if key in line:
list_lines.append(new_val+'\n')
flag_rewrite = True
need_rewrite = True
break # only replace first find the words.
if not flag_rewrite:
list_lines.append(line)
if not need_rewrite:
return
with open(file_path, 'w') as f:
[f.write(line) for line in list_lines]
if __name__ == '__main__':
work_dir = os.path.dirname(os.path.abspath(__file__))
txt_list = glob.glob(work_dir + '/*.txt')
replace_dict = dict(apple='banana', orange='grape')
for txt_path in txt_list:
replace_line(txt_path, replace_dict)

Remove the last empty line from each text file

I have many text files, and each of them has a empty line at the end. My scripts did not seem to remove them. Can anyone help please?
# python 2.7
import os
import sys
import re
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
all_file = open(filepath,'r')
lines = all_file.readlines()
output = 'F:/WF/new/' + filename
# Read in each row and parse out components
for line in lines:
# Weed out blank lines
line = filter(lambda x: not x.isspace(), lines)
# Write to the new directory
f = open(output,'w')
f.writelines(line)
f.close()
You can use Python's rstrip() function to do this as follows:
filename = "test.txt"
with open(filename) as f_input:
data = f_input.read().rstrip('\n')
with open(filename, 'w') as f_output:
f_output.write(data)
This will remove all empty lines from the end of the file. It will not change the file if there are no empty lines.
you can remove last empty line by using:
with open(filepath, 'r') as f:
data = f.read()
with open(output, 'w') as w:
w.write(data[:-1])
You can try this without using the re module:
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
f = open(filepath).readlines()
new_file = open(filepath, 'w')
new_file.write('')
for i in f[:-1]:
new_file.write(i)
new_file.close()
For each filepath, the code opens the file, reads in its contents line by line, then writes over the file, and lastly writes the contents of f to the file, except for the last element in f, which is the empty line.
You can remove the last blank line by the following command. This worked for me:
file = open(file_path_src,'r')
lines = file.read()
with open(file_path_dst,'w') as f:
for indx, line in enumerate(lines):
f.write(line)
if indx != len(lines) - 1:
f.write('\n')
i think this should work fine
new_file.write(f[:-1])

How to read and process data from a set of text files sequentially?

I have 50 text files (namely Force1.txt, Force2.txt, ..., Force50.txt). The files look like this:
0.0000000e+000 -1.4275799e-003
2.0000000e-002 -1.1012760e-002
4.0000000e-002 -1.0298970e-002
6.0000000e-002 -8.9733599e-003
8.0000000e-002 -9.6871497e-003
1.0000000e-001 -1.2236400e-002
1.2000000e-001 -1.4479739e-002
1.4000000e-001 -1.3052160e-002
1.6000000e-001 -1.1216700e-002
1.8000000e-001 -8.6674497e-003
2.0000000e-001 -8.6674497e-003
2.2000000e-001 -1.3358070e-002
2.4000000e-001 -1.7946720e-002
2.6000000e-001 -1.9782179e-002
I wish to read data from Force1.txt, store data in a list of tuples, and analize these data (the details of such analysis are not relevant to the question). Then I have to do the same with Force2.txt, Force3.txt, and so on.
Here is my attempt:
def load_data(fn):
with open(fn) as f:
lines = f.readlines()
return [tuple(map(float, x)) for x in [row.split() for row in lines]]
def display_data(lst):
return lst.__repr__().replace('[', '').replace(']', '')
pp = []
for file in os.listdir("dir"):
if file.endswith('.txt'):
if file.startswith('Force'):
print os.path.join(r"dir", file)
with open(file) as f:
for line in f:
pp.append(map(float, line.split()))
mdb.models['Model-1'].TabularAmplitude(data=pp, name='Table', smooth=SOLVER_DEFAULT, timeSpan=STEP)
I'm getting this error:
'Table', smooth=SOLVER_DEFAULT, timeSpan=STEP):
Invalid time values, expected monotonically increasing numbers
How can I solve this issue?
This code should do:
import os
def load_data(fn):
with open(fn) as f:
lines = f.readlines()
return [tuple(map(float, x)) for x in [row.split() for row in lines]]
def display_data(lst):
return lst.__repr__().replace('[', '').replace(']', '')
dirname = r"Your dir name goes here"
for filename in os.listdir(dirname):
if filename.endswith('.txt'):
if filename.startswith('Force'):
pathfile = os.path.join(dirname, filename)
print pathfile
pp = load_data(pathfile)
print display_data(pp)
mdb.models['Model-1'].TabularAmplitude(data=pp,
name='Table',
smooth=SOLVER_DEFAULT,
timeSpan=STEP)
You just need to update dirname with the name of the directory which contains the text files. I recommend you not to use file as a variable identifier because file is a reserved word in Python. I've used filename instead.

Want to read multiple csv file one by one and filepaths are stored in a text file using python

here is my code for readinng individual cell of one csv file. but want to read multiple csv file one by one from .txt file where csv file paths are located.
import csv
ifile = open ("C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv", "rb")
data = list(csv.reader(ifile, delimiter = ';'))
REQ = []
RES = []
n = len(data)
for i in range(n):
x = data[i][1]
y = data[i][2]
REQ.append (x)
RES.append (y)
i += 1
for j in range(2,n):
try:
if REQ[j] != '' and RES[j]!= '': # ignore blank cell
print REQ[j], ' ', RES[j]
except:
pass
j += 1
And csv file paths are stored in a .txt file like
C:\Desktop\Test_Specification\RDBI.csv
C:\Desktop\Test_Specification\ECUreset.csv
C:\Desktop\Test_Specification\RDTC.csv
and so on..
You can read stuff stored in files into variables. And you can use variables with strings in them anywhere you can use a literal string. So...
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip() # or was it trim()? I keep mixing them up
ifile = open(file_name, 'rb')
# ... the rest of your code goes here
Maybe we can fix this up a little...
import csv
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip()
csv_file = csv.reader(open(file_name, 'rb', delimiter=';'))
for record in csv_file[1:]: # skip header row
req = record[1]
res = record[2]
if len(req + res):
print req, ' ', res
you just need to add a while which will read your file containing your list of files & paths upon your first open statement, for example
from __future__ import with_statement
with open("myfile_which_contains_file_path.txt") as f:
for line in f:
ifile = open(line, 'rb')
# here the rest of your code
You need to use a raw string string your path contains \
import csv
file_list = r"C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv"
with open(file_list) as f:
for line in f:
with open(line.strip(), 'rb') as the_file:
reader = csv.reader(the_file, delimiter=';')
for row in reader:
req,res = row[1:3]
if req and res:
print('{0} {1}'.format(req, res))

Categories

Resources