Append multiple lines in multiple files

Append multiple lines in multiple files - python

I need to create a certain number of files that always have the same lines inside them.
With this script, I can create the "schede" folder in which a certain number of *.tex files are created.
Latex strings are written only in the last tab, while the others remain blank. How can I have all the forms filled in?
import os
import subprocess
work_path = os.path.abspath(os.path.dirname(__file__))
if not os.path.exists("schede"):
os.mkdir("schede")
os.chdir(os.path.expanduser(work_path+"/schede"))
n = 5 #put the number as you wish
for i in range(n):
file_name = "S"+str(i).zfill(1)+".tex"
subprocess.call(['touch', file_name]) #crea 34 file s.tex
def append_new_line(file_name, text_to_append):
"""Append given text as a new line at the end of file"""
# Open the file in append & read mode ('a+')
with open(file_name, "a+") as file_object:
# Move read cursor to the start of file.
file_object.seek(0)
# If file is not empty then append '\n'
data = file_object.read(100)
if len(data) > 0:
file_object.write("\n")
# Append text at the end of file
file_object.write(text_to_append)
def append_multiple_lines(file_name, lines_to_append):
# Open the file in append & read mode ('a+')
with open(file_name, "a+") as file_object:
appendEOL = False
# Move read cursor to the start of file.
file_object.seek(0)
# Check if file is not empty
data = file_object.read(100)
if len(data) > 0:
appendEOL = True
# Iterate over each string in the list
for line in lines_to_append:
# If file is not empty then append '\n' before first line for
# other lines always append '\n' before appending line
if appendEOL == True:
file_object.write("\n")
else:
appendEOL = True
# Append element at the end of file
file_object.write(line)
def main():
append_new_line(file_name, 'This is second line')
print('Append multiple lines to a file in Python')
list_of_lines = [
'\ecvtitle{}{}',
'\ecvitem{\ecvhighlight{Organizzato da:}}{\\textbf{}}',
'\ecvitem{\ecvhighlight{}}{}',
'\ecvitem{}{}',
'\ecvitem{\ecvhighlight{Programma Formativo:}}{}',
'\smallskip',
'\ecvitem{}{',
'\\begin{ecvitemize}'
'\item scrivere un\'item',
'\\end{ecvitemize}',
'}']
# Append strings in list as seperate new lines in the end of file
append_multiple_lines(file_name, list_of_lines)
if __name__ == '__main__':
main()

As #MisterMiyagi said file_name is one variable so every time you make a file it is overwritten
I suggest making a list called files = []
and instead of just file_name = "S"+str(i).zfill(1)+".tex"
add files.append(file_name)
to the end of the for loop where you create the files.
and change the main to do the instructions for each file.
def main():
for file_name in file:
append_new_line(file_name, 'This is second line')
print('Append multiple lines to a file in Python')
list_of_lines = [
'\ecvtitle{}{}',
'\ecvitem{\ecvhighlight{Organizzato da:}}{\\textbf{}}',
'\ecvitem{\ecvhighlight{}}{}',
'\ecvitem{}{}',
'\ecvitem{\ecvhighlight{Programma Formativo:}}{}',
'\smallskip',
'\ecvitem{}{',
'\\begin{ecvitemize}'
'\item scrivere un\'item',
'\\end{ecvitemize}',
'}']
# Append strings in list as seperate new lines in the end of file
append_multiple_lines(file_name, list_of_lines)

Related

Python - define a function to manage files

I need to define a fucntion that will, in short:
Open and grab the content from an existing file
Transform that content
Create a new file
Write that new content in this new file
Print the content of the new file
I'm a complete begginer, but I got this until now. How can I improve this?
def text():
#open the existing file
text_file = open('music.txt', 'r')
#reads the file
reading = text_file.read ()
#this turns everything to lower case, counts the words and displays the list vertically
from collections import Counter
new_text = reading.lower()
list_words = Counter(new_text.split())
ordered_list = sorted(list_words.items())
#creates a new file and writes the content there
with open('finheiro_saida.txt', 'x') as final_file:
for i in ordem:
finheiro_saida.write(str(i) + '\n')
#not sure how to open this new file and print its content, when I tried it says the new file doesn't exist in the directory - tried everything.
final = open('C:/Users/maria/OneDrive/Documents/SD_DTM/ficheiro_saida.txt', 'r')
read_file = final.read ()
print(read_file)

You can open the new file and print its content the same way you read and wrote to it!
# ...After all your previous code...
with open('finheiro_saida.txt', 'r') as final_file:
final_file_content = final_file.read()
print(final_file_content)

Fixed some syntax error in your code.
you can display the the same way you read.
Also provide all imports to the start of the file.
you can also read all lines from the file as a list using file.readlines()
from collections import Counter
def text():
# open the existing file
text_file = open("music.txt", "r")
# reads the file
reading = text_file.read()
# this turns everything to lower case, counts the words and displays the list vertically
new_text = reading.lower()
list_words = Counter(new_text.split())
ordered_list = sorted(list_words.items())
# creates a new file and writes the content there
file_name = "finheiro_saida.txt"
with open("finheiro_saida.txt", "x") as final_file:
for i in ordered_list:
final_file.write(str(i) + "\n")
return file_name
def display(final_file_name):
with open(final_file_name) as file:
print(file.read())
final_file_name = text()
display(final_file_name)

Output is an empty file

My code does not throw an error, it simply creates the files, but of which are empty. I tried it from the command line, and it works using the wildcard training_set_pssm/*.pssm path, but I must do it from the IDE because it is not printing the correct output anyway.
The input file is a set of checkpoint files that look like this:
From this file, which is a text file, saved as .pssm, essentially, I am extracting only the PROFILE side, which is on the right and NORMALIZING it at the same time... my code does not seem to do it correctly, and from the IDE it does not do it at all, so I am not sure what I need to modify in the script to do so at this point.
Here is the code:
#!/usr/bin/env python3
import sys
import os.path
from pathlib import Path
def pssm_list(infile): # call list of file names and for dsspfile
''' Reads relevant lines from a pssm file and saves them to a list.
Returns values of the 2 matrices (no header).'''
with open(infile) as ofile:
flist = ofile.readlines()[3:-6] # list of each line of the file excluding first 3 & last 6 lines
return flist
def lines_to_list(infile1):
''' Reads all lines from a file and saves them to a list containing the '\n' char. '''
all_lines_list = []
with open(infile1, 'r') as rfile:
all_lines_list = rfile.readlines()
return all_lines_list # need to rstrip in a loop for using filenames.
def relevant_lines(infile2):
'''Takes list (extracted from a .pssm file) and extracts the Sequence Profile Portion only.
Returns a list of list where each element is one line of the sequence profile matrix. '''
pssm_profile_list = pssm_list(infile2) # contains all lines from the pssm file.
profile_final_list = [] # for holding relevant fields of the lines
for line in pssm_profile_list:
#print(line)
pssm_profile_list = line.split()[22:42] # profile ranges from pos 22-42
profile_final_list.append(pssm_profile_list) # appending to final list of lists
return profile_final_list # list of lists
# # divide all values by 100
def write_normalized_profile(profile_final_list, ofile):
'''Takes profile list of lists and outfile name as input. Writes each number that is in
one of the sublists and devides it by 100. The number is converted to a string and added
a tab and written to a file. After each sublist a newline character is written to the file.'''
with open(ofile, "a") as wfile:
for sublist in profile_final_list:
# print(sublist)
for el in sublist:
num = int(el) / 100
numstring = str(num)
wfile.write(numstring + '\t') # adding tab after each number
wfile.write("\n") # adding newline at the end of each sublist.
#print(sublist)
#print(numstring)
if __name__ == '__main__':
# infile = sys.argv[1]
infile = ('/Users/name/Desktop/PDB/training_set_pssm/idlist/') # the idlist to loop on
#print(infile)
# Call the function by looping through an id list+'.pssm' extension
# name the outfile the same --> id list+'.profile'
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist") # containing the id of the file but NOT the extension ".pssm"
#print(idlist)
for ids in idlist:
#print(ids)
part2 = ids.rstrip() + '.pssm' # removing newlinecharacter, adding necessary extension
#print(part2)
if os.path.isfile(infile) == True: # does this file exist
ofile = ids.rstrip() + '.profile' # outfile for each id with correct extension
#print(ofile)
profile_list = relevant_lines(infile)
#print(profile_list)
write_normalized_profile(profile_list, ofile)
#print(write_normalized_profile)
#print(profile_list)
else:
print("Error file: " + infile + " not found.")

First and foremost lets fix your paths, you imported from pathlib import Path but never used it.
lets declare infile = Path('/Users/name/Desktop/PDB/training_set_pssm/idlist/'), we now have some helpfull functions we can use for finding problems.
try out some of these to make sure you are searching in the right place.
#this will write out the absolute filepath usefull to check if it is correct
infile.absolute()
#this tells you if this path exists
infile.exists()
#this tells you if this is a file
infile.is_file()
let's start at the beginning
I'll try and explain what is happening in your code line by line.
if __name__ == '__main__':
# i don't really know what this infile is, is it a file containing
# d1s7za_.fasta.pssm
# d1s98a_.fasta.pssm
# d1s99a_.fasta.pssm
#or a directory containing files named
#d1s7za_.fasta.pssm
#d1s98a_.fasta.pssm
#d1s99a_.fasta.pssm
#...
infile = Path('/Users/name/Desktop/PDB/training_set_pssm/idlist')
# this returns a list of string presumably in the form of
# d1ciya2.fasta\n
# d1ciya3.fasta\n
# d1cq3a_.fasta\n
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist")
# loop over that list
for ids in idlist:
# strips the '\n' from the id and adds '.pssm'
# you now have something like 'd1d0qa_.fasta.pssm'
# you never use this
part2 = ids.rstrip() + '.pssm'
# was 'if os.path.isfile(infile) == True:' but should be :
if infile.is_file():
# strips the '\n' from the id and adds '.profile'
# you now have something like 'd1d0qa_.fasta.profile'
ofile = ids.rstrip() + '.profile'
# here is where it becomes a bit weird
# in relevant_lines you say:
# Takes list (extracted from a .pssm file) and extracts the Sequence Profile Portion only.
# is infile a .pssm file?
# is this correct?
profile_list = relevant_lines(infile)
# this seems fine, it writes the normalized data to ofile.
# ofile will be something like 'd1d0qa_.fasta.profile'
write_normalized_profile(profile_list, ofile)
solution:
if __name__ == '__main__':
pssm_directory = Path('/Users/name/Desktop/PDB/training_set_pssm/idlist/') #the directory
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist")
for ids in idlist:
infile = pssm_directory.joinpath(ids.rstrip() + '.pssm') #generate filename from id
if infile.is_file(): #check if filename exists
ofile = ids.rstrip() + '.profile'
profile_list = relevant_lines(infile)
write_normalized_profile(profile_list, ofile)

if __name__ == '__main__':
#infile is a directory containing files named
#d1s7za_.fasta.pssm
#d1s98a_.fasta.pssm
#d1s99a_.fasta.pssm
#...
infile = Path('/Users/name/Desktop/PDB/training_set_pssm/')
# this returns a list of string presumably in the form of
# d1ciya2.fasta\n
# d1ciya3.fasta\n
# d1cq3a_.fasta\n
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist")
# loop over that list
for ids in idlist:
# strips the '\n' from the id and adds '.pssm'
# you now have something like 'd1d0qa_.fasta.pssm' **exactly, now it matches with the file to be extracted from the directory**
# you never use this
part2 = ids.rstrip() + '.pssm' **I think this should have been concat to the infile..**
# was 'if os.path.isfile(infile) == True:' but should be :
if infile.is_file():
# strips the '\n' from the id and adds '.profile'
# you now have something like 'd1d0qa_.fasta.profile'
ofile = ids.rstrip() + '.profile' **yes these will be the output file names**
# here is where it becomes a bit weird
# in relevant_lines you say:
# Takes list or lines of data (extracted from a .pssm file) and extracts the Sequence Profile Portion only.
# is infile a .pssm file? **yes it is, it is the actual datafile from the directory, well should be**
# is this correct?
profile_list = relevant_lines(infile)
# this seems fine, it writes the normalized data to ofile.
# ofile will be something like 'd1d0qa_.fasta.profile'
write_normalized_profile(profile_list, ofile) ``` **yes**

Parse updated text from a .txt file

I'm attempting to read and parse a .txt file that is continually being updated throughout the day. I want to parse only lines that have not already been consumed. These are then to be sent to a Telegram group.
At present, every time I run the script it parses everything.
selections = []
msgList = []
urr = ""
name = ""
ourLines=len(selections)
while(True):
file1 = open(r'C:\\urlt\log.txt', 'r')
Lines = file1.readlines()
file1.close()
try:
while(True):
if(ourLines==len(Lines)):
break
else:
txt = Lines[ourLines].strip()
tlist = txt.split("&")
ourLines=ourLines+1
for subtxt in tlist:
if "eventurl=" in subtxt:
a = subtxt[9:len(subtxt) - 3]
url = "www.beefandtuna.com/%23"+a.replace("%23", "/").strip('(')
#print(url)
urr = url
elif "bet=" in subtxt:
name = urllib.parse.unquote(subtxt[4:len(subtxt)])
#print(name)
selections.append(url+name)
msg = url +" " '\n' "Name: "+ name
if msg not in msgList:
post_to_telegram(msg)
msgList.append(msg)
#time.sleep(0.5)
except:
pass

Assuming the new contents are appended to the end of the file: after you finish reading the file, create a copy of the file.
The next time you read the file, seek to the location that is the length of the copy.
import os
from shutil import copyfile
in_file_loc = r'C:\\SmartBet.io Bot\placerlog.txt'
backup_file_loc = in_file_loc + ".bak"
while True:
try:
file_backup_size = os.stat(backup_file_loc).st_size
except:
file_backup_size = 0
file1 = open(in_file_loc, 'r')
# move file position to the end of the old file
file1.seek(file_backup_size)
# Read all lines in the file after the position we seek-ed to
Lines = file1.readlines()
file1.close()
# copy current version of file to backup
copyfile(in_file_loc, backup_file_loc)
# Then do whatever you want to do with Lines
This is probably not the best way to do this because, as rici said in a comment below:
"make a copy" is not an atomic operation, and as the file grows copying will be successively slower. Any data appended to the log file during the copy will never be reported. Furthermore, the copy might happen to include a partial entry, in which case the next scan will start in the middle of an entry.
An alternative is to save the size of the current file in a different one:
in_file_loc = r'C:\\SmartBet.io Bot\placerlog.txt'
size_file_loc = in_file_loc + ".lastsize"
while True:
# read old size from file
try:
with open(size_file_loc, 'r') as f:
file_size = int(f.read())
except:
# if error, file size is zero
file_size = 0
file1 = open(in_file_loc, 'r')
file1.seek(file_size)
Lines = file1.readlines()
new_file_size = file1.tell() # Get the location of the current file marker
file1.close()
# write new size to file
with open(size_file_loc, 'w') as f:
f.write(str(new_file_size))
# Then do whatever you want to do with Lines

Loop through folder full of text files, grab values for variables, match & populate CSV with storeID and variable name

super new to Python, and looking for some guidance. I'm trying to
loop through hundreds of text files in a folder (one for each store), and generate a CSV file with the store ID (given in the title of the text document i.e. xxx2902ncjc), and various parameters about the store (i.e. maxPeople=31, or space_temp=78, etc.). Each text file may have difference parameters depending on the location, so I've captured all of the unique variables in the third for loop below. I've captured all of the store IDs in the second for-loop. That's all I've gotten so far.
Challenges that I'm seeing are 1) figuring out how to import this all to Excel, 2) Finding someway to store IDs (which are at this point a slice of each filename) with the correct parameters 3) Finding a way to have excel match up the Store ID and the parameters to the variables.
I honestly have no idea what I should be doing next. Any and all help would be very appreciated as I am a suuuper novice. Cheers.
import os, sys, glob
path = r"C:\Users\XXXXX" #insert folder for data here
dirs=os.listdir(path)
fullfilenames=[]
variablelist=[]
allvariables=[]
variables=[]
for file in os.listdir(path):
if ".prop" in file:
fullfilenames.append(path+'\\'+file)
for name in fullfilenames: #create list of StoreIDs
index_of_c = name.index('qPA')
file_name= name[index_of_c:] #cuts off path
file_name=file_name.rsplit(".",1)[0] #removes extension
SiteID= file_name[4:] #splits filename into Site ID
print (SiteID) #prints SiteID
for file in fullfilenames:
f = open(file,'r') #opens the file and enters reading mode
content=f.readlines() #reads each line of file and seperates based on whitespace
for line in content:
variables.append(line.split('=')[0]) #splits up each line of each file, specified before the "="
for variable in variables:
if variable not in allvariables: #checks if variable is included in the variable list
allvariables.append(variable) #if variabe isn't include in the list, it adds it to list
def createkeys():
print(allvariables)
print(type(allvariables))
print(len(allvariables))

import os, sys, glob, re
path = r"C:\Users\mcantwell\Desktop\Projects\kohls_prop" #insert folder for data here
outfile = r"C:\Users\mcantwell\Desktop\Projects\kohls_prop.csv"
dirs=os.listdir(path)
fullfilenames=[]
variablelist=[]
allvariables=set()
variables=[]
for file in os.listdir(path):
if ".prop" in file:
fullfilenames.append(path+'\\'+file)
for file in fullfilenames:
f = open(file,'r') #opens the file and enters reading mode
content=f.readlines() #reads each line of file and seperates based on whitespace
for line in content:
line_split = line.split('=') #splits up each line of each file, specified before the "="
if len(line_split) == 2:
variable = line_split[0]
allvariables.add(variable)
out = open(outfile, 'w')
def writerow(row):
out.write(', '.join(row))
out.write('\n')
writerow(['SiteID'] + list(allvariables))
for file in fullfilenames:
m = re.search('qPAC(\d+)', file)
SiteID = m.group(1)
f = open(file,'r') #opens the file and enters reading mode
content=f.readlines() #reads each line of file and seperates based on whitespace
data={}
for line in content:
line_split = line.strip().split('=') #splits up each line of each file, specified before the "="
if len(line_split) == 2:
variable = line_split[0]
value = line_split[1]
data[variable] = value
values = [SiteID] + [data.get(variable, '') for variable in allvariables]
writerow(values)
print(allvariables)
print(type(allvariables))
print(len(allvariables))

How do I search a file for a string and replace it with multiple lines in Python?

I am running Python 3.5.1
I have a text file that I'm trying to search through and replace or overwrite text if it matches a predefined variable. Below is a simple example:
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Outdated line of information that has no comment above - message_label
The last line in this example needs to be overwritten so the new file looks like below:
test2.txt after script
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
The function I have written idealAppend does not work as intended and subsequent executions create a bit of a mess. My workaround has been to separate the two lines into single line variables but this doesn't scale well. I want to use this function throughout my script with the ability to handle any number of lines. (if that makes sense)
Script
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
fullData = r'''
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
'''
idealAppend(file, fullData)
def idealAppend(filename, data):
label = data.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != data: # If a line 2 exists that matches the last word (label)
line = data # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(data in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write(data) # Write data to the end of the file
if __name__ == "__main__": main()
Workaround Script
This seems to work perfectly as long as I only need to write exactly two lines. I'd love this to be more dynamic when it comes to number of lines so I can reuse the function easily.
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
comment = r'# This is an important line that needs to be copied'
append = r'Very Important Line of information that the above line is a comment for - message_label'
appendFile(file, comment, append)
def appendFile(filename, comment, append):
label = append.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != append: # If a line 2 exists that matches the last word (label)
line = '\n' + comment + '\n' + append # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(append in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write('\n' + comment + '\n' + append) # Write data to the end of the file
if __name__ == "__main__": main()
I am very new to Python so I'm hoping there is a simple solution that I overlooked. I thought it might make sense to try and split the fullData variable at the new line characters into a list or tuple, filter the label from the last item in the list, then output all entries but this is starting to move beyond what I've learned so far.

If I understand your issue correctly, you can just open the input and output files, then check whether the line contains old information and ends with the label and write the appropriate content accordingly.
with open('in.txt') as f, open('out.txt', 'r') as output:
for line in f:
if line.endswith(label) and not line.startswith(new_info):
output.write(replacement_text)
else:
output.write(line)
If you want to update the original file instead of creating a second one, it's easiest to just delete the original and rename the new one instead of trying to modify it in place.

Is this what you are looking for ? It's looking for a label and then replaces the whole line with whatever you want.
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Here is to be replaced - to_replace
script.py
#!/usr/bin/env python3
def main():
file = 'test2.txt'
label_to_modify = "to_replace"
replace_with = "# Blabla\nMultiline\nHello"
"""
# Raw string stored in a file
file_replace_with = 'replace_with.txt'
with open(file_replace_with, 'r') as f:
replace_with = f.read()
"""
appendFile(file, label_to_modify, replace_with)
def appendFile(filename, label_to_modify, replace_with):
new_file = []
with open(filename, 'r') as f:
for line in f:
if len(line.split()) > 0 and line.split()[-1] == label_to_modify:
new_file.append(replace_with)
else:
new_file.append(line)
with open(filename + ".bak", 'w') as f:
f.write(''.join(new_file))
if __name__ == "__main__": main()
test2.txt.bak
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# Blabla
Multiline
Hello

Reading over both answers I've come up with the following as the best solution i can get to work. It seems to do everything I need. Thanks Everyone.
#!/usr/bin/env python3
def main():
testConfFile = 'test2.txt' # /etc/apache2/apache2.conf
testConfLabel = 'timed_combined'
testConfData = r'''###This is an important line that needs to be copied - ##-#-####
Very Important Line of information that the above line is a \"r\" comment for - message_label'''
testFormatAppend(testConfFile, testConfData, testConfLabel) # Add new test format
def testFormatAppend(filename, data, label):
dataSplit = data.splitlines()
fileDataStr = ''
with open(filename, 'r') as file:
fileData = stringToDictByLine(file)
for key, val in fileData.items():
for row in dataSplit:
if val.strip().endswith(row.strip().split()[-1]):
fileData[key] = ''
fileLen = len(fileData)
if fileData[fileLen] == '':
fileLen += 1
fileData[fileLen] = data
else:
fileLen += 1
fileData[fileLen] = '\n' + data
for key, val in fileData.items():
fileDataStr += val
with open(filename, 'w') as file:
file.writelines(str(fileDataStr))
def stringToDictByLine(data):
fileData = {}
i = 1
for line in data:
fileData[i] = line
i += 1
return fileData
if __name__ == "__main__": main()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Append multiple lines in multiple files - python

Related

Python - define a function to manage files

Output is an empty file

Parse updated text from a .txt file

Loop through folder full of text files, grab values for variables, match & populate CSV with storeID and variable name

How do I search a file for a string and replace it with multiple lines in Python?

Categories

Resources