Output is an empty file - python

My code does not throw an error, it simply creates the files, but of which are empty. I tried it from the command line, and it works using the wildcard training_set_pssm/*.pssm path, but I must do it from the IDE because it is not printing the correct output anyway.
The input file is a set of checkpoint files that look like this:
From this file, which is a text file, saved as .pssm, essentially, I am extracting only the PROFILE side, which is on the right and NORMALIZING it at the same time... my code does not seem to do it correctly, and from the IDE it does not do it at all, so I am not sure what I need to modify in the script to do so at this point.
Here is the code:
#!/usr/bin/env python3
import sys
import os.path
from pathlib import Path
def pssm_list(infile): # call list of file names and for dsspfile
''' Reads relevant lines from a pssm file and saves them to a list.
Returns values of the 2 matrices (no header).'''
with open(infile) as ofile:
flist = ofile.readlines()[3:-6] # list of each line of the file excluding first 3 & last 6 lines
return flist
def lines_to_list(infile1):
''' Reads all lines from a file and saves them to a list containing the '\n' char. '''
all_lines_list = []
with open(infile1, 'r') as rfile:
all_lines_list = rfile.readlines()
return all_lines_list # need to rstrip in a loop for using filenames.
def relevant_lines(infile2):
'''Takes list (extracted from a .pssm file) and extracts the Sequence Profile Portion only.
Returns a list of list where each element is one line of the sequence profile matrix. '''
pssm_profile_list = pssm_list(infile2) # contains all lines from the pssm file.
profile_final_list = [] # for holding relevant fields of the lines
for line in pssm_profile_list:
pssm_profile_list = line.split()[22:42] # profile ranges from pos 22-42
profile_final_list.append(pssm_profile_list) # appending to final list of lists
return profile_final_list # list of lists
# # divide all values by 100
def write_normalized_profile(profile_final_list, ofile):
'''Takes profile list of lists and outfile name as input. Writes each number that is in
one of the sublists and devides it by 100. The number is converted to a string and added
a tab and written to a file. After each sublist a newline character is written to the file.'''
with open(ofile, "a") as wfile:
for sublist in profile_final_list:
# print(sublist)
for el in sublist:
num = int(el) / 100
numstring = str(num)
wfile.write(numstring + '\t') # adding tab after each number
wfile.write("\n") # adding newline at the end of each sublist.
if __name__ == '__main__':
# infile = sys.argv[1]
infile = ('/Users/name/Desktop/PDB/training_set_pssm/idlist/') # the idlist to loop on
# Call the function by looping through an id list+'.pssm' extension
# name the outfile the same --> id list+'.profile'
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist") # containing the id of the file but NOT the extension ".pssm"
for ids in idlist:
part2 = ids.rstrip() + '.pssm' # removing newlinecharacter, adding necessary extension
if os.path.isfile(infile) == True: # does this file exist
ofile = ids.rstrip() + '.profile' # outfile for each id with correct extension
profile_list = relevant_lines(infile)
write_normalized_profile(profile_list, ofile)
print("Error file: " + infile + " not found.")

First and foremost lets fix your paths, you imported from pathlib import Path but never used it.
lets declare infile = Path('/Users/name/Desktop/PDB/training_set_pssm/idlist/'), we now have some helpfull functions we can use for finding problems.
try out some of these to make sure you are searching in the right place.
#this will write out the absolute filepath usefull to check if it is correct
#this tells you if this path exists
#this tells you if this is a file
let's start at the beginning
I'll try and explain what is happening in your code line by line.
if __name__ == '__main__':
# i don't really know what this infile is, is it a file containing
# d1s7za_.fasta.pssm
# d1s98a_.fasta.pssm
# d1s99a_.fasta.pssm
#or a directory containing files named
infile = Path('/Users/name/Desktop/PDB/training_set_pssm/idlist')
# this returns a list of string presumably in the form of
# d1ciya2.fasta\n
# d1ciya3.fasta\n
# d1cq3a_.fasta\n
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist")
# loop over that list
for ids in idlist:
# strips the '\n' from the id and adds '.pssm'
# you now have something like 'd1d0qa_.fasta.pssm'
# you never use this
part2 = ids.rstrip() + '.pssm'
# was 'if os.path.isfile(infile) == True:' but should be :
if infile.is_file():
# strips the '\n' from the id and adds '.profile'
# you now have something like 'd1d0qa_.fasta.profile'
ofile = ids.rstrip() + '.profile'
# here is where it becomes a bit weird
# in relevant_lines you say:
# Takes list (extracted from a .pssm file) and extracts the Sequence Profile Portion only.
# is infile a .pssm file?
# is this correct?
profile_list = relevant_lines(infile)
# this seems fine, it writes the normalized data to ofile.
# ofile will be something like 'd1d0qa_.fasta.profile'
write_normalized_profile(profile_list, ofile)
if __name__ == '__main__':
pssm_directory = Path('/Users/name/Desktop/PDB/training_set_pssm/idlist/') #the directory
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist")
for ids in idlist:
infile = pssm_directory.joinpath(ids.rstrip() + '.pssm') #generate filename from id
if infile.is_file(): #check if filename exists
ofile = ids.rstrip() + '.profile'
profile_list = relevant_lines(infile)
write_normalized_profile(profile_list, ofile)

if __name__ == '__main__':
#infile is a directory containing files named
infile = Path('/Users/name/Desktop/PDB/training_set_pssm/')
# this returns a list of string presumably in the form of
# d1ciya2.fasta\n
# d1ciya3.fasta\n
# d1cq3a_.fasta\n
idlist = lines_to_list("/Users/name/Desktop/PDB/training_set_idlist")
# loop over that list
for ids in idlist:
# strips the '\n' from the id and adds '.pssm'
# you now have something like 'd1d0qa_.fasta.pssm' **exactly, now it matches with the file to be extracted from the directory**
# you never use this
part2 = ids.rstrip() + '.pssm' **I think this should have been concat to the infile..**
# was 'if os.path.isfile(infile) == True:' but should be :
if infile.is_file():
# strips the '\n' from the id and adds '.profile'
# you now have something like 'd1d0qa_.fasta.profile'
ofile = ids.rstrip() + '.profile' **yes these will be the output file names**
# here is where it becomes a bit weird
# in relevant_lines you say:
# Takes list or lines of data (extracted from a .pssm file) and extracts the Sequence Profile Portion only.
# is infile a .pssm file? **yes it is, it is the actual datafile from the directory, well should be**
# is this correct?
profile_list = relevant_lines(infile)
# this seems fine, it writes the normalized data to ofile.
# ofile will be something like 'd1d0qa_.fasta.profile'
write_normalized_profile(profile_list, ofile) ``` **yes**


Python (on replit.com) only allowing one instance of one function to run at a time

Here is some code I'm working on requiring string and the opening and closing of files.
#Importing required Packages---------------------------------------------
import string
# Importing Datasets-----------------------------------------------------
allNames = open("allNames.csv", "r")
onlyNames = open("onlyNames.csv", "r")
# [1] findName(name, outputFile)-----------------------------------------
# Works ####
def findName(name, outputFile):
outfile = open(outputFile + ".csv", "w") # Output file
outfile.write("Artist \tSong \tYear\n") # Initial title lines
alreadyAdded = [] # List of lines already added to remove duplicates
for aline in allNames: # Looping through allNames.csv
fields = aline.split("\t") # Splitting elements of a line into a list
if fields[-1] == name + "\n": # Selecting lines with only the specified name (last element)
dataline = fields[0] + "\t" + fields[1] + "\t" + fields[3] # Each line in the .csv file
if dataline not in alreadyAdded: # Removing Duplicates
outfile.write(dataline + "\n") # Writing the file
alreadyAdded.append(dataline) # Adding lines already added
# findName("Mary Anne", "mary anne")
# findName("Jack", "jack")
# findName("Mary", "mary")
# findName("Peter", "peter")
The code serves its intended purpose as I get an exported file. However, this only works for one function at a time, for example if I try to run both findName("Mary Anne", "mary anne") and findName("Jack", "jack") at the same time, the second instance of the function does not work. Moreover, all subsequent functions on the project file do not work unless I comment out this code.
Let me know what the issue is, thank you!

Parse updated text from a .txt file

I'm attempting to read and parse a .txt file that is continually being updated throughout the day. I want to parse only lines that have not already been consumed. These are then to be sent to a Telegram group.
At present, every time I run the script it parses everything.
selections = []
msgList = []
urr = ""
name = ""
file1 = open(r'C:\\urlt\log.txt', 'r')
Lines = file1.readlines()
txt = Lines[ourLines].strip()
tlist = txt.split("&")
for subtxt in tlist:
if "eventurl=" in subtxt:
a = subtxt[9:len(subtxt) - 3]
url = "www.beefandtuna.com/%23"+a.replace("%23", "/").strip('(')
urr = url
elif "bet=" in subtxt:
name = urllib.parse.unquote(subtxt[4:len(subtxt)])
msg = url +" " '\n' "Name: "+ name
if msg not in msgList:
Assuming the new contents are appended to the end of the file: after you finish reading the file, create a copy of the file.
The next time you read the file, seek to the location that is the length of the copy.
import os
from shutil import copyfile
in_file_loc = r'C:\\SmartBet.io Bot\placerlog.txt'
backup_file_loc = in_file_loc + ".bak"
while True:
file_backup_size = os.stat(backup_file_loc).st_size
file_backup_size = 0
file1 = open(in_file_loc, 'r')
# move file position to the end of the old file
# Read all lines in the file after the position we seek-ed to
Lines = file1.readlines()
# copy current version of file to backup
copyfile(in_file_loc, backup_file_loc)
# Then do whatever you want to do with Lines
This is probably not the best way to do this because, as rici said in a comment below:
"make a copy" is not an atomic operation, and as the file grows copying will be successively slower. Any data appended to the log file during the copy will never be reported. Furthermore, the copy might happen to include a partial entry, in which case the next scan will start in the middle of an entry.
An alternative is to save the size of the current file in a different one:
in_file_loc = r'C:\\SmartBet.io Bot\placerlog.txt'
size_file_loc = in_file_loc + ".lastsize"
while True:
# read old size from file
with open(size_file_loc, 'r') as f:
file_size = int(f.read())
# if error, file size is zero
file_size = 0
file1 = open(in_file_loc, 'r')
Lines = file1.readlines()
new_file_size = file1.tell() # Get the location of the current file marker
# write new size to file
with open(size_file_loc, 'w') as f:
# Then do whatever you want to do with Lines

Append multiple lines in multiple files

I need to create a certain number of files that always have the same lines inside them.
With this script, I can create the "schede" folder in which a certain number of *.tex files are created.
Latex strings are written only in the last tab, while the others remain blank. How can I have all the forms filled in?
import os
import subprocess
work_path = os.path.abspath(os.path.dirname(__file__))
if not os.path.exists("schede"):
n = 5 #put the number as you wish
for i in range(n):
file_name = "S"+str(i).zfill(1)+".tex"
subprocess.call(['touch', file_name]) #crea 34 file s.tex
def append_new_line(file_name, text_to_append):
"""Append given text as a new line at the end of file"""
# Open the file in append & read mode ('a+')
with open(file_name, "a+") as file_object:
# Move read cursor to the start of file.
# If file is not empty then append '\n'
data = file_object.read(100)
if len(data) > 0:
# Append text at the end of file
def append_multiple_lines(file_name, lines_to_append):
# Open the file in append & read mode ('a+')
with open(file_name, "a+") as file_object:
appendEOL = False
# Move read cursor to the start of file.
# Check if file is not empty
data = file_object.read(100)
if len(data) > 0:
appendEOL = True
# Iterate over each string in the list
for line in lines_to_append:
# If file is not empty then append '\n' before first line for
# other lines always append '\n' before appending line
if appendEOL == True:
appendEOL = True
# Append element at the end of file
def main():
append_new_line(file_name, 'This is second line')
print('Append multiple lines to a file in Python')
list_of_lines = [
'\ecvitem{\ecvhighlight{Organizzato da:}}{\\textbf{}}',
'\ecvitem{\ecvhighlight{Programma Formativo:}}{}',
'\item scrivere un\'item',
# Append strings in list as seperate new lines in the end of file
append_multiple_lines(file_name, list_of_lines)
if __name__ == '__main__':
As #MisterMiyagi said file_name is one variable so every time you make a file it is overwritten
I suggest making a list called files = []
and instead of just file_name = "S"+str(i).zfill(1)+".tex"
add files.append(file_name)
to the end of the for loop where you create the files.
and change the main to do the instructions for each file.
def main():
for file_name in file:
append_new_line(file_name, 'This is second line')
print('Append multiple lines to a file in Python')
list_of_lines = [
'\ecvitem{\ecvhighlight{Organizzato da:}}{\\textbf{}}',
'\ecvitem{\ecvhighlight{Programma Formativo:}}{}',
'\item scrivere un\'item',
# Append strings in list as seperate new lines in the end of file
append_multiple_lines(file_name, list_of_lines)

how to join incorporate splitted lines with replacing data from a file into the same string

So as most of us are thinking it's a duplicate which is not, so what I'm trying to achieve is let's say there is a Master string like the below and couple of files mentioned in it then we need to open the files and check if there are any other files included in it, if so we need to copy that into the line where we fetched that particular text.
Master String:
How are you
everything alright
How are you
everything alright
for msplitin [stext.split('\n')]:
for num, items in enumerate(stext,1):
if items.strip().startswith("here is") and items.strip().endswith(".txt"):
gmsf = open(os.path.join(os.getcwd()+"\txt", items[8:]), "r")
gmsfstr = gmsf.read()
newline = items.replace(items, gmsfstr)
How to join these replace items in the same string format.
Also, any idea on how to re-iterate the same function until there are no ".txt". So, once the join is done there might be other ".txt" inside a ".txt.
Thanks for your help in advance.
A recursive approach that works with any level of file name nesting:
from os import linesep
def get_text_from_file(file_path):
with open(file_path) as f:
text = f.read()
return SAK_replace(text)
def SAK_replace(s):
lines = s.splitlines()
for index, l in enumerate(lines):
if l.endswith('.txt'):
lines[index] = get_text_from_file(l)
return linesep.join(lines)
You can try:
s = """Welcome
How are you
here is file.txt
everything alright
here is signature.txt
data = s.split("\n")
match = ['.txt']
all_matches = [s for s in data if any(xs in s for xs in match)]
for index, item in enumerate(data):
if item in all_matches:
data[index] ="XYZ"
data = "\n".join(data)
print data
How are you
everything alright
Added new requirement:
def file_obj(filename):
fo = open(filename,"r")
s = fo.readlines()
data = s.split("\n")
match = ['.txt']
all_matches = [s for s in data if any(xs in s for xs in match)]
for index, item in enumerate(data):
if item in all_matches:
data[index] ="XYZ"
data = "\n".join(data)
print data
We can create temporary file object and keep the replaced line in that temporary file object and once everything line is processed then we can replace with the new content to original file. This temporary file will be deleted automatically once its come out from the 'with' statement.
import tempfile
import re
file_pattern = re.compile(ur'(((\w+)\.txt))')
original_content_file_name = 'sample.txt'
sample.txt should have this content.
How are you
here is file.txt
everything alright
here is signature.txt
replaced_file_str = None
def replace_file_content():
replace the file content using temporary file object.
def read_content(file_name):
# matched file name is read and returned back for replacing.
content = ""
with open(file_name) as fileObj:
content = fileObj.read()
return content
# read the file and keep the replaced text in temporary file object(tempfile object will be deleted automatically).
with open(original_content_file_name, 'r') as file_obj, tempfile.NamedTemporaryFile() as tmp_file:
for line in file_obj.readlines():
if line.strip().startswith("here is") and line.strip().endswith(".txt"):
file_path = re.search(file_pattern, line).group()
line = read_content(file_path) + '\n'
# assign the replaced value to this variable
replaced_file_str = tmp_file.read()
# replace with new content to the original file
with open(original_content_file_name, 'w+') as file_obj:

How do I search a file for a string and replace it with multiple lines in Python?

I am running Python 3.5.1
I have a text file that I'm trying to search through and replace or overwrite text if it matches a predefined variable. Below is a simple example:
A Bunch of Nonsense Stuff
# More Stuff Goes HERE #
More stuff here
Outdated line of information that has no comment above - message_label
The last line in this example needs to be overwritten so the new file looks like below:
test2.txt after script
A Bunch of Nonsense Stuff
# More Stuff Goes HERE #
More stuff here
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
The function I have written idealAppend does not work as intended and subsequent executions create a bit of a mess. My workaround has been to separate the two lines into single line variables but this doesn't scale well. I want to use this function throughout my script with the ability to handle any number of lines. (if that makes sense)
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
fullData = r'''
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
idealAppend(file, fullData)
def idealAppend(filename, data):
label = data.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != data: # If a line 2 exists that matches the last word (label)
line = data # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(data in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write(data) # Write data to the end of the file
if __name__ == "__main__": main()
Workaround Script
This seems to work perfectly as long as I only need to write exactly two lines. I'd love this to be more dynamic when it comes to number of lines so I can reuse the function easily.
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
comment = r'# This is an important line that needs to be copied'
append = r'Very Important Line of information that the above line is a comment for - message_label'
appendFile(file, comment, append)
def appendFile(filename, comment, append):
label = append.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != append: # If a line 2 exists that matches the last word (label)
line = '\n' + comment + '\n' + append # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(append in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write('\n' + comment + '\n' + append) # Write data to the end of the file
if __name__ == "__main__": main()
I am very new to Python so I'm hoping there is a simple solution that I overlooked. I thought it might make sense to try and split the fullData variable at the new line characters into a list or tuple, filter the label from the last item in the list, then output all entries but this is starting to move beyond what I've learned so far.
If I understand your issue correctly, you can just open the input and output files, then check whether the line contains old information and ends with the label and write the appropriate content accordingly.
with open('in.txt') as f, open('out.txt', 'r') as output:
for line in f:
if line.endswith(label) and not line.startswith(new_info):
If you want to update the original file instead of creating a second one, it's easiest to just delete the original and rename the new one instead of trying to modify it in place.
Is this what you are looking for ? It's looking for a label and then replaces the whole line with whatever you want.
A Bunch of Nonsense Stuff
# More Stuff Goes HERE #
More stuff here
Here is to be replaced - to_replace
#!/usr/bin/env python3
def main():
file = 'test2.txt'
label_to_modify = "to_replace"
replace_with = "# Blabla\nMultiline\nHello"
# Raw string stored in a file
file_replace_with = 'replace_with.txt'
with open(file_replace_with, 'r') as f:
replace_with = f.read()
appendFile(file, label_to_modify, replace_with)
def appendFile(filename, label_to_modify, replace_with):
new_file = []
with open(filename, 'r') as f:
for line in f:
if len(line.split()) > 0 and line.split()[-1] == label_to_modify:
with open(filename + ".bak", 'w') as f:
if __name__ == "__main__": main()
A Bunch of Nonsense Stuff
# More Stuff Goes HERE #
More stuff here
# Blabla
Reading over both answers I've come up with the following as the best solution i can get to work. It seems to do everything I need. Thanks Everyone.
#!/usr/bin/env python3
def main():
testConfFile = 'test2.txt' # /etc/apache2/apache2.conf
testConfLabel = 'timed_combined'
testConfData = r'''###This is an important line that needs to be copied - ##-#-####
Very Important Line of information that the above line is a \"r\" comment for - message_label'''
testFormatAppend(testConfFile, testConfData, testConfLabel) # Add new test format
def testFormatAppend(filename, data, label):
dataSplit = data.splitlines()
fileDataStr = ''
with open(filename, 'r') as file:
fileData = stringToDictByLine(file)
for key, val in fileData.items():
for row in dataSplit:
if val.strip().endswith(row.strip().split()[-1]):
fileData[key] = ''
fileLen = len(fileData)
if fileData[fileLen] == '':
fileLen += 1
fileData[fileLen] = data
fileLen += 1
fileData[fileLen] = '\n' + data
for key, val in fileData.items():
fileDataStr += val
with open(filename, 'w') as file:
def stringToDictByLine(data):
fileData = {}
i = 1
for line in data:
fileData[i] = line
i += 1
return fileData
if __name__ == "__main__": main()

