Segmentation Fault - python

I am using python 2.4.4 (old machine, can't do anything about it) on a UNIX machine. I am extremely new to python/programming and have never used a UNIX machine before. This is what I am trying to do:
extract a single sequence from a FASTA file (proteins + nucleotides) to a temporary text file.
Give this temporary file to a program called 'threader'
Append the output from threader (called tempresult.out) to a file called results.out
Remove the temporary file.
Remove the tempresult.out file.
Repeat using the next FASTA sequence.
Here is my code so far:
import os
from itertools import groupby
input_file = open('controls.txt', 'r')
output_file = open('results.out', 'a')
def fasta_parser(fasta_name):
input = fasta_name
parse = (x[1] for x in groupby(input, lambda line: line[0] == ">"))
for header in parse:
header = header.next()[0:].strip()
seq = "\n".join(s.strip() for s in parse.next())
yield (header, '\n', seq)
parsedfile = fasta_parser(input_file)
mylist = list(parsedfile)
index = 0
while index < len(mylist):
temp_file = open('temp.txt', 'a+')
temp_file.write(' '.join(mylist[index]))
os.system('threader' + ' temp.txt' + ' tempresult.out' + ' structures.txt')
os.remove('temp.txt')
f = open('tempresult.out', 'r')
data = str(f.read())
output_file.write(data)
os.remove('tempresult.out')
index +=1
output_file.close()
temp_file.close()
input_file.close()
When I run this script I get the error 'Segmentation Fault'. From what I gather this is to do with me messing with memory I shouldn't be messing with (???). I assume it is something to do with the temporary files but I have no idea how I would get around this.
Any help would be much appreciated!
Thanks!
Update 1:
Threader works fine when I give it the same sequence multiple times like this:
import os
input_file = open('control.txt', 'r')
output_file = open('results.out', 'a')
x=0
while x<3:
os.system('threader' + ' control.txt' + ' tempresult.out' + ' structures.txt')
f = open('tempresult.out', 'r')
data = str(f.read())
output_file.write(data)
os.remove('result.out')
x += 1
output_file.close()
input_file.close()
Update 2: In the event that someone else gets this error. I forgot to close temp.txt before invoking the threader program.

Related

Python: Separating txt file to multiple files using a reoccuring symbol

I have a .txt file of amino acids separated by ">node" like this:
Filename.txt :
>NODE_1
MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI
KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY
GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
>NODE_2
MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD
MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV
PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
I want to separate this file into two (or as many as there are nodes) files;
Filename1.txt :
>NODE
MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI
KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY
GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
Filename2.txt :
>NODE
MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD
MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV
PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
With a number after the filename
This code works, however it deletes the ">NODE" line and does not create a file for the last node (the one without a '>' afterwards).
with open('FilePathway') as fo:
op = ''
start = 0
cntr = 1
for x in fo.read().split("\n"):
if x.startswith('>'):
if start == 1:
with open (str(cntr) + '.fasta','w') as opf:
opf.write(op)
opf.close()
op = ''
cntr += 1
else:
start = 1
else:
if op == '':
op = x
else:
op = op + '\n' + x
fo.close()
I can´t seem to find the mistake. Would be thankful if you could point it out to me.
Thank you for your help!
Hi again! Thank you for all the comments. With your help, I managed to get it to work perfectly. For anyone with similar problems, this is my final code:
import os
import glob
folder_path = 'FilePathway'
for filename in glob.glob(os.path.join(folder_path, '*.fasta')):
with open(filename) as fo:
for line in fo.readlines():
if line.startswith('>'):
original = line
content = [original]
fileno = 1
filename = filename
y = filename.replace(".fasta","_")
def writefasta():
global content, fileno
if len(content) > 1:
with open(f'{y}{fileno}.fasta', 'w') as fout:
fout.write(''.join(content))
content = [line]
fileno += 1
with open('FilePathway') as fin:
for line in fin:
if line.startswith('>NODE'):
writefasta()
else:
content.append(line)
writefasta()
You could do it like this:
def writefasta(d):
if len(d['content']) > 1:
with open(f'Filename{d["fileno"]}.fasta', 'w') as fout:
fout.write(''.join(d['content']))
d['content'] = ['>NODE\n']
d['fileno'] += 1
with open('test.fasta') as fin:
D = {'content': ['>NODE\n'], 'fileno': 1}
for line in fin:
if line.startswith('>NODE'):
writefasta(D)
else:
D['content'].append(line)
writefasta(D)
This would be better way. It is going to write only on odd iterations. So that, ">NODE" will be skipped and files will be created only for the real content.
with open('filename.txt') as fo:
cntr=1
for i,content in enumerate(fo.read().split("\n")):
if i%2 == 1:
with open (str(cntr) + '.txt','w') as opf:
opf.write(content)
cntr += 1
By the way, since you are using context manager, you dont need to close the file.
Context managers allow you to allocate and release resources precisely
when you want to. It opens the file, writes some data to it and then
closes it.
Please check: https://book.pythontips.com/en/latest/context_managers.html
with open('FileName') as fo:
cntr = 1
for line in fo.readlines():
with open (f'{str(cntr)}.fasta','w') as opf:
opf.write(line)
opf.close()
op = ''
cntr += 1
fo.close()

Open two files. Read from the first file and do lookup on second before writing a line

I have two text files. I can open both with Python successfully.
I open the first file and read a data element into a variable using the for l in file construct.
I open the second file and read a data element into a variable using the for l in file construct.
If both variables match I write data to a text file. For the first line read it works perfectly but subsequent lines do not. The FIN variable never changes even though it finds a new line that starts with D further along. Is there a way to loop through two files like this? Am I missing something obvious?
File2Split = 'c:\\temp\\datafile\\comparionIP.txt'
GetResident = 'c:\\temp\\datafile\\NPINumbers.txt'
writefile = open('c:\\temp\\datafile\\comparionIPmod.txt','w')
openfile = open(File2Split,'r')
openfileNPI = open(GetResident,'r')
FIN = ''
FirstChar = ''
FIN2 = ''
for l in openfile:
FirstChar = (l[0:1])
if FirstChar =='D':
FIN = (l[21:31])
#print (FIN)
if FIN.startswith('1'):
writefile.write(l)
elif FirstChar in ['F','G','C','R']:
writefile.write(l)
elif FirstChar =='N':
for l2 in openfileNPI:
FIN2 = (l2[0:10])
NPI = ('N' + (l2[11:21]))
if FIN2 == FIN:
writefile.write(NPI + '\n')
openfileNPI.close()
openfile.close()
writefile.close()

for loop only runs 21 times

Hello i want to write my list to a .csv file.
This is my code
def writeCsv(self, content):
filename = 'data.csv'
f = open(filename, 'w')
header = 'index;title;img;link;views;brand;\n'
f.write(header)
#print(len(content))
i = 0
for c in content:
f.write(c['index'] + ";" + c['title'] + ';' + c['img'] + ';' + c['link'] + ';' + c['views'] + ";\n")
#i += 1
#print(i)
f.close()
My problem is that len(content) returns 72 but the loop only runs 21 times. (I print i every time in the loop and my .csv file only has 21 lines.)
Is there some limit or unknown parameter i miss on the write() function?
Update: I used Sayse´s solution but added encoding='utf-8'. The probem was an illegal charater in line 22
As noted in the comments, the only thing that could cause this is malformed data (probably line 22) and you catching the broader exception.
Regardless, you should just use the csv modules DictWriter
from csv import DictWriter
def writeCsv(self, content):
filename = 'data.csv'
with open(filename, 'w') as f:
field_names = ["index","title","img","link","views","brand"]
dict_writer = DictWriter(f, field_names, delimiter=";")
dict_writer.writeheader()
dict_writer.writerows(content)
Try this perhaps:
def writeCsv(self, content):
filename = 'data.csv'
f = open(filename, 'w')
header = 'index;title;img;link;views;brand'
f.write(header)
#print(len(content))
i = 0
for c in content:
try:
f.write(";\n"+";".join([c[k] for k in header.split(";")]))
except KeyError:
print(c)
i += 1
print(i)
f.write(";")
f.close()
Using the header as your indexes is cleaner imo and wrapping your explicit key access in error handling could help you get through some snags. Also based on how you are writing you output file you will have an empty line at the end of your file, presuming that you have amalgamated your data from some number of similar files you likely have empty elements in your list.

Making a loop to write new lines to a txt file using python

I'm trying to get the script to read a text file of Congress members in which each line is formatted like this:
Darrell Issa (R-Calif)
I want it to print a line to a different file that's formatted like this (notice the added comma):
Darrell Issa,(R-Calif)
For some reason the script below works but it only does it for the first line. How do I get it to execute the loop for each line?
basicfile = open('membersofcongress.txt', 'r')
for line in basicfile:
partyst = line.find('(')
partyend = line.find(')')
party = line[partyst:partyend+1]
name = line[+0:partyst-1]
outfile = open('memberswcomma.txt','w')
outfile.write(name)
outfile.write(",")
outfile.write(party)
outfile.close()
basicfile.close()
print "All Done"
Thank you in advance for your help.
According to documentation,
'w' for only writing (an existing file with the same name will be
erased)
When you open your output file with w, loop keeps creating a new txt file for each line. Using a would be better.
basicfile = open('membersofcongress.txt', 'r')
for line in basicfile:
partyst = line.find('(')
partyend = line.find(')')
party = line[partyst:partyend+1]
name = line[+0:partyst-1]
outfile = open('memberswcomma.txt','a')
outp = name + "," + party + "\n"
outfile.write(outp)
outfile.close()
basicfile.close()
EDIT:
Much better solution would be,
You open your output file at the begining of the loop instead of inside of it.
basicfile = open('membersofcongress.txt', 'r')
outfile = open('memberswcomma.txt','w')
for line in basicfile:
partyst = line.find('(')
partyend = line.find(')')
party = line[partyst:partyend+1]
name = line[+0:partyst-1]
outp = name + "," + party + "\n"
outfile.write(outp)
outfile.close()
basicfile.close()
ok a few things to fix this, use 'a' mode to open your outfile and open it just before the loop, close the outfile after the loop and not inside it.
something like this should work (tested it)
basicfile = open('membersofcongress.txt', 'r')
outfile = open('memberswcomma.txt','a')
for line in basicfile:
partyst = line.find('(')
partyend = line.find(')')
party = line[partyst:partyend+1]
name = line[0:partyst-1]
outfile.write(name)
outfile.write(",")
outfile.write(party)
outfile.close()
basicfile.close()
print "All Done"

Python script how can it do short

I have written a script on a python "icecast server", and I changed some strings in "/etc/icecast2/icecast.xml" like this:
import os,sys,re
def ices2():
changedir=open(pathh + "icecast3.xml", "w")
data=open("/etc/icecast2/icecast.xml").read()
changedir.write(re.sub("<source-password>hackme</source-password>","<source-password>123</source-password>" % x,data))
changedir.close()
ices2()
def ices1():
changedir1=open(pathh + "icecast2.xml", "w")
data=open(pathh + "icecast3.xml").read()
changedir1.write(re.sub("<relay-password>hackme</relay-password>", "<relay-password>123</relay-password>" % x,data))
changedir1.close()
os.remove(pathh + "icecast3.xml")
ices1()
def ices():
changedir2=open("/etc/icecast2/icecast.xml", "w")
data=open(pathh + "icecast2.xml").read()
changedir2.write(re.sub("<admin-password>hackme</admin-password>","<admin-password>123</admin-password>" % x,data))
changedir2.close()
os.remove(pathh + "icecast2.xml")
ices()
...but it's too long for the script. How can I shorten it? I need to do some changes in one file, open it to make changes and close it without any lost data. I know that it can be done in one function, but how to do it I don't know.
I need three changes in one function like this:
def ices():
changedir=open(pathh + "icecast3.xml", "w")
data=open("/etc/icecast2/icecast.xml").read()
changedir.write(re.sub("<source-password>hackme</source-password>","<source-password>123</source-password>",data))
changedir1.write(re.sub("<relay-password>hackme</relay-password>", "<relay-password>123</relay-password>",data))
changedir2.write(re.sub("<admin-password>hackme</admin-password>","<admin-password>123</admin-password>",data))
changedir.close()
i did it in one function and my script short than upper one. But it's wrong i need do it correctly
changedir=open(pathh + "icecast3.xml", "w")
data=open("/etc/icecast2/icecast.xml").read()
Here I create a new file "pathh + "icecast3.xml" (pathh-/home/user/Downloads), but I need to open file:
"/etc/icecast2/icecast.xml"
...read it and write changes to the same file.
All three functions do the same so you can join them into one. This is not complete solution but I think that you could go on from here on your own:
import os,sys,re
def ices(in_path, out_path, remove=False):
changedir = open(out_path, "w")
data = open(in_path, 'r')
changedir.write(re.sub("<source-password>hackme</source-password>","<source-password>123</source-password>" % x,data.read())) # this is wrong as well but I take it as an example
changedir.close()
data.close()
if remove:
os.remove(in_path)
You can call this function with:
ices(base_path + 'icecast2.xml', base_path + 'icecast3.xml', True)
Hints:
it's better to use os.path.join for creating the full paths (as opposed to string concatenation)
look at with statement and cosider using it for increased readability
EDIT (respecting the clarification in comment):
Sorry I missed the different strings in write. You can do it simply like this:
f = open(filename, 'r')
data = f.read()
f.close()
for tag in ['source', 'relay', 'admin']
sub_str = "<{tag_name}>%s</{tag_name}>".format(tag_name=tag+'-password')
data = re.sub(sub_str % 'hackme', sub_str % '123', data)
f = open(filename+'.new', 'w')
f.write(data)
f.close()

Categories

Resources