My code looks like
from __future__ import print_function
import linecache
from random import choice
from string import digits, ascii_lowercase
import random, string
import fileinput
L = ''.join([random.choice(string.ascii_letters + string.digits) for n in xrange(10)])
print(L)
with open("input.txt") as f:
for line in f:
ok = (line)
ok = line.strip()
if line > 6:
f = open('output.txt', 'a' )
f.write( str(ok) +" " + str(L) + '\n')
f.close()
total_count = 0
for line in fileinput.input('output.txt', inplace=True):
count = line.count(ok)
if count > 0:
print(line, end='')
total_count += count
print (total_count)
My input file is:
55
99
42
65
49
49
My problem here is that it's supposed to be saving all of the numbers but instead it's only saving the last 2, any help would be appreciated
Seems the fileinput module may be causing issues with correctly writing the file output, but I am not sure how that module works so I cannot suggest how to use it.
The main correction may be to use different file handler pointers for reading the input file and writing to the output file (don't reuse f as mentioned in the comments).
Here's some refactoring of your original code:
from __future__ import print_function
import linecache
from random import choice
from string import digits, ascii_lowercase
import random, string
from collections import defaultdict
L = ''.join([random.choice(string.ascii_letters + string.digits) for n in xrange(10)])
print(L)
total_count = defaultdict(lambda: 0)
with open("input.txt", 'r') as f:
for line in f:
ok = line.strip()
if int(line) > 6:
total_count[int(line)] += 1
with open('output.txt', 'a' ) as ff:
ff.write( str(ok) + " " + str(L) + '\n')
infile_contents = []
outfile_contents = []
# get list of input.txt contents
with open('input.txt', 'r') as infile:
for line in infile.readlines():
infile_contents.append(line.strip())
# get list of output.txt first column contents
with open('output.txt', 'r') as outfile:
for line in outfile.readlines():
outfile_contents.append(line.split()[0])
# dictionary to hold count of occurrences
totals = {}
# count occurrences
for num in infile_contents:
totals[num] = outfile_contents.count(num)
# print counts
for item in totals.items():
print('{num} appears {count} times.'.format(num=item[0], count=item[1]))
Example output after running script twice:
vlVHVi3t9L
55 appears 2 times.
99 appears 2 times.
42 appears 2 times.
65 appears 2 times.
49 appears 4 times.
Related
I am trying to create 3 different files that I am then attempting to print ten random letters to print out 3 times in the file.
Here is my code:
import string
import random
for i in range(3):
with open('data%i.txt' % i, 'w+') as f:
line = 0
while line < 3:
for j in range(10):
myStr = random.choice(string.ascii_lowercase)
f.write(myStr)
line = line + 1
and output:
dfzanwalkccdipukrbwsrzrbheceqi
I've tried to print them with including the newline character in the file write, but that instead prints 30 lines with a single letter per line. Any help is appreciated!
Write the newline in the line loop, not the character loop.
You can also change while line < 3: to for line in range(3):
for i in range(3):
with open('data%i.txt' % i, 'w+') as f:
for line in range(3):
for j in range(10):
myStr = random.choice(string.ascii_lowercase)
f.write(myStr)
f.write("\n")
You can also use random.choices() to get 10 random characters at once, instead of looping.
for i in range(3):
with open('data%i.txt' % i, 'w+') as f:
for line in range(3):
myStr = "".join(random.choices(string.ascii_lowercase, k=10))
f.write(myStr + "\n")
I recently started scripting and am having difficulties with nested loops. I'm not getting the first iterator object from the first loop as an input to the second to run properly.
The problem itself is quite simple. I would like to change the second item (‘20’) on row 1 in my data to a number from the range and create a file.
So if the first number from the range is 14 then the first line of the file is (L,14,0,0,0,0) and gets a name data1.txt.
Data:
L,1,5.827,20,-4.705,0
L,20,0,0,0,0
L,12,15,0,-6,0
Original Script:
import re
from itertools import islice
import numpy as np
x = np.arange(14,30.5,0.5)
size = x.size
with open('data.txt', 'r') as line:
for line in islice(line, 1, 2):
re.sub(r'\s', '', line).split(',')
nline = line[:2] + line[3:]
x = iter(x)
y = next(x)
for i in x:
nline = nline[:2] + str(y)+ nline[3:]
with open('data.txt', 'r') as file:
data = file.readlines()
data[1] = nline
for i in range(1,size):
with open('data%i.txt' %i, 'w') as file:
file.writelines(data)
EDITED:
Ive made some progress with my script and Im almost there.
After the first loop I have the output that I need (33 cases). All I would like to do now is to write them to a 33 unique files, named data1 to data33. What seems to happen however is that the second loop iterates through the first loop another 33 times and creates 1089 cases. So what ends up in the files is only the last line of the first loop.
Any suggestions how to allow the second loop for file creation but disable it for data?
Updated Script:
import re
from itertools import islice
import numpy as np
x = np.arange(14,30.5,0.5)
size = x.size
with open('data.txt', 'r') as line:
for line in islice(line, 1, 2):
re.sub(r'\s', '', line).split(',')
for i in x:
y=str(i)
nline = line[:2] + y + line[4:]
with open('data.txt', 'r') as file:
data = file.readlines()
data[1] = nline
for i in range(1,size+1):
with open('data%i.txt' %i, 'w') as file:
file.writelines(data)
print data
It seems you are trying to concatenate a string to a list. nline = nline[:2] + str(y)+ nline[3:]. This will yield in a type error.
Also nline[:2] gets the first 2 parts of the list, so you want to split nline[:1] to nline[2:]
Try something along the lines of:
import re
from itertools import islice
import numpy as np
x = np.arange(14,30.5,0.5)
size = x.size
with open('data.txt', 'r') as line:
for line in islice(line, 1, 2):
re.sub(r'\s', '', line).split(',')
nline = line[:1] + line[2:] #not sure what this does, but this might be wrong, change accordingly
x = iter(x)
y = next(x)
temp = []
temp.append(y)
for i in x:
nline = nline[:1] + temp + nline[2:]
with open('data.txt', 'r') as file:
data = file.readlines()
data[1] = nline
for i in range(1,size):
with open('data%i.txt' %i, 'w') as file:
file.writelines(data)
I have tried this
import itertools
import numpy as np
with open('base.txt','r') as f:
lst = map(int, itertools.imap(float, f))
num=1200
for line in lst:
if num == line:
print (line)
Just prints 1200...
I thought of re than
import re
import itertools
with open('base.txt','r') as f:
lst = map(int, itertools.imap(float, f))
p = re.compile(r'(\d+)')
num=1200
for line in lst:
if num in p.findall(line):
print line
But I got
File "a7.py", line 12, in <module>
if num in p.findall(line) :
TypeError: expected string or buffer
What I want is the all line numbers that contain 1200.File has numerical inputs one by line,I have checked this.
Staying as close to your proposed solution as possible, this should print out the line numbers for all lines containing your chosen num.
import itertools
with open('base.txt','r') as f:
lst = map(int, itertools.imap(float, f))
num=1200
line_number = 1
for line in lst:
if num == line:
print (line_number)
line_number += 1
Edit
However, your code just truncates the floats in your file - it will not round them correctly. 1200.9 becomes 1200 instead of 1201, for instance.
If this isn't a problem in your case, that is fine. However, in general it would be better to change your
lst = map(int, itertools.imap(float, f))
function call to something like
lst = map(int,map(round, itertools.imap(float, f)))
You can use enumerate():
with open('base.txt', 'r') as f:
for i, line in enumerate(f):
if num == int(line):
print i
If you just want to print the line numbers, then you need to keep track of what line you are on.
Also, this code doesn't read the entire contents of the file into memory at once. (Useful for large files).
num = 1200
line_num = 0
with open('base.txt','r') as f:
line_num += 1
for line in f:
if int(line) == num:
print line_num
This is a simple question but I just can't solve it. I want to count the number of A for each sequence of lines. Please see the example below:
This is my input:
>sca200
ACACGTGYNNNN
ACGTCCCGWCNN
NNNNNNNNNA
>scaf500
AAAAAAAAAAAA
TTTTTTTTTTTT
WCWCWNNNN
>scaf201
AACACACACACC
GTGTGTGTGTGT
WWRRRYNNNNNN
NNNNNN
code:
#!/usr/bin/python
from __future__ import division
import sys
fasta = open(sys.argv[1], "r")
for line in fasta:
line = line.rstrip("\n")
if line.startswith(">"):
total_A = 0
print line[1:]
else:
A = line.count('A')
total_A = total_A + A
print total_A
The output is:
sca200
2
3
4
scaf500
12
12
12
scaf201
6
6
6
6
How can I get it to report only the final number?, that is:
sca200
4
scaf500
12
scaf201
6
This should solve your problem:
#!/usr/bin/python
from __future__ import division
import sys
fasta = open(sys.argv[1], "r")
total_A = None
for line in fasta:
line = line.rstrip("\n")
if line.startswith(">"):
print total_A if total_A != None else 0
total_A = 0
print line[1:]
else:
A = line.count('A')
total_A += A
print total_A
You just want to print the total count of A just when a new fasta header starts.
Note: Edited to address a comment raised by #Lafexlos.
try to split input by newlines and then output totals only if the line starts with < (what does mean that sequence is over) - remember about printing first and last record out of the loop
#!/usr/bin/python
from __future__ import division
import sys
#reading file into str variable
fasta_file = open(sys.argv[1], "r")
fasta = fasta_file.read()
fasta_file.close()
total = 0
print fasta.split('\n')[0][1:]
for f in fasta.split('\n')[1:]:
if f[0] != '>':
total += f.count('A')
else:
print total, '\n', f[1:]
total = 0
print total
Here is one-liner solution:
from __future__ import print_function
import sys
import re
with open(sys.argv[1], 'r') as f:
data = f.read()
"""
1. find all blocks of text and split it into two groups: (block_name, corresponding_TEXT)
2. loop through blocks
3. print 'block_name' and the length of list containing all 'A's from the corresponding_TEXT
"""
[ print('{0}\n{1}'.format(name, len(re.findall(r'A', txt, re.M))))
for name, txt in re.findall(r'>(sca[^\n]*)([^>]*)', data, re.M)
]
Output:
sca200
4
scaf500
12
scaf201
6
I have a text file with a 1825 x 51 table. I am attempting to read into the text file and write this table to a new text file while removing certain columns from the table. I couldn't figure out how to delete a whole column because the list is a string so I am attempting to go into each row and use an if/else statement to determine if the element is in the desired range. If it is, write it to output, otherwise delete it.
if i in range(1,3)+[14]+range(20,27)+range(33,38)+[43]+[45]:
newNum=data[i]
data[i]=newNum
else:
delete.data
This is my first time using python so any help would be greatly appreciated!
code from comments
with open(inputfilepath,'r') as f:
outputfile=open(outputfilepath,'w+')
inSection=False
for line in f:
if begin in line:inSection=True
if inSection:
keep=range(1,3)+[14]+range(20,27)+range(33,38)+[43]+[45]
tmp=[]
spl=line.split(' ')
for idx in keep:
tmp.extend(spl[idx])
outputfile.write('%s\n' % ' '.join(tmp))
if end in line:inSection=False
I would probably go with something like this:
from __future__ import print_function
def process(infile, keep):
for line in infile:
fields = line.split()
yield ' '.join([_ for i, _ in enumerate(fields) if i in keep])
def main(infile, outfile):
# The following line (taken from your example) will not work in Python 3 as
# you cannot "add" ranges to lists. In Python 3 you would need to write:
# >>> [14] + list(range(20, 27)
keep = range(1, 3) + [14] + range(20, 27) + range(33, 38) + [43] + [45]
for newline in process(infile, keep):
print(newline, file=outfile)
if __name__ == '__main__':
with open('so.txt') as infile, open('output.txt', 'w') as outfile:
main(infile, outfile)
keep = [1,2,3,14,20,21,22,23,24,25,26,27,33,34,35,36,37,38,43,45]
with open('textfile') as fin, open('textout') as fout:
for ln in fin:
tmp = []
spl = ln.split(' ')
for idx in keep:
tmp.append(spl[idx])
fout.write('%s\n' % ' '.join(tmp))