Function to randomly read a line from a text file - python

I have to create a function that reads a random line from a text file in python.
I have the following code but am not able to get it to work
import random
def randomLine(filename):
#Retrieve a random line from a file, reading through the file irrespective of the length
fh = open(filename.txt, "r")
lineNum = 0
it = ''
while 1:
aLine = fh.readline()
lineNum = lineNum + 1
if aLine != "":
# How likely is it that this is the last line of the file ?
if random.uniform(0,lineNum)<1:
it = aLine
else:
break
fh.close()
return it
print(randomLine(testfile.txt))
I got so far but,need help to go further, Please help
once the program is running i'm getting an error saying
print(randomLine(testfile.txt))
NameError: name 'testfile' is not defined

Here's a version that's been tested to work, and avoids empty lines.
Variable names are verbose for clarity.
import random
import sys
def random_line(file_handle):
lines = file_handle.readlines()
num_lines = len(lines)
random_line = None
while not random_line:
random_line_num = random.randint(0, num_lines - 1)
random_line = lines[random_line_num]
random_line = random_line.strip()
return random_line
file_handle = None
if len(sys.argv) < 2:
sys.stderr.write("Reading stdin\n")
file_handle = sys.stdin
else:
file_handle = open(sys.argv[1])
print(random_line(file_handle))
file_handle.close()

Related

Python: Separating txt file to multiple files using a reoccuring symbol

I have a .txt file of amino acids separated by ">node" like this:
Filename.txt :
>NODE_1
MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI
KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY
GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
>NODE_2
MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD
MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV
PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
I want to separate this file into two (or as many as there are nodes) files;
Filename1.txt :
>NODE
MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI
KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY
GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
Filename2.txt :
>NODE
MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD
MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV
PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
With a number after the filename
This code works, however it deletes the ">NODE" line and does not create a file for the last node (the one without a '>' afterwards).
with open('FilePathway') as fo:
op = ''
start = 0
cntr = 1
for x in fo.read().split("\n"):
if x.startswith('>'):
if start == 1:
with open (str(cntr) + '.fasta','w') as opf:
opf.write(op)
opf.close()
op = ''
cntr += 1
else:
start = 1
else:
if op == '':
op = x
else:
op = op + '\n' + x
fo.close()
I canĀ“t seem to find the mistake. Would be thankful if you could point it out to me.
Thank you for your help!
Hi again! Thank you for all the comments. With your help, I managed to get it to work perfectly. For anyone with similar problems, this is my final code:
import os
import glob
folder_path = 'FilePathway'
for filename in glob.glob(os.path.join(folder_path, '*.fasta')):
with open(filename) as fo:
for line in fo.readlines():
if line.startswith('>'):
original = line
content = [original]
fileno = 1
filename = filename
y = filename.replace(".fasta","_")
def writefasta():
global content, fileno
if len(content) > 1:
with open(f'{y}{fileno}.fasta', 'w') as fout:
fout.write(''.join(content))
content = [line]
fileno += 1
with open('FilePathway') as fin:
for line in fin:
if line.startswith('>NODE'):
writefasta()
else:
content.append(line)
writefasta()
You could do it like this:
def writefasta(d):
if len(d['content']) > 1:
with open(f'Filename{d["fileno"]}.fasta', 'w') as fout:
fout.write(''.join(d['content']))
d['content'] = ['>NODE\n']
d['fileno'] += 1
with open('test.fasta') as fin:
D = {'content': ['>NODE\n'], 'fileno': 1}
for line in fin:
if line.startswith('>NODE'):
writefasta(D)
else:
D['content'].append(line)
writefasta(D)
This would be better way. It is going to write only on odd iterations. So that, ">NODE" will be skipped and files will be created only for the real content.
with open('filename.txt') as fo:
cntr=1
for i,content in enumerate(fo.read().split("\n")):
if i%2 == 1:
with open (str(cntr) + '.txt','w') as opf:
opf.write(content)
cntr += 1
By the way, since you are using context manager, you dont need to close the file.
Context managers allow you to allocate and release resources precisely
when you want to. It opens the file, writes some data to it and then
closes it.
Please check: https://book.pythontips.com/en/latest/context_managers.html
with open('FileName') as fo:
cntr = 1
for line in fo.readlines():
with open (f'{str(cntr)}.fasta','w') as opf:
opf.write(line)
opf.close()
op = ''
cntr += 1
fo.close()

How to replace a string in a file?

I have 2 numbers in two similar files. There is a new.txt and original.txt. They both have the same string in them except for a number. The new.txt has a string that says boothNumber="3". The original.txt has a string that says boothNumber="1".
I want to be able to read the new.txt, pick the number 3 out of it and replace the number 1 in original.txt.
Any suggestions? Here is what I am trying.
import re # used to replace string
import sys # some of these are use for other code in my program
def readconfig():
with open("new.text") as f:
with open("original.txt", "w") as f1:
for line in f:
match = re.search(r'(?<=boothNumber=")\d+', line)
for line in f1:
pattern = re.search(r'(?<=boothNumber=")\d+', line)
if re.search(pattern, line):
sys.stdout.write(re.sub(pattern, match, line))
When I run this, my original.txt gets completely cleared of any text.
I did a traceback and I get this:
in readconfig
for line in f1:
io.UnsupportedOperationo: not readable
UPDATE
I tried:
def readconfig(original_txt_path="original.txt",
new_txt_path="new.txt"):
with open(new_txt_path) as f:
for line in f:
if not ('boothNumber=') in line:
continue
booth_number = int(line.replace('boothNumber=', ''))
# do we need check if there are more than one 'boothNumber=...' line?
break
with open(original_txt_path) as f1:
modified_lines = [line.startswith('boothNumber=') if not line
else 'boothNumber={}'.format(booth_number)
for line in f1]
with open(original_txt_path, mode='w') as f1:
f1.writelines(modified_lines)
And I get error:
booth_number = int(line.replace('boothNumber=', ''))
ValueError: invalid literal for int() with base 10: '
(workstationID="1" "1" window=1= area="" extra parts of the line here)\n
the "1" after workstationID="1" is where the boothNumber=" " would normally go. When I open up original.txt, I see that it actually did not change anything.
UPDATE 3
Here is my code in full. Note, the file names are changed but I'm still trying to do the same thing. This is another idea or revision I had that is still not working:
import os
import shutil
import fileinput
import re # used to replace string
import sys # prevents extra lines being inputed in config
# example: sys.stdout.write
def convertconfig(pattern):
source = "template.config"
with fileinput.FileInput(source, inplace=True, backup='.bak') as file:
for line in file:
match = r'(?<=boothNumber=")\d+'
sys.stdout.write(re.sub(match, pattern, line))
def readconfig():
source = "bingo.config"
pattern = r'(?<=boothNumber=")\d+' # !!!!!!!!!! This probably needs fixed
with fileinput.FileInput(source, inplace=True, backup='.bak') as file:
for line in file:
if re.search(pattern, line):
fileinput.close()
convertconfig(pattern)
def copyfrom(servername):
source = r'//' + servername + '/c$/remotedirectory'
dest = r"C:/myprogram"
file = "bingo.config"
try:
shutil.copyfile(os.path.join(source, file), os.path.join(dest, file))
except:
print ("Error")
readconfig()
# begin here
os.system('cls' if os.name == 'nt' else 'clear')
array = []
with open("serverlist.txt", "r") as f:
for servername in f:
copyfrom(servername.strip())
bingo.config is my new file
template.config is my original
It's replacing the number in template.config with the literal string "r'(?<=boothNumber=")\d+'"
So template.config ends up looking like
boothNumber="r'(?<=boothNumber=")\d+'"
instead of
boothNumber="2"
To find boothNumber value we can use next regular expression (checked with regex101)
(?<=\sboothNumber=\")(\d+)(?=\")
Something like this should work
import re
import sys # some of these are use for other code in my program
BOOTH_NUMBER_RE = re.compile('(?<=\sboothNumber=\")(\d+)(?=\")')
search_booth_number = BOOTH_NUMBER_RE.search
replace_booth_number = BOOTH_NUMBER_RE.sub
def readconfig(original_txt_path="original.txt",
new_txt_path="new.txt"):
with open(new_txt_path) as f:
for line in f:
search_res = search_booth_number(line)
if search_res is None:
continue
booth_number = int(search_res.group(0))
# do we need check if there are more than one 'boothNumber=...' line?
break
else:
# no 'boothNumber=...' line was found, so next lines will fail,
# maybe we should raise exception like
# raise Exception('no line starting with "boothNumber" was found')
# or assign some default value
# booth_number = -1
# or just return?
return
with open(original_txt_path) as f:
modified_lines = []
for line in f:
search_res = search_booth_number(line)
if search_res is not None:
line = replace_booth_number(str(booth_number), line)
modified_lines.append(line)
with open(original_txt_path, mode='w') as f:
f.writelines(modified_lines)
Test
# Preparation
with open('new.txt', mode='w') as f:
f.write('some\n')
f.write('<jack Fill workstationID="1" boothNumber="56565" window="17" Code="" area="" section="" location="" touchScreen="False" secureWorkstation="false">')
with open('original.txt', mode='w') as f:
f.write('other\n')
f.write('<jack Fill workstationID="1" boothNumber="23" window="17" Code="" area="" section="" location="" touchScreen="False" secureWorkstation="false">')
# Invocation
readconfig()
# Checking output
with open('original.txt') as f:
for line in f:
# stripping newline character
print(line.rstrip('\n'))
gives
other
<jack Fill workstationID="1" boothNumber="56565" window="17" Code="" area="" section="" location="" touchScreen="False" secureWorkstation="false">

Calculating the average in python

Am Writing a program that prompts for a file name, then opens that file and reads through the file, looking for lines of the form:
X-DSPAM-Confidence: 0.8475
I want to count these lines and extract the floating point values from each of the lines and compute the average of those values. Can I please get some help. I just started programming so I need something very simple. This is the code I have already written.
fname = raw_input("Enter file name: ")
if len(fname) == 0:
fname = 'mbox-short.txt'
fh = open(fname,'r')
count = 0
total = 0
#Average = total/num of lines
for line in fh:
if not line.startswith("X-DSPAM-Confidence:"): continue
count = count+1
print line
Try:
total += float(line.split(' ')[1])
so that total / count gives you the answer.
Iterate over the file (using the context manager ("with") handles the closing automatically), looking for such lines (like you did), and then read them in like this:
fname = raw_input("Enter file name:")
if not fname:
fname = "mbox-short.txt"
scores = []
with open(fname) as f:
for line in f:
if not line.startswith("X-DSPAM-Confidence:"):
continue
_, score = line.split()
scores.append(float(score))
print sum(scores)/len(scores)
Or a bit more compact:
mean = lambda x: sum(x)/len(x)
with open(fname) as f:
result = mean([float(l.split()[1]) if line.startswith("X-DSPAM-Confidence:") for l in f])
A program like the following should satisfy your needs. If you need to change what the program is looking for, just change the PATTERN variable to describe what you are trying to match. The code is written for Python 3.x but can be adapted for Python 2.x without much difficulty if needed.
Program:
#! /usr/bin/env python3
import re
import statistics
import sys
PATTERN = r'X-DSPAM-Confidence:\s*(?P<float>[+-]?\d*\.\d+)'
def main(argv):
"""Calculate the average X-DSPAM-Confidence from a file."""
filename = argv[1] if len(argv) > 1 else input('Filename: ')
if filename in {'', 'default'}:
filename = 'mbox-short.txt'
print('Average:', statistics.mean(get_numbers(filename)))
return 0
def get_numbers(filename):
"""Extract all X-DSPAM-Confidence values from the named file."""
with open(filename) as file:
for line in file:
for match in re.finditer(PATTERN, line, re.IGNORECASE):
yield float(match.groupdict()['float'])
if __name__ == '__main__':
sys.exit(main(sys.argv))
You may also implement the get_numbers generator in the following way if desired.
Alternative:
def get_numbers(filename):
"""Extract all X-DSPAM-Confidence values from the named file."""
with open(filename) as file:
yield from (float(match.groupdict()['float'])
for line in file
for match in re.finditer(PATTERN, line, re.IGNORECASE))

What is wrong with this code? I'm trying to insert this file

I am trying to insert a file and I keep getting a syntax error on the line line = infile.redline()
def main():
# Declare variables
line = ''
counter = 0
# Prompt for file name
fileName = input('Enter the name of the file: ')
# Open the specified file for reading
infile = open('test.txt', 'r')
# Priming read
line = infile.redline()
counter = 1
# Read in and display first five lines
while line != '' and counter <= 5:
# Strip '\n'
line = line.rtrip('\n')
print(line)
1ine = infile.readline()
# Update counter when line is read
counter +=1
# Close file
infile.close()
# Call the main function.
main()
rtrip should be rstrip. redline should be readline. infile.close() should be indented, and main() should not be.
However, the most serious problem is here:
1ine = infile.readline()
That first character is a one, not an L.
Knowing the standard libraries can make your life much simpler!
from itertools import islice
def main():
fname = input('Enter the name of the file: ')
with open(fname) as inf:
for line in islice(inf, 5): # get the first 5 lines
print(line.rstrip())
if __name__=="__main__":
main()
It is not redline but readline:
line = infile.redline()

Reading through file 4 lines at a time

import os
filePath = "C:\\Users\\siba\\Desktop\\1x1x1.blb"
BrickName = (os.path.splitext(os.path.basename(filePath))[0])
import sys
def ImportBLB(filePath):
file = open(filePath)
line = file.readline()
while line:
if(line == "POSITION:\n"):
POS1 = file.next()
POS2 = file.next()
POS3 = file.next()
POS4 = file.next()
sys.stdout.write(POS1)
sys.stdout.write(POS2)
sys.stdout.write(POS3)
sys.stdout.write(POS4)
return
line = file.readline()
file.close()
return
ImportBLB(filePath)
I'm attempting to read through the file four lines at a time upon locating the line "POSITION:", but this only outputs the first four lines due to the return statement ending the loop.
Removing the return statement gives me a "ValueError: Mixing iteration and read methods would lose data" error, how would I get around this?
Replace your logic with this:
with open(file_path) as f:
while True:
try:
line = next(f)
except StopIteration:
break # stops the moment you finish reading the file
if not line:
break # stops the moment you get to an empty line
if line == "POSITION:\n":
for _ in range(4):
sys.stdout.write(next(f))
edit: As your comment stated, you want 4 variables; 1 for each line. replace the last part with this:
lines = [next(f) for _ in range(4)]
This will give you a list with 4 items (the 4 lines you want) if you would prefer individual variables:
line1, line2, line3, line4 = [next(f) for _ in range(4)]
Used a little bit of both of the above suggestions, and this is now my code;
import os
filePath = "C:\Users\siba\Desktop\1x1x1.blb"
BrickName = (os.path.splitext(os.path.basename(filePath))[0])
import sys
def ImportBLB(filePath):
file = open(filePath)
line = file.next()
while line:
if(line == "POSITION:\n"):
POS1 = file.next()
POS2 = file.next()
POS3 = file.next()
POS4 = file.next()
sys.stdout.write(POS1)
sys.stdout.write(POS2)
sys.stdout.write(POS3)
sys.stdout.write(POS4)
try:
line = file.next()
except StopIteration:
break
file.close()
return
ImportBLB(filePath)

Categories

Resources