Python printing with user defined functions - python

I'm trying to write a code that will take data from a file and write it differently. I have the code for the most part but when i run it, everything is on one line.
import csv
#Step 4
def read_data(filename):
try:
data = open("dna.txt", "r")
except IOError:
print( "File not found")
return data
#Step 5
def get_dna_stats(dna_string):
a_letters = ""
t_letters = ""
if "A" in dna_string:
a_letters.append("A")
if "T" in dna_string:
t_letters.append("T")
nucleotide_content = ((len(a_letters) + len(t_letters))/len(dna_string))
#Step 6
def get_dna_complement(dna_string):
dna_complement = ""
for i in dna_string:
if i == "A":
dna_complement.append("T")
elif i == "T":
dna_complement.append("A")
elif i == "G":
dna_complement.append("C")
elif i == "C":
dna_complement.append("G")
else:
break
return dna_complement
#Step 7
def print_dna(dna_strand):
dna_complement = get_dna_complement(dna_strand)
for i in dna_strand:
for j in dna_complement:
print( i + "=" + j)
#Step 8
def get_rna_sequence(dna_string):
rna_complement = ""
for i in dna_string:
if i == "A":
rna_complement.append("U")
elif i == "T":
rna_complement.append("A")
elif i == "G":
rna_complement.append("C")
elif i == "C":
rna_complement.append("G")
else:
break
return rna_complement
#Step 9
def extract_exon(dna_strand, start, end):
return (f"{dna_strand} between {start} and {end}")
#Step 10
def calculate_exon_pctg(dna_strand, exons):
exons_length = 0
for i in exons:
exons_length += 1
return exons_length/ len(dna_strand)
#Step 11
def format_data(dna_string):
x = "dna_strand"[0:62].upper()
y = "dna_strand"[63:90].lower()
z = "dna_strand"[91:-1].upper()
return x+y+z
#Step 12
def write_results(output, filename):
try:
with open("output.csv","w") as csvFile:
writer = csv.writer(csvFile)
for i in output:
csvFile.write(i)
except IOError:
print("Error writing file")
#Step 13
def main():
read_data("dna.txt")
output = []
output.append("The AT content is" + get_dna_stats() + "% of the DNA sequence.")
get_dna_stats("dna_sequence")
output.append("The DNA complement is " + get_dna_complement())
get_dna_complement("dna_sequence")
output.append("The RNA sequence is" + get_rna_sequence())
get_rna_sequence("dna_sequence")
exon1 = extract_exon("dna_sequence", 0, 62)
exon2 = extract_exon("dna_sequence", 91, len("dna_sequence"))
output.append(f"The exon regions are {exon1} and {exon2}")
output.append("The DNA sequence, which exons in uppercase and introns in lowercase, is" + format_dna())
format_data("dna_sequence")
output.append("Exons comprise " + calculate_exon_pctg())
calculate_exon_pctg("dna_sequence",[exon1, exon2])
write_results(output, "results.txt")
print("DNA processing complete")
#Step 14
if __name__ == "__main__":
main()
When I run it, its supposed to output a file that looks like this but my code ends up putting every word on the top line like this
I have a feeling it has to do with the write_resultsfunction but that's all i know on how to write to the file.
The second mistake I'm making is that I'm not calling the functions correctly in the append statements. I've tried concatenating and I've tried formatting the string but now I'm hitting a road block on what I need to do.

When you write to the file you need to concat a '\n' to the end of the string every time you want to have something on a new line in the written file
for example:
output.append("The AT content is" + get_dna_stats() + "% of the DNA sequence." + '\n')
To solve your second problem I would change your code to something like this:
temp = "The AT content is" + get_dna_stats() + "% of the DNA sequence." + '\n'
output.append(temp)
When you append to a list and call a function it will take the literal text of the function instead of calling it. Doing it with a temp string holder will call the function before the string is concatenated. Then you are able to append the string to the list

read_data() doesn't actually read anything (just opens file). It should read the file and return its contents:
def read_data(filename):
with open(filename, "r") as f:
return f.read()
get_dna_stats() won't get DNA stats (won't return anything, and it doesn't count "A"s or "T"s, only checks if they're present, nucleotide_content is computed but never used or returned. It should probably count and return the results:
def get_dna_stats(dna_string):
num_a = dna_string.count("A")
num_t = dna_string.count("T")
nucleotide_content = (num_a + num_t) /float(len(dna_string))
return nucleotide_content
get_dna_complement() and get_rna_sequence(): you can't append to a string. Instead use
dna_complement += "T"
... and rather than break, you either append a "?" to denote a failed transscription, or raise ValueError("invalid letter in DNA: "+i)
print_dna() is a bit more interesting. I'm guessing you want to "zip" each letter of the DNA and its complement. Coincidentally, you can use the zip function to achieve just that:
def print_dna(dna_strand):
dna_complement = get_dna_complement(dna_strand)
for dna_letter, complement in zip(dna_strand, dna_complement):
print(dna_letter + "=" + complement)
As for extract_exon(), I don't know what that is, but presumably you just want the substring from start to end, which is achieved by:
def extract_exon(dna_strand, start, end):
return dna_strand[start:end] # possibly end+1, I don't know exons
I am guessing that in calculate_exon_pctg(), you want exons_length += len(i) to sum the lengths of the exons. You can achieve this by using the buildin function sum:
exons_length = sum(exons)
In function format_data(), loose the doublequotes. You want the variable.
main() doesn't pass any data around. It should pass the results of read_data() to all the other functions:
def main():
data = read_data("dna.txt")
output = []
output.append("The AT content is " + get_dna_stats(data) + "% of the DNA sequence.")
output.append("The DNA complement is " + get_dna_complement(data))
output.append("The RNA sequence is" + get_rna_sequence(data))
...
write_results(output, "results.txt")
print("DNA processing complete")
The key for you at this stage is to understand how function calls work: they take data as input parameters, and they return some results. You need to a) provide the input data, and b) catch the results.
write_results() - from your screenshot, you seem to want to write a plain old text file, yet you use csv.writer() (which writes CSV, i.e. tabular data). To write plain text,
def write_results(output, filename):
with open(filename, "w") as f:
f.write("\n".join(output)) # join output lines with newline
f.write("\n") # extra newline at file's end
If you really do want a CSV file, you'll need to define the columns first, and make all the output you collect fit that column format.

You never told your program to make a new line. You could either append or prepend the special "\n" character to each of your strings or you could do it in a system agnostic way by doing
import os
at the top of your file and writing your write_results function like this:
def write_results(output, filename):
try:
with open("output.csv","w") as csvFile:
writer = csv.writer(csvFile)
for i in output:
csvFile.write(i)
os.write(csvFile, os.linesep) # Add this line! It is a system agnostic newline
except IOError:
print("Error writing file")

Related

Python: Writing to file using while loop fails with no errors given

I am attempting to collect only certain type of data from one file. After that the data is to be saved to another file. The function for writing for some reason is not saving to the file. The code is below:
def reading(data):
file = open("model.txt", 'r')
while (True):
line = file.readline().rstrip("\n")
if (len(line) == 0):
break
elif (line.isdigit()):
print("Number '" + line + "' is present. Adding")
file.close()
return None
def writing(data):
file = open("results.txt", 'w')
while(True):
line = somelines
if line == "0":
file.close()
break
else:
file.write(line + '\n')
return None
file = "model.txt"
data = file
somelines = reading(data)
writing(data)
I trying several things, the one above produced a TypeError (unsupported operand). Changing to str(somelines) did solve the error, but still nothing was written. I am rather confused about this. Is it the wrong definition of the "line" in the writing function? Or something else?
See this line in your writing function:
file.write(line + '\n')
where you have
line = somelines
and outside the function you have
somelines = reading(data)
You made your reading function return None. You cannot concat None with any string, hence the error.
Assuming you want one reading function which scans the input file for digits, and one writing file which writes these digits to a file until the digit read is 0, this may help:
def reading(file_name):
with open(file_name, 'r') as file:
while True:
line = file.readline().rstrip("\n")
if len(line) == 0:
break
elif line.isdigit():
print("Number '" + line + "' is present. Adding")
yield line
def writing(results_file, input_file):
file = open(results_file, 'w')
digits = reading(input_file)
for digit in digits:
if digit == "0":
file.close()
return
else:
file.write(digit + '\n')
file.close()
writing("results.txt", "model.txt")

Extract IP addresses from text file without using REGEX

I am trying to extract IPv4 addresses from a text file and save them as a list to a new file, however, I can not use regex to parse the file, Instead, I have check the characters individually. Not really sure where to start with that, everything I find seems to have import re as the first line.
So far this is what I have,
#Opens and prints wireShark txt file
fileObject = open("wireShark.txt", "r")
data = fileObject.read()
print(data)
#Save IP adresses to new file
with open('wireShark.txt') as fin, open('IPAdressess.txt', 'wt') as fout:
list(fout.write(line) for line in fin if line.rstrip())
#Opens and prints IPAdressess txt file
fileObject = open("IPAdressess.txt", "r")
data = fileObject.read()
print(data)
#Close Files
fin.close()
fout.close()
So I open the file, and I have created the file that I will put the extracted IP's in, I just don't know ow to pull them without using REGEX.
Thanks for the help.
Here is a possible solution.
The function find_first_digit, position the index at the next digit in the text if any and return True. Else return False
The functions get_dot and get_num read a number/dot and, lets the index at the position just after the number/dot and return the number/dot as str. If one of those functions fails to get the number/dot raise an MissMatch exception.
In the main loop, find a digit, save the index and then try to get an ip.
If sucess, write it to output file.
If any of the called functions raises a MissMatch exception, set the current index to the saved index plus one and start over.
class MissMatch(Exception):pass
INPUT_FILE_NAME = 'text'
OUTPUT_FILE_NAME = 'ip_list'
def find_first_digit():
while True:
c = input_file.read(1)
if not c: # EOF found!
return False
elif c.isdigit():
input_file.seek(input_file.tell() - 1)
return True
def get_num():
num = input_file.read(1) # 1st digit
if not num.isdigit():
raise MissMatch
if num != '0':
for i in range(2): # 2nd 3th digits
c = input_file.read(1)
if c.isdigit():
num += c
else:
input_file.seek(input_file.tell() - 1)
break
return num
def get_dot():
if input_file.read(1) == '.':
return '.'
else:
raise MissMatch
with open(INPUT_FILE_NAME) as input_file, open(OUTPUT_FILE_NAME, 'w') as output_file:
while True:
ip = ''
if not find_first_digit():
break
saved_position = input_file.tell()
try:
ip = get_num() + get_dot() \
+ get_num() + get_dot() \
+ get_num() + get_dot() \
+ get_num()
except MissMatch:
input_file.seek(saved_position + 1)
else:
output_file.write(ip + '\n')

how to edit a number in a text field in python?

So i have am trying to go through a text file, put it into a dictionary, and then check to see if a string is already in it. if it is, i want to change the "1" to a "2". Currently if the string is already in the text file, it will just make a new line but with a "2". is there a way to edit the text file so the number can stay in the same place but be replaced?
class Isduplicate:
dicto = {}
def read(self):
f = open(r'C:\Users\jacka\OneDrive\Documents\outputs.txt', "r")
for line in f:
k, v = line.strip().split(':')
self.dicto[k.strip()] = int(v.strip())
return self.dicto
Is = Isduplicate()
while counter < 50:
e = str(elem[counter].get_attribute("href"))
e = e.replace("https://www.reddit.com/r/", "")
e = e[:-1]
if e in Is.read():
Is.dicto[e] += 1
else:
Is.dicto[e] = 1
text_file.write(e + ":" + str(Is.dicto[e]) + '\n')
print(e)
counter = counter +2
You can not rewrite a particular byte in a file, you have to rewrite the file in a whole.
Probably reading the file into the list of strings, processing that list and writing it back to the file would solve your task.

How to write new line to a file in Python?

I have a function where conversion is a list and filename is the name of the input file. I want to write 5 characters in a line then add a new line and add 5 characters to that line and add a new line until there is nothing to write from the list conversion to the file filename. How can I do that?
From what I think I understand about your question, you may need code that looks like this:
import os
def write_data_file(your_collection, data_file_path):
"""Writes data file from memory to previously specified path."""
the_string = ''
for row in your_collection:
for i, c in enumerate(row):
if i % 5 < 4:
the_string += c
else:
the_string += os.linesep + c
with open(data_file_path, 'w') as df:
df.write(the_string)
my_collection = [
"This is more than five characters",
"This is definately more than five characters",
"NOT",
"NADA"
]
write_data_file(my_collection, '/your/file/path.txt')
Other than that, you may need to clarify what you are asking. This code snippet loops through a collection (like a list) then loops over the assumed string contained at that row in the collection. It adds the new line whenever the 5 character limit has been reached.
def foo(conversion, filename):
with open(filename, "a") as f:
line = ""
for s in conversion:
for c in s:
if len(line) < 5:
line += c
else:
f.write(line + "\n")
line = c
f.write(line)
Convert the list of strings into one large string, then loop through that string five characters at a time and insert "\n" after each iteration of the loop. Then write that to a file. Here's a basic example of what I mean, might need some tweaking but it'll give you the idea:
# concatenate all the strings for simplicity
big_string = ""
for single_string in conversion:
big_string += single_string
# loop through using 5 characters at a time
string_with_newlines = ""
done = False
while not done:
next_five_chars = big_string[:5]
big_string = big_string[5:]
if next_five_chars:
string_with_newlines += next_five_chars + "\n"
else:
done = True
# write it all to file
with open(filename, "w") as your_file:
your_file.write(string_with_newlines)
l = ["one", "two", "three", "four", "five"]
def write(conversion, filename):
string = "".join(conversion)
f = open(filename, "a")
for i in range(5, len(string) + 5, 5):
f.write(string[i-5:i])
f.write("\n")
if __name__ == '__main__':
write(l, "test.txt")
This create a file called "test.txt" with the content:
onetw
othre
efour
onetw
othre
efour
five
I made a function for grouping your list:
import itertools
def group(iterable, n):
gen = (char for substr in iterable for char in substr)
while True:
part = ''.join(itertools.islice(gen, 0, n))
if not part:
break
yield part
Presentation:
>>> l = ['DBTsiQoECGPPo', 'd', 'aDAuehlM', 'FbUnSuMLuEbHe', 'jRvARVZMn', 'SbGCi'
, 'jhI', 'Rpbd', 'uspffRvPiAmbQEoZDFAG', 'RIbHAcbREdqpMDX', 'bqVMrN', 'FtU', 'nu
fWcfjfmAaUtYtwNUBc', 'oZvk', 'EaytqdRkICuxqbPaPulCZlD', 'dVrZdidLeakPT', 'qttRfH
eJJMOlJRMKBM', 'SAiBrdPblHtRGpjpZKuFLGza', 'RxrLgclVavoCmPkhR', 'YuulTYaNTLghUkK
riOicMuUD']
>>> list(group(l, 5))
['DBTsi', 'QoECG', 'PPoda', 'DAueh', 'lMFbU', 'nSuML', 'uEbHe', 'jRvAR', 'VZMnS'
, 'bGCij', 'hIRpb', 'duspf', 'fRvPi', 'AmbQE', 'oZDFA', 'GRIbH', 'AcbRE', 'dqpMD
', 'XbqVM', 'rNFtU', 'nufWc', 'fjfmA', 'aUtYt', 'wNUBc', 'oZvkE', 'aytqd', 'RkIC
u', 'xqbPa', 'PulCZ', 'lDdVr', 'ZdidL', 'eakPT', 'qttRf', 'HeJJM', 'OlJRM', 'KBM
SA', 'iBrdP', 'blHtR', 'GpjpZ', 'KuFLG', 'zaRxr', 'LgclV', 'avoCm', 'PkhRY', 'uu
lTY', 'aNTLg', 'hUkKr', 'iOicM', 'uUD']
>>> '\n'.join(group(l, 5))
'DBTsi\nQoECG\nPPoda\nDAueh\nlMFbU\nnSuML\nuEbHe\njRvAR\nVZMnS\nbGCij\nhIRpb\ndu
spf\nfRvPi\nAmbQE\noZDFA\nGRIbH\nAcbRE\ndqpMD\nXbqVM\nrNFtU\nnufWc\nfjfmA\naUtYt
\nwNUBc\noZvkE\naytqd\nRkICu\nxqbPa\nPulCZ\nlDdVr\nZdidL\neakPT\nqttRf\nHeJJM\nO
lJRM\nKBMSA\niBrdP\nblHtR\nGpjpZ\nKuFLG\nzaRxr\nLgclV\navoCm\nPkhRY\nuulTY\naNTL
g\nhUkKr\niOicM\nuUD'
Write the result of '\n'.join(group(l, 5)) to a file.

Append to JSON in Python (Optimally due to RAM constraint)

I'm trying to find the optimal way to append some data to a json file using Python. Basically what happens is I have about say 100 threads open storing data to an array. When they are done they send that to a json file using json.dump. However since this can take a few hours for the array to build up I end up running out of RAM eventually. So I'm trying to see what's the best way to use the least amount of RAM in this process. The following is what I have which consumes to much RAM.
i = 0
twitter_data = {}
for null in range(0,1):
while True:
try:
for friends in Cursor(api.followers_ids,screen_name=self.ip).items():
twitter_data[i] = {}
twitter_data[i]['fu'] = self.ip
twitter_data[i]['su'] = friends
i = i + 1
except tweepy.TweepError, e:
print "ERROR on " + str(self.ip) + " Reason: ", e
with open('C:/Twitter/errors.txt', mode='a') as a_file:
new_ii = "ERROR on " + str(self.ip) + " Reason: " + str(e) + "\n"
a_file.write(new_ii)
break
## Save data
with open('C:/Twitter/user_' + str(self.id) + '.json', mode='w') as f:
json.dump(twitter_data, f, indent=2, encoding='utf-8')
Thanks
Output the individual items as an array as they're created, creating the JSON formatting for the array around it manually. JSON is a simple format, so this is trivial to do.
Here's a simple example that prints out a JSON array, without having to hold the entire contents in memory; only a single element in the array needs to be stored at once.
def get_item():
return { "a": 5, "b": 10 }
def get_array():
results = []
yield "["
for x in xrange(5):
if x > 0:
yield ","
yield json.dumps(get_item())
yield "]"
if __name__ == "__main__":
for s in get_array():
sys.stdout.write(s)
sys.stdout.write("\n")
My take, building on the idea from Glenn's answer but serializing a big dict as requested by the OP and using the more pythonic enumerate instead of manually incrementing i (errors can be taken into account by keeping a separate count for them and subtracting it from i before wriring to f):
with open('C:/Twitter/user_' + str(self.id) + '.json', mode='w') as f:
f.write('{')
for i, friends in enumerate(Cursor(api.followers_ids,screen_name=self.ip).items()):
if i>0:
f.write(", ")
f.write("%s:%s" % (json.dumps(i), json.dumps(dict(fu=self.ip, su=friends))))
f.write("}")

Categories

Resources