How to write new line to a file in Python? - python

I have a function where conversion is a list and filename is the name of the input file. I want to write 5 characters in a line then add a new line and add 5 characters to that line and add a new line until there is nothing to write from the list conversion to the file filename. How can I do that?

From what I think I understand about your question, you may need code that looks like this:
import os
def write_data_file(your_collection, data_file_path):
"""Writes data file from memory to previously specified path."""
the_string = ''
for row in your_collection:
for i, c in enumerate(row):
if i % 5 < 4:
the_string += c
else:
the_string += os.linesep + c
with open(data_file_path, 'w') as df:
df.write(the_string)
my_collection = [
"This is more than five characters",
"This is definately more than five characters",
"NOT",
"NADA"
]
write_data_file(my_collection, '/your/file/path.txt')
Other than that, you may need to clarify what you are asking. This code snippet loops through a collection (like a list) then loops over the assumed string contained at that row in the collection. It adds the new line whenever the 5 character limit has been reached.

def foo(conversion, filename):
with open(filename, "a") as f:
line = ""
for s in conversion:
for c in s:
if len(line) < 5:
line += c
else:
f.write(line + "\n")
line = c
f.write(line)

Convert the list of strings into one large string, then loop through that string five characters at a time and insert "\n" after each iteration of the loop. Then write that to a file. Here's a basic example of what I mean, might need some tweaking but it'll give you the idea:
# concatenate all the strings for simplicity
big_string = ""
for single_string in conversion:
big_string += single_string
# loop through using 5 characters at a time
string_with_newlines = ""
done = False
while not done:
next_five_chars = big_string[:5]
big_string = big_string[5:]
if next_five_chars:
string_with_newlines += next_five_chars + "\n"
else:
done = True
# write it all to file
with open(filename, "w") as your_file:
your_file.write(string_with_newlines)

l = ["one", "two", "three", "four", "five"]
def write(conversion, filename):
string = "".join(conversion)
f = open(filename, "a")
for i in range(5, len(string) + 5, 5):
f.write(string[i-5:i])
f.write("\n")
if __name__ == '__main__':
write(l, "test.txt")
This create a file called "test.txt" with the content:
onetw
othre
efour
onetw
othre
efour
five

I made a function for grouping your list:
import itertools
def group(iterable, n):
gen = (char for substr in iterable for char in substr)
while True:
part = ''.join(itertools.islice(gen, 0, n))
if not part:
break
yield part
Presentation:
>>> l = ['DBTsiQoECGPPo', 'd', 'aDAuehlM', 'FbUnSuMLuEbHe', 'jRvARVZMn', 'SbGCi'
, 'jhI', 'Rpbd', 'uspffRvPiAmbQEoZDFAG', 'RIbHAcbREdqpMDX', 'bqVMrN', 'FtU', 'nu
fWcfjfmAaUtYtwNUBc', 'oZvk', 'EaytqdRkICuxqbPaPulCZlD', 'dVrZdidLeakPT', 'qttRfH
eJJMOlJRMKBM', 'SAiBrdPblHtRGpjpZKuFLGza', 'RxrLgclVavoCmPkhR', 'YuulTYaNTLghUkK
riOicMuUD']
>>> list(group(l, 5))
['DBTsi', 'QoECG', 'PPoda', 'DAueh', 'lMFbU', 'nSuML', 'uEbHe', 'jRvAR', 'VZMnS'
, 'bGCij', 'hIRpb', 'duspf', 'fRvPi', 'AmbQE', 'oZDFA', 'GRIbH', 'AcbRE', 'dqpMD
', 'XbqVM', 'rNFtU', 'nufWc', 'fjfmA', 'aUtYt', 'wNUBc', 'oZvkE', 'aytqd', 'RkIC
u', 'xqbPa', 'PulCZ', 'lDdVr', 'ZdidL', 'eakPT', 'qttRf', 'HeJJM', 'OlJRM', 'KBM
SA', 'iBrdP', 'blHtR', 'GpjpZ', 'KuFLG', 'zaRxr', 'LgclV', 'avoCm', 'PkhRY', 'uu
lTY', 'aNTLg', 'hUkKr', 'iOicM', 'uUD']
>>> '\n'.join(group(l, 5))
'DBTsi\nQoECG\nPPoda\nDAueh\nlMFbU\nnSuML\nuEbHe\njRvAR\nVZMnS\nbGCij\nhIRpb\ndu
spf\nfRvPi\nAmbQE\noZDFA\nGRIbH\nAcbRE\ndqpMD\nXbqVM\nrNFtU\nnufWc\nfjfmA\naUtYt
\nwNUBc\noZvkE\naytqd\nRkICu\nxqbPa\nPulCZ\nlDdVr\nZdidL\neakPT\nqttRf\nHeJJM\nO
lJRM\nKBMSA\niBrdP\nblHtR\nGpjpZ\nKuFLG\nzaRxr\nLgclV\navoCm\nPkhRY\nuulTY\naNTL
g\nhUkKr\niOicM\nuUD'
Write the result of '\n'.join(group(l, 5)) to a file.

Related

Python: Counting words from a directory of txt files and writing word counts to a separate txt file

New to Python and I'm trying to count the words in a directory of text files and write the output to a separate text file. However, I want to specify conditions. So if word count is > 0 is would like to write the count and file path to one file and if the count is == 0. I would like to write the count and file path to a separate file. Below is my code so far. I think I'm close, but I'm hung up on how to do the conditions and separate files. Thanks.
import sys
import os
from collections import Counter
import glob
stdoutOrigin=sys.stdout
sys.stdout = open("log.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join("path", '*.txt')):
with open(filepath) as f:
data = f.read()
for key,val in words.items():
#print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key,val in sorted(words.items()):
print(filepath)
if val > 0:
print('{0}:\t{1}'.format(
key,
val))
filepath = sys.argv[1]
keys = ["x", "y"]
words = dict.fromkeys(keys,0)
count_words_in_dir(filepath, words, action=print_summary)
sys.stdout.close()
sys.stdout=stdoutOrigin
I would strongly urge you to not repurpose stdout for writing data to a file as part of the normal course of your program. I also wonder how you can ever have a word "count < 0". I assume you meant "count == 0".
The main problem that your code has is in this line:
for filepath in glob.iglob(os.path.join("path", '*.txt')):
The string constant "path" I'm pretty sure doesn't belong there. I think you want filepath there instead. I would think that this problem would prevent your code from working at all.
Here's a version of your code where I fixed these issues and added the logic to write to two different output files based on the count:
import sys
import os
import glob
out1 = open("/tmp/so/seen.txt", "w")
out2 = open("/tmp/so/missing.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
with open(filepath) as f:
data = f.read()
for key, val in words.items():
# print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key, val in sorted(words.items()):
whichout = out1 if val > 0 else out2
print(filepath, file=whichout)
print('{0}: {1}'.format(key, val), file=whichout)
filepath = sys.argv[1]
keys = ["country", "friend", "turnip"]
words = dict.fromkeys(keys, 0)
count_words_in_dir(filepath, words, action=print_summary)
out1.close()
out2.close()
Result:
file seen.txt:
/Users/steve/tmp/so/dir/data2.txt
friend: 1
/Users/steve/tmp/so/dir/data.txt
country: 2
/Users/steve/tmp/so/dir/data.txt
friend: 1
file missing.txt:
/Users/steve/tmp/so/dir/data2.txt
country: 0
/Users/steve/tmp/so/dir/data2.txt
turnip: 0
/Users/steve/tmp/so/dir/data.txt
turnip: 0
(excuse me for using some search words that were a bit more interesting than yours)
Hello I hope I understood your question correctly, this code will count how many different words are in your file and depending on the conditions will do something you want.
import os
all_words = {}
def count(file_path):
with open(file_path, "r") as f:
# for better performance it is a good idea to go line by line through file
for line in f:
# singles out all the words, by splitting string around spaces
words = line.split(" ")
# and checks if word already exists in all_words dictionary...
for word in words:
try:
# ...if it does increment number of repetitions
all_words[word.replace(",", "").replace(".", "").lower()] += 1
except Exception:
# ...if it doesn't create it and give it number of repetitions 1
all_words[word.replace(",", "").replace(".", "").lower()] = 1
if __name__ == '__main__':
# for every text file in your current directory count how many words it has
for file in os.listdir("."):
if file.endswith(".txt"):
all_words = {}
count(file)
n = len(all_words)
# depending on the number of words do something
if n > 0:
with open("count1.txt", "a") as f:
f.write(file + "\n" + str(n) + "\n")
else:
with open("count2.txt", "a") as f:
f.write(file + "\n" + str(n) + "\n")
if you want to count same word multiple times you can add up all values from dictionary or you can eliminate try-except block and count every word there.

Python printing with user defined functions

I'm trying to write a code that will take data from a file and write it differently. I have the code for the most part but when i run it, everything is on one line.
import csv
#Step 4
def read_data(filename):
try:
data = open("dna.txt", "r")
except IOError:
print( "File not found")
return data
#Step 5
def get_dna_stats(dna_string):
a_letters = ""
t_letters = ""
if "A" in dna_string:
a_letters.append("A")
if "T" in dna_string:
t_letters.append("T")
nucleotide_content = ((len(a_letters) + len(t_letters))/len(dna_string))
#Step 6
def get_dna_complement(dna_string):
dna_complement = ""
for i in dna_string:
if i == "A":
dna_complement.append("T")
elif i == "T":
dna_complement.append("A")
elif i == "G":
dna_complement.append("C")
elif i == "C":
dna_complement.append("G")
else:
break
return dna_complement
#Step 7
def print_dna(dna_strand):
dna_complement = get_dna_complement(dna_strand)
for i in dna_strand:
for j in dna_complement:
print( i + "=" + j)
#Step 8
def get_rna_sequence(dna_string):
rna_complement = ""
for i in dna_string:
if i == "A":
rna_complement.append("U")
elif i == "T":
rna_complement.append("A")
elif i == "G":
rna_complement.append("C")
elif i == "C":
rna_complement.append("G")
else:
break
return rna_complement
#Step 9
def extract_exon(dna_strand, start, end):
return (f"{dna_strand} between {start} and {end}")
#Step 10
def calculate_exon_pctg(dna_strand, exons):
exons_length = 0
for i in exons:
exons_length += 1
return exons_length/ len(dna_strand)
#Step 11
def format_data(dna_string):
x = "dna_strand"[0:62].upper()
y = "dna_strand"[63:90].lower()
z = "dna_strand"[91:-1].upper()
return x+y+z
#Step 12
def write_results(output, filename):
try:
with open("output.csv","w") as csvFile:
writer = csv.writer(csvFile)
for i in output:
csvFile.write(i)
except IOError:
print("Error writing file")
#Step 13
def main():
read_data("dna.txt")
output = []
output.append("The AT content is" + get_dna_stats() + "% of the DNA sequence.")
get_dna_stats("dna_sequence")
output.append("The DNA complement is " + get_dna_complement())
get_dna_complement("dna_sequence")
output.append("The RNA sequence is" + get_rna_sequence())
get_rna_sequence("dna_sequence")
exon1 = extract_exon("dna_sequence", 0, 62)
exon2 = extract_exon("dna_sequence", 91, len("dna_sequence"))
output.append(f"The exon regions are {exon1} and {exon2}")
output.append("The DNA sequence, which exons in uppercase and introns in lowercase, is" + format_dna())
format_data("dna_sequence")
output.append("Exons comprise " + calculate_exon_pctg())
calculate_exon_pctg("dna_sequence",[exon1, exon2])
write_results(output, "results.txt")
print("DNA processing complete")
#Step 14
if __name__ == "__main__":
main()
When I run it, its supposed to output a file that looks like this but my code ends up putting every word on the top line like this
I have a feeling it has to do with the write_resultsfunction but that's all i know on how to write to the file.
The second mistake I'm making is that I'm not calling the functions correctly in the append statements. I've tried concatenating and I've tried formatting the string but now I'm hitting a road block on what I need to do.
When you write to the file you need to concat a '\n' to the end of the string every time you want to have something on a new line in the written file
for example:
output.append("The AT content is" + get_dna_stats() + "% of the DNA sequence." + '\n')
To solve your second problem I would change your code to something like this:
temp = "The AT content is" + get_dna_stats() + "% of the DNA sequence." + '\n'
output.append(temp)
When you append to a list and call a function it will take the literal text of the function instead of calling it. Doing it with a temp string holder will call the function before the string is concatenated. Then you are able to append the string to the list
read_data() doesn't actually read anything (just opens file). It should read the file and return its contents:
def read_data(filename):
with open(filename, "r") as f:
return f.read()
get_dna_stats() won't get DNA stats (won't return anything, and it doesn't count "A"s or "T"s, only checks if they're present, nucleotide_content is computed but never used or returned. It should probably count and return the results:
def get_dna_stats(dna_string):
num_a = dna_string.count("A")
num_t = dna_string.count("T")
nucleotide_content = (num_a + num_t) /float(len(dna_string))
return nucleotide_content
get_dna_complement() and get_rna_sequence(): you can't append to a string. Instead use
dna_complement += "T"
... and rather than break, you either append a "?" to denote a failed transscription, or raise ValueError("invalid letter in DNA: "+i)
print_dna() is a bit more interesting. I'm guessing you want to "zip" each letter of the DNA and its complement. Coincidentally, you can use the zip function to achieve just that:
def print_dna(dna_strand):
dna_complement = get_dna_complement(dna_strand)
for dna_letter, complement in zip(dna_strand, dna_complement):
print(dna_letter + "=" + complement)
As for extract_exon(), I don't know what that is, but presumably you just want the substring from start to end, which is achieved by:
def extract_exon(dna_strand, start, end):
return dna_strand[start:end] # possibly end+1, I don't know exons
I am guessing that in calculate_exon_pctg(), you want exons_length += len(i) to sum the lengths of the exons. You can achieve this by using the buildin function sum:
exons_length = sum(exons)
In function format_data(), loose the doublequotes. You want the variable.
main() doesn't pass any data around. It should pass the results of read_data() to all the other functions:
def main():
data = read_data("dna.txt")
output = []
output.append("The AT content is " + get_dna_stats(data) + "% of the DNA sequence.")
output.append("The DNA complement is " + get_dna_complement(data))
output.append("The RNA sequence is" + get_rna_sequence(data))
...
write_results(output, "results.txt")
print("DNA processing complete")
The key for you at this stage is to understand how function calls work: they take data as input parameters, and they return some results. You need to a) provide the input data, and b) catch the results.
write_results() - from your screenshot, you seem to want to write a plain old text file, yet you use csv.writer() (which writes CSV, i.e. tabular data). To write plain text,
def write_results(output, filename):
with open(filename, "w") as f:
f.write("\n".join(output)) # join output lines with newline
f.write("\n") # extra newline at file's end
If you really do want a CSV file, you'll need to define the columns first, and make all the output you collect fit that column format.
You never told your program to make a new line. You could either append or prepend the special "\n" character to each of your strings or you could do it in a system agnostic way by doing
import os
at the top of your file and writing your write_results function like this:
def write_results(output, filename):
try:
with open("output.csv","w") as csvFile:
writer = csv.writer(csvFile)
for i in output:
csvFile.write(i)
os.write(csvFile, os.linesep) # Add this line! It is a system agnostic newline
except IOError:
print("Error writing file")

how to edit a number in a text field in python?

So i have am trying to go through a text file, put it into a dictionary, and then check to see if a string is already in it. if it is, i want to change the "1" to a "2". Currently if the string is already in the text file, it will just make a new line but with a "2". is there a way to edit the text file so the number can stay in the same place but be replaced?
class Isduplicate:
dicto = {}
def read(self):
f = open(r'C:\Users\jacka\OneDrive\Documents\outputs.txt', "r")
for line in f:
k, v = line.strip().split(':')
self.dicto[k.strip()] = int(v.strip())
return self.dicto
Is = Isduplicate()
while counter < 50:
e = str(elem[counter].get_attribute("href"))
e = e.replace("https://www.reddit.com/r/", "")
e = e[:-1]
if e in Is.read():
Is.dicto[e] += 1
else:
Is.dicto[e] = 1
text_file.write(e + ":" + str(Is.dicto[e]) + '\n')
print(e)
counter = counter +2
You can not rewrite a particular byte in a file, you have to rewrite the file in a whole.
Probably reading the file into the list of strings, processing that list and writing it back to the file would solve your task.

python: line.startswith(str) will not work if the file contains the same string in every line

I have a file xyz.txt which has some values assigned to different variables like below.
abc.def = "Hi how are you"
abc.def.ghi = "Hi I am fine"
abc.def.ghi.jkl = "What are you doing"
abc.def.ghi.Mno = "I am working"
I want to write a python generic function which reads the line abc.def.ghi.Mno and changes the string from "I am working" to "I am playing"
This function should also be used for other files also.
I tried with line.startswith(abc.def.ghi.Mno) but its not working.
Below is what I tried.
Thanks in advance.
def find_replace(new_value, start_str, filename):
result = ""
with open(filename) as f:
for line in f:
if line.lower().startswith( start_str ):
list = line.split('=')
list[1] = new_value + '\n'
line = "=".join( list )
result += line
f = open(filename, 'wt')
f.write(result)
f.close()
find_replace(new_value = "I am playing", start_str = "abc.def.ghi.Mno", filename=xyz.txt)
You almost got it. You simply unnecessarily made line lowercase before testing if it starts with "abc.def.ghi.Mno", a mixed-case string, so it naturally wouldn't be true. Remove .lower() and it should work.
There are a number of issues with your code. As blhsing mentioned, one is that line.startswith() is case sensitive, so calling .lower() will not match your start_str. You'll also want to make sure the filename is a string (add quotes). If you want to add the quotes back in as in your original xyz.txt, just escape them when you add the string add them to your list:
def find_replace(new_value, start_str, filename):
result = ""
with open(filename) as f:
for line in f:
if line.lower().startswith( start_str ):
list = line.split('=')
list[1] = "\"" + new_value + "\"\n"
line = "=".join( list )
result += line
f = open(filename, 'wt')
f.write(result)
f.close()
find_replace(new_value = "I am playing", start_str = "abc.def.ghi.Mno", filename="xyz.txt")

How to replace one field of a line by another line in awk/sed/python?

input.txt
A(0,1,2)
...
B(A,3)
...
C(B,4,5)
If the first parameter of a function is not equal 0 but corresponding to a function name, I want to replace it with all of the corresponding function's parameters (e.g. to replace the first parameter 'A' in function B above with all parameters of function A). That is to expect
output.txt
A(0,1,2)
...
B(0,1,2,3)
...
C(0,1,2,3,4,5)
How can we do this with awk/sed or python?
EDIT:
One idea I have is to store the function name as variables and its parameters as values in bash. In python, we may use dict, and consider function names as keys, and its parameters as values. The implementation is not that easy.
Awk
awk -F'[()]' '
$2 !~ /^0,/ {
split($2, a, /,/)
sub(/^[^,]+/, val[a[1]], $2)
}
{
val[$1] = $2
print $1 "(" $2 ")"
}
' input.txt > output.txt
Where sub(/^[^,]+/, val[a[1]], $2) is used to match the first parameter in $2 and replace it with the value of val[a[1]] which is defined by the execution of val[$1] = $2 on previous lines.
Here's a solution in Python:
import re
with open('input.txt') as f:
data = f.read()
data = [line.strip() for line in data.split('\n') if line]
sets, output = {}, open('output.txt', 'w')
for line in data:
if line == '...':
output.write(line + '\n')
continue
sets[line[0]] = line[2:-1]
output.write(line[0] + '(')
for char in line[2:-1]:
if re.match(r'[\d,]', char):
output.write(char)
else:
output.write(sets[char])
output.write(')\n')
output.close()
Relevant documentation: open(), re.
Let lines be the lines of the input file. The following code will work if all parameters are integers or a functionname
funcs = {}
for line in lines:
match = re.search( '(.*)\((.*)\)', line)
if not match:
raise RuntimeError('Line does not match expectation')
function_name = match.group(1)
parameters = map(str.strip, match.group(2).split(','))
parameter_list = []
for parameter in parameters:
try:
parameter_list.append(int(parameter))
except ValueError:
parameter_list.extend( funcs.get(parameter, []) )
funcs[function_name] = parameter_list
for func_name, paras in sorted(funcs.items()):
print '{function}({parameters})'.format(
function=func_name,
parameters=', '.join(map(str, paras))
)
There are probably a ton of ways to do this but I think this is a simple way to do what you want.
import re
import sys
def convertLine(line):
if re.match("^\\w{1}\(.*\)$", line) is None:
return line
retVal = re.sub( "A", "0,1,2",line[1:])
retVal = re.sub( "B", "0,1,2,3",retVal)
retVal = re.sub( "C", "0,1,2,3,4,5",retVal)
return line[0:1]+retVal
def main():
for line in sys.stdin.read().splitlines():
print convertLine(line)
if __name__ == "__main__":
main()
usage:
python ReplaceProg.py < input.txt
if your file is like this
A(0,1,2)
B(A,3)
C(B,4,5)
using python:
f = open('inpu_file.txt').readlines()
f[0] = f[0].strip()
for i,x in enumerate(f):
if i > 0:
f[i]=re.sub(f[i-1][0],",".join(re.findall('\d+',f[i-1])),x).strip()
print f
output:
['A(0,1,2)', 'B(0,1,2,3)', 'C(0,1,2,3,4,5)']
i don't understand that ... in every alternate line, if its there tell me i can edit the code for that.
Kinda long but more modular:
import re
def build_dict(fobj):
d = dict()
for line in fobj:
match = re.match('^(\w)\((.*)\)', line)
fname = match.group(1)
fargs = match.group(2)
d[fname] = replace(fargs, d)
fobj.seek(0) # Reset cursor to start of file
return d
def replace(s, d):
for each in d:
if each in s:
s = s.replace(each, d[each])
return s
def split_paren(s):
index = s.index('(')
return s[:index], s[index:]
def write_replace(fobj, d):
outname = fobj.name[:-4] + '.out'
outfile = open(outname, 'w')
for line in fobj:
first, second = split_paren(line)
second = replace(second, d)
outfile.write(first + second)
outfile.close()
if __name__ == '__main__':
with open('test.txt', 'r') as f:
d = build_dict(f)
write_replace(f, d)

Categories

Resources