I am very new to Python and I am trying to get a bit of code to concatenate text files into one!
I have the following code:
Checkpoint = open("/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_Checkpoint.txt", "r")
eightbyeight = open("/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_8x8.txt", "r")
AmazonAWS = open("/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_AmazonAWS.txt", "r")
Common_Tech_Terms = open("/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_Common_Tech_Terms.txt", "r")
import sys
filenames = ['Common_Tech_Terms', 'eightbyeight', 'AmazonAWS', 'Checkpoint']
with open('output_file2.txt', 'w+') as outfile:
for fname in filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line + "\n")
I get the following error:
Traceback (most recent call last):
File "/Users/owenmurray/Desktop/Combining Files", line 60, in <module>
with open(fname) as infile:
IOError: [Errno 2] No such file or directory: 'Common_Tech_Terms'
[Finished in 0.1s with exit code 1]
[shell_cmd: python -u "/Users/owenmurray/Desktop/Combining Files"]
[dir: /Users/owenmurray/Desktop]
Anyone have any idea to fix this?
You're losing the benefit of using a context manager, call open when you need a file content:
Checkpoint = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_Checkpoint.txt"
eightbyeight = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_8x8.txt"
AmazonAWS = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_AmazonAWS.txt"
Common_Tech_Terms = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_Common_Tech_Terms.txt"
filenames = [Common_Tech_Terms, eightbyeight, AmazonAWS, Checkpoint]
with open('output_file2.txt', 'w+') as outfile:
for fname in filenames:
with open(fname, "r") as infile:
for line in infile:
outfile.write(line + "\n")
If you do:
Checkpoint = open("/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_Checkpoint.txt", "r")
Then you will need to manually close the file as well:
Checkpoint.close()
Instead simply do:
# Updated. Removed "open(, 'r')"
Checkpoint = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_Checkpoint.txt"
eightbyeight = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_8x8.txt"
AmazonAWS = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_AmazonAWS.txt"
Common_Tech_Terms = "/Users/owenmurray/Desktop/vocab/Custom_Vocab_Split_by_Brand_Common_Tech_Terms.txt"
import sys
# Removed "". We need variable Common_Tech_Terms, not string "Common_Tech_Terms".
filenames = [Common_Tech_Terms, eightbyeight, AmazonAWS, Checkpoint]
with open('output_file2.txt', 'w+') as outfile:
for fname in filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line + "\n")
Related
I want to append some text to every line in my file
Here is my code
filepath = 'hole.txt'
with open(filepath) as fp:
line = fp.readline()
cnt = 1
while line:
#..........
#want to append text "#" in every line by reading line by line
text from .txt file
line = fp.readline()
cnt += 1
You can read the lines and put them in a list. Then you open the same file with write mode and write each line with the string you want to append.
filepath = "hole.txt"
with open(filepath) as fp:
lines = fp.read().splitlines()
with open(filepath, "w") as fp:
for line in lines:
print(line + "#", file=fp)
Assuming you can load the full text in memory, you could open the file, split by row and for each row append the '#'. Then save :-) :
with open(filepath, 'r') as f: # load file
lines = f.read().splitlines() # read lines
with open('new_file.txt', 'w') as f:
f.write('\n'.join([line + '#' for line in lines])) # write lines with '#' appended
I'll assume the file is small enough to keep two copies of it in memory:
filepath = 'hole.txt'
with open(filepath, 'r') as f:
original_lines = f.readlines()
new_lines = [line.strip() + "#\n" for line in original_lines]
with open(filepath, 'w') as f:
f.writelines(new_lines)
First, we open the file and read all lines into a list. Then, a new list is generated by strip()ing the line terminators from each line, adding some additional text and a new line terminator after it.
Then, the last line overwrites the file with the new, modified lines.
does this help?
inputFile = "path-to-input-file/a.txt"
outputFile = "path-to-output-file/b.txt"
stringToAPpend = "#"
with open(inputFile, 'r') as inFile, open(outputFile, 'w') as outFile:
for line in inFile:
outFile.write(stringToAPpend+line)
I am very new to python, and I have a python script to run for a particular file (input1.txt) and generated a output (output1.fasta), but I would like to run this script for multiple files, for example: input2.txt, input3.txt...and generate the respective output: output2.fasta, output3.fasta
from Bio import SeqIO
fasta_file = "sequences.txt"
wanted_file = "input1.txt"
result_file = "output1.fasta"
wanted = set()
with open(wanted_file) as f:
for line in f:
line = line.strip()
if line != "":
wanted.add(line)
fasta_sequences = SeqIO.parse(open(fasta_file),'fasta')
with open(result_file, "w") as f:
for seq in fasta_sequences:
if seq.id in wanted:
SeqIO.write([seq], f, "fasta")
I tried to add the glob function, but I do not know how to deal with the output file name.
from Bio import SeqIO
import glob
fasta_file = "sequences.txt"
for filename in glob.glob('*.txt'):
wanted = set()
with open(filename) as f:
for line in f:
line = line.strip()
if line != "":
wanted.add(line)
fasta_sequences = SeqIO.parse(open(fasta_file),'fasta')
with open(result_file, "w") as f:
for seq in fasta_sequences:
if seq.id in wanted:
SeqIO.write([seq], f, "fasta")
The error message is: NameError: name 'result_file' is not defined
Your glob is currently pulling your "sequences" file as well as the inputs because *.txt includes the sequences.txt file. If the "fasta" file is always the same and you only want to iterate the input files, then you need
for filename in glob.glob('input*.txt'):
Also, to iterate through your entire process, perhaps you want to put it within a method. And if the output filename is always created to correspond to the input, then you can create that dynamically.
from Bio import SeqIO
def create_fasta_outputs(fasta_file, wanted_file):
result_file = wanted_file.replace("input","output").replace(".txt",".fasta")
wanted = set()
with open(wanted_file) as f:
for line in f:
line = line.strip()
if line != "":
wanted.add(line)
fasta_sequences = SeqIO.parse(open(fasta_file),'fasta')
with open(result_file, "w") as f:
for seq in fasta_sequences:
if seq.id in wanted:
SeqIO.write([seq], f, "fasta")
fasta_file = "sequences.txt"
for wanted_file in glob.glob('input*.txt'):
create_fasta_outputs(fasta_file, wanted_file)
I have many text files, and each of them has a empty line at the end. My scripts did not seem to remove them. Can anyone help please?
# python 2.7
import os
import sys
import re
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
all_file = open(filepath,'r')
lines = all_file.readlines()
output = 'F:/WF/new/' + filename
# Read in each row and parse out components
for line in lines:
# Weed out blank lines
line = filter(lambda x: not x.isspace(), lines)
# Write to the new directory
f = open(output,'w')
f.writelines(line)
f.close()
You can use Python's rstrip() function to do this as follows:
filename = "test.txt"
with open(filename) as f_input:
data = f_input.read().rstrip('\n')
with open(filename, 'w') as f_output:
f_output.write(data)
This will remove all empty lines from the end of the file. It will not change the file if there are no empty lines.
you can remove last empty line by using:
with open(filepath, 'r') as f:
data = f.read()
with open(output, 'w') as w:
w.write(data[:-1])
You can try this without using the re module:
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
f = open(filepath).readlines()
new_file = open(filepath, 'w')
new_file.write('')
for i in f[:-1]:
new_file.write(i)
new_file.close()
For each filepath, the code opens the file, reads in its contents line by line, then writes over the file, and lastly writes the contents of f to the file, except for the last element in f, which is the empty line.
You can remove the last blank line by the following command. This worked for me:
file = open(file_path_src,'r')
lines = file.read()
with open(file_path_dst,'w') as f:
for indx, line in enumerate(lines):
f.write(line)
if indx != len(lines) - 1:
f.write('\n')
i think this should work fine
new_file.write(f[:-1])
So I'm merging two documents and outputting a third file
I get the error
Traceback (most recent call last):
File "summarize.py", line 124, in <module>
train_data = set(document3)
NameError: name 'document3' is not defined
This is what I have done:
Code:
filenames = ["/home/mustafa/data/combinedfile.txt", "/home/mustafa/data/sentences.txt"]
with open("document3", "wb") as outfile:
for fname in filenames:
with open(fname) as infile:
outfile.write(infile.read())
train_data = set(document3)
What am I doing wrong?
It seems that you are trying to write into a file
'document3' and you are trying to read from that file(according to your comment). If that is the case you should read that file first and then you have to process the data. So the code will be
filenames = ["/home/mustafa/data/combinedfile.txt", "/home/mustafa/data/sentences.txt"]
with open("document3", "wb") as outfile: # here document3 is file name
for fname in filenames:
with open(fname) as infile:
outfile.write(infile.read())
train_data = set(open("document3").read().replace("\n","")) #this will read all data from document3 and stores as a set.
I would like to loop through files into a directory, make something on these files and then for each file write out the result.
But my files can't be read because python interprets file names as string objects and not a readable file.
Is there a way to avoid this?
import re
import os
def create_filename_for_fileout (f1):
fileout_n = f1.replace("TT", "out")
fileout = "C:\\Users\\KP\\Desktop\\FSC_Treetag\\out\\"+str(fileout_n)
return fileout
for file_in in os.listdir('C:\\Users\\KP\\Desktop\\FSC_Treetag'):
filename = str(file_in)
file_out = create_filename_for_fileout(filename)
open(file_in, 'r')
open(file_out, 'w')
content_file = file_in.readlines()
for ln in content_file:
regex = re.compile('(.*\t(ADJ|ADV|NOM|VER:cond|VER:futu|VER:impe|VER:impf|VER:infi|VER:pper|VER:pres|VER:pres|VER:simp|VER:subi|VER:subp)\t(.*))')
res = regex.search(ln)
if res:
# categ = res.group(2)
lemme = res.group(3)
file_out.write(str(lemme)+"\n")
file_out.close()
file_in.close()
Result:
content_file = file_in.readlines()
AttributeError: 'str' object has no attribute 'readlines'
>>>
You're not assigning your open to any variable to use.
# Change
open(file_in, 'r')
open(file_out, 'w')
# to
input_file = open(file_in, 'r')
output_file = open(file_out, 'w')
for ln in input_file:
# do your processing
if res:
lemme = res.group(3)
output_file.write(str(lemme) + "\n")
You are not assigning the open functions to the respective handlers (open is returning an object of the file type).
filename = str(file_in)
file_out = create_filename_for_fileout(filename)
open(file_in, 'r')
open(file_out, 'w')
Should be:
file_out = open(create_filename_for_fileout(file_in), 'w')
file_in = open(file_in, 'r')
NOTE: for clarity sake it's a good idea to use another pointer for the infile handler.
Check: https://docs.python.org/2/library/functions.html#open
open(name[, mode[, buffering]])
Open a file, returning an object of the file type described in section File Objects. If the file cannot be opened, IOError is raised.