I have a folder full of .mpt files, each of them having the same data format.
I need to delete the first 57 lines from all files and append these files into one csv - output.csv.
I have that section already:
import glob
import os
dir_name = 'path name'
lines_to_ignore = 57
input_file_format = '*.mpt'
output_file_name = "output.csv"
def convert():
files = glob.glob(os.path.join(dir_name, input_file_format))
with open(os.path.join(dir_name, output_file_name), 'w') as out_file:
for f in files:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
out_file.write(i)
print("working")
convert()
print("done")
This part works ok.
how do i add the filename of each .mpt file as the last column of the output.csv
Thank you!
This is a quick 'n dirty solution.
In this loop the variable i is just a string (a line from a CSV file):
for i in content:
out_file.write(i)
So you just need to 1) strip off the end of line character(s) (either "\n" or "\r\n") and append ",".
If you're using Unix, try:
for i in content:
i = i.rstrip("\n") + "," + output_file_name + "\n"
out_file.write(i)
This assumes that the field separator is a comma. Another option is:
for i in content:
i = i.rstrip() + "," + output_file_name
print >>out_file, i
This will strip all white space from the end of i.
Add quotes if you need to quote the output file name:
i = i.rstrip(...) + ',"' + output_file_name '"'
The relevant part:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
new_line = ",".join([i.rstrip(), f]) + "\n" #<-- this is new
out_file.write(new_line) #<-- this is new
Related
I need some help, I have a a txt file with spaces between words, I want to replace the space with underscore.
fileHandler = open('nog_rename_update.txt')
for eachline in fileHandler:
new_name = fileHandler.replace(" ","_")
print(new_name)
That's my code but it keeps throwing error messages
new_name = fileHandler.replace(" ","_")
AttributeError: '_io.TextIOWrapper' object has no attribute 'replace'
example files that I want to remove space and add underscore
Here's a generic approach that should work for you:
teststring = 'hello world this is just a test. don\'t mind me 123.'
# replace multiple spaces with one space
while ' ' in teststring:
teststring = teststring.replace(' ', ' ')
# replace space with underscore (_)
teststring = teststring.replace(' ', '_')
print(teststring)
assert teststring == "hello_world_this_is_just_a_test._don't_mind_me_123." # True
Using a file example:
fname = 'mah_file.txt'
with open(fname) as in_file:
contents = in_file.read()
while ' ' in contents:
contents = contents.replace(' ', ' ')
# write updated contents back to file
with open(fname, 'w') as out_file:
out_file.write(contents.replace(' ', '_'))
This opens the files, reads line by line, splits the line into two parts, and combines the two parts with an underscore. I stored it in a list that you can use to do your next step.
with open('nog_rename_update.txt') as f:
new_list = []
for line in f:
# split the line
split = line.split()
new_list.append(split[0]+"_"+split[1])
# print the list to see results
print(new_list)
#
# add code to loop through the new list and to write to a file
#
Try out this
fileHandler = open('nog_rename_update.txt').read()
new_name = fileHandler.replace(" ", "_")
print(new_name)
f = open("test.txt", "r")
text=f.read()
f.close()
f=open("testfile.txt", "w+")
text2=''
if ' ' in text:
text2 = text.replace(' ' , '_')
print(text2)
f.write(text2)
f.close()
Here is another, less verbose solution. Simply use re.sub:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
with open(file_name, "r") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")
And if you want to replace the spaces in your text file:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
lines = []
with open(file_name, "r") as file:
lines = [
re.sub("( )+", "_", line) for line in file.readlines()
]
with open(file_name, "w") as file:
file.writelines(lines)
Or use fileinput:
import re
import fileinput
file_name = r"D:\projects\playground\python\data\test.txt"
with fileinput.FileInput(file_name, inplace=True, backup=".bak") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")
I am trying code to remove the header from multiple CSV files and add | delimiter by replacing, here is my code but it's getting out-
import time, os
from datetime import datetime
def remove_header_replace_delimiter():
src_folder = 'path'
src_files = os.listdir(src_folder)
print(src_files)
for file_name in src_files:
with open('path' + file_name, 'r') as inp, open('path' + file_name, 'w') as out:
next(inp)
for line in inp:
line = line.replace(',', '|')
print(line)
out.write(line)
Myfile content -
Date,Runner Name,Automation,Order Number,SON,Account Name,Quote Number,Product Code,Status
01/02/2021 10:43:25,dsadsa,AS Silver,444,3323,aaapp,W-3342,AQS-11-L,Failed
01/02/2021 10:57:52,dsfsdds,AS Silver,34333,3213,defsd,A-1222,fdsfds-L,Success
You are missing a "/" between path and file_name causing it to try and open a file called "pathMyfile.txt" when src_folder="path" and your file is called "Myfile.txt"
Also you might want to use src_folder instead of hardcoding "path" in your file open line.
Lastly, you could try to use f-strings instead of string concatenation with + for clarity and performance.
Example:
import time, os
from datetime import datetime
def remove_header_replace_delimiter():
src_folder = 'path'
src_files = os.listdir(src_folder)
print(src_files)
for file_name in src_files:
with open(f'{src_folder}/{file_name}', 'r') as inp, open(f'{src_folder}/fixed_{file_name}', 'w') as out:
next(inp)
for line in inp:
line = line.replace(',', '|')
print(line)
out.write(line)
remove_header_replace_delimiter()
I am trying to modify my .fasta files from this:
>YP_009208724.1 hypothetical protein ADP65_00072 [Achromobacter phage phiAxp-3]
MSNVLLKQ...
>YP_009220341.1 terminase large subunit [Achromobacter phage phiAxp-1]
MRTPSKSE...
>YP_009226430.1 DNA packaging protein [Achromobacter phage phiAxp-2]
MMNSDAVI...
to this:
>Achromobacter phage phiAxp-3
MSNVLLKQ...
>Achromobacter phage phiAxp-1
MRTPSKSE...
>Achromobacter phage phiAxp-2
MMNSDAVI...
Now, I've already have a script that can do it to a single file:
with open('Achromobacter.fasta', 'r') as fasta_file:
out_file = open('./fastas3/Achromobacter.fasta', 'w')
for line in fasta_file:
line = line.rstrip()
if '[' in line:
line = line.split('[')[-1]
out_file.write('>' + line[:-1] + "\n")
else:
out_file.write(str(line) + "\n")
but I can't get to automate the process for all 120 files in my folder.
I tried using glob.glob, but I can't seem to make it work:
import glob
for fasta_file in glob.glob('*.fasta'):
outfile = open('./fastas3/'+fasta_file, 'w')
with open(fasta_file, 'r'):
for line in fasta_file:
line = line.rstrip()
if '[' in line:
line2 = line.split('[')[-1]
outfile.write('>' + line2[:-1] + "\n")
else:
outfile.write(str(line) + "\n")
it gives me this output:
A
c
i
n
e
t
o
b
a
c
t
e
r
.
f
a
s
t
a
I managed to get a list of all files in the folder, but can't open certain files using the object on the list.
import os
file_list = []
for file in os.listdir("./fastas2/"):
if file.endswith(".fasta"):
file_list.append(file)
Considering you are able to change the contents of file name now you need to automate the process. We changed the function for one file by removing file handler which was used twice for the opening of file.
def file_changer(filename):
data_to_put = ''
with open(filename, 'r+') as fasta_file:
for line in fasta_file.readlines():
line = line.rstrip()
if '[' in line:
line = line.split('[')[-1]
data_to_put += '>' + str(line[:-1]) + "\n"
else:
data_to_put += str(line) + "\n"
fasta_file.write(data_to_put)
fasta_file.close()
Now we need to iterate over all your files. So lets use glob module for it
import glob
for file in glob.glob('*.fasta'):
file_changer(file)
You are iterating the file name, which gives you all the characters in the name instead of the lines of the file. Here is a corrected version of the code:
import glob
for fasta_file_name in glob.glob('*.fasta'):
with open(fasta_file_name, 'r') as fasta_file, \
open('./fastas3/' + fasta_file_name, 'w') as outfile:
for line in fasta_file:
line = line.rstrip()
if '[' in line:
line2 = line.split('[')[-1]
outfile.write('>' + line2[:-1] + "\n")
else:
outfile.write(str(line) + "\n")
As an alternative to the Python script, you can simply use sed from the command line:
sed -i 's/^>.*\[\(.*\)\].*$/>\1/' *.fasta
This will modify all files in place, so consider copying them first.
i have a python file named file_1.py
it has some code in which, i just have to change a word "file_1" to "file_2"
and also preserve indentation of other functions`
and save it as file_2.py
there are 3 occurances of the word file_1
i have to do this for 100 such times. `file_1.py, file_2.py.....file_100.py`
is there any way to automate this?
Run this script:
import fileinput
with fileinput.FileInput('file_1.py', inplace=True, backup='.bak') as file:
for line in file:
print(line.replace('file_1', 'file_2'), end='')
hope this help :)
create a script:
first: read file
with open("./file1.py") as f:
content = f.read()
second: replace filename
new_content = content.replace("file1","file2")
third: write new file(I would suggest you write a new file)
with open("./file2.py", "w") as f:
f.write(new_content)
if you have multiple files, use something like
filenames = ["file" + str(item) for item in range(1,100)]
for filename in filenames:
with open(filename + ".py") as f:
content = f.read()
new_filename = filename[:-1] + str(int(filename[-1]) + 1)
new_content = content.replace(filename,new_filename)
with open("./another_folder" + new_filename + ".py", "w") as f:
f.write(new_content)
import os
searchquery = 'word'
with open('Y:/Documents/result.txt', 'w') as f:
for filename in os.listdir('Y:/Documents/scripts/script files'):
with open('Y:/Documents/scripts/script files/' + filename) as currentFile:
for line in currentFile:
if searchquery in line:
start = line.find(searchquery)
end = line.find("R")
result = line[start:end]
print result
f.write(result + ' ' +filename[:-4] + '\n')
Now this works well to search for "word" and prints everything after word up until an "R" providing that it is on the same line. However if the "R" is on the line it won't print the stuff before it.
eg:
this should not be printed!
this should also not be printed! "word" = 12345
6789 "R" After this R should not be printed either!
In the case above the 6789 on line 3 will not be printed with my current. However i want it to be. How do i make python keep going over multiple lines until it reaches the "R".
Thanks for any help!
It is normal that it does not print the content on the next line because you are searching for the word on one line. A better solution would be as follows.
import os
searchquery = 'word'
with open('Y:/Documents/result.txt', 'w') as f:
for filename in os.listdir('Y:/Documents/scripts/script files'):
with open('Y:/Documents/scripts/script files/' + filename) as currentFile:
content = ''.join([line for line in currentFile])
start = content.find(searchquery)
end = content.find("R")
result = content[start:end].replace("\n", "")
print result
f.write(result + ' ' +filename[:-4] + '\n')
Please be advised, this will work only for a single occurence. You will need to break it up further to print multiple occurences.