I am trying code to remove the header from multiple CSV files and add | delimiter by replacing, here is my code but it's getting out-
import time, os
from datetime import datetime
def remove_header_replace_delimiter():
src_folder = 'path'
src_files = os.listdir(src_folder)
print(src_files)
for file_name in src_files:
with open('path' + file_name, 'r') as inp, open('path' + file_name, 'w') as out:
next(inp)
for line in inp:
line = line.replace(',', '|')
print(line)
out.write(line)
Myfile content -
Date,Runner Name,Automation,Order Number,SON,Account Name,Quote Number,Product Code,Status
01/02/2021 10:43:25,dsadsa,AS Silver,444,3323,aaapp,W-3342,AQS-11-L,Failed
01/02/2021 10:57:52,dsfsdds,AS Silver,34333,3213,defsd,A-1222,fdsfds-L,Success
You are missing a "/" between path and file_name causing it to try and open a file called "pathMyfile.txt" when src_folder="path" and your file is called "Myfile.txt"
Also you might want to use src_folder instead of hardcoding "path" in your file open line.
Lastly, you could try to use f-strings instead of string concatenation with + for clarity and performance.
Example:
import time, os
from datetime import datetime
def remove_header_replace_delimiter():
src_folder = 'path'
src_files = os.listdir(src_folder)
print(src_files)
for file_name in src_files:
with open(f'{src_folder}/{file_name}', 'r') as inp, open(f'{src_folder}/fixed_{file_name}', 'w') as out:
next(inp)
for line in inp:
line = line.replace(',', '|')
print(line)
out.write(line)
remove_header_replace_delimiter()
Related
Here below is my code about how to edit text file.
Since python can't just edit a line and save it at the same time,
I save the previous text file's content into a list first then write it out.
For example,if there are two text files called sample1.txt and sample2.txt in the same folder.
Sample1.txt
A for apple.
Second line.
Third line.
Sample2.txt
First line.
An apple a day.
Third line.
Execute python
import glob
import os
#search all text files which are in the same folder with python script
path = os.path.dirname(os.path.abspath(__file__))
txtlist = glob.glob(path + '\*.txt')
for file in txtlist:
fp1 = open(file, 'r+')
strings = [] #create a list to store the content
for line in fp1:
if 'apple' in line:
strings.append('banana\n') #change the content and store into list
else:
strings.append(line) #store the contents did not be changed
fp2 = open (file, 'w+') # rewrite the original text files
for line in strings:
fp2.write(line)
fp1.close()
fp2.close()
Sample1.txt
banana
Second line.
Third line.
Sample2.txt
First line.
banana
Third line.
That's how I edit specific line for text file.
My question is : Is there any method can do the same thing?
Like using the other functions or using the other data type rather than list.
Thank you everyone.
Simplify it to this:
with open(fname) as f:
content = f.readlines()
content = ['banana' if line.find('apple') != -1 else line for line in content]
and then write value of content to file back.
Instead of putting all the lines in a list and writing it, you can read it into memory, replace, and write it using same file.
def replace_word(filename):
with open(filename, 'r') as file:
data = file.read()
data = data.replace('word1', 'word2')
with open(filename, 'w') as file:
file.write(data)
Then you can loop through all of your files and apply this function
The built-in fileinput module makes this quite simple:
import fileinput
import glob
with fileinput.input(files=glob.glob('*.txt'), inplace=True) as files:
for line in files:
if 'apple' in line:
print('banana')
else:
print(line, end='')
fileinput redirects print into the active file.
import glob
import os
def replace_line(file_path, replace_table: dict) -> None:
list_lines = []
need_rewrite = False
with open(file_path, 'r') as f:
for line in f:
flag_rewrite = False
for key, new_val in replace_table.items():
if key in line:
list_lines.append(new_val+'\n')
flag_rewrite = True
need_rewrite = True
break # only replace first find the words.
if not flag_rewrite:
list_lines.append(line)
if not need_rewrite:
return
with open(file_path, 'w') as f:
[f.write(line) for line in list_lines]
if __name__ == '__main__':
work_dir = os.path.dirname(os.path.abspath(__file__))
txt_list = glob.glob(work_dir + '/*.txt')
replace_dict = dict(apple='banana', orange='grape')
for txt_path in txt_list:
replace_line(txt_path, replace_dict)
I have many text files, and each of them has a empty line at the end. My scripts did not seem to remove them. Can anyone help please?
# python 2.7
import os
import sys
import re
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
all_file = open(filepath,'r')
lines = all_file.readlines()
output = 'F:/WF/new/' + filename
# Read in each row and parse out components
for line in lines:
# Weed out blank lines
line = filter(lambda x: not x.isspace(), lines)
# Write to the new directory
f = open(output,'w')
f.writelines(line)
f.close()
You can use Python's rstrip() function to do this as follows:
filename = "test.txt"
with open(filename) as f_input:
data = f_input.read().rstrip('\n')
with open(filename, 'w') as f_output:
f_output.write(data)
This will remove all empty lines from the end of the file. It will not change the file if there are no empty lines.
you can remove last empty line by using:
with open(filepath, 'r') as f:
data = f.read()
with open(output, 'w') as w:
w.write(data[:-1])
You can try this without using the re module:
filedir = 'F:/WF/'
dir = os.listdir(filedir)
for filename in dir:
if 'ABC' in filename:
filepath = os.path.join(filedir,filename)
f = open(filepath).readlines()
new_file = open(filepath, 'w')
new_file.write('')
for i in f[:-1]:
new_file.write(i)
new_file.close()
For each filepath, the code opens the file, reads in its contents line by line, then writes over the file, and lastly writes the contents of f to the file, except for the last element in f, which is the empty line.
You can remove the last blank line by the following command. This worked for me:
file = open(file_path_src,'r')
lines = file.read()
with open(file_path_dst,'w') as f:
for indx, line in enumerate(lines):
f.write(line)
if indx != len(lines) - 1:
f.write('\n')
i think this should work fine
new_file.write(f[:-1])
i have a python file named file_1.py
it has some code in which, i just have to change a word "file_1" to "file_2"
and also preserve indentation of other functions`
and save it as file_2.py
there are 3 occurances of the word file_1
i have to do this for 100 such times. `file_1.py, file_2.py.....file_100.py`
is there any way to automate this?
Run this script:
import fileinput
with fileinput.FileInput('file_1.py', inplace=True, backup='.bak') as file:
for line in file:
print(line.replace('file_1', 'file_2'), end='')
hope this help :)
create a script:
first: read file
with open("./file1.py") as f:
content = f.read()
second: replace filename
new_content = content.replace("file1","file2")
third: write new file(I would suggest you write a new file)
with open("./file2.py", "w") as f:
f.write(new_content)
if you have multiple files, use something like
filenames = ["file" + str(item) for item in range(1,100)]
for filename in filenames:
with open(filename + ".py") as f:
content = f.read()
new_filename = filename[:-1] + str(int(filename[-1]) + 1)
new_content = content.replace(filename,new_filename)
with open("./another_folder" + new_filename + ".py", "w") as f:
f.write(new_content)
I have a folder full of .mpt files, each of them having the same data format.
I need to delete the first 57 lines from all files and append these files into one csv - output.csv.
I have that section already:
import glob
import os
dir_name = 'path name'
lines_to_ignore = 57
input_file_format = '*.mpt'
output_file_name = "output.csv"
def convert():
files = glob.glob(os.path.join(dir_name, input_file_format))
with open(os.path.join(dir_name, output_file_name), 'w') as out_file:
for f in files:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
out_file.write(i)
print("working")
convert()
print("done")
This part works ok.
how do i add the filename of each .mpt file as the last column of the output.csv
Thank you!
This is a quick 'n dirty solution.
In this loop the variable i is just a string (a line from a CSV file):
for i in content:
out_file.write(i)
So you just need to 1) strip off the end of line character(s) (either "\n" or "\r\n") and append ",".
If you're using Unix, try:
for i in content:
i = i.rstrip("\n") + "," + output_file_name + "\n"
out_file.write(i)
This assumes that the field separator is a comma. Another option is:
for i in content:
i = i.rstrip() + "," + output_file_name
print >>out_file, i
This will strip all white space from the end of i.
Add quotes if you need to quote the output file name:
i = i.rstrip(...) + ',"' + output_file_name '"'
The relevant part:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
new_line = ",".join([i.rstrip(), f]) + "\n" #<-- this is new
out_file.write(new_line) #<-- this is new
The problem I am having at this point in time (being new to Python) is writing strings to a text file. The issue I'm experiencing is one where either the strings don't have linebreaks inbetween them or there is a linebreak after every character. Code to follow:
import string, io
FileName = input("Arb file name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
TempFile.write(MyString)
MyFile.close()
TempFile.close()
My input looks like this:
1 Jargon
2 Python
3 Yada Yada
4 Stuck
My output when I do this is:
JargonPythonYada YadaStuck
I then modify the source code to this:
import string, io
FileName = input("Arb File Name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
#print(MyString)
TempFile.write('\n'.join(MyString))
MyFile.close()
TempFile.close()
Same input and my output looks like this:
J
a
r
g
o
nP
y
t
h
o
nY
a
d
a
Y
a
d
aS
t
u
c
k
Ideally, I would like each of the words to appear on a seperate line without the numbers in front of them.
Thanks,
MarleyH
You have to write the '\n' after each line, since you're stripping the original '\n';
Your idea of using '\n'.join() doesn't work because it will use\n to join the string, inserting it between each char of the string. You need a single \n after each name, instead.
import string, io
FileName = input("Arb file name (.txt): ")
with open(FileName, 'r') as MyFile:
with open('TempFile.txt', 'w', encoding='UTF-8') as TempFile:
for line in MyFile:
line = line.strip().split(": ", 1)
TempFile.write(line[1] + '\n')
fileName = input("Arb file name (.txt): ")
tempName = 'TempFile.txt'
with open(fileName) as inf, open(tempName, 'w', encoding='UTF-8') as outf:
for line in inf:
line = line.strip().split(": ", 1)[-1]
#print(line)
outf.write(line + '\n')
Problems:
the result of str.split() is a list (this is why, when you cast it to str, you get ['my item']).
write does not add a newline; if you want one, you have to add it explicitly.