How to replace a string in every file in a directory - python

I'm trying to replace with python a string: "XXXXXXXXXXX" with a new string: "ILoveStackOverflow" in every file in a particular folder (all the files in the folder are xml).
My code is as follows:
import os, fnmatch
for filename in os.listdir("C:/Users/Francesco.Borri/Desktop/passivo GME"):
if filename.endswith('.xml'):
with open(os.path.join("C:/Users/Francesco.Borri/Desktop/passivo GME", filename)) as f:
content = f.read()
content = content.replace("XXXXXXXXXXX", "ILoveStackOverflow")
with open(os.path.join("C:/Users/Francesco.Borri/Desktop/passivo GME", filename), mode="w") as f: #Long Pierre-André answer
f.write(content)
The next step would be to replace a different string: "YYYY" with a number that increases every time. If in my directory there are 10 files and I set the starting number 1, the first file "YYYY" will be replaced with 1, the second file with 2 and so on until 10.

You are close. When you open the file the second time, you have to open it in writeable mode to be able to write the content.
with open(os.path.join("C:/Users/Francesco.Borri/Desktop/passivo GME", filename), 'w') as f:
f.write(content)
Once you fix this, I think the second part of your question is just maintaining a variable whose value you increment everytime you replace the string. You could do it manually (iterate over the string), or use the replace function in a for loop:
with open(os.path.join("C:/Users/Francesco.Borri/Desktop/passivo GME", filename)) as f:
content = f.read()
for i in range(content.count("YYYY")):
content.replace("YYYY", str(i), 1) # or str(i+1)
with open(os.path.join("C:/Users/Francesco.Borri/Desktop/passivo GME", filename), 'w') as f:
f.write(content)

with open(os.path.join("C:/Users/Francesco.Borri/Desktop/passivo GME", filename), mode="w") as f:
You must open the file on writing mode.

Related

printing the content of a file, but getting an int as an output (number of characters in the file) in Python

I wrote a function that copies the content of one file to a new file.
the function gets 2 parameters:
the directory of the copied file
the directory of the new file.
When I try to print the content of the copied file, I get the content of the file (which is what I want), But when I try to do the same thing with the new file, I get the number of characters inside the file (14 in this case).
I don't understand why do I get 2 different outputs with the same (at list as per my understanding) lines of code.
Would be happy to get some help, thank you!
Here's my code:
# creating the file that's going to be copied:
with open(source_link, 'w') as f:
f.write('Copy this file')
# the function:
def copy_file_content(source, destination):
# getting the content of the copied file:
f1 = open(source, 'r')
copied_file = f1.read()
f1.close()
# putting the content of the copied file in the new file:
f2 = open(destination, 'w')
new_file = f2.write(copied_file)
f2.close
# print old file:
print(copied_file)
print('***')
# print new file:
print(new_file)
copy_file_content(source = source_link, destination = dest_link)
Output:
Copy this file
***
14
As referenced in Python documentation:
f.write(string) writes the contents of string to the file, returning the number of characters written.
Opposed to f.read(), which returns file contents.
If you want to read contents of copied_file, you will need to open it again in read mode:
with open(destination, 'r') as f:
new_file = f.read(copied_file)
.read() returns the file contents, which is why when copied_file is set to f1.read(), you can print the contents out. However, .write() performs the write operation on the file and then returns the number of characters written.
Therefore new_file contains the number of characters written. Rather than setting the value of new_file to f2.write(), you must open the new file again in read mode, and then perform file.read()
def copy_file_content(source, destination):
# getting the content of the copied file:
with open(source, 'r') as f1:
copied_file = f1.read()
# putting the content of the copied file in the new file:
with open(destination, 'w') as f2:
f2.write(copied_file)
with open(destination, "r") as f2_read:
new_file = f2_read.read()
# print old file:
print(copied_file)
print('***')
# print new file:
print(new_file)
copy_file_content(source = source_link, destination = dest_link)

python slice a string and use .find to get last three letters to determine the type of file

what i am asking is how can i correct my if statement, essentially i have a file attachment called 'fileName' and i am trying to get the last 3 letters from that file to determine if that type of file is in my config (csv, txt).
valid_filename = myconfig file (csv, txt)
def load_file():
try:
# get file from read email and assign a directory path (attachments/file)
for fileName in os.listdir(config['files']['folder_path']):
# validate the file extension per config
# if valid_filename: else send email failure
valid_filename = config['valid_files']['valid']
if fileName[-3:].find(valid_filename):
file_path = os.path.join(config['files']['folder_path'], fileName)
# open file path and read it as csv file using csv.reader
with open(file_path, "r") as csv_file:
csvReader = csv.reader(csv_file, delimiter=',')
first_row = True
let me know if i can clarify anything better
Try pathlib, for example
Assuming the config file is in the form:
[valid_files]
valid = .csv, .txt
[files]
forder_path = .
# other imports
import pathlib
def load_file():
valid_suffixes = [e.strip() for e in config['valid_files']['valid'].split(",")]
folder_path = config['files']['folder_path']
for fileName in os.listdir(folder_path):
if pathlib.Path(filename).suffix in valid_suffixes:
file_path = os.path.join(folder_path, fileName)
with open(file_path, "r") as csv_file:
...
find() method returns -1 if the string you're searching for is not found. To check if the element exists in the string, check if find returns -1.

Write() starts halfway through

I am new to python and I really don't understand why this is happening: when I run my code, the lower() is only applied to half (or less) of the text file. How I can fix this?
import glob, os, string, re
list_of_files = glob.glob("/Users/louis/Downloads/assignment/data2/**/*.txt")
for file_name in list_of_files:
f = open(file_name, 'r+')
for line in f:
line = line.lower()
f.write(line)
The problem is most probably because you are reading and writing at the same time. And you need to return to the start of the file to write in place of the original content. Try this:
for file_name in list_of_files:
with open(file_name, 'r+') as f:
content = f.read().lower()
f.seek(0, 0) # returns to the start of the file
f.write(content)

Trying to merge all text files in a folder and append file as well

I am trying to merge all text files in a folder. I have this part working, but when I try to append the file name before the contents of each text file, I'm getting a error that reads: TypeError: a bytes-like object is required, not 'str'
The code below must be pretty close, but something is definitely off. Any thoughts what could be wrong?
import glob
folder = 'C:\\my_path\\'
read_files = glob.glob(folder + "*.txt")
with open(folder + "final_result.txt", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(f)
outfile.write(infile.read())
outfile.close
outfile.write(f) seems to be your problem because you opened the file with in binary mode with 'wb'. You can convert to bytes using encode You'll likely not want to close outfile in your last line either (although you aren't calling the function anyway). So something like this might work for you:
import glob
folder = 'C:\\my_path\\'
read_files = glob.glob(folder + "*.txt")
with open(folder + "final_result.txt", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(f.encode('utf-8'))
outfile.write(infile.read())

How to combine several text files into one file?

I want to combine several text files into one output files.
my original code is to download 100 text files then each time I filter the text from several words and the write it to the output file.
Here is part of my code that suppose to combine the new text with the output text. The result each time overwrite the output file, delete the previous content and add the new text.
import fileinput
import glob
urls = ['f1.txt', 'f2.txt','f3.txt']
N =0;
print "read files"
for url in urls:
read_files = glob.glob(urls[N])
with open("result.txt", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(infile.read())
N+=1
and I tried this also
import fileinput
import glob
urls = ['f1.txt', 'f2.txt','f3.txt']
N =0;
print "read files"
for url in urls:
file_list = glob.glob(urls[N])
with open('result-1.txt', 'w') as file:
input_lines = fileinput.input(file_list)
file.writelines(input_lines)
N+=1
Is there any suggestions?
I need to concatenate/combine approximately 100 text files into one .txt file In sequence manner. (Each time I read one file and add it to the result.txt)
The problem is that you are re-opening the output file on each loop iteration which will cause it to overwrite -- unless you explicitly open it in append mode.
The glob logic is also unnecessary when you already know the filename.
Try this instead:
with open("result.txt", "wb") as outfile:
for url in urls:
with open(url, "rb") as infile:
outfile.write(infile.read())

Categories

Resources