How to delete a string from all files in a directory

How to delete a string from all files in a directory - python

I want all files in directory "path" to have the string "error" removed from them and the result to be saved in the same file that was editted. My current code (below) ends up clearing up the entire file, rather than just removing the string and keeping everything else the same.
import os
path = "path"
files = os.listdir(path)
error = "string"
for index, file in enumerate(files):
with open(os.path.join(path, file)) as fin, open(os.path.join(path, file), "w+") as fout:
for line in fin:
line = line.replace(error, "f")
fout.write(line)

import os
path = "path"
files = os.listdir(path)
error = "string"
for index, file in enumerate(files):
with open(os.path.join(path, file), 'r') as fin:
d = din.read()
with open(os.path.join(path, file), "w") as fout:
d = d.replace(error, "")
fout.write(d)

This is the correct way to do this:
import os
path = "path"
for file in os.listdir(path):
if not os.path.isdir(file):
with open(file, 'r+') as fd:
contents = fd.read().replace('error', '')
fd.seek(0)
fd.write(contents)
fd.truncate()

Related

copying files mentioned in a file in the corresponding Paths in Python

it's a python script.
I am writing the content of all the files with a special extension to a txt file skipping the first line in files, removing duplicates and special Paths
import os
skipLines = "/home/YY"
mainDir = "C:/XX"
directory = os.fsencode(mainDir)
endFile = mainDir+"/endFile.txt"
open(endFile, 'w').close()
fileoutput = open(endFile, "a")
for file in os.listdir(directory):
filename = os.fsdecode(file)
fileFullPath = mainDir+"/"+filename
if filename.endswith(".dep"):
print("working on : ", filename)
file = open(fileFullPath, "r")
next(file)
for line in file:
if skipLines not in line:
fileoutput.write(line)
else:
continue
file.close()
fileoutput.close()
lines_seen = set()
with open("C:/XX/endFile.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
fileoutput.close()
my end file looks like this:
F0/XXX.c
../../F1/F2/X/Test.h
../../F1/F2/Z/Test1.h
../../../F1/F3/Y/Test3.h
.
.
.
Here is my question:
how can I copy these file from the paths indicated in each line, and create a folder and create the same file path and copy the files in the corresponding paths?

how to run multiple text files as input in Python

I wrote a python code which will take input of a txt file and print to excel . I'm able to achieve for one txt file giving as input . But the requirement I have will have around a million of txt files in a folder . SO I don't know how to change the python code to take the input from a folder .
The below code handles input 1.txt file . I want to run multiple txt files from a folder , That's my requirement .
with open('C:/test/1.txt') as infile:
registrations = []
fields = OrderedDict()
d = {}
for line in infile:
line = line.strip()
if line:
key, value = [s.strip() for s in line.split(':', 1)]
d[key] = value
fields[key] = None
else:
if d:
registrations.append(d)
print(d)
d = {}
if ',' not in line:
print('line without ,:', line)
continue
else:
if d: # handle EOF
registrations.append(d)
with open('C:/registrations.csv', 'w') as outfile:
writer = DictWriter(outfile, fieldnames=fields)
writer.writeheader()
writer.writerows(registrations)
Thanks,
Meera

Use the pathlib module:
from pathlib import Path
FOLDER = Path('your/folder/here')
for file in FOLDER.glob('*.txt'):
# Do your thing
pass

Based on your source code, I optimized it. I use os.walk to access each .txt file and then read it line by line in those txt files and save it in an enum. Then I will check each line in that enum.
import os
extension = [".txt"]
path = "C:/test"
for subdir, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(subdir, file)
ext = os.path.splitext(file)[-1].lower()
if ext in extension:
with open(file_path, "r") as f:
try:
f_content = f.readlines()
except Exception as e:
print(e)
for l_idx, line in enumerate(f_content):
# ..................................
# l_idx: return position line
# line: content in line

merge multiple files into a single folder

I have corpus named ZebRa consisting of 7 folders, each having 10 files inside. I want to merge the 10 files inside each folder, in order to have finally only 7 folders. Here is what I have tried:
import os
def CombineFiles(file_path):
with open(file_path, 'r', encoding="utf-8") as f:
OutFile = open('D:/1.txt', 'w', encoding="utf-8")
lines = f.read().splitlines()
for i in range(len(lines)):
lines[i] = lines[i].replace('\n', '')
lines.append('\n')
for i in range(len(lines)):
OutFile.write(lines[i])
return OutFile
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
CombineFiles(file_path)
However, it seems that each time it empties the content of OutFile and the stored output is only the content of the last file in the last folder
I have also tried the following, however, the output will be an empty file:
import os
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
print(files)
with open('D:/1.txt', 'w', encoding="utf-8") as OutFile:
for filename in files:
file_path = os.path.join(root, filename)
with open(file_path, 'r', encoding="utf-8") as f:
OutFile.write(f.read())

Change open('D:/1.txt', 'w', encoding="utf-8") to open('D:/1.txt', 'a', encoding="utf-8"). a flag is used to append new data to end of the file, while the w flag always rewrite the file. See this tutorial.

Delete blank rows in a csv file in Python

I have some csv files in a folder and I am trying to delete all blank rows and move the news files into a new folder.
Here is the code I have:
import csv
import glob
import os
import shutil
path = 'in_folder/*.csv'
files=glob.glob(path)
#Read every file in the directory
x = 0 #counter
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in fin.readlines():
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
dir_src = "in_folder"
dir_dst = "out_folder"
for file in os.listdir(dir_src):
if x>0:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
What the code is doing right now is deleting everything from the files and moving them to the new folder. I want my files to be the same but with deleted blank rows.

You can just output every line to the new file, no need to do any moving afterwards:
dir_src = "in_folder/*.csv"
dir_dst = "out_folder"
files = glob.glob(dir_src)
# Read every file in the directory
x = 0 # counter
for filename in files:
outfilename = os.path.join(dir_dst, os.path.basename(filename))
with open(filename, 'r') as fin:
with open(outfilename, 'w') as fout:
for line in fin:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1

try this.
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in data:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1

Copying selected lines from files in different directories to another file

I have a directory with many subdirectories, containing files. I want to open the files ending with "root.vrpj" or "root.vprj", in "App_integrations" folder and copy the lines containing the word "table" to another file.
Until now I've managed to visit each file with this code:
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(("root.vrpj", "root.vprj")):
The problem is that what I have now are just the names of the files I want to visit and I'm stuck here.

You can try this:
f = open('final_file.txt', 'w')
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith("root.vrpj") or filename.endswith("root.vprj"):
with open(filename) as data:
for line in data:
if "table" in data:
f.write('{}\n'.format(data))
f.close()

This is a version of Ajax' code that closes the files you open in the loop (and fixes a couple of other minor issues):
with open('final_file.txt', 'w') as f:
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(("root.vrpj"), ("root.vprj")):
with open(os.path.join(root, filename)) as finput:
for line in finput:
if 'table' in line:
f.write(line)
however, when you see 8 levels of indentation you need to refactor, e.g.:
def find_files(startdir, *extensions):
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(extensions):
yield os.path.join(root, filename)
def find_lines(fname, text):
with open(fname) as fp:
return [line for line in fp if text in line]
with open('final_file.txt', 'w') as f:
for fname in find_files(movedir, 'root.vrpj', 'root.vprj'):
f.writelines(find_lines(fname, 'table'))

I finally solved it
import os
rootdir = my root folder
# creates a file f that contains all the lines of the files
# with "root.vrpj" or "root.vprj" in their name
# and who are inside "App_integrations" folders
# without duplicates
#creating the big file with all the file containing the lines I need
f = open('final_file.txt', 'a')
for root, dirs, files in os.walk(rootdir):
for filename in files:
if (filename.endswith(("root.vrpj", "root.vprj")) and ("App_Integration" in os.path.join(root, filename))):
full_name = os.path.join(root, filename)
data = open(full_name).read()
f.write(data + "\n")
f.close()
#copying the lines I need to f1 without duplicates
lines_seen = set()
f = open('final_file.txt')
f1 = open('testread1.txt', 'a')
doIHaveToCopyTheLine=False
for line in f.readlines():
if (("Table" in line) and (line not in lines_seen)):
doIHaveToCopyTheLine=True
if doIHaveToCopyTheLine:
f1.write(line)
lines_seen.add(line)
f1.close()
f.close()

Find the files
from pathlib import Path
import itertools
source_dir = Path(<source_dir>)
patterns = ['**/*root.vrpj', '**/*root.vprj']
files = itertools.chain.from_iterables(source_dir.glob(pat) for pat in patterns))
Filter the files:
def filter_lines(files):
for file in files:
if not 'App_Integration' in file.parts:
continue
with file.open('r') as file_handle:
for line in file_handle:
if 'table' in line:
yield line
Write the output
def save_lines(lines, output_file=sys.std_out):
for line in lines:
output_file.write(line)
with Path(<output_file>).open('w') as output_file:
save_lines(filter_lines(files), as output_file)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to delete a string from all files in a directory - python

import os path = "path" files = os.listdir(path) error = "string" for index, file in enumerate(files): with open(os.path.join(path, file), 'r') as fin: d = din.read() with open(os.path.join(path, file), "w") as fout: d = d.replace(error, "") fout.write(d)

This is the correct way to do this: import os path = "path" for file in os.listdir(path): if not os.path.isdir(file): with open(file, 'r+') as fd: contents = fd.read().replace('error', '') fd.seek(0) fd.write(contents) fd.truncate()

Related

copying files mentioned in a file in the corresponding Paths in Python

how to run multiple text files as input in Python

merge multiple files into a single folder

Delete blank rows in a csv file in Python

Copying selected lines from files in different directories to another file

Categories

Resources