How delete a line that repeat in multiple files text with python - python

i am a beginner in python.
So what I want to do is a script that finds a specific line in multiple files and delete it and rewrite the file with the same name. Something like this but for more files:
similar to problem "Deleting a line in multiple files in python"
i traid quith this code that i can find in the before question
but it didn't work
import os
os.chdir('C:\escenarie')
source = "*.mgt"
for root, dirs, filenames in os.walk(source):
for f in filenames:
this_file = open(os.path.join(source, f), "r")
this_files_data = this_file.readlines()
this_file.close()
# rewrite the file with all line except the one you don't want
this_file = open(os.path.join(source, f), "w")
for line in this_files_data:
if line != " 1.200 5 0.00000"":
this_file.write(line)
this_file.close()

You ought to learn basic file operations in Python. The code example below should help.
#open file
with open(filename, 'r') as f:
lines = f.readlines()
#find index of line to remove
for index, line in enumerate(lines):
if 'delete me' in line:
#remove line
lines.pop(index)
break
#write new file
with open(filename, 'w') as f:
f.write(''.join(lines))
And to perform this operation on multiple files:
filenames = ['file1.txt', 'file2.txt']
for filename in filenames:
# ... see code above

Related

copying files mentioned in a file in the corresponding Paths in Python

it's a python script.
I am writing the content of all the files with a special extension to a txt file skipping the first line in files, removing duplicates and special Paths
import os
skipLines = "/home/YY"
mainDir = "C:/XX"
directory = os.fsencode(mainDir)
endFile = mainDir+"/endFile.txt"
open(endFile, 'w').close()
fileoutput = open(endFile, "a")
for file in os.listdir(directory):
filename = os.fsdecode(file)
fileFullPath = mainDir+"/"+filename
if filename.endswith(".dep"):
print("working on : ", filename)
file = open(fileFullPath, "r")
next(file)
for line in file:
if skipLines not in line:
fileoutput.write(line)
else:
continue
file.close()
fileoutput.close()
lines_seen = set()
with open("C:/XX/endFile.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
fileoutput.close()
my end file looks like this:
F0/XXX.c
../../F1/F2/X/Test.h
../../F1/F2/Z/Test1.h
../../../F1/F3/Y/Test3.h
.
.
.
Here is my question:
how can I copy these file from the paths indicated in each line, and create a folder and create the same file path and copy the files in the corresponding paths?

how to run multiple text files as input in Python

I wrote a python code which will take input of a txt file and print to excel . I'm able to achieve for one txt file giving as input . But the requirement I have will have around a million of txt files in a folder . SO I don't know how to change the python code to take the input from a folder .
The below code handles input 1.txt file . I want to run multiple txt files from a folder , That's my requirement .
with open('C:/test/1.txt') as infile:
registrations = []
fields = OrderedDict()
d = {}
for line in infile:
line = line.strip()
if line:
key, value = [s.strip() for s in line.split(':', 1)]
d[key] = value
fields[key] = None
else:
if d:
registrations.append(d)
print(d)
d = {}
if ',' not in line:
print('line without ,:', line)
continue
else:
if d: # handle EOF
registrations.append(d)
with open('C:/registrations.csv', 'w') as outfile:
writer = DictWriter(outfile, fieldnames=fields)
writer.writeheader()
writer.writerows(registrations)
Thanks,
Meera
Use the pathlib module:
from pathlib import Path
FOLDER = Path('your/folder/here')
for file in FOLDER.glob('*.txt'):
# Do your thing
pass
Based on your source code, I optimized it. I use os.walk to access each .txt file and then read it line by line in those txt files and save it in an enum. Then I will check each line in that enum.
import os
extension = [".txt"]
path = "C:/test"
for subdir, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(subdir, file)
ext = os.path.splitext(file)[-1].lower()
if ext in extension:
with open(file_path, "r") as f:
try:
f_content = f.readlines()
except Exception as e:
print(e)
for l_idx, line in enumerate(f_content):
# ..................................
# l_idx: return position line
# line: content in line

File not found error, even though file was found

Using the following bit of code:
for root, dirs, files in os.walk(corpus_name):
for file in files:
if file.endswith(".v4_gold_conll"):
f= open(file)
lines = f.readlines()
tokens = [line.split()[3] for line in lines if line.strip()
and not line.startswith("#")]
print(tokens)
I get the following error:
Traceback (most recent call last): File "text_statistics.py", line
28, in
corpus_reading_pos(corpus_name, option) File "text_statistics.py", line 13, in corpus_reading_pos
f= open(file) FileNotFoundError: [Errno 2] No such file or directory: 'abc_0001.v4_gold_conll'
As you can see, the file was, in fact, located, but then when I try to open the file, it... can't find it?
Edit:
using this updated code, it stops after reading 7 files, but there are 172 files.
def corpus_reading_token_count(corpus_name, option="token"):
for root, dirs, files in os.walk(corpus_name):
tokens = []
file_count = 0
for file in files:
if file.endswith(".v4_gold_conll"):
with open((os.path.join(root, file))) as f:
tokens += [line.split()[3] for line in f if line.strip() and not line.startswith("#")]
file_count += 1
print(tokens)
print("File count:", file_count)
file is just the file without the directory, which is root in your code. Try this:
f = open(os.path.join(root, file)))
Also, you should better use with to open the file, and not use file as a variable name, shadowing the builtin type. Also, judging from your comment, you should probably extend the list of tokens (use += instead of =):
tokens = []
for root, dirs, files in os.walk(corpus_name):
for filename in files:
if filename.endswith(".v4_gold_conll"):
with open(os.path.join(root, filename))) as f:
tokens += [line.split()[3] for line in f if line.strip() and not line.startswith("#")]
print(tokens)
You'll have to join the root with the filename.
for root, dirs, files in os.walk(corpus_name):
for file in files:
if file.endswith(".v4_gold_conll"):
with open(os.path.join(root, file)) as f:
tokens = [
line.split()[3]
for line in f
if line.strip() and not line.startswith("#")
]
print(tokens)

Copying selected lines from files in different directories to another file

I have a directory with many subdirectories, containing files. I want to open the files ending with "root.vrpj" or "root.vprj", in "App_integrations" folder and copy the lines containing the word "table" to another file.
Until now I've managed to visit each file with this code:
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(("root.vrpj", "root.vprj")):
The problem is that what I have now are just the names of the files I want to visit and I'm stuck here.
You can try this:
f = open('final_file.txt', 'w')
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith("root.vrpj") or filename.endswith("root.vprj"):
with open(filename) as data:
for line in data:
if "table" in data:
f.write('{}\n'.format(data))
f.close()
This is a version of Ajax' code that closes the files you open in the loop (and fixes a couple of other minor issues):
with open('final_file.txt', 'w') as f:
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(("root.vrpj"), ("root.vprj")):
with open(os.path.join(root, filename)) as finput:
for line in finput:
if 'table' in line:
f.write(line)
however, when you see 8 levels of indentation you need to refactor, e.g.:
def find_files(startdir, *extensions):
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(extensions):
yield os.path.join(root, filename)
def find_lines(fname, text):
with open(fname) as fp:
return [line for line in fp if text in line]
with open('final_file.txt', 'w') as f:
for fname in find_files(movedir, 'root.vrpj', 'root.vprj'):
f.writelines(find_lines(fname, 'table'))
I finally solved it
import os
rootdir = my root folder
# creates a file f that contains all the lines of the files
# with "root.vrpj" or "root.vprj" in their name
# and who are inside "App_integrations" folders
# without duplicates
#creating the big file with all the file containing the lines I need
f = open('final_file.txt', 'a')
for root, dirs, files in os.walk(rootdir):
for filename in files:
if (filename.endswith(("root.vrpj", "root.vprj")) and ("App_Integration" in os.path.join(root, filename))):
full_name = os.path.join(root, filename)
data = open(full_name).read()
f.write(data + "\n")
f.close()
#copying the lines I need to f1 without duplicates
lines_seen = set()
f = open('final_file.txt')
f1 = open('testread1.txt', 'a')
doIHaveToCopyTheLine=False
for line in f.readlines():
if (("Table" in line) and (line not in lines_seen)):
doIHaveToCopyTheLine=True
if doIHaveToCopyTheLine:
f1.write(line)
lines_seen.add(line)
f1.close()
f.close()
Find the files
from pathlib import Path
import itertools
source_dir = Path(<source_dir>)
patterns = ['**/*root.vrpj', '**/*root.vprj']
files = itertools.chain.from_iterables(source_dir.glob(pat) for pat in patterns))
Filter the files:
def filter_lines(files):
for file in files:
if not 'App_Integration' in file.parts:
continue
with file.open('r') as file_handle:
for line in file_handle:
if 'table' in line:
yield line
Write the output
def save_lines(lines, output_file=sys.std_out):
for line in lines:
output_file.write(line)
with Path(<output_file>).open('w') as output_file:
save_lines(filter_lines(files), as output_file)

Not opening a specific text file in Python

I have a folder with a bunch of text files. I have the following code that opens all the text files in its directory when executed and throws them all together in a master text file, "result.txt".
import glob
read_files = glob.glob("*.txt")
with open("result.txt", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(infile.read())
I don't want this script to open "result.txt". All text files except result.txt. How can I do this? I don't want it to duplicate result.txt by writing its contents into itself
Use a filter function:
read_files = filter(lambda f : f != 'result.txt', glob.glob('*.txt'))
Well, you can filter result.txt when looping through all files:
import glob
read_files = glob.glob("*.txt")
with open("result.txt", "wb") as outfile:
for f in (file for file in read_files if file != "result.txt"):
with open(f, "rb") as infile:
outfile.write(infile.read())
Alternatively, to prevent bugs in futher uses of read_files list, you could remove "result.txt" from it after glob.glob:
read_files = glob.glob("*.txt")
try:
read_files.remove("result.txt")
except ValueError: #File result.txt does not exist yet
pass
You could use continue to skip the file and start the next iteration of the loop:
for f in read_files:
if f == "result.txt":
continue
...
Alternatively, filter the list of files before you start looping:
read_files = [f for f in glob.glob("*.txt") if f != "result.txt"]

Categories

Resources