How to read multiple(more than 2) .txt files using python - python

I have 2 or more than .txt file contains
file1.txt
India
File2.txt
US
I wanted to write output in third file as India US.
Please any one can tell me how to do it using python.

import glob
all_text_files = glob.glob('/path/to/dir', '*.txt')
with open('output_file.txt', 'w') as fh:
for text_file in all_text_files:
data = open(text_file, 'r')
fh.write(data.read())
glob.glob('*.txt') returns ALL the .txt files in the current directory.
If you want to read only a few files, you can specify them in a list
all_text_files = ['file1.txt', 'file2.txt', ....., 'filen.txt']

source_files = ['file1.txt', 'file2.txt']
with open('output.txt', 'w') as fh_out:
for fname in source_files:
with open(fname, 'r') as fh:
fh_out.write(fh.read())

files = ['file1.txt','file2.txt']
for file in files:
with open(file,'r') as file_read:
with open('file3.txt', 'w+') as file_put:
file_put.write(file_read.read())

Related

Loop through files in a folder and create a new merged text file

I am working on merging a number of text files together into a single text document. I am able to read all the file names and create a new output document.
However, when I output the document, I am only getting the data from one file and not the rest? Overall it should be close to 1 million lines in a txt, but only getting the first 10k
import os
projpath1 = 'PATH1'
projpath2 = 'PATH2'
for root, dirs, files in os.walk(f"{projpath1}", topdown=False):
for name in files:
if not name.startswith('.DS_Store'):
split = name.split("/")
title = split[0]
filename = (os.path.join(root, name))
inputf = os.path.expanduser(f'{projpath1}/{title}')
updatedf = os.path.expanduser(f'{projpath2}/ENC_merged.txt')
with open(inputf, "r") as text_file, open(updatedf, 'w') as outfile:
for info in text_file:
for lines in info:
outfile.write(lines)
I really am stuck and can't figure it out :/
You are suppose to open create output file first and within it you need to save all the input files, something like this should work for you.
import os
projpath1 = 'PATH1'
projpath2 = 'PATH2'
with open(updatedf, 'w') as outfile:
for root, dirs, files in os.walk(f"{projpath1}", topdown=False):
for name in files:
if not name.startswith('.DS_Store'):
split = name.split("/")
title = split[0]
filename = (os.path.join(root, name))
inputf = os.path.expanduser(f'{projpath1}/{title}')
updatedf = os.path.expanduser(f'{projpath2}/ENC_merged.txt')
with open(inputf, "r") as text_file:
for info in text_file:
for lines in info:
outfile.write(lines)
What about doing it with bash
ls | xargs cat > merged_file

How delete a line that repeat in multiple files text with python

i am a beginner in python.
So what I want to do is a script that finds a specific line in multiple files and delete it and rewrite the file with the same name. Something like this but for more files:
similar to problem "Deleting a line in multiple files in python"
i traid quith this code that i can find in the before question
but it didn't work
import os
os.chdir('C:\escenarie')
source = "*.mgt"
for root, dirs, filenames in os.walk(source):
for f in filenames:
this_file = open(os.path.join(source, f), "r")
this_files_data = this_file.readlines()
this_file.close()
# rewrite the file with all line except the one you don't want
this_file = open(os.path.join(source, f), "w")
for line in this_files_data:
if line != " 1.200 5 0.00000"":
this_file.write(line)
this_file.close()
You ought to learn basic file operations in Python. The code example below should help.
#open file
with open(filename, 'r') as f:
lines = f.readlines()
#find index of line to remove
for index, line in enumerate(lines):
if 'delete me' in line:
#remove line
lines.pop(index)
break
#write new file
with open(filename, 'w') as f:
f.write(''.join(lines))
And to perform this operation on multiple files:
filenames = ['file1.txt', 'file2.txt']
for filename in filenames:
# ... see code above

merge multiple files into a single folder

I have corpus named ZebRa consisting of 7 folders, each having 10 files inside. I want to merge the 10 files inside each folder, in order to have finally only 7 folders. Here is what I have tried:
import os
def CombineFiles(file_path):
with open(file_path, 'r', encoding="utf-8") as f:
OutFile = open('D:/1.txt', 'w', encoding="utf-8")
lines = f.read().splitlines()
for i in range(len(lines)):
lines[i] = lines[i].replace('\n', '')
lines.append('\n')
for i in range(len(lines)):
OutFile.write(lines[i])
return OutFile
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
for filename in files:
file_path = os.path.join(root, filename)
CombineFiles(file_path)
However, it seems that each time it empties the content of OutFile and the stored output is only the content of the last file in the last folder
I have also tried the following, however, the output will be an empty file:
import os
for root, dirs, files in os.walk("C:/ZebRa", topdown= False):
print(files)
with open('D:/1.txt', 'w', encoding="utf-8") as OutFile:
for filename in files:
file_path = os.path.join(root, filename)
with open(file_path, 'r', encoding="utf-8") as f:
OutFile.write(f.read())
Change open('D:/1.txt', 'w', encoding="utf-8") to open('D:/1.txt', 'a', encoding="utf-8"). a flag is used to append new data to end of the file, while the w flag always rewrite the file. See this tutorial.

Copying selected lines from files in different directories to another file

I have a directory with many subdirectories, containing files. I want to open the files ending with "root.vrpj" or "root.vprj", in "App_integrations" folder and copy the lines containing the word "table" to another file.
Until now I've managed to visit each file with this code:
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(("root.vrpj", "root.vprj")):
The problem is that what I have now are just the names of the files I want to visit and I'm stuck here.
You can try this:
f = open('final_file.txt', 'w')
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith("root.vrpj") or filename.endswith("root.vprj"):
with open(filename) as data:
for line in data:
if "table" in data:
f.write('{}\n'.format(data))
f.close()
This is a version of Ajax' code that closes the files you open in the loop (and fixes a couple of other minor issues):
with open('final_file.txt', 'w') as f:
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(("root.vrpj"), ("root.vprj")):
with open(os.path.join(root, filename)) as finput:
for line in finput:
if 'table' in line:
f.write(line)
however, when you see 8 levels of indentation you need to refactor, e.g.:
def find_files(startdir, *extensions):
for root, dirs, files in os.walk(movedir):
for filename in files:
if filename.endswith(extensions):
yield os.path.join(root, filename)
def find_lines(fname, text):
with open(fname) as fp:
return [line for line in fp if text in line]
with open('final_file.txt', 'w') as f:
for fname in find_files(movedir, 'root.vrpj', 'root.vprj'):
f.writelines(find_lines(fname, 'table'))
I finally solved it
import os
rootdir = my root folder
# creates a file f that contains all the lines of the files
# with "root.vrpj" or "root.vprj" in their name
# and who are inside "App_integrations" folders
# without duplicates
#creating the big file with all the file containing the lines I need
f = open('final_file.txt', 'a')
for root, dirs, files in os.walk(rootdir):
for filename in files:
if (filename.endswith(("root.vrpj", "root.vprj")) and ("App_Integration" in os.path.join(root, filename))):
full_name = os.path.join(root, filename)
data = open(full_name).read()
f.write(data + "\n")
f.close()
#copying the lines I need to f1 without duplicates
lines_seen = set()
f = open('final_file.txt')
f1 = open('testread1.txt', 'a')
doIHaveToCopyTheLine=False
for line in f.readlines():
if (("Table" in line) and (line not in lines_seen)):
doIHaveToCopyTheLine=True
if doIHaveToCopyTheLine:
f1.write(line)
lines_seen.add(line)
f1.close()
f.close()
Find the files
from pathlib import Path
import itertools
source_dir = Path(<source_dir>)
patterns = ['**/*root.vrpj', '**/*root.vprj']
files = itertools.chain.from_iterables(source_dir.glob(pat) for pat in patterns))
Filter the files:
def filter_lines(files):
for file in files:
if not 'App_Integration' in file.parts:
continue
with file.open('r') as file_handle:
for line in file_handle:
if 'table' in line:
yield line
Write the output
def save_lines(lines, output_file=sys.std_out):
for line in lines:
output_file.write(line)
with Path(<output_file>).open('w') as output_file:
save_lines(filter_lines(files), as output_file)

Not opening a specific text file in Python

I have a folder with a bunch of text files. I have the following code that opens all the text files in its directory when executed and throws them all together in a master text file, "result.txt".
import glob
read_files = glob.glob("*.txt")
with open("result.txt", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(infile.read())
I don't want this script to open "result.txt". All text files except result.txt. How can I do this? I don't want it to duplicate result.txt by writing its contents into itself
Use a filter function:
read_files = filter(lambda f : f != 'result.txt', glob.glob('*.txt'))
Well, you can filter result.txt when looping through all files:
import glob
read_files = glob.glob("*.txt")
with open("result.txt", "wb") as outfile:
for f in (file for file in read_files if file != "result.txt"):
with open(f, "rb") as infile:
outfile.write(infile.read())
Alternatively, to prevent bugs in futher uses of read_files list, you could remove "result.txt" from it after glob.glob:
read_files = glob.glob("*.txt")
try:
read_files.remove("result.txt")
except ValueError: #File result.txt does not exist yet
pass
You could use continue to skip the file and start the next iteration of the loop:
for f in read_files:
if f == "result.txt":
continue
...
Alternatively, filter the list of files before you start looping:
read_files = [f for f in glob.glob("*.txt") if f != "result.txt"]

Categories

Resources