Concatenate multiple files' data into one file and also rename the file? - python

Using python how can I combine all the text file in the specified directory into one text file and rename the output text file with the same filename.
For example: Filea.txt and Fileb_2.txt is in root directory, and it output generated file is Filea_Fileb_2.txt
Filea.txt
123123
21321
Fileb_2.txt
2344
23432
Filea_Fileb_2.txt
123123
21321
2344
23432
my script:
PWD1 = /home/jenkins/workspace
files = glob.glob(PWD1 + '/' + '*.txt')
with open(f, 'r') as file:
for line in (file):
outputfile = open('outputfile.txt', 'a')
outputfile.write(line)
outputfile.close()

Here's another way to combine text files.
#! python3
from pathlib import Path
import glob
folder_File1 = r"C:\Users\Public\Documents\Python\CombineFIles"
txt_only = r"\*.txt"
files_File1 = glob.glob(f'{folder_File1}{txt_only}')
new_txt = f'{folder_File1}\\newtxt.txt'
newFile = []
for indx, file in enumerate(files_File1):
if file == new_txt:
pass
else:
contents = Path(file).read_text()
newFile.append(contents)
file = open(new_txt, 'w')
file.write("\n".join(newFile))
file.close()

This is a working solution which stores both file names and file contents in a list, then joins the list filenames and creates a "combined" filename and then adds the contents of all the files to it, because lists append in order that the data is read this is sufficient (my example filenames are filea.txt and fileb.txt but it will work for the filenames you've used):
import os
import sys
path = sys.argv[1]
files = []
contents = []
for f in os.listdir(path):
if f.endswith('.txt'): # in case there are other file types in there
files.append(str(f.replace('.txt', ''))) #chops off txt so we can join later
with open(f) as cat:
for line in cat:
contents.append(line) # put file contents in list
outfile_name = '_'.join(x for x in files)+'.txt' #create your output filename
outfile = open(outfile_name, 'w')
for line in contents:
outfile.write(line)
outfile.close()
to run this on a specific directory just pass it on the commandline:
$python3.6 catter.py /path/to/my_text_files/
output filename:
filea_fileb.txt
contents:
123123
21321
2344
23432

Related

copying files mentioned in a file in the corresponding Paths in Python

it's a python script.
I am writing the content of all the files with a special extension to a txt file skipping the first line in files, removing duplicates and special Paths
import os
skipLines = "/home/YY"
mainDir = "C:/XX"
directory = os.fsencode(mainDir)
endFile = mainDir+"/endFile.txt"
open(endFile, 'w').close()
fileoutput = open(endFile, "a")
for file in os.listdir(directory):
filename = os.fsdecode(file)
fileFullPath = mainDir+"/"+filename
if filename.endswith(".dep"):
print("working on : ", filename)
file = open(fileFullPath, "r")
next(file)
for line in file:
if skipLines not in line:
fileoutput.write(line)
else:
continue
file.close()
fileoutput.close()
lines_seen = set()
with open("C:/XX/endFile.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
fileoutput.close()
my end file looks like this:
F0/XXX.c
../../F1/F2/X/Test.h
../../F1/F2/Z/Test1.h
../../../F1/F3/Y/Test3.h
.
.
.
Here is my question:
how can I copy these file from the paths indicated in each line, and create a folder and create the same file path and copy the files in the corresponding paths?

Loop through files in a folder and create a new merged text file

I am working on merging a number of text files together into a single text document. I am able to read all the file names and create a new output document.
However, when I output the document, I am only getting the data from one file and not the rest? Overall it should be close to 1 million lines in a txt, but only getting the first 10k
import os
projpath1 = 'PATH1'
projpath2 = 'PATH2'
for root, dirs, files in os.walk(f"{projpath1}", topdown=False):
for name in files:
if not name.startswith('.DS_Store'):
split = name.split("/")
title = split[0]
filename = (os.path.join(root, name))
inputf = os.path.expanduser(f'{projpath1}/{title}')
updatedf = os.path.expanduser(f'{projpath2}/ENC_merged.txt')
with open(inputf, "r") as text_file, open(updatedf, 'w') as outfile:
for info in text_file:
for lines in info:
outfile.write(lines)
I really am stuck and can't figure it out :/
You are suppose to open create output file first and within it you need to save all the input files, something like this should work for you.
import os
projpath1 = 'PATH1'
projpath2 = 'PATH2'
with open(updatedf, 'w') as outfile:
for root, dirs, files in os.walk(f"{projpath1}", topdown=False):
for name in files:
if not name.startswith('.DS_Store'):
split = name.split("/")
title = split[0]
filename = (os.path.join(root, name))
inputf = os.path.expanduser(f'{projpath1}/{title}')
updatedf = os.path.expanduser(f'{projpath2}/ENC_merged.txt')
with open(inputf, "r") as text_file:
for info in text_file:
for lines in info:
outfile.write(lines)
What about doing it with bash
ls | xargs cat > merged_file

how to run multiple text files as input in Python

I wrote a python code which will take input of a txt file and print to excel . I'm able to achieve for one txt file giving as input . But the requirement I have will have around a million of txt files in a folder . SO I don't know how to change the python code to take the input from a folder .
The below code handles input 1.txt file . I want to run multiple txt files from a folder , That's my requirement .
with open('C:/test/1.txt') as infile:
registrations = []
fields = OrderedDict()
d = {}
for line in infile:
line = line.strip()
if line:
key, value = [s.strip() for s in line.split(':', 1)]
d[key] = value
fields[key] = None
else:
if d:
registrations.append(d)
print(d)
d = {}
if ',' not in line:
print('line without ,:', line)
continue
else:
if d: # handle EOF
registrations.append(d)
with open('C:/registrations.csv', 'w') as outfile:
writer = DictWriter(outfile, fieldnames=fields)
writer.writeheader()
writer.writerows(registrations)
Thanks,
Meera
Use the pathlib module:
from pathlib import Path
FOLDER = Path('your/folder/here')
for file in FOLDER.glob('*.txt'):
# Do your thing
pass
Based on your source code, I optimized it. I use os.walk to access each .txt file and then read it line by line in those txt files and save it in an enum. Then I will check each line in that enum.
import os
extension = [".txt"]
path = "C:/test"
for subdir, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(subdir, file)
ext = os.path.splitext(file)[-1].lower()
if ext in extension:
with open(file_path, "r") as f:
try:
f_content = f.readlines()
except Exception as e:
print(e)
for l_idx, line in enumerate(f_content):
# ..................................
# l_idx: return position line
# line: content in line

How to unzip all folders/files that end in .zip and extract “file.txt” file from each zipped folder

My code currently unzips one zip folder and finds the file called file.txt and extracts it. Now I need to unzip multiple folders that have the extension .zip. I have tried to use code similar to what I need it to do but the problem is that now I have to find a file called file.txt in each of those .zip folders and extract that file only . Also to store file.txt into a separate folder that has the same name where it came from. Thank you in advance for your time.
import re
import os
from zipfile import ZipFile
def pain():
print("\t\t\tinput_files.zip has been unzipped")
with ZipFile('input_files.zip', 'r') as zipObj:
zipObj.extractall()
listOfFileNames = zipObj.namelist()
for fileName in listOfFileNames:
if fileName.endswith('.txt'):
zipObj.extract(fileName, 'storage')
outfile = "output2.txt" #this will be the filename that the code will write to
baconFile = open(outfile,"wt")
file_name1 = "file.txt"
print('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the python shell and this is the way the code should collect the data
baconFile.write('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the output file and this is the way the code should collect the data
#for filename in os.listdir(os.getcwd() + "/input_files"):
for filename in os.listdir('C:\Users\M29858\Desktop\TestPy\Version10\input_files'):
with open("input_files/" + filename, 'r') as f:
if file_name1 in filename:
output_contents(filename, f, baconFile)
baconFile.close() #closes the for loop that the code is writing to
def output_contents(filename, f, baconFile): #using open() function to open the file inside the directory
index = 0
for line in f:
#create a list of all of the numerical values in our line
content = line.split(',') #this will be used to count the amount numbers before and after comma
whitespace_found = False
tab_found = False
false_string = "False (end of file)"
carriage_found = false_string
sigfigs = ""
index += 1 #adds 1 for every line if it finds what the command wants
if " " in line: #checking for whitespace
whitespace_found = True
if "\t" in line: #checking for tabs return
tab_found = True
if '\n' in line: #checking if there is a newline after the end of each line
carriage_found = True
sigfigs = (','.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line ))) #counts the sigsfigs after decimal point
print(filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found))) #whatever is inside the .format() is the way it the data is stored into
baconFile.write('\n')
baconFile.write( filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))
if __name__ == '__main__':
pain()
#THIS WORKS
import glob
import os
from zipfile import ZipFile
def main():
for fname in glob.glob("*.zip"): # get all the zip files
with ZipFile(fname) as archive:
# if there's no file.txt, ignore and go on to the next zip file
if 'file.txt' not in archive.namelist(): continue
# make a new directory named after the zip file
dirname = fname.rsplit('.',1)[0]
os.mkdir(dirname)
extract file.txt into the directory you just created
archive.extract('file.txt', path=dirname)

python How do I import multiple .txt files in a folder to add characters to each .txt file?

There are text files of various names in the folder 'a'. I want to read all of these text files and add the letter 'b' to each text file. What should I do?
cwd = os.getcwd()
input_dir = os.path.join(cwd, "my .txt files dir")
sorts = sorted(glob(input_dir), key = lambda x:(len(x) , x))
for f in sorts :
f = open(input_dir, 'a')
data = "add text"
f.write(data)
f.close()
Append data to file:
- first: get all file in folder a.
- second: find extension with .txt.
- third: open it and do something('append', or 'rewrite').
Demo:
import os
# your .txt files dir
path = 'a'
# append data what you want
appendData = 'b'
fileNames = list(os.walk(path))[0][2]
fileNames.sort(key=len)
fileNums = len(fileNames)
# your dst file extension
fileExt = '.txt'
# # Extract extension from filename
# fileExt = os.path.splitext(fileNames[0])[1]
for fileName in fileNames:
if fileName.endswith(fileExt):
fileFullPath = os.path.join(path, fileName)
with open(fileFullPath, 'a') as f:
f.write(appendData)
Like the others said, this is an easy question that could easily be find on google. Anyway here's how to do it:
from os import listdir
from os.path import isfile, isdir, join
files = [file for file in listdir("files") if isfile(join("files", file))]
directories = [directory for directory in listdir("files") if isdir(join("files", directory))]
print(files)
for file_name in files:
try:
file = open("files/" + file_name, "a")
file.write("b")
file.close()
except IOError as err:
print("Could not open file because : ", err)
Replace "file" with the directory where your files are or the path to that directory like "directory0/directory1/directory_with_files"
Avoid to open files with
f = open(input_dir, 'a')
f.close()
Instead
with open(input_dir, 'a') as inputFile:
Do something
Also what you want is
import os
import glob # We will use this module to open only .txt files
path = 'your/path'
for filename in glob.glob(os.path.join(path, '*.txt'))
with open(filename, 'a') as inputFile:
inputFile.write('b')

Categories

Resources