I wrote a python code which will take input of a txt file and print to excel . I'm able to achieve for one txt file giving as input . But the requirement I have will have around a million of txt files in a folder . SO I don't know how to change the python code to take the input from a folder .
The below code handles input 1.txt file . I want to run multiple txt files from a folder , That's my requirement .
with open('C:/test/1.txt') as infile:
registrations = []
fields = OrderedDict()
d = {}
for line in infile:
line = line.strip()
if line:
key, value = [s.strip() for s in line.split(':', 1)]
d[key] = value
fields[key] = None
else:
if d:
registrations.append(d)
print(d)
d = {}
if ',' not in line:
print('line without ,:', line)
continue
else:
if d: # handle EOF
registrations.append(d)
with open('C:/registrations.csv', 'w') as outfile:
writer = DictWriter(outfile, fieldnames=fields)
writer.writeheader()
writer.writerows(registrations)
Thanks,
Meera
Use the pathlib module:
from pathlib import Path
FOLDER = Path('your/folder/here')
for file in FOLDER.glob('*.txt'):
# Do your thing
pass
Based on your source code, I optimized it. I use os.walk to access each .txt file and then read it line by line in those txt files and save it in an enum. Then I will check each line in that enum.
import os
extension = [".txt"]
path = "C:/test"
for subdir, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(subdir, file)
ext = os.path.splitext(file)[-1].lower()
if ext in extension:
with open(file_path, "r") as f:
try:
f_content = f.readlines()
except Exception as e:
print(e)
for l_idx, line in enumerate(f_content):
# ..................................
# l_idx: return position line
# line: content in line
Related
I want all files in directory "path" to have the string "error" removed from them and the result to be saved in the same file that was editted. My current code (below) ends up clearing up the entire file, rather than just removing the string and keeping everything else the same.
import os
path = "path"
files = os.listdir(path)
error = "string"
for index, file in enumerate(files):
with open(os.path.join(path, file)) as fin, open(os.path.join(path, file), "w+") as fout:
for line in fin:
line = line.replace(error, "f")
fout.write(line)
import os
path = "path"
files = os.listdir(path)
error = "string"
for index, file in enumerate(files):
with open(os.path.join(path, file), 'r') as fin:
d = din.read()
with open(os.path.join(path, file), "w") as fout:
d = d.replace(error, "")
fout.write(d)
This is the correct way to do this:
import os
path = "path"
for file in os.listdir(path):
if not os.path.isdir(file):
with open(file, 'r+') as fd:
contents = fd.read().replace('error', '')
fd.seek(0)
fd.write(contents)
fd.truncate()
it's a python script.
I am writing the content of all the files with a special extension to a txt file skipping the first line in files, removing duplicates and special Paths
import os
skipLines = "/home/YY"
mainDir = "C:/XX"
directory = os.fsencode(mainDir)
endFile = mainDir+"/endFile.txt"
open(endFile, 'w').close()
fileoutput = open(endFile, "a")
for file in os.listdir(directory):
filename = os.fsdecode(file)
fileFullPath = mainDir+"/"+filename
if filename.endswith(".dep"):
print("working on : ", filename)
file = open(fileFullPath, "r")
next(file)
for line in file:
if skipLines not in line:
fileoutput.write(line)
else:
continue
file.close()
fileoutput.close()
lines_seen = set()
with open("C:/XX/endFile.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
fileoutput.close()
my end file looks like this:
F0/XXX.c
../../F1/F2/X/Test.h
../../F1/F2/Z/Test1.h
../../../F1/F3/Y/Test3.h
.
.
.
Here is my question:
how can I copy these file from the paths indicated in each line, and create a folder and create the same file path and copy the files in the corresponding paths?
OS: Ubuntu-18.04lts
Python version - 3.6.9
Excel report data.xlsx i have is,
I have many text files under /home/user/excel/report/directory and inside its sub-directories. Some of the text files along with path given below for reference.
/home/user/excel/report/file01.txt
/home/user/excel/report/folder-1/file02.txt
/home/user/excel/report/folder-1/filepath/file03.txt
/home/user/excel/report/folder-2/file04.txt
The filename of the text files are in excel sheet's B column. For each row, i need to a search the text file as per B column and need to look the User-ID in D column, if user-id exists in that particular row matched text file then user-id need to be removed from that text file, Same need to perform recursively.
Currently i below python code I'm using.
import os
import pandas as pd
data = pd.read_excel("data.xlsx")
d = dict(zip(data["File Name"], data["User-ID"]))
for file in d:
with open(f"/home/user/excel/report/" + file + ".txt", "r") as f:
contents = f.read().strip()
with open(f"/home/user/excel/report/" + file + ".txt", "w") as f:
f.write(contents.replace(d[file], ""))
Error:
$ python3.6 script.py
Traceback (most recent call last):
File "script.py", line 8, in <module>
with open(f"/home/user/excel/report/" + file + ".txt", "r") as f:
FileNotFoundError: [Errno 2] No such file or directory: '/home/user/excel/report/file03.txt'
Still script is look at /home/user/excel/report/directory only. Whereas file03.txt is present inside /home/user/excel/report/folder-1/filepath/ directory, Need help to fix this. Thanks.
You can use bash cmd find in python to find all the txt's paths.
import os
import pandas as pd
import subprocess
pycmd = lambda cmd: print(subprocess.check_output(cmd, shell=True, universal_newlines=True))
pycmd_output = lambda cmd: subprocess.check_output(cmd, shell=True, universal_newlines=True).strip().split('\n')
# use bash com find to find all txt file's path
os.chdir('/home/user/excel')
cmd = '''
find /home/user/excel/report/ -type f -name *.txt
'''
file_list = pycmd_output(cmd)
df_file_list = pd.Series(file_list)
file_list_name = df_file_list.str.split('/|\.').str[-2]
file_map = dict(zip(file_list_name ,df_file_list ))
# {'file02': '/home/user/excel/report/folder-1/file02.txt',
# 'file03': '/home/user/excel/report/folder-1/filepath/file03.txt',
# 'file04': '/home/user/excel/report/folder-2/file04.txt',
# 'file01': '/home/user/excel/report/file01.txt'}
data = pd.read_excel("data.xlsx")
data['file_path'] = data["File Name"].map(file_map)
# have duplicated values in `data["File Name"]`
d = data.groupby('file_path')['User-ID'].agg(list).to_dict()
for file, user_id_list in d.items():
with open(file, "r") as f:
contents = f.read().strip()
for user_id in user_id_list:
contents = contents.replace(user_id, "")
with open(file, "w") as f:
f.write(contents)
Considering
/home/user/excel/report/file01.txt
/home/user/excel/report/folder-1/file02.txt
/home/user/excel/report/folder-1/filepath/file03.txt
/home/user/excel/report/folder-2/file04.txt
you need first do discover where each file is located, if filenames are always unique this is relatively simple with os.walk. I would do:
import os
filepaths = {}
for dirpath, dirnames, filenames in os.walk("/home/user/excel/report"):
for fname in filenames:
filepaths[fname] = os.path.join(dirpath, fname)
print(filepaths)
which should created dict with keys being filenames and values paths to them. Then when you need to interact with file named "X" just use filepaths["X"].
If following code
import os
import pandas as pd
data = pd.read_excel("data.xlsx")
d = dict(zip(data["File Name"], data["User-ID"]))
for file in d:
with open(f"/home/user/excel/report/" + file + ".txt", "r") as f:
contents = f.read().strip()
with open(f"/home/user/excel/report/" + file + ".txt", "w") as f:
f.write(contents.replace(d[file], ""))
would work as intended if all files were inside /home/user/excel/report then following should work with files which might be in subdirs
import os
import pandas as pd
filepaths = {}
for dirpath, dirnames, filenames in os.walk("/home/user/excel/report"):
for fname in filenames:
filepaths[fname] = os.path.join(dirpath, fname)
data = pd.read_excel("data.xlsx")
d = dict(zip(data["File Name"], data["User-ID"]))
for file in d:
with open(filepaths[file+".txt"], "r") as f:
contents = f.read().strip()
with open(filepaths[file+".txt"], "w") as f:
f.write(contents.replace(d[file], ""))
i am a beginner in python.
So what I want to do is a script that finds a specific line in multiple files and delete it and rewrite the file with the same name. Something like this but for more files:
similar to problem "Deleting a line in multiple files in python"
i traid quith this code that i can find in the before question
but it didn't work
import os
os.chdir('C:\escenarie')
source = "*.mgt"
for root, dirs, filenames in os.walk(source):
for f in filenames:
this_file = open(os.path.join(source, f), "r")
this_files_data = this_file.readlines()
this_file.close()
# rewrite the file with all line except the one you don't want
this_file = open(os.path.join(source, f), "w")
for line in this_files_data:
if line != " 1.200 5 0.00000"":
this_file.write(line)
this_file.close()
You ought to learn basic file operations in Python. The code example below should help.
#open file
with open(filename, 'r') as f:
lines = f.readlines()
#find index of line to remove
for index, line in enumerate(lines):
if 'delete me' in line:
#remove line
lines.pop(index)
break
#write new file
with open(filename, 'w') as f:
f.write(''.join(lines))
And to perform this operation on multiple files:
filenames = ['file1.txt', 'file2.txt']
for filename in filenames:
# ... see code above
Using python how can I combine all the text file in the specified directory into one text file and rename the output text file with the same filename.
For example: Filea.txt and Fileb_2.txt is in root directory, and it output generated file is Filea_Fileb_2.txt
Filea.txt
123123
21321
Fileb_2.txt
2344
23432
Filea_Fileb_2.txt
123123
21321
2344
23432
my script:
PWD1 = /home/jenkins/workspace
files = glob.glob(PWD1 + '/' + '*.txt')
with open(f, 'r') as file:
for line in (file):
outputfile = open('outputfile.txt', 'a')
outputfile.write(line)
outputfile.close()
Here's another way to combine text files.
#! python3
from pathlib import Path
import glob
folder_File1 = r"C:\Users\Public\Documents\Python\CombineFIles"
txt_only = r"\*.txt"
files_File1 = glob.glob(f'{folder_File1}{txt_only}')
new_txt = f'{folder_File1}\\newtxt.txt'
newFile = []
for indx, file in enumerate(files_File1):
if file == new_txt:
pass
else:
contents = Path(file).read_text()
newFile.append(contents)
file = open(new_txt, 'w')
file.write("\n".join(newFile))
file.close()
This is a working solution which stores both file names and file contents in a list, then joins the list filenames and creates a "combined" filename and then adds the contents of all the files to it, because lists append in order that the data is read this is sufficient (my example filenames are filea.txt and fileb.txt but it will work for the filenames you've used):
import os
import sys
path = sys.argv[1]
files = []
contents = []
for f in os.listdir(path):
if f.endswith('.txt'): # in case there are other file types in there
files.append(str(f.replace('.txt', ''))) #chops off txt so we can join later
with open(f) as cat:
for line in cat:
contents.append(line) # put file contents in list
outfile_name = '_'.join(x for x in files)+'.txt' #create your output filename
outfile = open(outfile_name, 'w')
for line in contents:
outfile.write(line)
outfile.close()
to run this on a specific directory just pass it on the commandline:
$python3.6 catter.py /path/to/my_text_files/
output filename:
filea_fileb.txt
contents:
123123
21321
2344
23432