I have the following script that does multiple operations for .txt file (splits by tag, removes line breaks, sentence per line & blank lines in between).
I know how to do this for a specific file:
import re
with open("input.txt", "r") as f:
i = 1
w = None
for line in f:
line = line.strip()
if line:
if w is not None and line == "</div>":
w.close()
i += 1
w = None
else:
if w is None:
w = open('output_%i.txt' % i, 'w')
for s in re.split("(?<=[.!:;?])\s+", line):
w.write(s + '\n\n')
How can I apply this to all .txt files in a dir?
I cannot figure out how to open and write to all files in a dir.
I thought this would work:
import os
import re
path = "/Users/simon/dic/en-new-oxford/output"
for filename in os.listdir(path):
with open(os.path.join(path, filename), 'r') as f:
i = 1
w = None
for line in f:
line = line.strip()
if line:
if w is not None and line == "</d:entry>":
w.close()
i += 1
w = None
else:
if w is None:
w = open(os.path.join('path, filename_%i.txt') % i, 'w')
for s in re.split("(?<=[.!:;?])\s+", line):
w.write(s + '\n\n')
What is wrong about this?
This does work. You probably had some other directory in your directory messing up your code.Also checks if the file ends with '.txt' .
from os import listdir
from os.path import isfile, join
path = ''
# Get all Files in directory , not other directory's.
allFiles = [join(path, file) for file in listdir(path) if isfile(join(path, file)) and file.endswith('.txt')]
for file in allFiles:
with open(file) as f:
"do stuff here"
Related
the below script is to unzip zip file and rename extaracted subtitles names according to tv show episodes names. and then convert them to utf-8.
here is the problem:
I want to run this script in a linux os and inside any tv show folder i want.but I want the path in fixing function to be detected from the folder itselt i run the python script for because it is not a constant path, there are many tv show folders.
sorry for my english
import zipfile
import os
import re
from chardet import detect
import fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
def get_encoding_type(file):
with open(file, 'rb') as f:
rawdata = f.read()
return detect(rawdata)['encoding']
def correctSubtitleEncoding(filename, newFilename, encoding_from, encoding_to='UTF-8'):
with open(filename, 'r', encoding=encoding_from) as fr:
with open(newFilename, 'w', encoding=encoding_to) as fw:
for line in fr:
fw.write(line[:-1]+'\r\n')
def fixing(path):
oldsrtfiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(oldsrtfiles), ' old subtitles found')
if len(oldsrtfiles) != 0:
for oldsrt in oldsrtfiles:
os.remove(f'{path}{oldsrt}')
print(f'{oldsrt} removed')
filename = find('*.zip', path)[0]
with zipfile.ZipFile(f'{filename}',"r") as zip_ref:
zip_ref.extractall(path)
print('files extarcted')
os.remove(f'{filename}')
print("Zip File Removed!")
newsrtFiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(newsrtFiles), ' subtitles found')
showsTitles = [f for f in os.listdir(path) if '.mkv' in f or '.avi' in f or '.mp4' in f]
print(len(showsTitles), ' tv show found')
pattern = r'S(\d{1,2})E(\d{1,2})'
for show in showsTitles:
SEneeded = re.search(pattern, show).group(0)
for i, sub in enumerate(newsrtFiles):
if SEneeded in sub:
if sub[-3:] == 'srt':
newsrtFiles[i] = show.replace(show[-3:],'ar.srt')
os.rename(f'{path}{sub}',f'{path}{newsrtFiles[i]}')
elif sub[-3:] == 'ass':
subs[i] = show.replace(show[-3:],'ar.ass')
forencoding = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
for newsrtfile in forencoding:
from_codec = get_encoding_type(f'{path}{newsrtfile}')
print(from_codec)
correctSubtitleEncoding(f'{path}{newsrtfile}', f'{path}{newsrtfile}', from_codec, encoding_to='UTF-8')
function to get the current working directory
os.getcwd()
to call this function you need to import os module
import os
I have some csv files in a folder and I am trying to delete all blank rows and move the news files into a new folder.
Here is the code I have:
import csv
import glob
import os
import shutil
path = 'in_folder/*.csv'
files=glob.glob(path)
#Read every file in the directory
x = 0 #counter
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in fin.readlines():
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
dir_src = "in_folder"
dir_dst = "out_folder"
for file in os.listdir(dir_src):
if x>0:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
What the code is doing right now is deleting everything from the files and moving them to the new folder. I want my files to be the same but with deleted blank rows.
You can just output every line to the new file, no need to do any moving afterwards:
dir_src = "in_folder/*.csv"
dir_dst = "out_folder"
files = glob.glob(dir_src)
# Read every file in the directory
x = 0 # counter
for filename in files:
outfilename = os.path.join(dir_dst, os.path.basename(filename))
with open(filename, 'r') as fin:
with open(outfilename, 'w') as fout:
for line in fin:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
try this.
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in data:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
I am trying to read all fasta files from test folder and put the name of file in all headers of individual file. The code working for first file and dont proceed to second file and return error. Could you help me find bug in my code or edit it. Thanks
import sys, glob, os, string
header = ''
check = 0
path = "./test/"
dirs = os.listdir(path)
for file in dirs:
fp = open(file, "r")
fpx = open('%s_output.txt' % file, 'w')
for line in fp:
if line.startswith('>'):
line = line.rstrip()
check = check + 1
if check >= 1:
header = line
fpx.write(header + '_' + file + '\n')
else:
line = line.rstrip()
fpx.write(line + '\n')
It would be good to provide the error message you are getting! I think this must fail with "File not found" because you try to open the file by name instead of path. Try fp = open(os.path.join(path, file), "r"):
import sys, glob, os, string
header = ''
check = 0
path = "./test/"
dirs = os.listdir(path)
for file in dirs:
fp = open(os.path.join(path, file), "r")
fpx = open('%s_output.txt' % file, 'w')
for line in fp:
if line.startswith('>'):
line = line.rstrip()
check = check + 1
if check >= 1:
header = line
fpx.write(header + '_' + file + '\n')
else:
line = line.rstrip()
fpx.write(line + '\n')
i have this python script that open a file dialog and select a text file than copy its content to another file, what i need to do before i copy to the next file is to delete several lines based on some strings that are predefined in the script.
the problem is that the file is copied as it is without deleting anything.
can anyone help me to solve this issue??
OPenDirectory.py
#!/usr/bin/python
import Tkinter
from os import listdir
from os.path import isfile, join
import tkFileDialog
def readWrite():
unwanted = set(['thumbnails', 'tikare', 'cache'])
mypath = "C:\Users\LT GM\Desktop/"
Tkinter.Tk().withdraw()
in_path = tkFileDialog.askopenfile(initialdir = mypath, filetypes=[('text files', ' TXT ')])
files = [f for f in listdir(mypath) if isfile(join(mypath, f))]
for file in files:
if file.split('.')[1] == 'txt':
outputFileName = 'Sorted-' + file
with open(mypath + outputFileName, 'w') as w:
with open(mypath + '/' + file) as f:
for l in f:
if l != unwanted:
print l
w.write(l)
in_path.close()
if __name__== "__main__":
readWrite()
ChangedScipt
#!/usr/bin/python
import Tkinter
from os import listdir
from os.path import isfile, join
import tkFileDialog
def readWrite():
unwanted = set(['thumbnails', 'tikare', 'cache'])
mypath = "C:\Users\LT GM\Desktop/"
Tkinter.Tk().withdraw()
in_path = tkFileDialog.askopenfile(initialdir = mypath, filetypes=[('text files', ' TXT ')])
files = [f for f in listdir(mypath) if isfile(join(mypath, f))]
for file in files:
if file.split('.')[1] == 'txt':
if file.strip() in unwanted:
continue
outputFileName = 'Sorted-' + file
with open(mypath + outputFileName, 'w') as w:
with open(mypath + '/' + file) as f:
for l in f:
if l != unwanted:
print l
w.write(l)
in_path.close()
if __name__== "__main__":
readWrite()
blacklist = set(['thumbnails', 'quraan', 'cache'])
with open('path/to/input') as infile, open('path/to/output', 'w') as outfile:
for line in infile:
if line.strip() in blacklist: continue # if you want to match the full line
# if any(word in line for word in blacklist): continue # if you want to check if a word exists on a line
outfile.write(line)
I'd like to remove the first line and the last second line of files which exits in different sub directories in the same root directory. And the codes as below
import fileinput
import sys
import os
path = "./rootDire"
for(dirpath,dirnames,files) in os.walk(path):
f = open(file,'r')
lines = f.readlines()
f.close()
f = open(file,'w')
f.writelines(lines[1:-2])
f.close()
But, when it found the file, the error happened saying no the file which has already been found.
Correct me if it does not work:
import fileinput
import sys
import os
path = "./rootDire"
for(dirpath,dirnames,files) in os.walk(path):
for filename in files:
filepath = os.path.join(dirpath, filename)
f = open(filepath,'r')
lines = f.readlines()
f.close()
f = open(filepath,'w')
f.writelines(lines[1:-2])
f.close()