Python iterating through folders and \ characters - python

I have an old project I wanted to post up on gh-pages and I have a bunch of html files in a bunch of folders.
So I've been piecing together a piece of python that would create an index page displaying all the contained html hyperlinks so that the content will be browse-able on gh-pages similarly to how it is done with full on web servers such as Apache.
To get started I have all the content printing in one file but unfortunately python is throwing file locations as py r'Strings' where \ is escaped with a \.
I have been trying to prevent this from causing IO errors but have been getting a little stuck.
import os
class indexer:
path = "~"
prod = []
def __init__(self,p):
self.path=p
def HtmlFrek(self,k):
print("rek")
os.chdir(k)
ret="<h1>"+k+"</h1>"
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.split(".")[len(f.split("."))-1]=="html"]
for t in files:
t.replace(".","")
t.replace("\\","/")
ret+= ""+k+"\n"
folders = [x[0] for x in os.walk('.')]
for k in folders:
print k
if(k == '.'):
continue
print k
ret+="<div class='blue1'>"
ret+=self.HtmlFrek(k)
ret = "</div>"
os.chdir("..")
return(ret)
def HtmlProd(self):
print("start")
ret = []
ret.append("""<!DOCTYPE html><html>""")
ret.append("<div class = 'ClearShadeLeft'>")
folders = [x[0] for x in os.walk('.')]
for k in folders:
ret[1]+="<div class='blue1'>"
ret[1]+=self.HtmlFrek(k)
ret[1] = "</div>"
ret[1] = "</div>"
ret.append("""<\html><html>""")
self.prod = ret
return(ret)
i = indexer(".")
i.HtmlProd()
print i.prod
for k in i.prod:
print k
print()
Edit: I think the answer here is to replace os.walk with [f for f in os.listdir(somedir) if os.path.isfile(f)].
Another Edit:
This version of the code works...
import os
class indexer:
path = "~"
site = "http://krewn.github.io"
proj = "Reprogramming"
prod = []
loc=[]
def __init__(self,p):
self.path=p
def fprep(self,name):
name.replace(".","")
name.replace("\\","/")
return(name)
def refPrep(self):
ref = self.site+"/"+self.proj
for qw in self.loc:
ref+="/"+qw
return(ref)
def HtmlFrek(self,adir):
self.loc.append(adir)
os.chdir(adir)
ret="<h2>"+adir+"</h2>"
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.split(".")[len(f.split("."))-1]=="html"]
for t in files:
ret+=""+self.fprep(t)+"<br>\n"
images = [f for f in os.listdir('.') if os.path.isfile(f) and f.split(".")[len(f.split("."))-1]=="png"]
for i in images:
i = self.fprep(i)
ref = self.refPrep()
ret+= "<img src="+ref+"/"+i+">\n"
folders = [f for f in os.listdir(".") if not os.path.isfile(f)]
for k in folders:
if(k.__contains__(".")):
continue
ret+="<div class='blue1'>"
ret+=self.HtmlFrek(k)
ret+="</div>"
os.chdir("..")
del self.loc[len(self.loc)-1]
return(ret)
def HtmlProd(self):
print("start")
ret = ""
ret+="""<!DOCTYPE html><html>"""
ret+="<div>"
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.split(".")[len(f.split("."))-1]=="html"]
for t in files:
ret+=""+self.fprep(t)+"<br>\n"
folders = [f for f in os.listdir(".") if not os.path.isfile(f)]
for k in folders:
if(k.__contains__(".")):
continue
print k
ret+="<div>"
ret+=self.HtmlFrek(k)
ret+="</div>"
ret+="</div>"
ret+="""</html>"""
self.prod = ret
return(ret)
i = indexer(".")
q=i.HtmlProd()
#print i.prod
w = open("index.html","w")
w.write(q)
w.close()

Doxygen is your friend for this sort of thing. You give doxygen a source code folder. You specify what file extensions that you consider to be source. Then it goes off and builds an index of everything in the folder. You can output this in html or as a pdf.

Related

How can I clasify files based on their extension in Python?

I want to move files into folders based on their extensions and categorize them.
I've tried shutil.move() to categorize it. But it gives an error like this:
shutil.Error: Cannot move a directory 'C:\Users\user\Desktop\deneme' into itself 'None'.
How can i fix the problem?
My code:
import os
from os import path
import pathlib
import shutil
path_ = input("Directory: ")
file_list = os.listdir(path_)
os.chdir(path_)
current_directory = os.getcwd()
e = []
a = 0
for i in file_list:
ext = os.path.splitext(i)[1][1:]
e.append(ext)
# print("Ext:", ext)
for j in range(len(e)):
if e[j] == ",":
j = j + 1
continue
os.mkdir(str(j))
os.rename(str(j), e[j])
new_folder = e[j]
for f in os.listdir(current_directory):
new_directory = os.chdir(new_folder)
if f == ",":
f +=1
continue
shutil.move(os.path.join(current_directory), str(new_directory))
#print("it is moved")
print(os.path.dirname(os.path.abspath(str(e[j]))))
Try os.path.splitext()[1] (it returns a list. The 0th element is the filename and the 1st is the extension) if you want to find the file extension

How to detect folder path i ran script for?

the below script is to unzip zip file and rename extaracted subtitles names according to tv show episodes names. and then convert them to utf-8.
here is the problem:
I want to run this script in a linux os and inside any tv show folder i want.but I want the path in fixing function to be detected from the folder itselt i run the python script for because it is not a constant path, there are many tv show folders.
sorry for my english
import zipfile
import os
import re
from chardet import detect
import fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
def get_encoding_type(file):
with open(file, 'rb') as f:
rawdata = f.read()
return detect(rawdata)['encoding']
def correctSubtitleEncoding(filename, newFilename, encoding_from, encoding_to='UTF-8'):
with open(filename, 'r', encoding=encoding_from) as fr:
with open(newFilename, 'w', encoding=encoding_to) as fw:
for line in fr:
fw.write(line[:-1]+'\r\n')
def fixing(path):
oldsrtfiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(oldsrtfiles), ' old subtitles found')
if len(oldsrtfiles) != 0:
for oldsrt in oldsrtfiles:
os.remove(f'{path}{oldsrt}')
print(f'{oldsrt} removed')
filename = find('*.zip', path)[0]
with zipfile.ZipFile(f'{filename}',"r") as zip_ref:
zip_ref.extractall(path)
print('files extarcted')
os.remove(f'{filename}')
print("Zip File Removed!")
newsrtFiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(newsrtFiles), ' subtitles found')
showsTitles = [f for f in os.listdir(path) if '.mkv' in f or '.avi' in f or '.mp4' in f]
print(len(showsTitles), ' tv show found')
pattern = r'S(\d{1,2})E(\d{1,2})'
for show in showsTitles:
SEneeded = re.search(pattern, show).group(0)
for i, sub in enumerate(newsrtFiles):
if SEneeded in sub:
if sub[-3:] == 'srt':
newsrtFiles[i] = show.replace(show[-3:],'ar.srt')
os.rename(f'{path}{sub}',f'{path}{newsrtFiles[i]}')
elif sub[-3:] == 'ass':
subs[i] = show.replace(show[-3:],'ar.ass')
forencoding = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
for newsrtfile in forencoding:
from_codec = get_encoding_type(f'{path}{newsrtfile}')
print(from_codec)
correctSubtitleEncoding(f'{path}{newsrtfile}', f'{path}{newsrtfile}', from_codec, encoding_to='UTF-8')
function to get the current working directory
os.getcwd()
to call this function you need to import os module
import os

Python: How to get the full path of a file in order to move it?

I had files that were in zips. I unzipped them with Zip-7 so they are in folders with the zip file names.
Each of these folders has either a .otf or .ttf (some have both) that I want out of them and moved to another folder.
I have tried a few methods of getting the full path of the files but every one of them leaves out the folder that the file is actually in.
Here is my latest try:
import os
import shutil
from pathlib import Path
result = []
for root, dirs, files in os.walk("."):
for d in dirs:
continue
for f in files:
if f.endswith(".otf"):
print(f)
p = Path(f).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
elif f.endswith(".ttf"):
print(f)
p = Path(f).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
else:
continue
Other attempts:
# parent_dir = Path(f).parents[1]
# shutil.move(f, parent_dir)
#print("OTF: " + f)
# fn = f
# f = f[:-4]
# f += '\\'
# f += fn
# result.append(os.path.realpath(f))
#os.path.relpath(os.path.join(root, f), "."))
I know this is something simple but I just can't figure it out. Thanks!
You should join the file name with the path name root:
for root, dirs, files in os.walk("."):
for d in dirs:
continue
for f in files:
if f.endswith(".otf"):
p = Path(os.path.join(root, f)).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
elif f.endswith(".ttf"):
p = Path(os.path.join(root, f)).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
else:
continue
for root, dirs, files in os.walk(".")
for d in dirs:
continue
for f in files:
print(os.path.abspath(f))
You can use os.path.abspath() to get a path of a full file
You would also need to still filter for the certain file types.

Python - match directories with pattern (regular expression)

I wrote a loop which ignores all sub-directories which contain .txt files within them.
src = raw_input("Enter source disk location: ")
src = os.path.abspath(src)
dst = raw_input("Enter first destination to copy: ")
dst = os.path.abspath(dst)
dest = raw_input("Enter second destination to move : ")
dest = os.path.abspath(dest)
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)'
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = [] # do not recurse into subdirectories
continue
files = [os.path.join(dir, f) for f in files ]
for f in files:
part1 = os.path.dirname(f)
part2 = os.path.dirname(os.path.dirname(part1))
part3 = os.path.split(part1)[1]
path_miss1 = os.path.join(dst, "missing_txt")
path_miss = os.path.join(path_miss1, part3)
path_missing = os.path.join(dest, "missing_txt")
searchFileName = re.search(path_patter, part3)#### update
if searchFileName:#####update
try:
if not os.path.exists(path_miss):
os.makedirs(path_miss)
else:
pass
if os.path.exists(path_miss):
distutils.dir_util.copy_tree(part1, path_miss)
else:
debug_status += "missing_file\n"
pass
if (get_size(path_miss)) == 0:
os.rmdir(path_miss)
else:
pass
if not os.path.exists(path_missing):
os.makedirs(path_missing)
else:
pass
if os.path.exists(path_missing):
shutil.move(part1, path_missing)
else:
pass
if (get_size(path_missing)) == 0:
os.rmdir(path_missing)
else:
pass
except Exception:
pass
else:
continue
How to modify this code to compare directory name with regular expression in this case. (it has to ignore directories with .txt files)
import os
import re
def createEscapedPattern(path,pattern):
newPath = os.path.normpath(path)
newPath = newPath.replace("\\","\\\\\\\\")
return newPath + "\\\\\\\\" + pattern
def createEscapedPath(path):
newPath = os.path.normpath(path)
return newPath.replace("\\","\\\\")
src = 'C:\\Home\\test'
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)$'
p = re.compile(createEscapedPattern(src,path_patter))
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = []
continue
if any(p.match(createEscapedPath(dir)) for f in files):
for f in files:
print createEscapedPath(dir + "/" + f)
p = re.compile(createEscapedPattern(dir,path_patter))
There are a couple of things i did here and hope this example helps
I wrote this for windows fs so used two path convert functions.
This script ignores dirs with .txt files like you implemented it
This script will start at the directory you start the script and will only print file names if the pattern matches. This is done for all subdirectory's that are not ignored by the previous rule.
Used regex in python and made it compile again for each directory so you get: 'directory/(\S+)(\d+)(\d+)_(\d+)__(\d+)(\d+)(\d+)$'

Comparing two directories with subdirectories to find any changes?

For starters I've only been playing with python for about a 2 weeks now and im relatively new to its proccessess, I'm trying to create a script that compares two directories with subdirectories and prints out ANY changes. I've read articles on hear about using os.walk to walk the directories and I've managed to write the script that prints all the files in a directory and its subdirectories in a understandable manner. I've also read on here and learned how to compare two directories but it only compares 1 file deep.
import os
x = 'D:\\xfiles'
y = 'D:\\yfiles'
q= [ filename for filename in x if filename not in y ]
print q
Obviously that does not do what I want it to. This however is listing all files and all directories.
import os
x = 'D:\\xfiles'
x1 = os.walk(x)
for dirName, subdirList, fileList in x1:
print ('Directory: %s' % dirName)
for fname in fileList:
print ('\%s' % fname)
How do I combine them and get it to work?
Write a function to aggregate your listing.
import os
def listfiles(path):
files = []
for dirName, subdirList, fileList in os.walk(path):
dir = dirName.replace(path, '')
for fname in fileList:
files.append(os.path.join(dir, fname))
return files
x = listfiles('D:\\xfiles')
y = listfiles('D:\\yfiles')
You could use a list comprehension to extract the files that are not in both directories.
q = [filename for filename in x if filename not in y]
But using sets is much more efficient and flexible.
files_only_in_x = set(x) - set(y)
files_only_in_y = set(y) - set(x)
files_only_in_either = set(x) ^ set(y)
files_in_both = set(x) & set(y)
all_files = set(x) | set(y)
I guess that best way to go will be external programs, as #Robᵩ suggests in the comment.
Using Python I would recommend doing following:
import os
def fileIsSame(right, left, path):
return os.path.exists (os.path.join(left, path.replace(right, '')));
def compare(right, left):
difference = list();
for root, dirs, files in os.walk(right):
for name in files:
path = os.path.join(root, name);
# check if file is same
if fileIsSame(right, left, path):
if os.path.isdir(path):
# recursively check subdirs
difference.extend(compare(path, left));
else:
# count file as difference
difference.append(path);
return difference;
This approach lacks normal fileIsSame function that would make sure files are same by content or by date modified and be sure to handle paths correctly (as I'm not sure this variant will). This algorithm requres you to specify full paths.
Usage example:
print (compare(r'c:\test', r'd:\copy_of_test'));
If second folder is copy of first, all the differences in paths (different disk letter and foldername) is ignored. Output will be [].
import os
def ls(path):
all = []
walked = os.walk(path)
for base, sub_f, files in walked:
for sub in sub_f:
entry = os.path.join(base,sub)
entry = entry[len(path):].strip("\\")
all.append(entry)
for file in files:
entry = os.path.join(base,file)
entry = entry[len(path):].strip("\\")
all.append(entry)
all.sort()
return all
def folder_diff(folder1_path, folder2_path):
folder1_list = ls(folder1_path);
folder2_list = ls(folder2_path);
diff = [item for item in folder1_list if item not in folder2_list]
diff.extend([item for item in folder2_list if item not in folder1_list])
return diff
I have done a code that check two directory recursively, and if there is different, it would point out the line that different.
import os
FOLDER_A = os.path.join(os.path.dirname(__file__), 'folder_a')
FOLDER_B = os.path.join(os.path.dirname(__file__), 'folder_b')
def load_directory(directory):
files = set()
directories = set()
for file_or_directory in os.listdir(directory):
file_or_directory_path = f'{directory}/{file_or_directory}'
if os.path.isfile(file_or_directory_path):
files.add(file_or_directory)
else:
directories.add(file_or_directory)
return files, directories
def compare_files(a, b):
assert os.path.isfile(a)
assert os.path.isfile(b)
with open(a, 'r') as file:
file_a = file.read()
with open(b, 'r') as file:
file_b = file.read()
if file_a != file_b:
file_a_lines = file_a.split('\n')
file_b_lines = file_b.split('\n')
if len(file_a_lines) != len(file_b_lines):
print(f'Two file {a} and {b} have different length, of {len(file_a_lines)} and {len(file_b_lines)}')
return False
compare_lines = zip(file_a_lines, file_b_lines)
index = 0
for i in compare_lines:
index += 1
if i[0] != i[1]:
print(f'Different found in file {a} and {b}, at line number {index}')
return False
print('Some thing wrong')
return False
return True
def compare_directories(a, b):
assert not os.path.isfile(a)
assert not os.path.isfile(b)
a_files, a_directories = load_directory(a)
b_files, b_directories = load_directory(b)
if (a_files != b_files):
print(f'Different Found In {a} and {b} directories files')
print(f'A: {a_files}\nB: {b_files}')
return False
if (a_directories != b_directories):
print(f'Different Found In {a} and {b} directories subdirectories')
print(f'A: {a_directories}\nB: {b_directories}')
return False
for files in a_files:
if not compare_files(f'{a}/{files}', f'{b}/{files}'):
return False
for directories in a_directories:
if not compare_directories(f'{a}/{directories}', f'{b}/{directories}'):
return False
return True
def main():
print(compare_directories(FOLDER_A, FOLDER_B))
if __name__ == '__main__':
main()

Categories

Resources