Visualization of Mercurial revision tree - python

What's the easiest way to produce a visualization of Mercurial revision tree using Python (ideally, Python 3)?
I am guessing it would have to be through a combination 2 libraries: one that provides an interface to the Mercurial repository, and one that visualizes graphs.
Use case: we have written a (pure Python) continuous integration testing module. We'd like it to display the revision tree, marking each node as "passed", "failed", "in progress", "not tested" or something along these lines.

For the visualization part, I would check out NetworkX. It is a Python library that lets you do graph/network processing, import/export, and visualization.

Here, have my script:
#!/usr/bin/env python
from itertools import chain
import hglib
#from pygraphviz import *
repo = hglib.open('.')
log = dict((t[0], t) for t in repo.log())
def graph(): return dict((v, []) for v in log)
forward, backward, jump = (graph() for i in range(3))
for target in log:
for parent in repo.parents(target) or []:
source = parent[0]
forward[source].append(target)
backward[target].append(source)
def endpoint(v):
if len(forward[v]) != 1: return True
if len(backward[forward[v][0]]) != 1: return True
if len(backward[v]) != 1: return True
if len(forward[backward[v][0]]) != 1: return True
for v in forward:
if endpoint(v):
w = v
while len(forward[w]) == 1 == len(backward[forward[w][0]]):
w = forward[w][0]
jump[v] = w
else: del jump[v]
def vertex(tupl): return 'v' + tupl[1][:5]
print 'digraph {'
colors = ['red', 'green', 'blue', 'yellow', 'cyan', 'magenta',
'orange', 'chartreuse']
authors = dict()
for v in sorted(forward, key=int):
if not endpoint(v) or v not in jump: continue
node = 'v%s' % v
if jump[v] != v:
sep = '+' if forward[v] == jump[v] else '::'
label = '%s%s%s' % (v, sep, jump[v])
assert int(jump[v]) > int(v)
v = jump[v]
del jump[v]
else:
label = v
author = log[v][4]
print '// %s' % author
if author not in authors: authors[author] = colors[len(authors) %
len(colors)]
attrs = dict(color=authors[author], label=label, style='bold').items()
print '\t%s [%s]' % (node, ','.join('%s="%s"' % kv for kv in attrs))
for w in forward[v]: print '\t%s -> v%s' % (node, w)
print '}'
As you can tell, I use hglib to get at the Mercurial data (hglib.open('.').log() is a list of tuples, one per commit). I use graphviz for visualization. No library, I just *print* the damn thing ;-)
I run the script plus graphviz like this:
python version-dag.py > log.dot ; dot -Tpdf log.dot -o log.pdf
... and then look at the glorious .pdf file. Graphviz can do png, eps and perhaps other formats. The jump dictionary is for compression single-parent-paths down to a single node. Enjoy :)

Related

Run python script to replace betacode with greek letters LaTeX

I want to convert the betacode in an existing .tex-File to normal greek letters.
For example: I want to replace:
\bcode{lo/gos}
with simple:
λόγος
And so on for all other glyphs. Fortunately there seems to be a python-script that is supposed to do just that. But, being completely inexperienced I simply don’t know how to run it.
Here is the code of the python sript:
# beta2unicode.py
#
# Version 2004-11-23
#
# James Tauber
# http://jtauber.com/
#
# You are free to redistribute this, but please inform me of any errors
#
# USAGE:
#
# trie = beta2unicodeTrie()
# beta = "LO/GOS\n";
# unicode, remainder = trie.convert(beta)
#
# - to get final sigma, string must end in \n
# - remainder will contain rest of beta if not all can be converted
class Trie:
def __init__(self):
self.root = [None, {}]
def add(self, key, value):
curr_node = self.root
for ch in key:
curr_node = curr_node[1].setdefault(ch, [None, {}])
curr_node[0] = value
def find(self, key):
curr_node = self.root
for ch in key:
try:
curr_node = curr_node[1][ch]
except KeyError:
return None
return curr_node[0]
def findp(self, key):
curr_node = self.root
remainder = key
for ch in key:
try:
curr_node = curr_node[1][ch]
except KeyError:
return (curr_node[0], remainder)
remainder = remainder[1:]
return (curr_node[0], remainder)
def convert(self, keystring):
valuestring = ""
key = keystring
while key:
value, key = self.findp(key)
if not value:
return (valuestring, key)
valuestring += value
return (valuestring, key)
def beta2unicodeTrie():
t = Trie()
t.add("*A", u"\u0391")
t.add("*B", u"\u0392")
t.add("*G", u"\u0393")
t.add("*D", u"\u0394")
t.add("*E", u"\u0395")
t.add("*Z", u"\u0396")
t.add("*H", u"\u0397")
t.add("*Q", u"\u0398")
t.add("*I", u"\u0399")
t.add("*K", u"\u039A")
t.add("*L", u"\u039B")
t.add("*M", u"\u039C")
t.add("*N", u"\u039D")
t.add("*C", u"\u039E")
t.add("*O", u"\u039F")
t.add("*P", u"\u03A0")
t.add("*R", u"\u03A1")
t.add("*S", u"\u03A3")
t.add("*T", u"\u03A4")
t.add("*U", u"\u03A5")
t.add("*F", u"\u03A6")
t.add("*X", u"\u03A7")
t.add("*Y", u"\u03A8")
t.add("*W", u"\u03A9")
t.add("A", u"\u03B1")
t.add("B", u"\u03B2")
t.add("G", u"\u03B3")
t.add("D", u"\u03B4")
t.add("E", u"\u03B5")
t.add("Z", u"\u03B6")
t.add("H", u"\u03B7")
t.add("Q", u"\u03B8")
t.add("I", u"\u03B9")
t.add("K", u"\u03BA")
t.add("L", u"\u03BB")
t.add("M", u"\u03BC")
t.add("N", u"\u03BD")
t.add("C", u"\u03BE")
t.add("O", u"\u03BF")
t.add("P", u"\u03C0")
t.add("R", u"\u03C1")
t.add("S\n", u"\u03C2")
t.add("S,", u"\u03C2,")
t.add("S.", u"\u03C2.")
t.add("S:", u"\u03C2:")
t.add("S;", u"\u03C2;")
t.add("S]", u"\u03C2]")
t.add("S#", u"\u03C2#")
t.add("S_", u"\u03C2_")
t.add("S", u"\u03C3")
t.add("T", u"\u03C4")
t.add("U", u"\u03C5")
t.add("F", u"\u03C6")
t.add("X", u"\u03C7")
t.add("Y", u"\u03C8")
t.add("W", u"\u03C9")
t.add("I+", U"\u03CA")
t.add("U+", U"\u03CB")
t.add("A)", u"\u1F00")
t.add("A(", u"\u1F01")
t.add("A)\\", u"\u1F02")
t.add("A(\\", u"\u1F03")
t.add("A)/", u"\u1F04")
t.add("A(/", u"\u1F05")
t.add("E)", u"\u1F10")
t.add("E(", u"\u1F11")
t.add("E)\\", u"\u1F12")
t.add("E(\\", u"\u1F13")
t.add("E)/", u"\u1F14")
t.add("E(/", u"\u1F15")
t.add("H)", u"\u1F20")
t.add("H(", u"\u1F21")
t.add("H)\\", u"\u1F22")
t.add("H(\\", u"\u1F23")
t.add("H)/", u"\u1F24")
t.add("H(/", u"\u1F25")
t.add("I)", u"\u1F30")
t.add("I(", u"\u1F31")
t.add("I)\\", u"\u1F32")
t.add("I(\\", u"\u1F33")
t.add("I)/", u"\u1F34")
t.add("I(/", u"\u1F35")
t.add("O)", u"\u1F40")
t.add("O(", u"\u1F41")
t.add("O)\\", u"\u1F42")
t.add("O(\\", u"\u1F43")
t.add("O)/", u"\u1F44")
t.add("O(/", u"\u1F45")
t.add("U)", u"\u1F50")
t.add("U(", u"\u1F51")
t.add("U)\\", u"\u1F52")
t.add("U(\\", u"\u1F53")
t.add("U)/", u"\u1F54")
t.add("U(/", u"\u1F55")
t.add("W)", u"\u1F60")
t.add("W(", u"\u1F61")
t.add("W)\\", u"\u1F62")
t.add("W(\\", u"\u1F63")
t.add("W)/", u"\u1F64")
t.add("W(/", u"\u1F65")
t.add("A)=", u"\u1F06")
t.add("A(=", u"\u1F07")
t.add("H)=", u"\u1F26")
t.add("H(=", u"\u1F27")
t.add("I)=", u"\u1F36")
t.add("I(=", u"\u1F37")
t.add("U)=", u"\u1F56")
t.add("U(=", u"\u1F57")
t.add("W)=", u"\u1F66")
t.add("W(=", u"\u1F67")
t.add("*A)", u"\u1F08")
t.add("*)A", u"\u1F08")
t.add("*A(", u"\u1F09")
t.add("*(A", u"\u1F09")
#
t.add("*(\A", u"\u1F0B")
t.add("*A)/", u"\u1F0C")
t.add("*)/A", u"\u1F0C")
t.add("*A(/", u"\u1F0F")
t.add("*(/A", u"\u1F0F")
t.add("*E)", u"\u1F18")
t.add("*)E", u"\u1F18")
t.add("*E(", u"\u1F19")
t.add("*(E", u"\u1F19")
#
t.add("*(\E", u"\u1F1B")
t.add("*E)/", u"\u1F1C")
t.add("*)/E", u"\u1F1C")
t.add("*E(/", u"\u1F1D")
t.add("*(/E", u"\u1F1D")
t.add("*H)", u"\u1F28")
t.add("*)H", u"\u1F28")
t.add("*H(", u"\u1F29")
t.add("*(H", u"\u1F29")
t.add("*H)\\", u"\u1F2A")
t.add(")\\*H", u"\u1F2A")
t.add("*)\\H", u"\u1F2A")
#
t.add("*H)/", u"\u1F2C")
t.add("*)/H", u"\u1F2C")
#
t.add("*)=H", u"\u1F2E")
t.add("(/*H", u"\u1F2F")
t.add("*(/H", u"\u1F2F")
t.add("*I)", u"\u1F38")
t.add("*)I", u"\u1F38")
t.add("*I(", u"\u1F39")
t.add("*(I", u"\u1F39")
#
#
t.add("*I)/", u"\u1F3C")
t.add("*)/I", u"\u1F3C")
#
#
t.add("*I(/", u"\u1F3F")
t.add("*(/I", u"\u1F3F")
#
t.add("*O)", u"\u1F48")
t.add("*)O", u"\u1F48")
t.add("*O(", u"\u1F49")
t.add("*(O", u"\u1F49")
#
#
t.add("*(\O", u"\u1F4B")
t.add("*O)/", u"\u1F4C")
t.add("*)/O", u"\u1F4C")
t.add("*O(/", u"\u1F4F")
t.add("*(/O", u"\u1F4F")
#
t.add("*U(", u"\u1F59")
t.add("*(U", u"\u1F59")
#
t.add("*(/U", u"\u1F5D")
#
t.add("*(=U", u"\u1F5F")
t.add("*W)", u"\u1F68")
t.add("*W(", u"\u1F69")
t.add("*(W", u"\u1F69")
#
#
t.add("*W)/", u"\u1F6C")
t.add("*)/W", u"\u1F6C")
t.add("*W(/", u"\u1F6F")
t.add("*(/W", u"\u1F6F")
t.add("*A)=", u"\u1F0E")
t.add("*)=A", u"\u1F0E")
t.add("*A(=", u"\u1F0F")
t.add("*W)=", u"\u1F6E")
t.add("*)=W", u"\u1F6E")
t.add("*W(=", u"\u1F6F")
t.add("*(=W", u"\u1F6F")
t.add("A\\", u"\u1F70")
t.add("A/", u"\u1F71")
t.add("E\\", u"\u1F72")
t.add("E/", u"\u1F73")
t.add("H\\", u"\u1F74")
t.add("H/", u"\u1F75")
t.add("I\\", u"\u1F76")
t.add("I/", u"\u1F77")
t.add("O\\", u"\u1F78")
t.add("O/", u"\u1F79")
t.add("U\\", u"\u1F7A")
t.add("U/", u"\u1F7B")
t.add("W\\", u"\u1F7C")
t.add("W/", u"\u1F7D")
t.add("A)/|", u"\u1F84")
t.add("A(/|", u"\u1F85")
t.add("H)|", u"\u1F90")
t.add("H(|", u"\u1F91")
t.add("H)/|", u"\u1F94")
t.add("H)=|", u"\u1F96")
t.add("H(=|", u"\u1F97")
t.add("W)|", u"\u1FA0")
t.add("W(=|", u"\u1FA7")
t.add("A=", u"\u1FB6")
t.add("H=", u"\u1FC6")
t.add("I=", u"\u1FD6")
t.add("U=", u"\u1FE6")
t.add("W=", u"\u1FF6")
t.add("I\\+", u"\u1FD2")
t.add("I/+", u"\u1FD3")
t.add("I+/", u"\u1FD3")
t.add("U\\+", u"\u1FE2")
t.add("U/+", u"\u1FE3")
t.add("A|", u"\u1FB3")
t.add("A/|", u"\u1FB4")
t.add("H|", u"\u1FC3")
t.add("H/|", u"\u1FC4")
t.add("W|", u"\u1FF3")
t.add("W|/", u"\u1FF4")
t.add("W/|", u"\u1FF4")
t.add("A=|", u"\u1FB7")
t.add("H=|", u"\u1FC7")
t.add("W=|", u"\u1FF7")
t.add("R(", u"\u1FE4")
t.add("*R(", u"\u1FEC")
t.add("*(R", u"\u1FEC")
# t.add("~", u"~")
# t.add("-", u"-")
# t.add("(null)", u"(null)")
# t.add("&", "&")
t.add("0", u"0")
t.add("1", u"1")
t.add("2", u"2")
t.add("3", u"3")
t.add("4", u"4")
t.add("5", u"5")
t.add("6", u"6")
t.add("7", u"7")
t.add("8", u"8")
t.add("9", u"9")
t.add("#", u"#")
t.add("$", u"$")
t.add(" ", u" ")
t.add(".", u".")
t.add(",", u",")
t.add("'", u"'")
t.add(":", u":")
t.add(";", u";")
t.add("_", u"_")
t.add("[", u"[")
t.add("]", u"]")
t.add("\n", u"")
return t
t = beta2unicodeTrie()
import sys
for line in file(sys.argv[1]):
a, b = t.convert(line)
if b:
print a.encode("utf-8"), b
raise Exception
print a.encode("utf-8")
And here is a little .tex-file with which it should work.
\documentclass[12pt]{scrbook}
\usepackage[polutonikogreek, ngerman]{babel}
\usepackage[ngerman]{betababel}
\usepackage{fontspec}
%\defaultfontfeatures{Ligatures=TeX}
%\newfontfeature{Microtype}{protrusion=default;expansion=default;}
\begin{document}
\bcode{lo/gos}
\end{document}
In case the script does not work: would it be possible to convert all the strings within the \bcode-Makro with something like regex? For example the "o/" to the ό and so on? What would be the weapon of choice here?
Do I have python installed?
Try python -V at a shell prompt. Your code is python 2 code, so you will a python 2 version.
I need to install Python
Most straight forward way if you don't need a complex environment (and you don't for this problem) is just to go to python.org. Don't forget you need python 2.
Running the program
Generally it will be as simple as:
python beta2unicode.py myfile.tex-file
And to capture the output:
python beta2unicode.py myfile.tex-file > myfile.not-tex-file
Does the script work?
Almost. You will need to replace the code at the end of the script that starts the same way this does, with this:
import sys
t = beta2unicodeTrie()
import re
BCODE = re.compile(r'\\bcode{[^}]*}')
for line in open(sys.argv[1]):
matches = BCODE.search(line)
for match in BCODE.findall(line):
bcode = match[7:-1]
a, b = t.convert(bcode.upper())
if b:
raise IOError("failed conversion '%s' in '%s'" % (b, line))
converted = a.encode("utf-8")
line = line.replace(match, converted)
print(line.rstrip())
Results
\documentclass[12pt]{scrbook}
\usepackage[polutonikogreek, ngerman]{babel}
\usepackage[ngerman]{betababel}
\usepackage{fontspec}
%\defaultfontfeatures{Ligatures=TeX}
%\newfontfeature{Microtype}{protrusion=default;expansion=default;}
\begin{document}
λόγοσ
\end{document}

Is there a way to programmatically combine Korean unicode into one?

Using a Korean Input Method Editor (IME), it's possible to type 버리 + 어 and it will automatically become 버려.
Is there a way to programmatically do that in Python?
>>> x, y = '버리', '어'
>>> z = '버려'
>>> ord(z[-1])
47140
>>> ord(x[-1]), ord(y)
(47532, 50612)
Is there a way to compute that 47532 + 50612 -> 47140?
Here's some more examples:
가보 + 아 -> 가봐
끝나 + ㄹ -> 끝날
I'm a Korean. First, if you type 버리 + 어, it becomes 버리어 not 버려. 버려 is an abbreviation of 버리어 and it's not automatically generated. Also 가보아 cannot becomes 가봐 automatically during typing by the same reason.
Second, by contrast, 끝나 + ㄹ becomes 끝날 because 나 has no jongseong(종성). Note that one character of Hangul is made of choseong(초성), jungseong(중성), and jongseong. choseong and jongseong are a consonant, jungseong is a vowel. See more at Wikipedia. So only when there's no jongseong during typing (like 끝나), there's a chance that it can have jongseong(ㄹ).
If you want to make 버리 + 어 to 버려, you should implement some Korean language grammar like, especially for this case, abbreviation of jungseong. For example ㅣ + ㅓ = ㅕ, ㅗ + ㅏ = ㅘ as you provided. 한글 맞춤법 chapter 4. section 5 (I can't find English pages right now) defines abbreviation like this. It's possible, but not so easy job especially for non-Koreans.
Next, if what you want is just to make 끝나 + ㄹ to 끝날, it can be a relatively easy job since there're libraries which can handle composition and decomposition of choseong, jungseong, jongseong. In case of Python, I found hgtk. You can try like this (nonpractical code):
# hgtk methods take one character at a time
cjj1 = hgtk.letter.decompose('나') # ('ㄴ', 'ㅏ', '')
cjj2 = hgtk.letter.decompose('ㄹ') # ('ㄹ', '', '')
if cjj1[2]) == '' and cjj2[1]) == '':
cjj = (cjj1[0], cjj1[1], cjj2[0])
cjj2 = None
Still, without proper knowledge of Hangul, it will be very hard to get it done.
You could use your own Translation table.
The drawback is you have to input all pairs manual or you have a file to get it from.
For instance:
# Sample Korean chars to map
k = [[('버리', '어'), ('버려')], [('가보', '아'), ('가봐')], [('끝나', 'ㄹ'), ('끝날')]]
class Korean(object):
def __init__(self):
self.map = {}
for m in k:
key = m[0][0] + m[0][1]
self.map[hash(key)] = m[1]
def __getitem__(self, item):
return self.map[hash(item)]
def translate(self, s):
return [ self.map[hash(token)] for token in s]
if __name__ == '__main__':
k_map = Korean()
k_chars = [ m[0][0] + m[0][1] for m in k]
print('Input: %s' % k_chars)
print('Output: %s' % k_map.translate(k_chars))
one_char_3 = k[0][0][0] + k[0][0][1]
print('%s = %s' % (one_char_3, k_map[ one_char_3 ]) )
Input: ['버리어', '가보아', '끝나ㄹ']
Output: ['버려', '가봐', '끝날']
버리어 = 버려
Tested with Python:3.4.2

Python unified diff with line numbers from both "files"

I'm trying to figure out a way to create unified diffs with line numbers only showing N lines of context. I have been unable to do this with difflib.unified_diff. I need to show changes in both files.
The closest I can come is using diff on the command line like so:
/usr/bin/diff
--unchanged-line-format=' %.2dn %L'
--old-line-format="-%.2dn %L"
--new-line-format="+%.2dn %L"
file1.py
file2.py
BUT I only want to show N lines of context, and /usr/bin/diff doesn't seem to support context with a custom line format (eg. -U2 is not compatible with --line-format "conflicting output style options").
Below is an example of what I'd like to accomplish (the same output as the above diff, but only showing 1 line of context surrounding changes):
+01: def renamed_function()
-01: def original_function():
02:
+03: """ Neat stuff here """
04:
21:
+22: # Here's a new comment
23:
85: # Output the value of foo()
+86: print "Foo is %s"%(foo())
-86: print foo()
87:
I was able to figure out something very close to what I wanted to do. It's slower than regular diff, though. Here's the entire code, from my project GitGate.
def unified_diff(to_file_path, from_file_path, context=1):
""" Returns a list of differences between two files based
on some context. This is probably over-complicated. """
pat_diff = re.compile(r'## (.[0-9]+\,[0-9]+) (.[0-9]+,[0-9]+) ##')
from_lines = []
if os.path.exists(from_file_path):
from_fh = open(from_file_path,'r')
from_lines = from_fh.readlines()
from_fh.close()
to_lines = []
if os.path.exists(to_file_path):
to_fh = open(to_file_path,'r')
to_lines = to_fh.readlines()
to_fh.close()
diff_lines = []
lines = difflib.unified_diff(to_lines, from_lines, n=context)
for line in lines:
if line.startswith('--') or line.startswith('++'):
continue
m = pat_diff.match(line)
if m:
left = m.group(1)
right = m.group(2)
lstart = left.split(',')[0][1:]
rstart = right.split(',')[0][1:]
diff_lines.append("## %s %s ##\n"%(left, right))
to_lnum = int(lstart)
from_lnum = int(rstart)
continue
code = line[0]
lnum = from_lnum
if code == '-':
lnum = to_lnum
diff_lines.append("%s%.4d: %s"%(code, lnum, line[1:]))
if code == '-':
to_lnum += 1
elif code == '+':
from_lnum += 1
else:
to_lnum += 1
from_lnum += 1
return diff_lines

Two issue about python OpenOPC library

Issues description and environments
The OpenOPC library is friendly and easy to use, the api is simple too, but I have found two issues during the development of a tool to record real time OPC items data.
The development environment is: Window 8.1, Python 2.7.6, wxpython 2.8 unicode
The testing environment is: Window XP SP3, Python 2.7.6, wxpython 2.8 unicode, Rockwell's soft logix as OPC Server
The deploy environment is: Window XP SP3, connected with Rockwell's real PLC, installed RSLogix 5000 and RSLinx Classic Gateway
Questions
the opc.list function doesn't list all the item of specify node both in testing and workstaion environment. The question is how to list the 't' from the opc server?
An int array 'dint100' and a dint 't' is added with RS logix 5000 at the scope of soft_1
With the default OPC client test tool from Rockwell it could list the new added 't'
With OpenOPC library, I couldn't find out how to list the item 't', but I could read it's value by opc.read('[soft_1]t') with it's tag.
If the 't' could be listed, it could be added into the IO tree of my tool.
The opc.servers function will encounter an OPCError on the deploy environment, but the client could connect the 'RSLinx OPC Server' directly with the server name. Does opc.servers function dependent on some special dll or service?
Any suggestions will be appreciated! Thanks in advance!
Consider that the browsing problems ("opc.list") may not be on your side. RSLinx is notorious for its broken OPC browsing. Try some test/simulation server from a different vendor, to test this hypothesis.
I realize that I'm really late to this game. I found what was causing this issue. OpenOPC.py assumes that there cannot be both a "Leaf" and a "Branch" on the same level. Replace the function ilist with this:
def ilist(self, paths='*', recursive=False, flat=False, include_type=False):
"""Iterable version of list()"""
try:
self._update_tx_time()
pythoncom.CoInitialize()
try:
browser = self._opc.CreateBrowser()
# For OPC servers that don't support browsing
except:
return
paths, single, valid = type_check(paths)
if not valid:
raise TypeError("list(): 'paths' parameter must be a string or a list of strings")
if len(paths) == 0: paths = ['*']
nodes = {}
for path in paths:
if flat:
browser.MoveToRoot()
browser.Filter = ''
browser.ShowLeafs(True)
pattern = re.compile('^%s$' % wild2regex(path) , re.IGNORECASE)
matches = filter(pattern.search, browser)
if include_type: matches = [(x, node_type) for x in matches]
for node in matches: yield node
continue
queue = []
queue.append(path)
while len(queue) > 0:
tag = queue.pop(0)
browser.MoveToRoot()
browser.Filter = ''
pattern = None
path_str = '/'
path_list = tag.replace('.','/').split('/')
path_list = [p for p in path_list if len(p) > 0]
found_filter = False
path_postfix = '/'
for i, p in enumerate(path_list):
if found_filter:
path_postfix += p + '/'
elif p.find('*') >= 0:
pattern = re.compile('^%s$' % wild2regex(p) , re.IGNORECASE)
found_filter = True
elif len(p) != 0:
pattern = re.compile('^.*$')
browser.ShowBranches()
# Branch node, so move down
if len(browser) > 0:
try:
browser.MoveDown(p)
path_str += p + '/'
except:
if i < len(path_list)-1: return
pattern = re.compile('^%s$' % wild2regex(p) , re.IGNORECASE)
# Leaf node, so append all remaining path parts together
# to form a single search expression
else:
###################################### JG Edit - Flip the next two rows comment/uncommented
p = '.'.join(path_list[i:])
# p = string.join(path_list[i:], '.')
pattern = re.compile('^%s$' % wild2regex(p) , re.IGNORECASE)
break
###################################### JG Edit - Comment this to return to original
browser.ShowBranches()
node_types = ['Branch','Leaf']
if len(browser) == 0:
lowest_level = True
node_types.pop(0)
else:
lowest_level = False
for node_type in node_types:
if node_type=='Leaf':
browser.ShowLeafs(False)
matches = filter(pattern.search, browser)
if not lowest_level and recursive:
queue += [path_str + x + path_postfix for x in matches]
else:
###################################### JG Edit - Flip the next two rows comment/uncommented
if lowest_level or node_type=='Leaf': matches = [exceptional(browser.GetItemID,x)(x) for x in matches]
# if lowest_level: matches = [exceptional(browser.GetItemID,x)(x) for x in matches]
if include_type: matches = [(x, node_type) for x in matches]
for node in matches:
if not node in nodes: yield node
nodes[node] = True
###################################### Uncomment this to return to original
# browser.ShowBranches()
# if len(browser) == 0:
# browser.ShowLeafs(False)
# lowest_level = True
# node_type = 'Leaf'
# else:
# lowest_level = False
# node_type = 'Branch'
# matches = filter(pattern.search, browser)
# if not lowest_level and recursive:
# queue += [path_str + x + path_postfix for x in matches]
# else:
# if lowest_level: matches = [exceptional(browser.GetItemID,x)(x) for x in matches]
# if include_type: matches = [(x, node_type) for x in matches]
# for node in matches:
# if not node in nodes: yield node
# nodes[node] = True
except pythoncom.com_error as err:
error_msg = 'list: %s' % self._get_error_str(err)
raise OPCError(error_msg)

Recursive directory list/analyze function doesn't seem to recurse right

I wrote what I thought was a straightforward Python script to traverse a given directory and tabulate all the file suffixes it finds. The output looks like this:
OTUS-ASIO:face fish$ sufs
>>> /Users/fish/Dropbox/ost2/face (total 194)
=== 1 1 -
=== css 16 -----
=== gif 14 -----
=== html 12 ----
=== icc 87 --------------------------
=== jpg 3 -
=== js 46 --------------
=== png 3 -
=== zip 2 -
... which would be great, if those values were correct. They are not. Here's what happens when I run it in a subdirectory of the directory I listed above:
OTUS-ASIO:face fish$ cd images/
OTUS-ASIO:images fish$ sufs
>>> /Users/fish/Dropbox/ost2/face/images (total 1016)
=== JPG 3 -
=== gif 17 -
=== ico 1 -
=== jpeg 1 -
=== jpg 901 --------------------------
=== png 87 ---
... It only seems to go one directory level down. Running the script one level up didn't pick up on the 'jpeg' suffix at all, and seemed to miss a good 898 jpg files.
The script in question is here:
#!/usr/bin/env python
# encoding: utf-8
"""
getfilesuffixes.py
Created by FI$H 2000 on 2010-10-15.
Copyright (c) 2010 OST, LLC. All rights reserved.
"""
import sys, os, getopt
help_message = '''
Prints a list of all the file suffixes found in each DIR, with counts.
Defaults to the current directory wth no args.
$ %s DIR [DIR DIR etc ...]
''' % os.path.basename(__file__)
dirs = dict()
skips = ('DS_Store','hgignore')
class Usage(Exception):
def __init__(self, msg):
self.msg = msg
def getmesomesuffixes(rootdir, thisdir=None):
if not thisdir:
thisdir = rootdir
for thing in [os.path.abspath(h) for h in os.listdir(thisdir)]:
if os.path.isdir(thing):
getmesomesuffixes(rootdir), thing)
else:
if thing.rfind('.') > -1:
suf = thing.rsplit('.').pop()
dirs[rootdir][suf] = dirs[rootdir].get(suf, 0) + 1
return
def main(argv=None):
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "h", ["help",])
except getopt.error, msg:
raise Usage(msg)
for option, value in opts:
if option == "-v":
verbose = True
if option in ("-h", "--help"):
raise Usage(help_message)
if len(args) == 0:
args.append(os.getcwd())
for durr in [os.path.abspath(arg) for arg in args]:
if os.path.isdir(durr):
dirs[durr] = dict()
for k, v in dirs.items():
getmesomesuffixes(k)
print ""
for k, v in dirs.items():
sufs = v.items()
sufs.sort()
maxcount = reduce(lambda fs, ns: fs > ns and fs or ns, map(lambda t: t[1], sufs), 1)
mincount = reduce(lambda fs, ns: fs < ns and fs or ns, map(lambda t: t[1], sufs), 1)
total = reduce(lambda fs, ns: fs + ns, map(lambda t: t[1], sufs), 0)
print ">>>\t\t\t%s (total %s)" % (k, total)
for suf, sufcount in sufs:
try:
skips.index(suf)
except ValueError:
print "===\t\t\t%12s\t %3s\t %s" % (suf, sufcount, "-" * (int(float(float(sufcount) / float(maxcount)) * 25) + 1))
print ""
except Usage, err:
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
print >> sys.stderr, "\t for help use --help"
return 2
if __name__ == "__main__":
sys.exit(main())
It seems that getmesomesuffixes() is subtly not doing what I want it to. I hate to ask such an annoying question, but if anyone can spot whatever amateur-hour error I am making with a quick once-over, it would save me some serious frustration.
Yeah, Won't you be better off if you used os.walk
for root, dirs, files in os.walk(basedir):
... do you stuff ..
See the example at
http://docs.python.org/library/os.html
Also look at os.path.splitext(path), a finer way to find the type of your file.
>>> os.path.splitext('/d/c/as.jpeg')
('/d/c/as', '.jpeg')
>>>
Both of these together should simplify your code.
import os
import os.path
from collections import defaultdict
def foo(dir='.'):
d = defaultdict(int)
for _, _, files in os.walk(dir):
for f in files:
d[os.path.splitext(f)[1]] += 1
return d
if __name__ == '__main__':
d = foo()
for k, v in sorted(d.items()):
print k, v

Categories

Resources