forcing ndiff on very dissimilar strings - python

The ndiff function from difflib allows a nice interface to detect differences in lines. It does a great job when the lines are close enough:
>>> print '\n'.join(list(ndiff(['foo*'], ['foot'], )))
- foo*
? ^
+ foot
? ^
But when the lines are too dissimilar, the rich reporting is no longer possible:
>>> print '\n'.join(list(ndiff(['foo'], ['foo*****'], )))
- foo
+ foo*****
This is the use case I am hitting, and I am trying to find ways to use ndiff (or the underlying class Differ) to force the reporting even if the strings are too dissimilar.
For the failing example, I would like to have a result like:
>>> print '\n'.join(list(ndiff(['foo'], ['foo*****'], )))
- foo
+ foo*****
? +++++

The function responsible for printing the context (i.e. those lines starting with ?) is Differ._fancy_replace. That function works by checking whether the two lines are equal by at least 75% (see the cutoff variable). Unfortunately, that 75% cutoff is hard-coded and cannot be changed.
What I can suggest is to subclass Differ and provide a version of _fancy_replace that simply ignores the cutoff. Here it is:
from difflib import Differ, SequenceMatcher
class FullContextDiffer(Differ):
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
"""
Copied and adapted from https://github.com/python/cpython/blob/3.6/Lib/difflib.py#L928
"""
best_ratio = 0
cruncher = SequenceMatcher(self.charjunk)
for j in range(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in range(alo, ahi):
ai = a[i]
if ai == bj:
continue
cruncher.set_seq1(ai)
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
aelt, belt = a[best_i], b[best_j]
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError('unknown tag %r' % (tag,))
yield from self._qformat(aelt, belt, atags, btags)
yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
And here is an example of how it works:
a = [
'foo',
'bar',
'foobar',
]
b = [
'foo',
'bar',
'barfoo',
]
print('\n'.join(FullContextDiffer().compare(a, b)))
# Output:
#
# foo
# bar
# - foobar
# ? ---
#
# + barfoo
# ? +++

It seems what you want to do here is not to compare across multiple lines, but across strings. You can then pass your strings directly, without a list, and you should get a behaviour close to the one you are looking for.
>>> print ('\n'.join(list(ndiff('foo', 'foo*****'))))
f
o
o
+ *
+ *
+ *
+ *
+ *
Even though the output format is not the exact one you are looking for, it encapsulate the correct information. We can make an output adapter to give the correct format.
def adapter(out):
chars = []
symbols = []
for c in out:
chars.append(c[2])
symbols.append(c[0])
return ''.join(chars), ''.join(symbols)
This can be used like so.
>>> print ('\n'.join(adapter(ndiff('foo', 'foo*****'))))
foo*****
+++++

Related

How can i change a variable name in a sympy expression

I am working on a project where I need to change all variables that are named 'a' to a new variable ai, where i is the order of the variable a in the expression. For instance if we use the expression: 1 + x + a + a ** 2, the output should be: 1 + x + a0 + a1 ** 2. Here is a code that I've written to solve this but it doesn't work, the expression remains unchanged.
import sympy.parsing.sympy_parser as sp1
import sympy as sp
I=sp1.parse_expr('1 + x + a + a**2', evaluate=False)
a,x=sp.symbols('a x')
def pre(expr):
i=0
for arg in sp.postorder_traversal(expr):
if arg==a:
tmp=sp.symbols('a'+str(i))
arg=tmp
print(arg)
i=i+1
pre(I)
print(I)
One way to achieve that is:
from sympy import Pow, Mul, Symbol, degree
def change_symbol(expr, a):
"""expr: the expression to modify
a: the symbol to look for and substitute
"""
# define a wild symbol to look for Symbols, Multiplications and Powers
# containing the specified symbol
w = Wild("w", properties=[
lambda t: isinstance(t, (Pow, Mul, Symbol)) and ((a in t.args) or (t == a))
])
# find the terms that satisfy the above criteria
terms = list(expr.find(w))
terms.sort(key=lambda t: degree(t), reverse=True)
# loop over those terms and performs the substitution with new symbols
name = a.name
for t in terms:
o = degree(t)
s = Symbol(name + "%s" % (o - 1))
expr = expr.subs(t, s**o)
return expr
change_symbol(I, a)
# out: a0 + a1**2 + x + 1
Your code did not work because you never changed the expression. When you say arg = tmp that assigns a value of tmp to arg but this does not update expr. #Davide_sd shows a way to recreate an expression with pieces that have been modified. You can also let replace do the traversal and let it replace a as it encounters it.
suffix = [0] #mutable suffix
def na():
rv = Symbol('a%s'%suffix[0])
suffix[0]+=1 # modify for next time
return rv
>>> a,x=var('a x')
>>> (1 + x + 2*a + a**2).replace(lambda x: x==a, lambda x: na())
a0**2 + 2*a1 + x + 1
Note that you said "order in expression" and coded as though you meant "order encountered" but in the polynomial sense, "higher order" terms will not necessarily appear later in the ordered terms. Note that a**2 appears before 2*a and that is why the replace gave it a value of a0:
>>> (1 + x + 2*a + a**2).args
(1, x, a**2, 2*a)

Adversarial inputs for Python substring search

The CPython implementation of substring search (e.g. via in) is implemented by the following algorithm.
def find(s, p):
# find first occurrence of p in s
n = len(s)
m = len(p)
skip = delta1(p)[p[m-1]]
i = 0
while i <= n-m:
if s[i+m-1] == p[m-1]: # (boyer-moore)
# potential match
if s[i:i+m-1] == p[:m-1]:
return i
if s[i+m] not in p:
i = i + m + 1 # (sunday)
else:
i = i + skip # (horspool)
else:
# skip
if s[i+m] not in p:
i = i + m + 1 # (sunday)
else:
i = i + 1
return -1 # not found
At least, according to this source (taken from this older answer) written by the author (?) of the CPython implementation.
This same source mentions a worst-case complexity of this algorithm as O(nm), where n and m are the lengths of the two strings. I am interested in whether this bound is tight. My question is:
Are there adversarial examples for the algorithm used in Python in? Can we give a sequence of pairs of strings (pattern, string) so that running pattern in string takes quadratic (or at least superlinear) time?
The standard example that demonstrates the quadratic worst-case run-time of naive substring search, where string = 'a'*n and pattern = 'a'*m + b does not work.
The naive example of s='a'*n and p='a'*m+'b' does not work because of the line
if s[i+m-1] == p[m-1]:
This checks the last character (not the first) of p ('b') with the corresponding current position in s. As this fails, then the result is to just a single iteration over s, which is why it is so fast.
If you flip p (s='a'*n and p='b'+'a'*m), then a similar thing occurs - this time the above line passes (the last character of p is now 'a'), but then p is iterated over forwards, so then the 'b' is found quickly, so again this example is linear and fast.
A simple change to the naive example that would show O(nm) behaviour is s='a'*n and p='a'*m+'ba'. In this case, the last character of p is 'a', so the initial check passes, but then it needs to iterate over the rest of p before it gets to the 'b'.
# full='a'*n; sub='a'*m+'b'
>>> timeit("sub in full", "sub='a'*10+'b'; full='a'*100")
0.13620498299860628
>>> timeit("sub in full", "sub='a'*10+'b'; full='a'*1000")
0.9594046580004942
>>> timeit("sub in full", "sub='a'*100+'b'; full='a'*1000")
0.9768632190007338
# Linear in n, but m has minimal effect: ~O(n)
# full='a'*n; sub='a'*m+'ba'
>>> timeit("sub in full", "sub='a'*10+'ba'; full='a'*100")
0.35251976200015633
>>> timeit("sub in full", "sub='a'*10+'ba'; full='a'*1000")
3.4642483099996753
>>> timeit("sub in full", "sub='a'*100+'ba'; full='a'*1000")
27.152958754999418
# Both n and m have linear effect: ~O(nm)
Try this:
import re
import time
def slow_match(n):
pat = 'a' + ('z' * n)
str = 'z' * (n + n)
start_time = time.time()
if re.search(pat, str):
print("Shouldn't happen")
print(("Searched", n, time.time() - start_time))
slow_match(10000)
slow_match(50000)
slow_match(100000)
slow_match(300000)

How to implement an encoder and decoder of ROT-13? [duplicate]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 6 years ago.
Improve this question
I am searching for a short and cool rot13 function in Python ;-)
I've written this function:
def rot13(s):
chars = "abcdefghijklmnopqrstuvwxyz"
trans = chars[13:]+chars[:13]
rot_char = lambda c: trans[chars.find(c)] if chars.find(c)>-1 else c
return ''.join( rot_char(c) for c in s )
Can anyone make it better? E.g supporting uppercase characters.
It's very simple:
>>> import codecs
>>> codecs.encode('foobar', 'rot_13')
'sbbone'
maketrans()/translate() solutions…
Python 2.x
import string
rot13 = string.maketrans(
"ABCDEFGHIJKLMabcdefghijklmNOPQRSTUVWXYZnopqrstuvwxyz",
"NOPQRSTUVWXYZnopqrstuvwxyzABCDEFGHIJKLMabcdefghijklm")
string.translate("Hello World!", rot13)
# 'Uryyb Jbeyq!'
Python 3.x
rot13 = str.maketrans(
'ABCDEFGHIJKLMabcdefghijklmNOPQRSTUVWXYZnopqrstuvwxyz',
'NOPQRSTUVWXYZnopqrstuvwxyzABCDEFGHIJKLMabcdefghijklm')
'Hello World!'.translate(rot13)
# 'Uryyb Jbeyq!'
This works on Python 2 (but not Python 3):
>>> 'foobar'.encode('rot13')
'sbbone'
The maketrans and translate methods of str are handy for this type of thing.
Here's a general solution:
import string
def make_rot_n(n):
lc = string.ascii_lowercase
uc = string.ascii_uppercase
trans = str.maketrans(lc + uc,
lc[n:] + lc[:n] + uc[n:] + uc[:n])
return lambda s: str.translate(s, trans)
rot13 = make_rot_n(13)
rot13('foobar')
# 'sbbone'
From the builtin module this.py (import this):
s = "foobar"
d = {}
for c in (65, 97):
for i in range(26):
d[chr(i+c)] = chr((i+13) % 26 + c)
print("".join([d.get(c, c) for c in s])) # sbbone
As of Python 3.1, string.translate and string.maketrans no longer exist. However, these methods can be used with bytes instead.
Thus, an up-to-date solution directly inspired from Paul Rubel's one, is:
rot13 = bytes.maketrans(
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
b"nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM")
b'Hello world!'.translate(rot13)
Conversion from string to bytes and vice-versa can be done with the encode and decode built-in functions.
Try this:
import codecs
codecs.encode("text to be rot13()'ed", "rot_13")
In python-3 the str-codec that #amber mentioned has moved to codecs standard-library:
> import codecs
> codecs.encode('foo', 'rot13')
sbb
The following function rot(s, n) encodes a string s with ROT-n encoding for any integer n, with n defaulting to 13. Both upper- and lowercase letters are supported. Values of n over 26 or negative values are handled appropriately, e.g., shifting by 27 positions is equal to shifting by one position. Decoding is done with invrot(s, n).
import string
def rot(s, n=13):
'''Encode string s with ROT-n, i.e., by shifting all letters n positions.
When n is not supplied, ROT-13 encoding is assumed.
'''
upper = string.ascii_uppercase
lower = string.ascii_lowercase
upper_start = ord(upper[0])
lower_start = ord(lower[0])
out = ''
for letter in s:
if letter in upper:
out += chr(upper_start + (ord(letter) - upper_start + n) % 26)
elif letter in lower:
out += chr(lower_start + (ord(letter) - lower_start + n) % 26)
else:
out += letter
return(out)
def invrot(s, n=13):
'''Decode a string s encoded with ROT-n-encoding
When n is not supplied, ROT-13 is assumed.
'''
return(rot(s, -n))
A one-liner to rot13 a string S:
S.translate({a : a + (lambda x: 1 if x>=0 else -1)(77 - a) * 13 for a in range(65, 91)})
For arbitrary values, something like this works for 2.x
from string import ascii_uppercase as uc, ascii_lowercase as lc, maketrans
rotate = 13 # ROT13
rot = "".join([(x[:rotate][::-1] + x[rotate:][::-1])[::-1] for x in (uc,lc)])
def rot_func(text, encode=True):
ascii = uc + lc
src, trg = (ascii, rot) if encode else (rot, ascii)
trans = maketrans(src, trg)
return text.translate(trans)
text = "Text to ROT{}".format(rotate)
encode = rot_func(text)
decode = rot_func(encode, False)
This works for uppercase and lowercase. I don't know how elegant you deem it to be.
def rot13(s):
rot=lambda x:chr(ord(x)+13) if chr(ord(x.lower())+13).isalpha()==True else chr(ord(x)-13)
s=[rot(i) for i in filter(lambda x:x!=',',map(str,s))]
return ''.join(s)
You can support uppercase letters on the original code posted by Mr. Walter by alternating the upper case and lower case letters.
chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"
If you notice the index of the uppercase letters are all even numbers while the index of the lower case letters are odd.
A = 0 a = 1,
B = 2, b = 3,
C = 4, c = 4,
...
This odd-even pattern allows us to safely add the amount needed without having to worry about the case.
trans = chars[26:] + chars[:26]
The reason you add 26 is because the string has doubled in letters due to the upper case letters. However, the shift is still 13 spaces on the alphabet.
The full code:
def rot13(s):
chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"
trans = chars[26:]+chars[:26]
rot_char = lambda c: trans[chars.find(c)] if chars.find(c) > -1 else c
return ''.join(rot_char(c) for c in s)
OUTPUT (Tested with python 2.7):
print rot13("Hello World!") --> Uryyb Jbeyq!
Interesting exercise ;-) i think i have the best solution because:
no modules needed, uses only built-in functions --> no deprecation
it can be used as a one liner
based on ascii, no mapping dicts/strings etc.
Python 2 & 3 (probably Python 1):
def rot13(s):
return ''.join([chr(ord(n) + (13 if 'Z' < n < 'n' or n < 'N' else -13)) if n.isalpha() else n for n in s])
def rot13_verbose(s):
x = []
for n in s:
if n.isalpha():
# 'n' is the 14th character in the alphabet so if a character is bigger we can subtract 13 to get rot13
ort = 13 if 'Z' < n < 'n' or n < 'N' else -13
x.append(chr(ord(n) + ort))
else:
x.append(n)
return ''.join(x)
# crazy .min version (99 characters) disclaimer: not pep8 compatible^
def r(s):return''.join([chr(ord(n)+(13if'Z'<n<'n'or'N'>n else-13))if n.isalpha()else n for n in s])
def rot13(s):
lower_chars = ''.join(chr(c) for c in range (97,123)) #ASCII a-z
upper_chars = ''.join(chr(c) for c in range (65,91)) #ASCII A-Z
lower_encode = lower_chars[13:] + lower_chars[:13] #shift 13 bytes
upper_encode = upper_chars[13:] + upper_chars[:13] #shift 13 bytes
output = "" #outputstring
for c in s:
if c in lower_chars:
output = output + lower_encode[lower_chars.find(c)]
elif c in upper_chars:
output = output + upper_encode[upper_chars.find(c)]
else:
output = output + c
return output
Another solution with shifting. Maybe this code helps other people to understand rot13 better.
Haven't tested it completely.
from string import maketrans, lowercase, uppercase
def rot13(message):
lower = maketrans(lowercase, lowercase[13:] + lowercase[:13])
upper = maketrans(uppercase, uppercase[13:] + uppercase[:13])
return message.translate(lower).translate(upper)
I found this post when I started wondering about the easiest way to implement
rot13 into Python myself. My goals were:
Works in both Python 2.7.6 and 3.3.
Handle both upper and lower case.
Not use any external libraries.
This meets all three of those requirements. That being said, I'm sure it's not winning any code golf competitions.
def rot13(string):
CLEAR = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
ROT13 = 'NOPQRSTUVWXYZABCDEFGHIJKLMnopqrstuvwxyzabcdefghijklm'
TABLE = {x: y for x, y in zip(CLEAR, ROT13)}
return ''.join(map(lambda x: TABLE.get(x, x), string))
if __name__ == '__main__':
CLEAR = 'Hello, World!'
R13 = 'Uryyb, Jbeyq!'
r13 = rot13(CLEAR)
assert r13 == R13
clear = rot13(r13)
assert clear == CLEAR
This works by creating a lookup table and simply returning the original character for any character not found in the lookup table.
Update
I got to worrying about someone wanting to use this to encrypt an arbitrarily-large file (say, a few gigabytes of text). I don't know why they'd want to do this, but what if they did? So I rewrote it as a generator. Again, this has been tested in both Python 2.7.6 and 3.3.
def rot13(clear):
CLEAR = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
ROT13 = 'NOPQRSTUVWXYZABCDEFGHIJKLMnopqrstuvwxyzabcdefghijklm'
TABLE = {x: y for x, y in zip(CLEAR, ROT13)}
for c in clear:
yield TABLE.get(c, c)
if __name__ == '__main__':
CLEAR = 'Hello, World!'
R13 = 'Uryyb, Jbeyq!'
r13 = ''.join(rot13(CLEAR))
assert r13 == R13
clear = ''.join(rot13(r13))
assert clear == CLEAR
I couldn't leave this question here with out a single statement using the modulo operator.
def rot13(s):
return ''.join([chr(x.islower() and ((ord(x) - 84) % 26) + 97
or x.isupper() and ((ord(x) - 52) % 26) + 65
or ord(x))
for x in s])
This is not pythonic nor good practice, but it works!
>> rot13("Hello World!")
Uryyb Jbeyq!
You can also use this also
def n3bu1A(n):
o=""
key = {
'a':'n', 'b':'o', 'c':'p', 'd':'q', 'e':'r', 'f':'s', 'g':'t', 'h':'u',
'i':'v', 'j':'w', 'k':'x', 'l':'y', 'm':'z', 'n':'a', 'o':'b', 'p':'c',
'q':'d', 'r':'e', 's':'f', 't':'g', 'u':'h', 'v':'i', 'w':'j', 'x':'k',
'y':'l', 'z':'m', 'A':'N', 'B':'O', 'C':'P', 'D':'Q', 'E':'R', 'F':'S',
'G':'T', 'H':'U', 'I':'V', 'J':'W', 'K':'X', 'L':'Y', 'M':'Z', 'N':'A',
'O':'B', 'P':'C', 'Q':'D', 'R':'E', 'S':'F', 'T':'G', 'U':'H', 'V':'I',
'W':'J', 'X':'K', 'Y':'L', 'Z':'M'}
for x in n:
v = x in key.keys()
if v == True:
o += (key[x])
else:
o += x
return o
Yes = n3bu1A("N zhpu fvzcyre jnl gb fnl Guvf vf zl Zragbe!!")
print(Yes)
Short solution:
def rot13(text):
return "".join([x if ord(x) not in range(65, 91)+range(97, 123) else
chr(((ord(x)-97+13)%26)+97) if x.islower() else
chr(((ord(x)-65+13)%26)+65) for x in text])

Developing a heuristic to test simple anonymous Python functions for equivalency

I know how function comparison works in Python 3 (just comparing address in memory), and I understand why.
I also understand that "true" comparison (do functions f and g return the same result given the same arguments, for any arguments?) is practically impossible.
I am looking for something in between. I want the comparison to work on the simplest cases of identical functions, and possibly some less trivial ones:
lambda x : x == lambda x : x # True
lambda x : 2 * x == lambda y : 2 * y # True
lambda x : 2 * x == lambda x : x * 2 # True or False is fine, but must be stable
lambda x : 2 * x == lambda x : x + x # True or False is fine, but must be stable
Note that I'm interested in solving this problem for anonymous functions (lambda), but wouldn't mind if the solution also works for named functions.
The motivation for this is that inside blist module, it would be nice to verify that two sortedset instances have the same sort function before performing a union, etc. on them.
Named functions are of less interest because I can assume them to be different when they are not identical. After all, suppose someone created two sortedsets with a named function in the key argument. If they intend these instances to be "compatible" for the purposes of set operations, they'd probably use the same function, rather than two separate named functions that perform identical operations.
I can only think of three approaches. All of them seem hard, so any ideas appreciated.
Comparing bytecodes might work but it might be annoying that it's implementation dependent (and hence the code that worked on one Python breaks on another).
Comparing tokenized source code seems reasonable and portable. Of course, it's less powerful (since identical functions are more likely to be rejected).
A solid heuristic borrowed from some symbolic computation textbook is theoretically the best approach. It might seem too heavy for my purpose, but it actually could be a good fit since lambda functions are usually tiny and so it would run fast.
EDIT
A more complicated example, based on the comment by #delnan:
# global variable
fields = ['id', 'name']
def my_function():
global fields
s1 = sortedset(key = lambda x : x[fields[0].lower()])
# some intervening code here
# ...
s2 = sortedset(key = lambda x : x[fields[0].lower()])
Would I expect the key functions for s1 and s2 to evaluate as equal?
If the intervening code contains any function call at all, the value of fields may be modified, resulting in different key functions for s1 and s2. Since we clearly won't be doing control flow analysis to solve this problem, it's clear that we have to evaluate these two lambda functions as different, if we are trying to perform this evaluation before runtime. (Even if fields wasn't global, it might have been had another name bound to it, etc.) This would severely curtail the usefulness of this whole exercise, since few lambda functions would have no dependence on the environment.
EDIT 2:
I realized it's very important to compare the function objects as they exist in runtime. Without that, all the functions that depend on variables from outer scope cannot be compared; and most useful functions do have such dependencies. Considered in runtime, all functions with the same signature are comparable in a clean, logical way, regardless of what they depend on, whether they are impure, etc.
As a result, I need not just the bytecode but also the global state as of the time the function object was created (presumably __globals__). Then I have to match all variables from outer scope to the values from __globals__.
Edited to check whether external state will affect the sorting function as well as if the two functions are equivalent.
I hacked up dis.dis and friends to output to a global file-like object. I then stripped out line numbers and normalized variable names (without touching constants) and compared the result.
You could clean this up so dis.dis and friends yielded out lines so you wouldn't have to trap their output. But this is a working proof-of-concept for using dis.dis for function comparison with minimal changes.
import types
from opcode import *
_have_code = (types.MethodType, types.FunctionType, types.CodeType,
types.ClassType, type)
def dis(x):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print >> out, "Disassembly of %s:" % name
try:
dis(x1)
except TypeError, msg:
print >> out, "Sorry:", msg
print >> out
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError, \
"don't know how to disassemble %s objects" % \
type(x).__name__
def disassemble(co, lasti=-1):
"""Disassemble a code object."""
code = co.co_code
labels = findlabels(code)
linestarts = dict(findlinestarts(co))
n = len(code)
i = 0
extended_arg = 0
free = None
while i < n:
c = code[i]
op = ord(c)
if i in linestarts:
if i > 0:
print >> out
print >> out, "%3d" % linestarts[i],
else:
print >> out, ' ',
if i == lasti: print >> out, '-->',
else: print >> out, ' ',
if i in labels: print >> out, '>>',
else: print >> out, ' ',
print >> out, repr(i).rjust(4),
print >> out, opname[op].ljust(20),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
print >> out, repr(oparg).rjust(5),
if op in hasconst:
print >> out, '(' + repr(co.co_consts[oparg]) + ')',
elif op in hasname:
print >> out, '(' + co.co_names[oparg] + ')',
elif op in hasjrel:
print >> out, '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
print >> out, '(' + co.co_varnames[oparg] + ')',
elif op in hascompare:
print >> out, '(' + cmp_op[oparg] + ')',
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
print >> out, '(' + free[oparg] + ')',
print >> out
def disassemble_string(code, lasti=-1, varnames=None, names=None,
constants=None):
labels = findlabels(code)
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
if i == lasti: print >> out, '-->',
else: print >> out, ' ',
if i in labels: print >> out, '>>',
else: print >> out, ' ',
print >> out, repr(i).rjust(4),
print >> out, opname[op].ljust(15),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
print >> out, repr(oparg).rjust(5),
if op in hasconst:
if constants:
print >> out, '(' + repr(constants[oparg]) + ')',
else:
print >> out, '(%d)'%oparg,
elif op in hasname:
if names is not None:
print >> out, '(' + names[oparg] + ')',
else:
print >> out, '(%d)'%oparg,
elif op in hasjrel:
print >> out, '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
if varnames:
print >> out, '(' + varnames[oparg] + ')',
else:
print >> out, '(%d)' % oparg,
elif op in hascompare:
print >> out, '(' + cmp_op[oparg] + ')',
print >> out
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
"""
labels = []
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
label = -1
if op in hasjrel:
label = i+oparg
elif op in hasjabs:
label = oparg
if label >= 0:
if label not in labels:
labels.append(label)
return labels
def findlinestarts(code):
"""Find the offsets in a byte code which are start of lines in the source.
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
line_increments = [ord(c) for c in code.co_lnotab[1::2]]
lastlineno = None
lineno = code.co_firstlineno
addr = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
if byte_incr:
if lineno != lastlineno:
yield (addr, lineno)
lastlineno = lineno
addr += byte_incr
lineno += line_incr
if lineno != lastlineno:
yield (addr, lineno)
class FakeFile(object):
def __init__(self):
self.store = []
def write(self, data):
self.store.append(data)
a = lambda x : x
b = lambda x : x # True
c = lambda x : 2 * x
d = lambda y : 2 * y # True
e = lambda x : 2 * x
f = lambda x : x * 2 # True or False is fine, but must be stable
g = lambda x : 2 * x
h = lambda x : x + x # True or False is fine, but must be stable
funcs = a, b, c, d, e, f, g, h
outs = []
for func in funcs:
out = FakeFile()
dis(func)
outs.append(out.store)
import ast
def outfilter(out):
for i in out:
if i.strip().isdigit():
continue
if '(' in i:
try:
ast.literal_eval(i)
except ValueError:
i = "(x)"
yield i
processed_outs = [(out, 'LOAD_GLOBAL' in out or 'LOAD_DECREF' in out)
for out in (''.join(outfilter(out)) for out in outs)]
for (out1, polluted1), (out2, polluted2) in zip(processed_outs[::2], processed_outs[1::2]):
print 'Bytecode Equivalent:', out1 == out2, '\nPolluted by state:', polluted1 or polluted2
The output is True, True, False, and False and is stable. The "Polluted" bool is true if the output will depend on external state -- either global state or a closure.
So, let's address some technical issues first.
1) Byte code: it is probably not an problem because, instead of inspecting the pyc (the binary files), you can use dis module to get the "bytecode". e.g.
>>> f = lambda x, y : x+y
>>> dis.dis(f)
1 0 LOAD_FAST 0 (x)
3 LOAD_FAST 1 (y)
6 BINARY_ADD
7 RETURN_VALUE
No need to worry about platform.
2) Tokenized source code. Again python has all you need to do the job. You can use the ast module to parse the code and obtain the ast.
>>> a = ast.parse("f = lambda x, y : x+y")
>>> ast.dump(a)
"Module(body=[Assign(targets=[Name(id='f', ctx=Store())], value=Lambda(args=arguments(args=[Name(id='x', ctx=Param()), Name(id='y', ctx=Param())], vararg=None, kwarg=None, defaults=[]), body=BinOp(left=Name(id='x', ctx=Load()), op=Add(), right=Name(id='y', ctx=Load()))))])"
So, the question we should really address is: is it feasible to determine that two functions are equivalent analytically?
It is easy for human to say 2*x equals to x+x, but how can we create an algorithm to prove it?
If it is what you want to achieve, you may want to check this out: http://en.wikipedia.org/wiki/Computer-assisted_proof
However, if ultimately you simply want to assert two different data set are sorted in the same order, you just need to run the sort function A on dataset B and vice versa, and then check the outcome. If they are identical, then the functions are probably functionally identical. Of course, the check is only valid for the said datasets.

Using a caesarian cipher on a string of text in python?

I'm trying to slowly knock out all of the intricacies of python. Basically, I'm looking for some way, in python, to take a string of characters and push them all over by 'x' characters.
For example, inputing abcdefg will give me cdefghi (if x is 2).
My first version:
>>> key = 2
>>> msg = "abcdefg"
>>> ''.join( map(lambda c: chr(ord('a') + (ord(c) - ord('a') + key)%26), msg) )
'cdefghi'
>>> msg = "uvwxyz"
>>> ''.join( map(lambda c: chr(ord('a') + (ord(c) - ord('a') + key)%26), msg) )
'wxyzab'
(Of course it works as expected only if msg is lowercase...)
edit: I definitely second David Raznick's answer:
>>> import string
>>> alphabet = "abcdefghijklmnopqrstuvwxyz"
>>> key = 2
>>> tr = string.maketrans(alphabet, alphabet[key:] + alphabet[:key])
>>> "abcdefg".translate(tr)
'cdefghi'
I think your best bet is to look at string.translate. You may have to use make_trans to make the mapping you like.
I would do it this way (for conceptual simplicity):
def encode(s):
l = [ord(i) for i in s]
return ''.join([chr(i + 2) for i in l])
Point being that you convert the letter to ASCII, add 2 to that code, convert it back, and "cast" it into a string (create a new string object). This also makes no conversions based on "case" (upper vs. lower).
Potential optimizations/research areas:
Use of StringIO module for large strings
Apply this to Unicode (not sure how)
This solution works for both lowercase and uppercase:
from string import lowercase, uppercase
def caesar(text, key):
result = []
for c in text:
if c in lowercase:
idx = lowercase.index(c)
idx = (idx + key) % 26
result.append(lowercase[idx])
elif c in uppercase:
idx = uppercase.index(c)
idx = (idx + key) % 26
result.append(uppercase[idx])
else:
result.append(c)
return "".join(result)
Here is a test:
>>> caesar("abcdefg", 2)
'cdefghi'
>>> caesar("z", 1)
'a'
Another version. Allows for definition of your own alphabet, and doesn't translate any other characters (such as punctuation). The ugly part here is the loop, which might cause performance problems. I'm not sure about python but appending strings like this is a big no in other languages like Java and C#.
def rotate(data, n):
alphabet = list("abcdefghijklmopqrstuvwxyz")
n = n % len(alphabet)
target = alphabet[n:] + alphabet[:n]
translation = dict(zip(alphabet, target))
result = ""
for c in data:
if translation.has_key(c):
result += translation[c]
else:
result += c
return result
print rotate("foobar", 1)
print rotate("foobar", 2)
print rotate("foobar", -1)
print rotate("foobar", -2)
Result:
gppcbs
hqqdct
emmazq
dllzyp
The make_trans() solution suggested by others is the way to go here.

Categories

Resources