How to use the DICT protocol client in Python?

How to use the DICT protocol client in Python? - python

I use the command line DICT client named dict like this : dict <some word>, which will show me the meaning from the dictionary servers which I have configured.
I'd like to interact with the dict servers from Python, for example reimplement that command line client in Python.
I found a Python module for the dict protocol in the Ubuntu repositories that I've installed (apt-get install python-dictclient), but unfortunately I couldn't find any documentation for this module. I tried to understand the modules's builtin help help('dictclient') but didn't succeed. I could only make a connection to a dict server and was able to see apartial definition, here's my attempt :
import dictclient
c = dictclient.Connection('localhost', 2628)
If anyone has experience with this module please explain to me how to use it.

dict-like definition fetching using dictclient in Python:
from dictclient import Connection, Database
from sys import argv
con = Connection("dict.org") #or whatever your server is
db = Database(con, "*") #replace * with ! to get only the first result
def_list = db.define(argv[1]) #list containing Definition objects
for x in def_list:
print x.getdefstr() + '\n'

Here is a Python3 version of dictlient:
# -*- coding: UTF-8 -*-
# Client for the DICT protocol (RFC2229)
#
# Copyright (C) 2002 John Goerzen
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# A few small hacks to make it work on Python3 - KrisvanderMerwe 25 Aoril 2015
import socket, re
version = '1.0.2.1'
def dequote(teks):
"""Will remove single or double quotes from the start and end of a string
and return the result."""
quotechars = "'\""
while len(teks) and teks[0] in quotechars:
teks = teks[1:]
while len(teks) and teks[-1] in quotechars:
teks = teks[0:-1]
return teks
def enquote(teks):
"""This function will put a string in double quotes, properly
escaping any existing double quotes with a backslash. It will
return the result."""
return '"' + teks.replace('"', "\\\"") + '"'
class Connection:
"""This class is used to establish a connection to a database server.
You will usually use this as the first call into the dictclient library.
Instantiating it takes two optional arguments: a hostname (a string)
and a port (an int). The hostname defaults to localhost
and the port to 2628, the port specified in RFC."""
def __init__(self, hostname = 'localhost', port = 2628):
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.connect((hostname, port))
self.rfile = self.sock.makefile()
self.wfile = self.sock.makefile("wb", 0)
self.saveconnectioninfo()
def getresultcode(self):
"""Generic function to get a result code. It will return a list
consisting of two items: the integer result code and the text
following. You will not usually use this function directly."""
line = self.rfile.readline().strip()
code, text = line.split(' ', 1)
return [int(code), text]
def get200result(self):
"""Used when expecting a single line of text -- a 200-class
result. Returns [intcode, remaindertext]"""
code, text = self.getresultcode()
if code < 200 or code >= 300:
raise Exception ("Got '%s' when 200-class response expected %s " % (code,text) )
return [code, text]
def get100block(self):
"""Used when expecting multiple lines of text -- gets the block
part only. Does not get any codes or anything! Returns a string."""
data = []
while 1:
line = self.rfile.readline().strip()
if line == '.':
break
data.append(line)
return "\n".join(data)
def get100result(self):
"""Used when expecting multiple lines of text, terminated by a period
and a 200 code. Returns: [initialcode, [bodytext_1lineperentry],
finalcode]"""
code, text = self.getresultcode()
if code < 100 or code >= 200:
raise Exception ("Got '%s' when 100-class response expected" % code )
bodylines = self.get100block().split("\n")
code2 = self.get200result()[0]
return [code, bodylines, code2]
def get100dict(self):
"""Used when expecting a dictionary of results. Will read from
the initial 100 code, to a period and the 200 code."""
dicl = {}
for line in self.get100result()[1]:
key, val = line.split(' ', 1)
dicl[key] = dequote(val)
return dicl
def saveconnectioninfo(self):
"""Called by __init__ to handle the initial connection. Will
save off the capabilities and messageid."""
code, string = self.get200result()
assert code == 220
capstr, msgid = re.search('<(.*)> (<.*>)$', string).groups()
self.capabilities = capstr.split('.')
self.messageid = msgid
def getcapabilities(self):
"""Returns a list of the capabilities advertised by the server."""
return self.capabilities
def getmessageid(self):
"""Returns the message id, including angle brackets."""
return self.messageid
def getdbdescs(self):
"""Gets a dict of available databases. The key is the db name
and the value is the db description. This command may generate
network traffic!"""
if hasattr(self, 'dbdescs'):
return self.dbdescs
self.sendcommand("SHOW DB")
self.dbdescs = self.get100dict()
return self.dbdescs
def getstratdescs(self):
"""Gets a dict of available strategies. The key is the strat
name and the value is the strat description. This call may
generate network traffic!"""
if hasattr(self, 'stratdescs'):
return self.stratdescs
self.sendcommand("SHOW STRAT")
self.stratdescs = self.get100dict()
return self.stratdescs
def getdbobj(self, dbname):
"""Gets a Database object corresponding to the database name passed
in. This function explicitly will *not* generate network traffic.
If you have not yet run getdbdescs(), it will fail."""
if not hasattr(self, 'dbobjs'):
self.dbobjs = {}
if dbname in self.dbobjs:
return self.dbobjs[dbname]
# We use self.dbdescs explicitly since we don't want to
# generate net traffic with this request!
if dbname != '*' and dbname != '!' and \
not dbname in self.dbdescs.keys():
raise Exception( "Invalid database name '%s'" % dbname )
self.dbobjs[dbname] = Database(self, dbname)
return self.dbobjs[dbname]
def sendcommand(self, command):
"""Takes a command, without a newline character, and sends it to
the server."""
self.wfile.write(command.encode() + b"\n")
def define(self, database, word):
"""Returns a list of Definition objects for each matching
definition. Parameters are the database name and the word
to look up. This is one of the main functions you will use
to interact with the server. Returns a list of Definition
objects. If there are no matches, an empty list is returned.
Note: database may be '*' which means to search all databases,
or '!' which means to return matches from the first database that
has a match."""
self.getdbdescs() # Prime the cache
if database != '*' and database != '!' and \
not database in self.getdbdescs():
raise Exception ( "Invalid database '%s' specified" % database )
self.sendcommand("DEFINE " + enquote(database) + " " + enquote(word))
code = self.getresultcode()[0]
retval = []
if code == 552:
# No definitions.
return []
if code != 150:
raise Exception ("Unknown code %d" % code )
while 1:
code, text = self.getresultcode()
if code != 151:
break
resultword, resultdb = re.search('^"(.+)" (\S+)', text).groups()
defstr = self.get100block()
retval.append(Definition(self, self.getdbobj(resultdb),
resultword, defstr))
return retval
def match(self, database, strategy, word):
"""Gets matches for a query. Arguments are database name,
the strategy (see available ones in getstratdescs()), and the
pattern/word to look for. Returns a list of Definition objects.
If there is no match, an empty list is returned.
Note: database may be '*' which means to search all databases,
or '!' which means to return matches from the first database that
has a match."""
self.getstratdescs() # Prime the cache
self.getdbdescs() # Prime the cache
if not strategy in self.getstratdescs().keys():
raise Exception ( "Invalid strategy '%s'" % strategy )
if database != '*' and database != '!' and not database in self.getdbdescs().keys():
raise Exception ( "Invalid database name '%s'" % database )
self.sendcommand("MATCH %s %s %s" % (enquote(database),
enquote(strategy),
enquote(word)))
code = self.getresultcode()[0]
if code == 552:
# No Matches
return []
if code != 152:
raise Exception ( "Unexpected code %d" % code )
retval = []
for matchline in self.get100block().split("\n"):
matchdict, matchword = matchline.split(" ", 1)
retval.append(Definition(self, self.getdbobj(matchdict),
dequote(matchword)))
if self.getresultcode()[0] != 250:
raise Exception ( "Unexpected end-of-list code %d" % code )
return retval
class Database:
"""An object corresponding to a particular database in a server."""
def __init__(self, dictconn, dbname):
"""Initialize the object -- requires a Connection object and
a database name."""
self.conn = dictconn
self.name = dbname
def getname(self):
"""Returns the short name for this database."""
return self.name
def getdescription(self):
if hasattr(self, 'description'):
return self.description
if self.getname() == '*':
self.description = 'All Databases'
elif self.getname() == '!':
self.description = 'First matching database'
else:
self.description = self.conn.getdbdescs()[self.getname()]
return self.description
def getinfo(self):
"""Returns a string of info describing this database."""
if hasattr(self, 'info'):
return self.info
if self.getname() == '*':
self.info = "This special database will search all databases on the system."
elif self.getname() == '!':
self.info = "This special database will return matches from the first matching database."
else:
self.conn.sendcommand("SHOW INFO " + self.name)
self.info = "\n".join(self.conn.get100result()[1])
return self.info
def define(self, word):
"""Get a definition from within this database.
The argument, word, is the word to look up. The return value is the
same as from Connection.define()."""
return self.conn.define(self.getname(), word)
def match(self, strategy, word):
"""Get a match from within this database.
The argument, word, is the word to look up. The return value is
the same as from Connection.define()."""
return self.conn.match(self.getname(), strategy, word)
class Definition:
"""An object corresponding to a single definition."""
def __init__(self, dictconn, db, word, defstr = None):
"""Instantiate the object. Requires: a Connection object,
a Database object (NOT corresponding to '*' or '!' databases),
a word. Optional: a definition string. If not supplied,
it will be fetched if/when it is requested."""
self.conn = dictconn
self.db = db
self.word = word
self.defstr = defstr
def getdb(self):
"""Get the Database object corresponding to this definition."""
return self.db
def getdefstr(self):
"""Get the definition string (the actual content) of this
definition."""
if not self.defstr:
self.defstr = self.conn.define(self.getdb().getname(), self.word)[0].getdefstr()
return self.defstr
def getword(self):
"""Get the word this object describes."""
return self.word

Related

Python class and objects within another function

I’m trying to learn how to utilize class and objects within my Python code. I'm totally overwhelmed by the fact that is is suppose to be a begginingers class and I got this assignment where if something like this:
Class name is IPAdress and there I want properties IP, hostname, ASN and ISP. Also, it has a method which creates similar print like this:
IP: 85.76.129.254
Hostname: 85-76-129-254-nat.elisa-mobile.fi
ASN: 719
ISP: Elisa Mobile
Method can be named as PrintDetails.
Once the class is ready, I want to use it within my Osio9 function as following
Function asks user to input IP, hostname, ASN and ISP. Object is created according to this information.
Object is added to list (initially empty) and information is printed by using the PrintDetails -method
After this user is asked if he/she wants to create a new IP and if answer is yes -> repeat the above process (ask for input), add the new object to a list and print all information from the list with PrintDetails
After input, the list is looped through and it calls each IPAddress object with PrintDetails -method. (Print can look like the example above)
If user doesn’t want to continue inputting IP-addresses, exit the function
I have no clue how to proceed and answer I receive from my teacher is to look from google so here I am.
class IPAddress: ##define class
def __init__(self, IP, hostname, ASN, ISP):
self.ip = ip
self.hostname = hostname
self.asn = asn
self.isp = isp
def PrintDetails(self): ## construction method
print("IP-address: " + str(self.ip) + " | Hostname: " + str(self.hostname) + " | ASN: " + str(self.asn) + "ISP: " + str(self.isp))
def Osio9():
addresses = []
while(true):
ip = str (input("Enter IP address: ")) #ask for user input
hostname = str (input("Enter Hostname: "))
asn = str (input("Enter ASN: "))
isp = str (input("Enter ISP: "))
address = IPAddress
address.isp = isp
addresses.append(address)
for IPAddress in addresses:
IPAddress.PrintDetails(self)
# continueInput = str (input("Do you want to add more IP-addresses (y/n): "))
# if (continueInput == "y"):
# return addresses
#
# else break

I just saw your last update and there are some good ideas in it. Here is one version of the program, which does not completely correspond to your assignment as the main execution is not within a loop but in a if __name__ == '__main__': statement which will execute only when the program is called directly, and not when it is imported or when function part of this program are imported. I highly encourage you to try a small part of the code, one at a time, and then to adapt it to your specific need.
Let's go piece by piece:
#%% Definition of IPAddress
class IPAddress:
def __init__(self, ip, hostname, asn, isp):
self.ip = ip
self.hostname = hostname
self.asn = asn
self.isp = isp
def prettyprint(self):
str2print = f' | IP: {self.ip}\n' +\
f' | Hostname: {self.hostname}\n' +\
f' | ASN: {self.asn}\n' +\
f' | ISP: {self.isp}\n'
print (str2print)
def __key(self):
"""
key method to uniquely identify an object IPAddress.
I defined it as a tuple of the 4 main attributes, but if one of them is
unique it could be directly used as a key.
"""
return (self.ip, self.hostname, self.asn, self.isp)
def __eq__(self, other):
""" == comparison method."""
return isinstance(self, type(other)) and self.__key() == other.__key()
def __ne__(self, other):
""" != comparison method."""
return not self.__eq__(other)
def __hash__(self):
"""Makes the object hashable. Sets can now be used on IPAddress."""
return hash(self.__key())
I am using spyder as an IDE, and the symbol #%% creates sections of code, very similar to matlab, which can be executed in the current console by doing Right-Click -> Run Cell.
I changed the print function to make it more explicit. Strings object can be denoted by 3 symbols: 'str', "str", """str""". Additionnaly, a flag in front can change the behavior of the string. The most usefull ones are f and r.
Formatting a string with either .format() method or the f flag.
value = 1
str1 = 'string to format with value v = {}'.format(value)
str2 = f'string to format with value v = {value}'
You can print both string to see that they are equivalent. The flag f is similar to a .format() call. It allows the element within the brackets {} to be executed before the print. It is especially usefull when you format a string with multiple values.
v1 = 1
v2 = 2
str1 = 'string to format with value v = {} and {}'.format(v1, v2)
str2 = f'string to format with value v = {v1} and {v2}'
I still chose to split the sring into 4 strings and to add them together for a cleaner look of the code. The backslash \ at the end of each line allows python to consider the current line and the next one as the same line. It's a line-split character.
N.B: If you want to know what the flag r does, look into the escape character ;) r flag is especially usefull when working with paths.
Objects can be compared together. For instace, if you try 5==3, python will return False because the integer object 5 is different from the integer object 3. You can implement the == and != method in any object/class you develop. To do so, I first define a unique key for this object in the function __key(). Then the function __eq__ implements == and returns True if both objects compared are created from the same class and have the same key. The method __ne__ implements != and returns True if __eq__ is not True.
N.B: I also added the hash method to make the object hashable. Once the key is define, creating a hash is trivial, since the idea is to be able to uniquely identify each object, which is also done by the key. This definition allows the use of sets, for instance to check if a list has multiple occurence of the same object.
def add_IPAddress(ListOfIPAddresses, IPAddress):
"""
Function adding IPAddress to ListOfIPAddresses if IPAddress is not already
in ListOfIPAddresses.
"""
if IPAddress not in ListOfIPAddresses:
ListOfIPAddresses.append(IPAddress)
print ('IP Address appended:')
IPAddress.prettyprint()
def input_IPAddress():
"""
Ask a user input to create an IPAddress object.
"""
ip = input("Enter IP address: ")
hostname = input("Enter Hostname: ")
asn = input("Enter ASN: ")
isp = input("Enter ISP: ")
return IPAddress(ip, hostname, asn, isp)
Then I splitted the input from the user and the addition to a list of objects into 2 different functions. The function input() already returns a string, thus I removed the str() call to convert.
N.B: Watchout when asking users to input numbers, usually you forget that the input is a string and then your code raises an error because you forget to convert the string to integer or float.
Improvements: I didn't do it here, but usually when using user input, you have to check if the input given is correct. Some simple syntax checks (e.g. the number of '.' characters in the ipadress) can already improve the robustness of a piece of code. You could also use regular expressions with regex to check if the input is conform.
The query_yes_no() is a clean yes/no util which I copied from somewhere.. I usually place it in a util module: folder within the environment path named util with a __init__.py file. More information if you look into the creation of python module.
#%% Main
if __name__ == '__main__':
ListOfIPAddresses = list()
while True:
if not query_yes_no('Do you want to input IP Address?'):
break
ListOfIPAddresses.append(input_IPAddress())
for ipaddress in ListOfIPAddresses:
ipaddress.prettyprint()
print ('--------------------------') # Just pretting a separator
Finally, in the main part, an infinite while loop is broke only if the user replies no (or any of the other negative keyworkds defined in query_yes_no()). Until it is broken, the user will be asked to add a new IPAddress to the list ListOfIPAddresses which will be added if it is not already present inside.
Full code snippet:
# -*- coding: utf-8 -*-
import sys
#%% Definition of IPAddress
class IPAddress:
def __init__(self, ip, hostname, asn, isp):
self.ip = ip
self.hostname = hostname
self.asn = asn
self.isp = isp
def prettyprint(self):
str2print = f' | IP: {self.ip}\n' +\
f' | Hostname: {self.hostname}\n' +\
f' | ASN: {self.asn}\n' +\
f' | ISP: {self.isp}\n'
print (str2print)
def __key(self):
"""
key method to uniquely identify an object IPAddress.
I defined it as a tuple of the 4 main attributes, but if one of them is
unique it could be directly used as a key.
"""
return (self.ip, self.hostname, self.asn, self.isp)
def __eq__(self, other):
""" == comparison method."""
return isinstance(self, type(other)) and self.__key() == other.__key()
def __ne__(self, other):
""" != comparison method."""
return not self.__eq__(other)
def __hash__(self):
"""Makes the object hashable. Sets can now be used on IPAddress."""
return hash(self.__key())
#%% Functions
def add_IPAddress(ListOfIPAddresses, IPAddress):
"""
Function adding IPAddress to ListOfIPAddresses if IPAddress is not already
in ListOfIPAddresses.
"""
if IPAddress not in ListOfIPAddresses:
ListOfIPAddresses.append(IPAddress)
print ('IP Address appended:')
IPAddress.prettyprint()
def input_IPAddress():
"""
Ask a user input to create an IPAddress object.
"""
ip = input("Enter IP address: ")
hostname = input("Enter Hostname: ")
asn = input("Enter ASN: ")
isp = input("Enter ISP: ")
return IPAddress(ip, hostname, asn, isp)
#%% Util
def query_yes_no(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
"""
valid = {"yes": True, "y": True, "ye": True,
"no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == '':
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' "
"(or 'y' or 'n').\n")
#%% Main
if __name__ == '__main__':
ListOfIPAddresses = list()
while True:
if not query_yes_no('Do you want to input IP Address?'):
break
ListOfIPAddresses.append(input_IPAddress())
for ipaddress in ListOfIPAddresses:
ipaddress.prettyprint()
print ('--------------------------') # Just pretting a separator

How to send the encrypted data to the client?

Here is my client code.
import socket, pickle,time
from encryption import *
def Main():
host = '127.0.0.1'
port = 5006
s = socket.socket()
s.connect((host, port))
m= encryption()
pri_key,pub_key,n=m.generating_keys(1)
filename = input("Filename? -> ")
if filename != 'q':
data=[filename,pub_key,n]
msg=pickle.dumps(data)
s.send(msg)
data = s.recv(1024)
data=data.decode('utf-8')
if data == '1':
size = s.recv(1024)
size = int(size.decode('utf-8'))
filesize = size
message = input("File exists, " + str(filesize) +"Bytes, download? (Y/N)? -> ")
if message == 'Y':
s.send(b'1')
count=0
f = open('new_'+filename, 'wb')
data = s.recv(1024)
data=int.from_bytes(data,byteorder="little")
msg=m.decrypt(data,pri_key,n)
totalRecv = len(msg)
f.write(msg)
#count=0
while totalRecv<filesize:
#time.sleep(.300)
decipher = s.recv(1024)
decipher=int.from_bytes(decipher,byteorder="little")
print(decipher)
if(decipher==0):
break
msg=m.decrypt(decipher,pri_key,n)
totalRecv += len(msg)
f.write(msg)
print ("{0:.2f}".format((totalRecv/float(filesize))*100)+ "% Done")
print ("Download Complete!")
f.close()
else:
print ("File Does Not Exist!")
s.close()
if __name__ == '__main__':
Main()
Here is my server code.
import socket,threading,os,pickle
from encryption import *
def RetrFile(name, sock):
m=encryption()
filename = sock.recv(1024)
dat=pickle.loads(filename)
if os.path.isfile(dat[0]):
s='1'
s=s.encode('utf-8')
sock.send(s)
k=str(os.path.getsize(dat[0]))
k=k.encode('utf-8')
sock.send(k)
count=8
userResponse = sock.recv(1024)
if userResponse[:2] == (b'1'):
with open(dat[0],'rb') as f:
bytesToSend = f.read(1024)
#print(type(bytesToSend))
#print('1')
#print(bytesToSend)
msg= m.encrypt(bytesToSend,dat[1],dat[2])
#print(msg)
#print(1)
k=msg.bit_length()
if(k%8>=1):
k=k+1
msg=msg.to_bytes(k,byteorder="little")
#print (msg)
#msg=msg.encode('utf-8')
#print(msg)
sock.send(msg)
s=''
s=s.encode('utf-8')
while bytesToSend != s:
bytesToSend = f.read(1024)
msg= m.encrypt(bytesToSend,dat[1],dat[2])
k=msg.bit_length()
if(k%8>=1):
k=k//8+1
msg=msg.to_bytes(k,byteorder="little")
sock.send(msg)
#count=count.to_bytes(1,byteorder="little")
#sock.send(count)
else:
sock.send(b'ERR')
sock.close()
def Main():
host = '127.0.0.1'
port = 5006
s = socket.socket()
s.bind((host,port))
s.listen(5)
print ("Server Started.")
while True:
c, addr = s.accept()
print ("client connedted ip:<" + str(addr) + ">")
t = threading.Thread(target=RetrFile, args=("RetrThread", c))
t.start()
s.close()
if __name__ == '__main__':
Main()
Now my problem is that decipher.recv(1024) in client side is not receiving the message. what should i do.

On the server side, change the code to:
while bytesToSend != s:
bytesToSend = f.read(1024)
length = len(bytesTosend)
leng = length.to_bytes(4, 'little')
sock.sendall(leng)
msg = m.encrypt(bytesToSend, dat[1], dat[2])
k = msg.bit_length()
if k % 8 >= 1 :
k = k // 8 + 1
else:
k = k // 8
msg = msg.to_bytes(k, byteorder='little')
sock.sendall(msg)
And on the client side:
while True:
length = s.recv(4)
length = int.from_bytes(length, byteorder='little')
decipher = s.recv(leng)
decipher = int.from_bytes(decipher, byteorder='little')
if not decipher:
break
msg = m.decrypt(decipher, pri_key, n)
f.write(msg)
f.close()

It is rather difficult to check your code without seeing the encryption module referenced in your code. With such functionality absent, testing to find out where the problem is becomes impossible. As such, the following programs are provided along with the implementation of another encryption module.
The server should be run from the command line and requires a port number and password to be supplied upon execution. The only form of authentication or authorization used is proper understanding of the client. The client must use the same password to be understood by the server.
Server
#! /usr/bin/env python3
import argparse
import pathlib
import pickle
import pickletools
import random
import socket
import socketserver
import zlib
import encryption
BYTES_USED = bytes(range(1 << 8))
CHAIN_SIZE = 1 << 8
def main():
"""Start a file server and serve clients forever."""
parser = argparse.ArgumentParser(description='Execute a file server demo.')
parser.add_argument('port', type=int, help='location where server listens')
parser.add_argument('password', type=str, help='key to use on secure line')
arguments = parser.parse_args()
server_address = socket.gethostbyname(socket.gethostname()), arguments.port
server = CustomServer(server_address, CustomHandler, arguments.password)
server.serve_forever()
class CustomServer(socketserver.ThreadingTCPServer):
"""Provide server support for the management of encrypted data."""
def __init__(self, server_address, request_handler_class, password):
"""Initialize the server and keep a set of security credentials."""
super().__init__(server_address, request_handler_class, True)
self.key = encryption.Key.new_client_random(
BYTES_USED,
CHAIN_SIZE,
random.Random(password)
)
self.primer = encryption.Primer.new_client_random(
self.key,
random.Random(password)
)
class CustomHandler(socketserver.StreamRequestHandler):
"""Allow forwarding of data to all connected clients."""
def __init__(self, request, client_address, server):
"""Initialize the handler with security translators."""
self.decoder = encryption.Decrypter(server.key, server.primer)
self.encoder = encryption.Encrypter(server.key, server.primer)
super().__init__(request, client_address, server)
def handle(self):
"""Run the code to handle clients while dealing with errors."""
try:
self.process_file_request()
except (ConnectionResetError, EOFError):
pass
def process_file_request(self):
"""Deal with clients that wish to download a file."""
segment = self.load()
path = pathlib.Path(segment)
if path.is_file():
size = path.stat().st_size
self.dump(size)
accepted = self.load()
if accepted:
with path.open('rb') as file:
while True:
buffer = file.read(1 << 15)
self.dump(buffer)
if not buffer:
break
else:
error = 'The given path does not specify a file.'
self.dump(error)
def load(self):
"""Read the client's connection with blocking."""
data = self.decoder.load_16bit_frame(self.rfile)
bytes_object = zlib.decompress(data)
return pickle.loads(bytes_object)
def dump(self, obj):
"""Send an object securely over to the client if possible."""
pickle_string = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
bytes_object = pickletools.optimize(pickle_string)
data = zlib.compress(bytes_object, zlib.Z_BEST_COMPRESSION)
self.encoder.dump_16bit_frame(data, self.wfile)
if __name__ == '__main__':
main()
The client should also be run from the command line and requires the host name, port number, and password for the server. Communications are encrypted with the password and cannot be decrypted properly if it is different. Please note that very little checking for errors is present in the two programs.
Client
#! /usr/bin/env python3
import argparse
import pathlib
import pickle
import pickletools
import random
import socket
import zlib
import encryption
BYTES_USED = bytes(range(1 << 8))
CHAIN_SIZE = 1 << 8
# These are possible answers accepted for yes/no style questions.
POSITIVE = tuple(map(str.casefold, ('yes', 'true', '1')))
NEGATIVE = tuple(map(str.casefold, ('no', 'false', '0')))
def main():
"""Connect a file client to a server and process incoming commands."""
parser = argparse.ArgumentParser(description='Execute a file client demo.')
parser.add_argument('host', type=str, help='name of server on the network')
parser.add_argument('port', type=int, help='location where server listens')
parser.add_argument('password', type=str, help='key to use on secure line')
arguments = parser.parse_args()
connection = socket.create_connection((arguments.host, arguments.port))
try:
talk_to_server(*make_dump_and_load(connection, arguments.password))
finally:
connection.shutdown(socket.SHUT_RDWR)
connection.close()
def make_dump_and_load(connection, password):
"""Create objects to help with the encrypted communications."""
reader = connection.makefile('rb', -1)
writer = connection.makefile('wb', 0)
chaos = random.Random(password)
key = encryption.Key.new_client_random(BYTES_USED, CHAIN_SIZE, chaos)
chaos = random.Random(password)
primer = encryption.Primer.new_client_random(key, chaos)
decoder = encryption.Decrypter(key, primer)
encoder = encryption.Encrypter(key, primer)
def dump(obj):
"""Write an object to the writer file in an encoded form."""
pickle_string = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
bytes_object = pickletools.optimize(pickle_string)
data = zlib.compress(bytes_object, zlib.Z_BEST_COMPRESSION)
encoder.dump_16bit_frame(data, writer)
def load():
"""Read an object from the reader file and decode the results."""
data = decoder.load_16bit_frame(reader)
bytes_object = zlib.decompress(data)
return pickle.loads(bytes_object)
return dump, load
def talk_to_server(dump, load):
"""Converse with the serve while trying to get a file."""
segment = input('Filename: ')
dump(segment)
size = load()
if isinstance(size, int):
print('File exists and takes', size, 'bytes to download.')
response = get_response('Continue? ')
dump(response)
if response:
location = input('Where should the new file be created? ')
with pathlib.Path(location).open('wb') as file:
written = 0
while True:
buffer = load()
if not buffer:
break
written += file.write(buffer)
print('Progress: {:.1%}'.format(written / size))
print('Download complete!')
else:
print(size)
def get_response(query):
"""Ask the user yes/no style questions and return the results."""
while True:
answer = input(query).casefold()
if answer:
if any(option.startswith(answer) for option in POSITIVE):
return True
if any(option.startswith(answer) for option in NEGATIVE):
return False
print('Please provide a positive or negative answer.')
if __name__ == '__main__':
main()
Since access to the encryption module was not provided, an alternative implementation has been included below. No guarantee is made for its suitability in any capacity or for any purpose. It may be somewhat slow as the software is currently configured but works well if obfuscation is desired.
encryption
"""Provide an implementation of Markov Encryption for simplified use.
This module exposes primitives useful for executing Markov Encryption
processes. ME was inspired by a combination of Markov chains with the
puzzles of Sudoku. This implementation has undergone numerous changes
and optimizations since its original design. Please see documentation."""
###############################################################################
# Import several functions needed later in the code.
from collections import deque
from math import ceil
from random import Random, SystemRandom
from struct import calcsize, pack, unpack
from inspect import currentframe
__author__ = 'Stephen "Zero" Chappell <Noctis.Skytower#gmail.com>'
__date__ = '18 August 2016'
__version__ = 2, 0, 8
###############################################################################
# Create some tools to use in the classes down below.
_CHAOS = SystemRandom()
def slots(names=''):
"""Set the __slots__ variable in the calling context with private names.
This function allows a convenient syntax when specifying the slots
used in a class. Simply call it in a class definition context with
the needed names. Locals are modified with private slot names."""
currentframe().f_back.f_locals['__slots__'] = \
tuple('__' + name for name in names.replace(',', ' ').split())
###############################################################################
# Implement a Key primitive data type for Markov Encryption.
class Key:
"""Key(data) -> Key instance
This class represents a Markov Encryption Key primitive. It allows for
easy key creation, checks for proper data construction, and helps with
encoding and decoding indexes based on cached internal tables."""
slots('data dimensions base size encoder axes order decoder')
#classmethod
def new(cls, bytes_used, chain_size):
"""Return a Key instance created from bytes_used and chain_size.
Creating a new key is easy with this method. Call this class method
with the bytes you want the key to recognize along with the size of
the chains you want the encryption/decryption processes to use."""
selection, blocks = list(set(bytes_used)), []
for _ in range(chain_size):
_CHAOS.shuffle(selection)
blocks.append(bytes(selection))
return cls(tuple(blocks))
#classmethod
def new_deterministic(cls, bytes_used, chain_size):
"""Automatically create a key with the information provided."""
selection, blocks, chaos = list(set(bytes_used)), [], Random()
chaos.seed(chain_size.to_bytes(ceil(
chain_size.bit_length() / 8), 'big') + bytes(range(256)))
for _ in range(chain_size):
chaos.shuffle(selection)
blocks.append(bytes(selection))
return cls(tuple(blocks))
#classmethod
def new_client_random(cls, bytes_used, chain_size, chaos):
"""Create a key using chaos as the key's source of randomness."""
selection, blocks = list(set(bytes_used)), []
for _ in range(chain_size):
chaos.shuffle(selection)
blocks.append(bytes(selection))
return cls(tuple(blocks))
def __init__(self, data):
"""Initialize the Key instance's variables after testing the data.
Keys are created with tuples of carefully constructed bytes arrays.
This method tests the given data before going on to build internal
tables for efficient encoding and decoding methods later on."""
self.__test_data(data)
self.__make_vars(data)
#staticmethod
def __test_data(data):
"""Test the data for correctness in its construction.
The data must be a tuple of at least two byte arrays. Each byte
array must have at least two bytes, all of which must be unique.
Furthermore, all arrays should share the exact same byte set."""
if not isinstance(data, tuple):
raise TypeError('Data must be a tuple object!')
if len(data) < 2:
raise ValueError('Data must contain at least two items!')
item = data[0]
if not isinstance(item, bytes):
raise TypeError('Data items must be bytes objects!')
length = len(item)
if length < 2:
raise ValueError('Data items must contain at least two bytes!')
unique = set(item)
if len(unique) != length:
raise ValueError('Data items must contain unique byte sets!')
for item in data[1:]:
if not isinstance(item, bytes):
raise TypeError('Data items must be bytes objects!')
next_length = len(item)
if next_length != length:
raise ValueError('All data items must have the same size!')
next_unique = set(item)
if len(next_unique) != next_length:
raise ValueError('Data items must contain unique byte sets!')
if next_unique ^ unique:
raise ValueError('All data items must use the same byte set!')
def __make_vars(self, data):
"""Build various internal tables for optimized calculations.
Encoding and decoding rely on complex relationships with the given
data. This method caches several of these key relationships for use
when the encryption and decryption processes are being executed."""
self.__data = data
self.__dimensions = len(data)
base, *mutations = data
self.__base = base = tuple(base)
self.__size = size = len(base)
offset = -sum(base.index(block[0]) for block in mutations[:-1]) % size
self.__encoder = base[offset:] + base[:offset]
self.__axes = tuple(reversed([tuple(base.index(byte) for byte in block)
for block in mutations]))
self.__order = key = tuple(sorted(base))
grid = []
for rotation in range(size):
block, row = base[rotation:] + base[:rotation], [None] * size
for byte, value in zip(block, key):
row[key.index(byte)] = value
grid.append(tuple(row))
self.__decoder = tuple(grid[offset:] + grid[:offset])
def test_primer(self, primer):
"""Raise an error if the primer is not compatible with this key.
Key and primers have a certain relationship that must be maintained
in order for them to work together. Since the primer understands
the requirements, it is asked to check this key for compatibility."""
primer.test_key(self)
def encode(self, index):
"""Encode index based on internal tables and return byte code.
An index probes into the various axes of the multidimensional,
virtual grid that a key represents. The index is evaluated, and
the value at its coordinates is returned by running this method."""
assert len(index) == self.__dimensions, \
'Index size is not compatible with key dimensions!'
*probes, current = index
return self.__encoder[(sum(
table[probe] for table, probe in zip(self.__axes, probes)
) + current) % self.__size]
def decode(self, index):
"""Decode index based on internal tables and return byte code.
Decoding does the exact same thing as encoding, but it indexes
into a virtual grid that represents the inverse of the encoding
grid. Tables are used to make the process fast and efficient."""
assert len(index) == self.__dimensions, \
'Index size is not compatible with key dimensions!'
*probes, current = index
return self.__decoder[sum(
table[probe] for table, probe in zip(self.__axes, probes)
) % self.__size][current]
#property
def data(self):
"""Data that the instance was initialized with.
This is the tuple of byte arrays used to create this key and can
be used to create an exact copy of this key at some later time."""
return self.__data
#property
def dimensions(self):
"""Dimensions that the internal, virtual grid contains.
The virtual grid has a number of axes that can be referenced when
indexing into it, and this number is the count of its dimensions."""
return self.__dimensions
#property
def base(self):
"""Base value that the internal grid is built from.
The Sudoku nature of the grid comes from rotating this value by
offsets, keeping values unique along any axis while traveling."""
return self.__base
#property
def order(self):
"""Order of base after its values have been sorted.
A sorted base is important when constructing inverse rows and when
encoding raw bytes for use in updating an encode/decode index."""
return self.__order
###############################################################################
# Implement a Primer primitive data type for Markov Encryption.
class Primer:
"""Primer(data) -> Primer instance
This class represents a Markov Encryption Primer primitive. It is very
important for starting both the encryption and decryption processes. A
method is provided for their easy creation with a related key."""
slots('data')
#classmethod
def new(cls, key):
"""Return a Primer instance from a parent Key.
Primers must be compatible with the keys they are used with. This
method takes a key and constructs a cryptographically sound primer
that is ready to use in the beginning stages of encryption."""
base = key.base
return cls(bytes(_CHAOS.choice(base)
for _ in range(key.dimensions - 1)))
#classmethod
def new_deterministic(cls, key):
"""Automatically create a primer with the information provided."""
base, chain_size, chaos = key.base, key.dimensions, Random()
chaos.seed(chain_size.to_bytes(ceil(
chain_size.bit_length() / 8), 'big') + bytes(range(256)))
return cls(bytes(chaos.choice(base) for _ in range(chain_size - 1)))
#classmethod
def new_client_random(cls, key, chaos):
"""Create a primer using chaos as the primer's source of randomness."""
base = key.base
return cls(
bytes(chaos.choice(base) for _ in range(key.dimensions - 1))
)
def __init__(self, data):
"""Initialize the Primer instance after testing validity of data.
Though not as complicated in its requirements as keys, primers do
need some simple structure in the data they are given. A checking
method is run before saving the data to the instance's attribute."""
self.__test_data(data)
self.__data = data
#staticmethod
def __test_data(data):
"""Test the data for correctness and test the data.
In order for the primer to be compatible with the nature of the
Markov Encryption processes, the data must be an array of bytes;
and to act as a primer, it must contain at least some information."""
if not isinstance(data, bytes):
raise TypeError('Data must be a bytes object!')
if not data:
raise ValueError('Data must contain at least one byte!')
def test_key(self, key):
"""Raise an error if the key is not compatible with this primer.
Primers provide needed data to start encryption and decryption. For
it be compatible with a key, it must contain one byte less than the
key's dimensions and must be a subset of the base in the key."""
if len(self.__data) != key.dimensions - 1:
raise ValueError('Key size must be one more than the primer size!')
if not set(self.__data).issubset(key.base):
raise ValueError('Key data must be a superset of primer data!')
#property
def data(self):
"""Data that the instance was initialized with.
This is the byte array used to create this primer and can be used
if desired to create an copy of this primer at some later time."""
return self.__data
###############################################################################
# Create an abstract processing class for use in encryption and decryption.
class _Processor:
"""_Processor(key, primer) -> NotImplementedError exception
This class acts as a base for the encryption and decryption processes.
The given key is saved, and several tables are created along with an
index. Since it is abstract, calling the class will raise an exception."""
slots('key into index from')
def __init__(self, key, primer):
"""Initialize the _Processor instance if it is from a child class.
After passing several tests for creating a valid processing object,
the key is saved, and the primer is used to start an index. Tables
are also formed for converting byte values between systems."""
if type(self) is _Processor:
raise NotImplementedError('This is an abstract class!')
key.test_primer(primer)
self.__key = key
self.__into = table = dict(map(reversed, enumerate(key.order)))
self.__index = deque(map(table.__getitem__, primer.data),
key.dimensions)
self.__from = dict(map(reversed, table.items()))
def process(self, data):
"""Process the data and return its transformed state.
A cache for the data transformation is created and an internal
method is run to quickly encode or decode the given bytes. The
cache is finally converted to immutable bytes when returned."""
cache = bytearray()
self._run(data, cache.append, self.__key, self.__into, self.__index)
return bytes(cache)
#staticmethod
def _run(data, cache_append, key, table, index):
"""Run the processing algorithm in an overloaded method.
Since this is only an abstract base class for encoding/decoding,
this method will raise an exception when run. Inheriting classes
should implement whatever is appropriate for the intended function."""
raise NotImplementedError('This is an abstract method!')
#property
def primer(self):
"""Primer representing the state of the internal index.
The index can be retrieved as a primer, useful for initializing
another processor in the same starting state as the current one."""
index = self.__index
index.append(None)
index.pop()
return Primer(bytes(map(self.__from.__getitem__, index)))
###############################################################################
# Inherit from _Processor and implement the ME encoding algorithm.
class Encrypter(_Processor):
"""Encrypter(key, primer) -> Encrypter instance
This class represents a state-aware encryption engine that can be fed
data and will return a stream of coherent cipher-text. An index is
maintained, and a state-continuation primer can be retrieved at will."""
slots()
#staticmethod
def _run(data, cache_append, key, table, index):
"""Encrypt the data with the given arguments.
To run the encryption process as fast as possible, methods are
cached as names. As the algorithm operates, only recognized bytes
are encoded while running through the selective processing loop."""
encode, index_append = key.encode, index.append
for byte in data:
if byte in table:
index_append(table[byte])
cache_append(encode(index))
else:
cache_append(byte)
def dump_16bit_frame(self, data, file):
"""Write the data to the file using a guaranteed frame size."""
size = len(data)
if not 1 <= size <= 1 << 16:
raise ValueError('data has an unsupported length')
packed = self.process(pack('<H{}s'.format(size), size - 1, data))
if file.write(packed) != len(packed):
raise IOError('frame was not properly written to file')
###############################################################################
# Inherit from _Processor and implement the ME decoding algorithm.
class Decrypter(_Processor):
"""Decrypter(key, primer) -> Decrypter instance
This class represents a state-aware decryption engine that can be fed
data and will return a stream of coherent plain-text. An index is
maintained, and a state-continuation primer can be retrieved at will."""
slots()
SIZE = '<H'
DATA = '{}s'
#staticmethod
def _run(data, cache_append, key, table, index):
"""Decrypt the data with the given arguments.
To run the decryption process as fast as possible, methods are
cached as names. As the algorithm operates, only recognized bytes
are decoded while running through the selective processing loop."""
decode, index_append = key.decode, index.append
for byte in data:
if byte in table:
index_append(table[byte])
value = decode(index)
cache_append(value)
index[-1] = table[value]
else:
cache_append(byte)
def load_16bit_frame(self, file):
"""Read some data from the file using a guaranteed frame size."""
size = unpack(self.SIZE, self.process(self.read_all(
file,
calcsize(self.SIZE)
)))[0] + 1
return unpack(self.DATA.format(size), self.process(self.read_all(
file,
size
)))[0]
#staticmethod
def read_all(file, size):
"""Get all the data that has been requested from the file."""
if not 1 <= size <= 1 << 16:
raise ValueError('size has an unsupported value')
buffer = bytearray()
while size > 0:
data = file.read(size)
if not data:
raise EOFError('file has unexpectedly reached the end')
buffer.extend(data)
size -= len(data)
if size < 0:
raise IOError('more data was read than was required')
return bytes(buffer)

Optimizing modifiable named list based on namedtuple

My goal is to optimize a framework based on a stack of modifiers for CSV-sourced lists. Each modifier uses a header list to work on a named basis.
CSV example (including header):
date;place
13/02/2013;New York
15/04/2012;Buenos Aires
29/10/2010;Singapour
I have written some code based on namedtuple in order to be able to use lists generated by csv module without reorganizing data every time. Generated code below :
class MyNamedList(object):
__slots__ = ("__values")
_fields = ['date', 'ignore', 'place']
def __init__(self, values):
self.__values = values
if len(self.__values) <= 151:
for i in range(len(self.__values), 151):
self.__values += [None,]
#property
def date(self):
return self.__values[0]
#date.setter
def date(self, val):
self.__values[0] = val
#property
def ignore(self):
return self.__values[150]
#ignore.setter
def ignore(self, val):
self.__values[150] = val
#property
def place(self):
return self.__values[1]
#b.setter
def place(self, val):
self.__values[1] = val
I must say i am very disappointed with performance using this class. Calling a simple modifier function (which changes "ignore" to True 100 times. Yes i know it is useless) for each line of a 70000-line csv file takes 9 seconds (with pypy. 5.5 using original python) whereas equivalent code using a list named foo takes 1.1 second (same with pypy and original python).
Is there anything i could do to get comparable performance between both approaches ? To me, record.ignore = True could be directly inlined (or so) and therefore translated into record[150] = True. Is there any blocking point i don't see to get this to happen ?
Note that the record i am modifying is actually (for now) not created for each line in the CSV file, meaning adding more items into the list happens only once, before the iteration.
Update : sample codes
--> Using namedlist
import namedlist
MyNamedList=namedlist.namedlist("MyNamedList", {"a":1, "b":2, "ignore":150})
test = MyNamedList([0,1])
def foo(a):
test.ignore = True # x100 times
import csv
stream = csv.reader(open("66666.csv", "rb"))
for i in stream:
foo(i)
--> Not using namedlist
import namedlist
import csv
MyNamedList=namedlist.namedlist("MyNamedList", {"a":1, "b":2, "ignore":150})
test = MyNamedList([0,1])
sample_data = []
for i in range(len(sample_data), 151):
sample_data += [None,]
def foo(a):
sample_data[150] = True # x100 times
stream = csv.reader(open("66666.csv", "rb"))
for i in stream:
foo(i)
Update #2 : code for namedlist.py (heavily based on namedtuple.py
# Retrieved from http://code.activestate.com/recipes/500261/
# Licensed under the PSF license
from keyword import iskeyword as _iskeyword
import sys as _sys
def namedlist(typename, field_indices, verbose=False, rename=False):
# Parse and validate the field names. Validation serves two purposes,
# generating informative error messages and preventing template injection attacks.
field_names = field_indices.keys()
for name in [typename,] + field_names:
if not min(c.isalnum() or c=='_' for c in name):
raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
raise ValueError('Type names and field names cannot be a keyword: %r' % name)
if name[0].isdigit():
raise ValueError('Type names and field names cannot start with a number: %r' % name)
seen_names = set()
for name in field_names:
if name.startswith('_') and not rename:
raise ValueError('Field names cannot start with an underscore: %r' % name)
if name in seen_names:
raise ValueError('Encountered duplicate field name: %r' % name)
seen_names.add(name)
# Create and fill-in the class template
numfields = len(field_names)
argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
reprtxt = ', '.join('%s=%%r' % name for name in field_names)
max_index=-1
for name in field_names:
index = field_indices[name]
if max_index < index:
max_index = index
max_index += 1
template = '''class %(typename)s(object):
__slots__ = ("__values") \n
_fields = %(field_names)r \n
def __init__(self, values):
self.__values = values
if len(self.__values) <= %(max_index)s:
for i in range(len(self.__values), %(max_index)s):
self.__values += [None,]'''% locals()
for name in field_names:
index = field_indices[name]
template += ''' \n
#property
def %s(self):
return self.__values[%d]
#%s.setter
def %s(self, val):
self.__values[%d] = val''' % (name, index, name, name, index)
if verbose:
print template
# Execute the template string in a temporary namespace
namespace = {'__name__':'namedtuple_%s' % typename,
'_property':property, '_tuple':tuple}
try:
exec template in namespace
except SyntaxError, e:
raise SyntaxError(e.message + ':\n' + template)
result = namespace[typename]
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in enviroments where
# sys._getframe is not defined (Jython for example) or sys._getframe is not
# defined for arguments greater than 0 (IronPython).
try:
result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError):
pass
return result

strange python 2.7 dict behavior: key not inside while clearly there

I have written a Parser that takes a JSON configuration and creates objects from it. I first create a well known object, and try to dynamically import a module (which may be from a user), while loading its class via the defined creator method of that module.
Here is some testing code:
import json
import imp
import os.path as path
from lib.config.members import Member
from lib.tasks.task import Task
class Parser(object):
def __init__(self):
self._loadedMods = {"tasks": {}}
def _load_module(self, clazz, modPart):
"""
imports and caches a module.
:param clazz: the filename of the module (i.e email, ping...)
:param modPart: the folder of the module. (i.e services, parsers...)
:return: the imported/cached module, or throws an error if it couldn't find it
"""
mods = self._loadedMods[modPart]
if clazz in mods:
return mods["class"]
else:
#mod = __import__(clazz)
p = path.join("lib", modPart, clazz + ".py")
mod = imp.load_source(clazz, p)
mods[clazz] = mod
return mod
def replace_with_import(self, objList, modPart, items_func, class_check):
"""
replaces configuration dicts with their objects by importing and creating it in the first step.
In the second step the original list of json config dicts gets replaced by the loaded objects
:param objList: the list of objects which is iterated on
:param modPart: the folder from the module (i.e tasks, parsers)
:param items_func: function to get a pointer on the list of json-config-objects to replace. Takes one argument and
should return a list of
:param class_check: currently unsupported
"""
for obj in objList:
repl = []
items = items_func(obj)
for clazzItem in items:
try:
clazz = clazzItem["class"]
mod = self._load_module(clazz, modPart)
item = mod.create(clazzItem)
if class_check(item):
repl.append(item)
else:
print " ignoring class " + clazzItem["class"] + "! It does not pass the class check!"
except ImportError, err:
print "could not import " + clazz + ": " + str(clazzItem) + "! reason:"
print str(err)
except KeyError, k:
print "Key " + str(k) + " not in classItem " + str(clazzItem)
except Exception, e:
print "Error while replacing class ( " + clazz + " :" + str(e) + ")"
del items[:]
items.extend(repl)
def _create_raw_Object(self, jsonDict, msgName, creator):
"""
creates an Main object from the configuration, but just parses raw data and hands it to the object
:param jsonDict: the configuration file part as dict
:param msgName: name of object for error message
:param creator: function pointer which is taking two arguments: identifier of the object and arguments.
:should return an object
:return: a list of objects returned by creator
"""
items = []
for key, val in jsonDict.items():
try:
item = creator(key, val)
items.append(item)
except Exception, e:
print "ignoring " + msgName + ": " + key + "! reason:"
print str(e)
return items
jsonFile = '''
{
"members":{
"homer":{
"name": "Homer Simpson",
"comment": "Security Inspector",
"tasks": [{"class":"email", "type": "donut", "args": {"rcpt": "homer_j_simpson#burnscorp.sp"}},
{"class":"email", "type": "do", "args": {"rcpt": "my_other_mail#burnscorp.sp"}}]
}
}
}
'''
jsonDict = json.loads(jsonFile)
parser = Parser()
creator = lambda name, values: Member(name, **values)
members = parser._create_raw_Object(jsonDict["members"], "Members", creator)
items_func = lambda member: member.get_tasks()
class_check = lambda task: isinstance(task, Task)
parser.replace_with_import(members, "tasks", items_func, class_check)
for d in members:
print d.__dict__
As you can see, a Member can have a list of arbitary tasks, and which one it should import is defined in its class attribute, but as soon as two of them has the same value for the class (which shouldn't break json the way we define it) I get a strange KeyError :
Key 'class' not in classItem {u'args': {u'rcpt': u'my_other_mail#burnscorp.sp'}, u'type': u'do', u'class': u'email'}
Why do I get this strange error? Any hint that my give me a clue whats going on is very welcome, as I feel hopeless, debugging this for hours.
I think that Member and Email/Task class are unrelated but Ill post them for completeness:
lib/config/members.py
class Member:
def __init__(self, id, name="", comment="", tasks=None):
self.id = id
self.name = name
self.tasks = []
self.add_task(tasks)
self.comment = comment
def get_id(self):
return self.id
def add_task(self, task):
if task is None:
return
if isinstance(task, list):
self.tasks.extend(task)
else:
self.tasks.append(task)
def get_tasks(self):
return self.tasks
lib/tasks/[task|email].py
class Task:
"""
Base class for all built-in Tasks.
"""
def set_task_type(self, taskType):
"""
sets the type of this task.
Be aware! this method can only get called once!
:param taskType: the type of this task
"""
if hasattr(self, "_taskType"):
raise Exception("taskType is only allowed to set once!")
self.taskType = taskType
def get_task_type(self):
"""
:return: the type set by set_type_task
"""
return self._taskType
"""
The email task.
"""
from lib.tasks.task import Task
class EmailTask(Task):
def __init__(self, **kwargs):
self.set_task_type(kwargs["type"])
self.recipient = kwargs["args"]["rcpt"]
def execute_task(self, msg):
pass
def create(taskDict):
return EmailTask(**taskDict)

It seems you are eating the actual exception by replacing it with your own custom print in replace_with_import. As I noted in the comment section.
You generally want to keep you try blocks small and very predictable, knowing exactly what can be raised and what you should handle at that point in the code. The less complexity in your try block the better.

Controlling Python PLY lexer states from parser

I am working on a simple SQL select like query parser and I need to be able to capture subqueries that can occur at certain places literally. I found lexer states are the best solution and was able to do a POC using curly braces to mark the start and end. However, the subqueries will be delimited by parenthesis, not curlys, and the parenthesis can occur at other places as well, so I can't being the state with every open-paren. This information is readily available with the parser, so I was hoping to call begin and end at appropriate locations in the parser rules. This however didn't work because lexer seem to tokenize the stream all at once, and so the tokens get generated in the INITIAL state. Is there a workaround for this problem? Here is an outline of what I tried to do:
def p_value_subquery(p):
"""
value : start_sub end_sub
"""
p[0] = "( " + p[1] + " )"
def p_start_sub(p):
"""
start_sub : OPAR
"""
start_subquery(p.lexer)
p[0] = p[1]
def p_end_sub(p):
"""
end_sub : CPAR
"""
subquery = end_subquery(p.lexer)
p[0] = subquery
The start_subquery() and end_subquery() are defined like this:
def start_subquery(lexer):
lexer.code_start = lexer.lexpos # Record the starting position
lexer.level = 1
lexer.begin('subquery')
def end_subquery(lexer):
value = lexer.lexdata[lexer.code_start:lexer.lexpos-1]
lexer.lineno += value.count('\n')
lexer.begin('INITIAL')
return value
The lexer tokens are simply there to detect the close-paren:
#lex.TOKEN(r"\(")
def t_subquery_SUBQST(t):
lexer.level += 1
#lex.TOKEN(r"\)")
def t_subquery_SUBQEN(t):
lexer.level -= 1
#lex.TOKEN(r".")
def t_subquery_anychar(t):
pass
I would appreciate any help.

This answer may only be partially helpful, but I would also suggest looking at section "6.11 Embedded Actions" of the PLY documentation (http://www.dabeaz.com/ply/ply.html). In a nutshell, it is possible to write grammar rules in which actions occur mid-rule. It would look something similar to this:
def p_somerule(p):
'''somerule : A B possible_sub_query LBRACE sub_query RBRACE'''
def p_possible_sub_query(p):
'''possible_sub_query :'''
...
# Check if the last token read was LBRACE. If so, flip lexer state
# Sadly, it doesn't seem that the token is easily accessible. Would have to hack it
if last_token == 'LBRACE':
p.lexer.begin('SUBQUERY')
Regarding the behavior of the lexer, there is only one token of lookahead being used. So, in any particular grammar rule, at most only one extra token has been read already. If you're going to flip lexer states, you need to make sure that it happens before the token gets consumed by the parser, but before the parser asks to read the next incoming token.
Also, if possible, I would try to stay out of the yacc() error handling stack as far as a solution. There is way too much black-magic going on in error handling--the more you can avoid it, the better.
I'm a bit pressed for time at the moment, but this seems to be something that could be investigated for the next version of PLY. Will put it on my to-do list.

Based on PLY author's response, I came up with this better solution. I am yet to figure out how to return the subquery as a token, but the rest looks much better and need not be considered a hack anymore.
def start_subquery(lexer):
lexer.code_start = lexer.lexpos # Record the starting position
lexer.level = 1
lexer.begin("subquery")
def end_subquery(lexer):
lexer.begin("INITIAL")
def get_subquery(lexer):
value = lexer.lexdata[lexer.code_start:lexer.code_end-1]
lexer.lineno += value.count('\n')
return value
#lex.TOKEN(r"\(")
def t_subquery_OPAR(t):
lexer.level += 1
#lex.TOKEN(r"\)")
def t_subquery_CPAR(t):
lexer.level -= 1
if lexer.level == 0:
lexer.code_end = lexer.lexpos # Record the ending position
return t
#lex.TOKEN(r".")
def t_subquery_anychar(t):
pass
def p_value_subquery(p):
"""
value : check_subquery_start OPAR check_subquery_end CPAR
"""
p[0] = "( " + get_subquery(p.lexer) + " )"
def p_check_subquery_start(p):
"""
check_subquery_start :
"""
# Here last_token would be yacc's lookahead.
if last_token.type == "OPAR":
start_subquery(p.lexer)
def p_check_subquery_end(p):
"""
check_subquery_end :
"""
# Here last_token would be yacc's lookahead.
if last_token.type == "CPAR":
end_subquery(p.lexer)
last_token = None
def p_error(p):
global subquery_retry_pos
if p is None:
print >> sys.stderr, "ERROR: unexpected end of query"
else:
print >> sys.stderr, "ERROR: Skipping unrecognized token", p.type, "("+ \
p.value+") at line:", p.lineno, "and column:", find_column(p.lexer.lexdata, p)
# Just discard the token and tell the parser it's okay.
yacc.errok()
def get_token():
global last_token
last_token = lexer.token()
return last_token
def parse_query(input, debug=0):
lexer.input(input)
return parser.parse(input, tokenfunc=get_token, debug=0)

Since nobody has an answer, it bugged me to find a workaround, and here is an ugly hack using the error recovery and restart().
def start_subquery(lexer, pos):
lexer.code_start = lexer.lexpos # Record the starting position
lexer.level = 1
lexer.begin("subquery")
lexer.lexpos = pos
def end_subquery(lexer):
value = lexer.lexdata[lexer.code_start:lexer.lexpos-1]
lexer.lineno += value.count('\n')
lexer.begin('INITIAL')
return value
#lex.TOKEN(r"\(")
def t_subquery_SUBQST(t):
lexer.level += 1
#lex.TOKEN(r"\)")
def t_subquery_SUBQEN(t):
lexer.level -= 1
if lexer.level == 0:
t.type = "SUBQUERY"
t.value = end_subquery(lexer)
return t
#lex.TOKEN(r".")
def t_subquery_anychar(t):
pass
# NOTE: Due to the nature of the ugly workaround, the CPAR gets dropped, which
# makes it look like there is an imbalance.
def p_value_subquery(p):
"""
value : OPAR SUBQUERY
"""
p[0] = "( " + p[2] + " )"
subquery_retry_pos = None
def p_error(p):
global subquery_retry_pos
if p is None:
print >> sys.stderr, "ERROR: unexpected end of query"
elif p.type == 'SELECT' and parser.symstack[-1].type == 'OPAR':
lexer.input(lexer.lexdata)
subquery_retry_pos = parser.symstack[-1].lexpos
yacc.restart()
else:
print >> sys.stderr, "ERROR: Skipping unrecognized token", p.type, "("+ \
p.value+") at line:", p.lineno, "and column:", find_column(p.lexer.lexdata, p)
# Just discard the token and tell the parser it's okay.
yacc.errok()
def get_token():
global subquery_retry_pos
token = lexer.token()
if token and token.lexpos == subquery_retry_pos:
start_subquery(lexer, lexer.lexpos)
subquery_retry_pos = None
return token
def parse_query(input, debug=0):
lexer.input(inp)
result = parser.parse(inp, tokenfunc=get_token, debug=0)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to use the DICT protocol client in Python? - python

Related

Python class and objects within another function

How to send the encrypted data to the client?

Optimizing modifiable named list based on namedtuple

strange python 2.7 dict behavior: key not inside while clearly there

Controlling Python PLY lexer states from parser

Categories

Resources