MessagePack and datetime - python

I need a fast way to send 300 short messages a second over zeromq between the python multiprocessing processes. Each message needs to contain an ID and time.time()
msgpack seems like the best way to serialize the dict before sending it via zeromq, and conveniently, msgpack has an example of exactly what I need, except it has a datetime.datetime.now().
import datetime
import msgpack
useful_dict = {
"id": 1,
"created": datetime.datetime.now(),
}
def decode_datetime(obj):
if b'__datetime__' in obj:
obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
return obj
def encode_datetime(obj):
if isinstance(obj, datetime.datetime):
return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}
return obj
packed_dict = msgpack.packb(useful_dict, default=encode_datetime)
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
The problem is that their example doesn't work, I get this error:
obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
KeyError: 'as_str'
Maybe because I'm on python 3.4, but I don't know what the issue with strptime. Would appreciate your help.

Given that messagepack ( import msgpack ) is good at serializing integers, I created a solution which only uses integers:
_datetime_ExtType = 42
def _unpacker_hook(code, data):
if code == _datetime_ExtType:
values = unpack(data)
if len(values) == 8: # we have timezone
return datetime.datetime(*values[:-1], dateutil.tz.tzoffset(None, values[-1]))
else:
return datetime.datetime(*values)
return msgpack.ExtType(code, data)
# This will only get called for unknown types
def _packer_unknown_handler(obj):
if isinstance(obj, datetime.datetime):
if obj.tzinfo:
components = (obj.year, obj.month, obj.day, obj.hour, obj.minute, obj.second, obj.microsecond, int(obj.utcoffset().total_seconds()))
else:
components = (obj.year, obj.month, obj.day, obj.hour, obj.minute, obj.second, obj.microsecond)
# we effectively double pack the values to "compress" them
data = msgpack.ExtType(_datetime_ExtType, pack(components))
return data
raise TypeError("Unknown type: {}".format(obj))
def pack(obj, **kwargs):
# we don't use a global packer because it wouldn't be re-entrant safe
return msgpack.packb(obj, use_bin_type=True, default=_packer_unknown_handler, **kwargs)
def unpack(payload):
try:
# we temporarily disable gc during unpack to bump up perf: https://pypi.python.org/pypi/msgpack-python
gc.disable()
# This must match the above _packer parameters above. NOTE: use_list is faster
return msgpack.unpackb(payload, use_list=False, encoding='utf-8', ext_hook=_unpacker_hook)
finally:
gc.enable()

Python3 and Python2 manage differently strings encoding : encoding-and-decoding-strings-in-python-3-x
Then it is needed to :
use b'as_str' (instead of 'as_str') as dictionary key
use encode and decode for the stored value
Modifying the code like this works with python2 and python3 :
import datetime
import msgpack
useful_dict = {
"id": 1,
"created": datetime.datetime.now(),
}
def decode_datetime(obj):
if b'__datetime__' in obj:
obj = datetime.datetime.strptime(obj[b'as_str'].decode(), "%Y%m%dT%H:%M:%S.%f")
return obj
def encode_datetime(obj):
if isinstance(obj, datetime.datetime):
obj = {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f").encode()}
return obj
packed_dict = msgpack.packb(useful_dict, default=encode_datetime)
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)

In the documentation it says that the default encoding of unicode strings of packb (and pack) is utf-8. You could simply search for the unicode string '__datetime__' in the decode_datetime function, instead of the byte object b'__datetime__', and add the encoding='utf-8' parameter to unpack. Like so:
def decode_datetime(obj):
if '__datetime__' in obj:
obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
return obj
packed_dict = msgpack.packb(useful_dict, default=encode_datetime)
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, encoding='utf-8')

Related

Confluent Kafka python schema parser causes conflict with fastavro

I am running Python 3.9 with Confluent Kafka 1.7.0, avro-python3 1.10.0 and fastavro 1.4.1.
The following code uses Avro schema encoder in order to encode a message, which succeeds only if we transform the resulting schema encoding by getting rid of the MappingProxyType:
from confluent_kafka import Producer
from confluent_kafka.avro import CachedSchemaRegistryClient, MessageSerializer
from fastavro.schema import parse_schema
from fastavro.validation import validate
from types import MappingProxyType
from typing import Any
import sys
def transformMap(item: Any) -> Any:
if type(item) in {dict, MappingProxyType}:
return {k:transformMap(v) for k,v in item.items()}
elif type(item) is list:
return [transformMap(v) for v in item]
else:
return item
def main(argv = None):
msgType = 'InstrumentIdMsg'
idFigi = 'BBG123456789'
head = {'sDateTime': 1, 'msgType': msgType, 'srcSeq': 1,
'rDateTime': 1, 'src': 'Brownstone', 'reqID': None,
'sequence': 1}
msgMap = {'head': head, 'product': 'Port', 'idIsin': None, 'idFigi': idFigi,
'idBB': None, 'benchmark': None, 'idCusip': None,'idCins': None}
registryClient = CachedSchemaRegistryClient(url = 'http://local.KafkaRegistry.com:8081')
schemaId, schema, version = registryClient.get_latest_schema(msgType)
serializer = MessageSerializer(registry_client = registryClient)
schemaMap = schema.to_json()
# NOTE:
# schemaMap cannot be used since it uses mappingproxy
# which causes validate() and parse_schema() to throw
schemaDict = transformMap(schemaMap)
isValid = validate(datum = msgMap, schema = schemaDict, raise_errors = True)
parsed_schema = parse_schema(schema = schemaDict)
msg = serializer.encode_record_with_schema_id(schema_id = schemaId,
record = msgMap)
producer = Producer({'bootstrap.servers': 'kafkaServer:9092'})
producer.produce(key = idFigi,
topic = 'TOPIC_NAME',
value = msg)
return 0
if __name__ == '__main__':
sys.exit(main())
The transformation basically leaves everything unchanged except altering MappingProxyType to dict instances.
Is there a problem in the way I am calling the standard library which causes mapping proxy to be used, which in turn causes fastavro to throw? Can this be fixed by something as a user, or is this really a bug in the Confluent Kafka library?
In addition, the output schemaId from registryClient.get_latest_schema() is marked in the docs to return str but returns int. If I understand correctly, this is the intended input into the schema_id parameter of serializer.encode_record_with_schema_id() (and it works correctly if I call it), which is also marked as int. Is that a typo in the docs? In other words, it seems either registryClient.get_latest_schema() should return an integer, or serializer.encode_record_with_schema_id() should take a string, or I am doing something incorrectly :) Which one is it?
Thank you very much.

Persist datetime values for use with gviz api

I have an python application that collects data from an MQTT broker and presents it to a website via the gviz python API:
DESCRIPTION= [ \
('Zeit', 'datetime'), \
('Temperatur', 'number'), \
('Feuchtigkeit', 'number'),\
('Batterie', 'number') \
]
def sendAnswer(conn):
#protect against the data supplier
Mutex.acquire()
Trans = deepcopy(DatArr)
Mutex.release()
#create and populate the DataTable
data_table = gviz_api.DataTable(DESCRIPTION)
data_table.LoadData(Trans)
Answer = data_table.ToJSon()
#send ti to the webserver
lng = len(Answer)
try:
conn.sendall(bytes("L{:06d};".format(lng),"UTF-8"))
conn.sendall(bytes(Answer,"UTF-8"))
except BaseException:
# if anything goes wrong, try again next time
pass
def on_message(client, userdata, message):
global Last, DatArr
#get the data from the broker
cur = json.loads(str(message.payload, encoding='utf-8'))
if cur == Last and len(DatArr)>2 : return
now = datetime.now()
# protect against the webserver
Mutex.acquire()
#add the data
DatArr.append([now, cur["temp"], cur["hum"], cur["bat"]])
#cleanup old values
Last = cur
for i in range(len(DatArr)):
if now - DatArr[0][0] > timedelta(days=1):
DatArr.pop(0)
else:
break
Mutex.release()
This works, but instead of keeping the data in the python variable I want to persist it in a file (preferrably JSON). But I cannot JSON.dump() a datetime variable and I cannot .LoadData() a string into a gviz DataTable. The python gviz also lacks an "addRow()". Any suggestions?
Much thanks in advance!
Based on the answers to this question: JSON datetime between Python and JavaScript
I found a solution and implemented it in a python module:
import json
import datetime
class DateTimeJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return dict(nested_datetime=obj.isoformat())
else:
return super(DateTimeJSONEncoder, self).default(obj)
def datetime_decoder(d):
if len(d) == 1 and 'nested_datetime' in d:
return datetime.datetime.strptime(d['nested_datetime'], '%Y-%m-%dT%H:%M:%S.%f')
result = {}
for prop in d:
if isinstance(d[prop], dict):
result[prop] = datetime_decoder(d[prop])
else:
result[prop] = d[prop]
return result
The class and the function go as named parameters into the json.dump and json.load functions like this:
DatArr = json.load(DatFile, object_hook=djson.datetime_decoder)
and
json.dump(DatArr, DatFile, cls=djson.DateTimeJSONEncoder)
This persists the formerly global variable DatArr in the json file DatFile.
Thanks to all the posters to the above question for providing the information.

Python json dumps syntax error when appending list of dict

I got two functions that return a list of dictionary and i'm trying to get json to encode it, it works when i try doing it with my first function, but now i'm appending second function with a syntax error of ": expected". I will eventually be appending total of 7 functions that each output a list of dict. Is there a better way of accomplishing this?
import dmidecode
import simplejson as json
def get_bios_specs():
BIOSdict = {}
BIOSlist = []
for v in dmidecode.bios().values():
if type(v) == dict and v['dmi_type'] == 0:
BIOSdict["Name"] = str((v['data']['Vendor']))
BIOSdict["Description"] = str((v['data']['Vendor']))
BIOSdict["BuildNumber"] = str((v['data']['Version']))
BIOSdict["SoftwareElementID"] = str((v['data']['BIOS Revision']))
BIOSdict["primaryBIOS"] = "True"
BIOSlist.append(BIOSdict)
return BIOSlist
def get_board_specs():
MOBOdict = {}
MOBOlist = []
for v in dmidecode.baseboard().values():
if type(v) == dict and v['dmi_type'] == 2:
MOBOdict["Manufacturer"] = str(v['data']['Manufacturer'])
MOBOdict["Model"] = str(v['data']['Product Name'])
MOBOlist.append(MOBOdict)
return MOBOlist
def get_json_dumps():
jsonOBJ = json
#Syntax error is here, i can't use comma to continue adding more, nor + to append.
return jsonOBJ.dumps({'HardwareSpec':{'BIOS': get_bios_specs()},{'Motherboard': get_board_specs()}})
Use multiple items within your nested dictionary.
jsonOBJ.dumps({
'HardwareSpec': {
'BIOS': get_bios_specs(),
'Motherboard': get_board_specs()
}
})
And if you want multiple BIOS items or Motherboard items, just use a list.
...
'HardwareSpec': {
'BIOS': [
get_bios_specs(),
get_uefi_specs()
]
...
}
If you want a more convenient lookup of specs, you can just embed a dict:
jsonOBJ.dumps({'HardwareSpec':{'BIOS': get_bios_specs(),
'Motherboard': get_board_specs()
}
})

Python Construct - consume data for optional field

With the Python construct library, the data I'm parsing has a field which only has meaning if a flag is set.
However, the data field is always present.
Therefore, I would like to consume the data in any case, but only set the field value based on the flag's value.
For example, if the structure is (incorrectly) defined as:
struct = Struct("struct",
Flag("flag"),
UBInt8("optional_data"),
UBInt8("mandatory")
)
For the data:
>>> struct.parse("010203".decode("hex"))
The result should be:
Container({'flag': True, 'mandatory': 3, 'optional_data': 2})
And for data:
>>> struct.parse("000203".decode("hex"))
The desired result is:
Container({'flag': False, 'mandatory': 3, 'optional_data': None})
I have tried the following:
struct = Struct("struct",
Flag("flag"),
IfThenElse("optional_data", lambda ctx: ctx.flag,
UBInt8("dummy"),
Padding(1)
),
UBInt8("mandatory")
)
However, Padding() puts the raw data in the field, like so:
>>> struct.parse("000203".decode("hex"))
Container({'flag': False, 'mandatory': 3, 'optional_data': '\x02'})
Thank you
I am not sure if I understand your problem correctly. If your problem is only that the padding is not parsed as int, then you dont need the IFThenElse. You can check the parsed container in your code for flag and choose to ignore the optional_data field.
struct = Struct("struct",
Flag("flag"),
UBInt8("optional_data"),
UBInt8("mandatory")
)
If your problem is that you want the name Optional Data to be used only when the flag is set but the name dummy to be used when flag is not set, then you need to define two Ifs
struct = Struct("struct",
Flag("flag"),
If("optional_data", lambda ctx: ctx.flag,
UBInt8("useful_byte"),
),
If("dummy", lambda ctx: !ctx.flag,
UBInt8("ignore_byte"),
),
UBInt8("mandatory")
)
Perhaps you could use an Adapter similar to the LengthValueAdapter for a Sequence
class LengthValueAdapter(Adapter):
"""
Adapter for length-value pairs. It extracts only the value from the
pair, and calculates the length based on the value.
See PrefixedArray and PascalString.
Parameters:
* subcon - the subcon returning a length-value pair
"""
__slots__ = []
def _encode(self, obj, context):
return (len(obj), obj)
def _decode(self, obj, context):
return obj[1]
class OptionalDataAdapter(Adapter):
__slots__ = []
def _decode(self, obj, context):
if context.flag:
return obj
else
return None
So
struct = Struct("struct",
Flag("flag"),
OptionalDataAdapter(UBInt8("optional_data")),
UBInt8("mandatory")
)

How to read/make sense of a PHP serialised data string in python

A legacy database I'm accessing via Django has a table column that stores serialised data in the following string format:
a:5:{i:1;s:4:"1869";i:2;s:4:"1859";i:3;s:4:"1715";i:4;s:1:"0";i:5;s:1:"0";}
Is there any way I can use python/python-library to change it into a list or any other friendly python data type for further processing of individual values?
Note: These values were written to the database via PHP.
phpserialize:
a port of the serialize and unserialize functions of php to python. This module implements the python serialization interface (eg: provides dumps, loads and similar functions)...
Here's a very rough and incomplete implementation:
def p(f, d): # parse until
data = []
b = f.read(1)
while b and b != d:
data.append(b)
b = f.read(1)
return b''.join(data)
def parse(f):
if not hasattr(f, 'read'):
import io
try:
f = io.BytesIO(f)
except TypeError:
f = io.BytesIO(f.encode('utf-8'))
typ = f.read(2)
if typ == b'i:':
return int(p(f, b';'))
if typ == b'd:':
return float(p(f, b';'))
if typ == b's:':
return f.read(int(p(f, b':')) + 3)[1:-2].decode('utf-8')
if typ == b'a:':
l = int(p(f, b':'))
f.read(1) # {
items = [(parse(f), parse(f)) for i in range(l)]
f.read(1) # }
return dict(items)
assert False, typ

Categories

Resources