str to unicode not working python - python

I am trying to match the exact format of a response. I have:
def recreate_metrics_format(self):
MAP = {
# "measurment_name": "measurement_name_",
"ref": "measurement_ref_",
"value": "measurement_value_"
}
IDs = 3
metrics = []
for this_id in range(1, IDs+1):
this_metric = {}
for k, v in MAP.items():
key = k.encode('UTF-8')
attribute = v + str(this_id)
this_metric[key] = getattr(self, attribute)
metrics.append(this_metric)
return metrics
For some reason, even after adding the encode part, my format is off:
ipdb> expected_metrics
[{'ref': u'tenetur', 'value': 567.41}, {'ref': u'blanditiis', 'value': 1632.0}, {'ref': u'ducimus', 'value': 786.4862}]
ipdb> metrics
[{u'ref': u'tenetur', u'value': 567.41}, {u'ref': u'blanditiis', u'value': 1632.0}, {u'ref': u'ducimus', u'value': 786.4862}]
I do unicode instead of encode in shell and it works:
ipdb> unicode
<type 'unicode'>
ipdb> unicode('blah', 'UTF-8')
u'blah'
I've used encode before and it worked, what is wrong with using encode here? Thank you

Related

recursively collect string blocks in python

I have a custom data file formatted like this:
{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}
I want to collect the blocks of data, meaning string between each set of {} while maintaining a hierarhcy. This data is not a typical json format so that is not a possible solution.
My idea was to create a class object like so
class Block:
def __init__(self, header, children):
self.header = header
self.children = children
Where i would then loop through the data line by line 'somehow' collecting the necessary data so my resulting output would like something like this...
Block("data = {}", [
Block("friends = {max = 0 0,\n min = 0 0,}", []),
Block("family = {version = 1}", [...])
])
In short I'm looking for help on ways I can serialize this into useful data I can then easily manipulate. So my approach is to break into objects by using the {} as dividers.
If anyone has suggestions on ways to better approach this I'm all up for ideas. Thank you again.
So far I've just implemented the basic snippets of code
class Block:
def __init__(self, content, children):
self.content = content
self.children = children
def GetBlock(strArr=[]):
print len(strArr)
# blocks = []
blockStart = "{"
blockEnd = "}"
with open(filepath, 'r') as file:
data = file.readlines()
blocks = GetBlock(strArr=data)
You can create a to_block function that takes the lines from your file as an iterator and recursively creates a nested dictionary from those. (Of course you could also use a custom Block class, but I don't really see the benefit in doing so.)
def to_block(lines):
block = {}
for line in lines:
if line.strip().endswith(("}", "},")):
break
key, value = map(str.strip, line.split(" = "))
if value.endswith("{"):
value = to_block(lines)
block[key] = value
return block
When calling it, you have to strip the first line, though. Also, evaluating the "leafs" to e.g. numbers or strings is left as an excercise to the reader.
>>> to_block(iter(data.splitlines()[1:]))
{'data': {'family': {'version': '1,',
'cars': {'bike': '"trek",', 'car': '"ford",', 'van': '"honda",'},
'presets': {'travelers': 'False,', 'size': '10,', 'location': '"italy",'}},
'friends': {'max': '0 0,', 'min': '0 0,'}}}
Or when reading from a file:
with open("data.txt") as f:
next(f) # skip first line
res = to_block(f)
Alternatively, you can do some preprocessing to transform that string into a JSON(-ish) string and then use json.loads. However, I would not go all the way here but instead just wrap the values into "" (and replace the original " with ' before that), otherwise there is too much risk to accidentally turning a string with spaces into a list or similar. You can sort those out once you've created the JSON data.
>>> data = data.replace('"', "'")
>>> data = re.sub(r'= (.+),$', r'= "\1",', data, flags=re.M)
>>> data = re.sub(r'^\s*(\w+) = ', r'"\1": ', data, flags=re.M)
>>> data = re.sub(r',$\s*}', r'}', data, flags=re.M)
>>> json.loads(data)
{'data': {'family': {'version': '1',
'presets': {'size': '10', 'travelers': 'False', 'location': "'italy'"},
'cars': {'bike': "'trek'", 'van': "'honda'", 'car': "'ford'"}},
'friends': {'max': '0 0', 'min': '0 0'}}}
You can also do with ast or json with the help of regex substitutions.
import re
a = """{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}"""
#with ast
a = re.sub("(\w+)\s*=\s*", '"\\1":', a)
a = re.sub(":\s*((?:\d+)(?: \d+)+)", lambda x:':[' + x.group(1).replace(" ", ",") + "]", a)
import ast
print ast.literal_eval(a)
#{'data': {'friends': {'max': [0, 0], 'min': [0, 0]}, 'family': {'cars': {'car': 'ford', 'bike': 'trek', 'van': 'honda'}, 'presets': {'travelers': False, 'location': 'italy', 'size': 10}, 'version': 1}}}
#with json
import json
a = re.sub(",(\s*\})", "\\1", a)
a = a.replace(":True", ":true").replace(":False", ":false").replace(":None", ":null")
print json.loads(a)
#{u'data': {u'friends': {u'max': [0, 0], u'min': [0, 0]}, u'family': {u'cars': {u'car': u'ford', u'bike': u'trek', u'van': u'honda'}, u'presets': {u'travelers': False, u'location': u'italy', u'size': 10}, u'version': 1}}}

Using json.loads() on data returned from Zoho Creator - extra_data() error

I get the following return from zoho creators api:
'var zohoview6 = {"Name_Registration":[ {"Approved":"Yes","Extension":1197,"ID_Number":"","Email":"xxx#yy.com","name_first ":"Test","Updated":false,"ID":"1257609000000083070","Authorised":true,"NameID":68 }]};'
I then use str.replace to remove 'var zohoview6 = '
However, when I load the resulting str using json.loads(str) I get the following error
ValueError: Extra data: line 1 column 192 - line 1 column 193 (char 191 - 192)
Here's the return I get formatted nicely:
var zohoview6 =
{
"Name_Registration":[
{
"Approved":"Yes",
"Extension":1197,
"ID_Number":"",
"Email":"xxx#yy.com",
"name_first":"Test",
"Updated":false,
"ID":"1257609000000083070",
"Authorised":true,
"NameID":68
}]
};
What am I doing wrong?
You also need to remove the extra ; at the end of the string using, for instance, str.rstrip():
>>> import json
>>>
>>> s = 'var zohoview6 = {"Name_Registration":[ {"Approved":"Yes","Extension":1197,"ID_Number":"","Email":"xxx#yy.com","name_first ":"Test","Updated":false,"ID":"1257609000000083070","Authorised":true,"NameID":68 }]};'
>>> data = s.replace("var zohoview6 = ", "").rstrip(";")
>>> json.loads(data)
{u'Name_Registration': [{u'Updated': False, u'Extension': 1197, u'ID_Number': u'', u'Email': u'xxx#yy.com', u'Authorised': True, u'name_first ': u'Test', u'ID': u'1257609000000083070', u'NameID': 68, u'Approved': u'Yes'}]}

TypeError: string indices must be integers while parsing JSON, Python?

I'm trying to parse JSON, below is my code.
import requests
import json
yatoken = '123123sdfsdf'
listOfId = ['11111111111', '2222222222', '33333333333']
for site in listOfId:
r = requests.get('http://api.ya-bot.net/projects/' + site + '?token=' + yatoken)
parsed = json.loads(r.text)
for url in parsed['project']:
#print url
print str(url['name'])
And JSON:
{
"project":{
"id":"123123sdfsdfs",
"urls":[],
"name":"Имя",
"group":"Группа",
"comments":"",
"sengines":[],
"wordstat_template":1,
"wordstat_regions":[]
}
}
It gives this error
print str(url['name'])
TypeError: string indices must be integers
How I can fix this problem?
Thx.
The 'project' key refers to a dictionary. Looping over that dictionary gives you keys, each a string. You are not looping over the list of URLs. One of those keys will be 'name'
Your code is confusing otherwise. You appear to want to get each URL. To do that, you'd have to loop over the 'urls' key in that nested dictionary:
for url in parsed['project']['urls']:
# each url value
In your sample response that list is empty however.
If you wanted to get the 'name' key from the nested dictionary, just print it without looping:
print parsed['project']['name']
Demo:
>>> import json
>>> parsed = json.loads('''\
... {
... "project":{
... "id":"123123sdfsdfs",
... "urls":[],
... "name":"Имя",
... "group":"Группа",
... "comments":"",
... "sengines":[],
... "wordstat_template":1,
... "wordstat_regions":[]
... }
... }
... ''')
>>> parsed['project']
{u'group': u'\u0413\u0440\u0443\u043f\u043f\u0430', u'name': u'\u0418\u043c\u044f', u'wordstat_regions': [], u'comments': u'', u'urls': [], u'sengines': [], u'id': u'123123sdfsdfs', u'wordstat_template': 1}
>>> parsed['project']['name']
u'\u0418\u043c\u044f'
>>> print parsed['project']['name']
Имя
>>> print parsed['project']['urls']
[]
for url in parsed['project'] returns a dict so you are actually iterating over the keys of the dict so "id"["name"] etc.. is going to error, you can use d = parsed['project'] to get the dict then access the dict by key to get whatever value you want.
d = parsed['project']
print(d["name"])
print(d["urls"])
...
Or iterate over the items to get key and value:
for k, v in parsed['project'].items():
print(k,v)
If you print what is returned you can see exactly what is happening:
In [17]: js = {
"project":{
"id":"123123sdfsdfs",
"urls":[],
"name":"Имя",
"group":"Группа",
"comments":"",
"sengines":[],
"wordstat_template":1,
"wordstat_regions":[]
}
}
In [18]: js["project"] # dict
Out[18]:
{'comments': '',
'group': 'Группа',
'id': '123123sdfsdfs',
'name': 'Имя',
'sengines': [],
'urls': [],
'wordstat_regions': [],
'wordstat_template': 1}
In [19]: for k in js["project"]: # iterating over the keys of the dict
....: print(k)
....:
sengines # k["name"] == error
id
urls
comments
name
wordstat_regions
wordstat_template
group

Python Json Parse

I have made a mistake during my storage of json strings to a database. Accidentally I did not store the string as json but I stored it as the string formation of the Object.
I received
my_jstring['field']
and I have inserted as a string to the database.
my_jstring['field'] is not json but a python json object. Is it possible to parse again this object that is in string format?
My string is the following:
'"\'{u\'\'full_name\'\': u\'\'Dublin City\'\', u\'\'url\'\': u\'\'https://api.twitter.com/1.1/geo/id/7dde0febc9ef245b.json\'\', u\'\'country\'\': u\'\'Ireland\'\', u\'\'place_type\'\': u\'\'city\'\', u\'\'bounding_box\'\': {u\'\'type\'\': u\'\'Polygon\'\', u\'\'coordinates\'\': [[[-6.3873911, 53.2987449], [-6.3873911, 53.4110598], [-6.1078047, 53.4110598], [-6.1078047, 53.2987449]]]}, u\'\'contained_within\'\': [], u\'\'country_code\'\': u\'\'IE\'\', u\'\'attributes\'\': {}, u\'\'id\'\': u\'\'7dde0febc9ef245b\'\', u\'\'name\'\': u\'\'Dublin City\'\'}\'"'
Use ast.literal_eval() to parse Python literals back into a Python object.
You appear to have doubly qouted the value however, adding in extra single quotes. These need to be repaired too:
data = ast.literal_eval(data)
data = data[1:-1].replace("''", "'")
obj = ast.literal_eval(data)
Demo:
>>> import ast
>>> data = '"\'{u\'\'full_name\'\': u\'\'Dublin City\'\', u\'\'url\'\': u\'\'https://api.twitter.com/1.1/geo/id/7dde0febc9ef245b.json\'\', u\'\'country\'\': u\'\'Ireland\'\', u\'\'place_type\'\': u\'\'city\'\', u\'\'bounding_box\'\': {u\'\'type\'\': u\'\'Polygon\'\', u\'\'coordinates\'\': [[[-6.3873911, 53.2987449], [-6.3873911, 53.4110598], [-6.1078047, 53.4110598], [-6.1078047, 53.2987449]]]}, u\'\'contained_within\'\': [], u\'\'country_code\'\': u\'\'IE\'\', u\'\'attributes\'\': {}, u\'\'id\'\': u\'\'7dde0febc9ef245b\'\', u\'\'name\'\': u\'\'Dublin City\'\'}\'"'
>>> data = ast.literal_eval(data)
>>> data = data[1:-1].replace("''", "'")
>>> obj = ast.literal_eval(data)
>>> obj
{u'country_code': u'IE', u'url': u'https://api.twitter.com/1.1/geo/id/7dde0febc9ef245b.json', u'country': u'Ireland', u'place_type': u'city', u'bounding_box': {u'type': u'Polygon', u'coordinates': [[[-6.3873911, 53.2987449], [-6.3873911, 53.4110598], [-6.1078047, 53.4110598], [-6.1078047, 53.2987449]]]}, u'contained_within': [], u'full_name': u'Dublin City', u'attributes': {}, u'id': u'7dde0febc9ef245b', u'name': u'Dublin City'}

JSON serialize a dictionary with tuples as key

Is there a way in Python to serialize a dictionary that is using a tuple as key?
e.g.
a = {(1, 2): 'a'}
simply using json.dumps(a) raises this error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/json/__init__.py", line 230, in dumps
return _default_encoder.encode(obj)
File "/usr/lib/python2.6/json/encoder.py", line 367, in encode
chunks = list(self.iterencode(o))
File "/usr/lib/python2.6/json/encoder.py", line 309, in _iterencode
for chunk in self._iterencode_dict(o, markers):
File "/usr/lib/python2.6/json/encoder.py", line 268, in _iterencode_dict
raise TypeError("key {0!r} is not a string".format(key))
TypeError: key (1, 2) is not a string
You can't serialize that as json, json has a much less flexible idea about what counts as a dict key than python.
You could transform the mapping into a sequence of key, value pairs, something like this:
import json
def remap_keys(mapping):
return [{'key':k, 'value': v} for k, v in mapping.iteritems()]
...
json.dumps(remap_keys({(1, 2): 'foo'}))
>>> '[{"value": "foo", "key": [1, 2]}]'
from json import loads, dumps
from ast import literal_eval
x = {(0, 1): 'la-la la', (0, 2): 'extricate'}
# save: convert each tuple key to a string before saving as json object
s = dumps({str(k): v for k, v in x.items()})
# load in two stages:
# (i) load json object
obj = loads(s)
# (ii) convert loaded keys from string back to tuple
d = {literal_eval(k): v for k, v in obj.items()}
See https://stackoverflow.com/a/12337657/2455413.
JSON only supports strings as keys. You'll need to choose a way to represent those tuples as strings.
You could just use str((1,2)) as key because json only expects the keys as strings but if you use this you'll have to use a[str((1,2))] to get the value.
json can only accept strings as keys for dict,
what you can do, is to replace the tuple keys with string like so
with open("file", "w") as f:
k = dic.keys()
v = dic.values()
k1 = [str(i) for i in k]
json.dump(json.dumps(dict(zip(*[k1,v]))),f)
And than when you want to read it, you can change the keys back to tuples using
with open("file", r) as f:
data = json.load(f)
dic = json.loads(data)
k = dic.keys()
v = dic.values()
k1 = [eval(i) for i in k]
return dict(zip(*[k1,v]))
This solution:
Avoids the security risk of eval().
Is short.
Is copy-pastable as save and load functions.
Keeps the structure of tuple as the key, in case you are editing the JSON by hand.
Adds ugly \" to the tuple representation, which is worse than the other str()/eval() methods here.
Can only handle tuples as keys at the first level for nested dicts (as of this writing no other solution here can do better)
def json_dumps_tuple_keys(mapping):
string_keys = {json.dumps(k): v for k, v in mapping.items()}
return json.dumps(string_keys)
def json_loads_tuple_keys(string):
mapping = json.loads(string)
return {tuple(json.loads(k)): v for k, v in mapping.items()}
m = {(0,"a"): "first", (1, "b"): [9, 8, 7]}
print(m) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
s = json_dumps_tuple_keys(m)
print(s) # {"[0, \"a\"]": "first", "[1, \"b\"]": [9, 8, 7]}
m2 = json_loads_tuple_keys(s)
print(m2) # {(0, 'a'): 'first', (1, 'b'): [9, 8, 7]}
print(m==m2) # True
Here is one way to do it. It will require the key to be json decoded after the main dictionary is decoded and the whole dictionary re-sequenced, but it is doable:
import json
def jsonEncodeTupleKeyDict(data):
ndict = dict()
# creates new dictionary with the original tuple converted to json string
for key,value in data.iteritems():
nkey = json.dumps(key)
ndict[nkey] = value
# now encode the new dictionary and return that
return json.dumps(ndict)
def main():
tdict = dict()
for i in range(10):
key = (i,"data",5*i)
tdict[key] = i*i
try:
print json.dumps(tdict)
except TypeError,e:
print "JSON Encode Failed!",e
print jsonEncodeTupleKeyDict(tdict)
if __name__ == '__main__':
main()
I make no claim to any efficiency of this method. I needed this for saving some joystick mapping data to a file. I wanted to use something that would create a semi-human readable format so it could be edited if needed.
You can actually not serialize tuples as key to json, but you can convert the tuple to a string and recover it, after you have deserialized the file.
with_tuple = {(0.1, 0.1): 3.14} ## this will work in python but is not serializable in json
{(0.1, 0.1): 3.14}
But you cannot serialize it with json. However, you can use
with_string = {str((0.1, 0.1))[1:-1]: 3.14} ## the expression [1,-1] removes the parenthesis surrounding the tuples in python.
{'0.1, 0.1': 3.14} # This is serializable
With a bit of cheating, you will recover the original tuple (after having deserialized the whole file) by treating each key (as str) separately
tuple(json.loads("["+'0.1, 0.1'+"]")) ## will recover the tuple from string
(0.1, 0.1)
It is a bit of overload to convert a string to a tuple using json.loads, but it will work. Encapsulate it and you are done.
Peace out and happy coding!
Nicolas
Here are two functions you could use to convert a dict_having_tuple_as_key into a json_array_having_key_and_value_as_keys and then de-convert it the way back
import json
def json_dumps_dict_having_tuple_as_key(dict_having_tuple_as_key):
if not isinstance(dict_having_tuple_as_key, dict):
raise Exception('Error using json_dumps_dict_having_tuple_as_key: The input variable is not a dictionary.')
list_of_dicts_having_key_and_value_as_keys = [{'key': k, 'value': v} for k, v in dict_having_tuple_as_key.items()]
json_array_having_key_and_value_as_keys = json.dumps(list_of_dicts_having_key_and_value_as_keys)
return json_array_having_key_and_value_as_keys
def json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(json_array_having_key_and_value_as_keys):
list_of_dicts_having_key_and_value_as_keys = json.loads(json_array_having_key_and_value_as_keys)
if not all(['key' in diz for diz in list_of_dicts_having_key_and_value_as_keys]) and all(['value' in diz for diz in list_of_dicts_having_key_and_value_as_keys]):
raise Exception('Error using json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps: at least one dictionary in list_of_dicts_having_key_and_value_as_keys ismissing key "key" or key "value".')
dict_having_tuple_as_key = {}
for dict_having_key_and_value_as_keys in list_of_dicts_having_key_and_value_as_keys:
dict_having_tuple_as_key[ tuple(dict_having_key_and_value_as_keys['key']) ] = dict_having_key_and_value_as_keys['value']
return dict_having_tuple_as_key
usage example:
my_dict = {
('1', '1001', '2021-12-21', '1', '484'): {"name": "Carl", "surname": "Black", "score": 0},
('1', '1001', '2021-12-22', '1', '485'): {"name": "Joe", "id_number": 134, "percentage": 11}
}
my_json = json_dumps_dict_having_tuple_as_key(my_dict)
print(my_json)
[{'key': ['1', '1001', '2021-12-21', '1', '484'], 'value': {'name': 'Carl', 'surname': 'Black', 'score': 0}},
{'key': ['1', '1001', '2021-12-22', '1', '485'], 'value': {'name': 'Joe', 'id_number': 134, 'percentage': 11}}]
my_dict_reconverted = json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(my_json)
print(my_dict_reconverted)
{('1', '1001', '2021-12-21', '1', '484'): {'name': 'Carl', 'surname': 'Black', 'score': 0},
('1', '1001', '2021-12-22', '1', '485'): {'name': 'Joe', 'id_number': 134, 'percentage': 11}}
# proof of working 1
my_dict == my_dict_reconverted
True
# proof of working 2
my_dict == json_loads_dictionary_split_into_key_and_value_as_keys_and_underwent_json_dumps(
json_dumps_dict_having_tuple_as_key(my_dict)
)
True
(Using concepts expressed by #SingleNegationElimination to answer #Kvothe comment)
Here's a complete example to encode/decode nested dictionaries with tuple keys and values into/from json. tuple key will be a string in JSON.
values of types tuple or set will be converted to list
def JSdecoded(item:dict, dict_key=False):
if isinstance(item, list):
return [ JSdecoded(e) for e in item ]
elif isinstance(item, dict):
return { literal_eval(key) : value for key, value in item.items() }
return item
def JSencoded(item, dict_key=False):
if isinstance(item, tuple):
if dict_key:
return str(item)
else:
return list(item)
elif isinstance(item, list):
return [JSencoded(e) for e in item]
elif isinstance(item, dict):
return { JSencoded(key, True) : JSencoded(value) for key, value in item.items() }
elif isinstance(item, set):
return list(item)
return item
usage
import json
pydata = [
{ ('Apple','Green') : "Tree",
('Orange','Yellow'):"Orchard",
('John Doe', 1945) : "New York" }
]
jsstr= json.dumps(JSencoded(pydata), indent='\t')
print(jsstr)
#[
# {
# "('Apple', 'Green')": "Tree",
# "('Orange', 'Yellow')": "Orchard",
# "('John Doe', 1945)": "New York"
# }
#]
data = json.loads(jsstr) #string keys
newdata = JSdecoded(data) #tuple keys
print(newdata)
#[{('Apple', 'Green'): 'Tree', ('Orange', 'Yellow'): 'Orchard', ('John Doe', 1945): 'New York'}]
def stringify_keys(d):
if isinstance(d, dict):
return {str(k): stringify_keys(v) for k, v in d.items()}
if isinstance(d, (list, tuple)):
return type(d)(stringify_keys(v) for v in d)
return d
json.dumps(stringify_keys(mydict))

Categories

Resources