How do I read this stringified javascript variable into Python? - python

I'm trying to read _pageData from https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1 into Python 2.7.11 so that I can process it, using this code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Testing _pageData processing. """
import urllib2
import re
import ast
import json
import yaml
BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1'
def main():
""" Do the business. """
response = urllib2.urlopen(BASE_URL, None)
results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read())
first_result = results[0]
# These all fail
data = ast.literal_eval(first_result)
# data = yaml.load(first_result)
# data = json.loads(first_result)
if __name__ == '__main__':
main()
but get the following error:
Traceback (most recent call last):
File "./temp.py", line 24, in <module>
main()
File "./temp.py", line 19, in main
data = ast.literal_eval(first_result)
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 49, in literal_eval
node_or_string = parse(node_or_string, mode='eval')
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 37, in parse
return compile(source, filename, mode, PyCF_ONLY_AST)
File "<unknown>", line 1
[[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n
^
SyntaxError: invalid syntax
var _pageData is in this format:
"[[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n"
I've tried replacing the \" and \n and decoding the \uXXXX before using, without success. I've also tried replacing ,, with ,"", and ,'', without success.
Thank you.

It seems like there are three kinds of syntactic errors in your string:
, followed by ,
[ followed by ,
, followed by ]
Assuming that those are supposed to be null elements (or ''?), you can just replace those in the original string -- exactly like you did for the ,, case, but you missed the others. Also, you have to do the ,, replacement twice, otherwise you will miss cases such as ,,,,. Then, you can load the JSON string with json.loads.
>>> s = "your messed up json string"
>>> s = re.sub(r",\s*,", ", null,", s)
>>> s = re.sub(r",\s*,", ", null,", s)
>>> s = re.sub(r"\[\s*,", "[ null,", s)
>>> s = re.sub(r",\s*\]", ", null]", s)
>>> json.loads(s)

I started off using ast.literal.eval(...) because I was under the (mistaken?) impression that javascript arrays and Python lists were mutually compatible, so all I had to do was destringify _pageData.
However, I hadn't noticed that Python doesn't like ,, true, false or [,. Fixing them does the trick (thank you #Two-Bit Alchemist and #tobias_k)
So, the following appears to work:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Testing _pageData processing. """
import urllib2
import re
import ast
import json
import yaml
BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1'
def main():
""" Do the business. """
response = urllib2.urlopen(BASE_URL, None)
results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read())
first_result = results[0]
first_result = first_result.replace(',,,,,,', ',None,None,None,None,None,')
first_result = first_result.replace(',,,,,', ',None,None,None,None,')
first_result = first_result.replace(',,,,', ',None,None,None,')
first_result = first_result.replace(',,,', ',None,None,')
first_result = first_result.replace(',,', ',None,')
first_result = first_result.replace('[,', '[None,')
first_result = first_result.replace('\\"', '\'')
first_result = first_result.replace('\\n', '')
first_result = first_result.replace('true', 'True')
first_result = first_result.replace('false', 'False')
data = ast.literal_eval(first_result)
for entry in data:
print entry
if __name__ == '__main__':
main()

Related

Traceback <module> from extractdocx import * <module> from docx import opendocx, getdocumenttext from exceptions import PendingDeprecationWarning

Traceback (most recent call last):
File "C:\xampp\htdocs\Plag\scripts\main.py", line 8, in
from extractdocx import *
File "C:\xampp\htdocs\Plag\scripts\extractdocx.py", line 18, in
from docx import opendocx, getdocumenttext
File "C:\Users\zeesh\AppData\Local\Programs\Python\Python39\lib\site-packages\docx.py", line 30, in
from exceptions import PendingDeprecationWarning
ModuleNotFoundError: No module named 'exceptions'
# -*- coding: utf-8 -*-
# Master script for the plagiarism-checker
# Coded by: Shashank S Rao
#import other modules
from cosineSim import *
from htmlstrip import *
from extractdocx import *
#import required modules
import codecs
import traceback
import sys
import operator
import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
import json as simplejson
# Given a text string, remove all non-alphanumeric
# characters (using Unicode definition of alphanumeric).
def getQueries(text,n):
import re
sentenceEnders = re.compile('[.!?]')
sentenceList = sentenceEnders.split(text)
sentencesplits = []
for sentence in sentenceList:
x = re.compile(r'\W+', re.UNICODE).split(sentence)
x = [ele for ele in x if ele != '']
sentencesplits.append(x)
finalq = []
for sentence in sentencesplits:
l = len(sentence)
l=l/n
index = 0
for i in range(0,l):
finalq.append(sentence[index:index+n])
index = index + n-1
if index !=len(sentence):
finalq.append(sentence[len(sentence)-index:len(sentence)])
return finalq
# Search the web for the plagiarised text
# Calculate the cosineSimilarity of the given query vs matched content on google
# This is returned as 2 dictionaries
def searchWeb(text,output,c):
try:
text = text.encode('utf-8')
except:
text = text
query = urllib.parse.quote_plus(text)
if len(query)>60:
return output,c
#using googleapis for searching web
base_url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q='
url = base_url + '%22' + query + '%22'
request = urllib.request.Request(url,None,{'Referer':'Google Chrome'})
response = urllib.request.urlopen(request)
results = simplejson.load(response)
try:
if ( len(results) and 'responseData' in results and 'results' in results['responseData'] and results['responseData']['results'] != []):
for ele in results['responseData']['results']:
Match = results['responseData']['results'][0]
content = Match['content']
if Match['url'] in output:
#print text
#print strip_tags(content)
output[Match['url']] = output[Match['url']] + 1
c[Match['url']] = (c[Match['url']]*(output[Match['url']] - 1) + cosineSim(text,strip_tags(content)))/(output[Match['url']])
else:
output[Match['url']] = 1
c[Match['url']] = cosineSim(text,strip_tags(content))
except:
return output,c
return output,c
# Use the main function to scrutinize a file for
# plagiarism
def main():
# n-grams N VALUE SET HERE
n=9
if len(sys.argv) <3:
print ("Usage: python main.py <input-filename>.txt <output-filename>.txt")
sys.exit()
if sys.argv[1].endswith(".docx"):
t = docxExtract(sys.argv[1])
else:
t=open(sys.argv[1],'r')
if not t:
print ("Invalid Filename")
print ("Usage: python main.py <input-filename>.txt <output-filename>.txt")
sys.exit()
t=t.read()
queries = getQueries(t,n)
q = [' '.join(d) for d in queries]
found = []
#using 2 dictionaries: c and output
#output is used to store the url as key and number of occurences of that url in different searches as value
#c is used to store url as key and sum of all the cosine similarities of all matches as value
output = {}
c = {}
i=1
count = len(q)
if count>100:
count=100
for s in q[:100]:
output,c=searchWeb(s,output,c)
msg = "\r"+str(i)+"/"+str(count)+"completed..."
sys.stdout.write(msg);
sys.stdout.flush()
i=i+1
#print ("\n")
f = open(sys.argv[2],"w")
for ele in sorted(iter(c.items()),key=operator.itemgetter(1),reverse=True):
f.write(str(ele[0])+" "+str(ele[1]*100.00))
f.write("\n")
f.close()
print ("\nDone!")
if __name__ == "__main__":
try:
main()
except:
#writing the error to stdout for better error detection
error = traceback.format_exc()
print(("\nUh Oh!\n"+"Plagiarism-Checker encountered an error!:\n"+error)) ```
docx, last release was in 2014. The code imports module exceptions that was a top-level module in Python 2.7 but was removed in Python 3:
$ python2.7 -c "import exceptions"
$ python3.7 -c "import exceptions"
Traceback (most recent call last):
File "<string>", line 1, in <module>
ModuleNotFoundError: No module named 'exceptions'
The bottom line: the package is only for Python 2. Use Python 2.7 or find a different package.

Adding a ProgressBar to a multithreaded Python script

i am trying to add a progressbar to my script but i couldn't succeed because i think it is multi-threaded or maybe it should be added in a separate thread . i found plenty of solutions in stackoverflow , for example tqdm library but i couldn't implement it , also i think i have a confusion where exactly i have to implement the progress bar code to make it works.
this is my code :
# -*- coding: utf-8 -*
from __future__ import unicode_literals
# !/usr/bin/python
import codecs
from multiprocessing.dummy import Pool
start_raw = "myfile"
threads = 10
with codecs.open(start_raw, mode='r', encoding='ascii', errors='ignore') as f:
lists = f.read().splitlines()
lists = list((lists))
def myfunction(x):
try:
print x
except:
pass
def Main():
try:
pp = Pool(int(threads))
pr = pp.map(myfunction, lists)
except:
pass
if __name__ == '__main__':
Main()
i have tried this solution
https://stackoverflow.com/a/45276885/9746396 :
# -*- coding: utf-8 -*
from __future__ import unicode_literals
# !/usr/bin/python
import codecs
from multiprocessing.dummy import Pool
import tqdm
start_raw = "myfile"
threads = 1
with codecs.open(start_raw, mode='r', encoding='ascii', errors='ignore') as f:
lists = f.read().splitlines()
lists = list((lists))
def myfunction(x):
try:
print (x)
except:
pass
def Main():
try:
pp = Pool(int(threads))
pr = pp.map(myfunction, lists)
except:
pass
if __name__ == '__main__':
with Pool(2) as p:
r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30))
but code does not run and i get exception (TypeError: 'NoneType' object is not callable)
0%| | 0/30 [00:00<?, ?it/s]Traceback (most recent call last):
File "file.py", line 35, in <module>
r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30))
File "C:\mypath\Python\Python38-32\lib\site-packages\tqdm\std.py", line 1118, in __iter__
for obj in iterable:
File "C:\mypath\Python\Python38-32\lib\multiprocessing\pool.py", line 865, in next
raise value
File "C:\mypath\Python\Python38-32\lib\multiprocessing\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
TypeError: 'NoneType' object is not callable
0%| | 0/30 [00:00<?, ?it/s]
I presume you wanted to pass myfunction instead of Main to imap, consistently with the first example.
When you pass Main() to p.imap in r = list(tqdm.tqdm(p.imap(Main(), range(30)), total=30)), Python calls executes Main method and passes the return value as the first argument to imap.
You should remove the parentheses after Main as: p.imap in r = list(tqdm.tqdm(p.imap(Main, range(30)), total=30)).

python crawler ieee paper keywords

i trying to use crawler to get ieee paper keywords but now i get a error
how can to fix my crawler?
my code is here
import requests
import json
from bs4 import BeautifulSoup
ieee_content = requests.get("http://ieeexplore.ieee.org/document/8465981", timeout=180)
soup = BeautifulSoup(ieee_content.text, 'xml')
tag = soup.find_all('script')
for i in tag[9]:
s = json.loads(re.findall('global.document.metadata=(.*;)', i)[0].replace("'", '"').replace(";", ''))
and error is here
Traceback (most recent call last):
File "G:/github/爬蟲/redigg-leancloud/crawlers/sup_ieee_keywords.py", line 90, in <module>
a.get_es_data(offset=0, size=1)
File "G:/github/爬蟲/redigg-leancloud/crawlers/sup_ieee_keywords.py", line 53, in get_es_data
self.get_data(link=ieee_link, esid=es_id)
File "G:/github/爬蟲/redigg-leancloud/crawlers/sup_ieee_keywords.py", line 65, in get_data
s = json.loads(re.findall('global.document.metadata=(.*;)', i)[0].replace(";", '').replace("'", '"'))
IndexError: list index out of range
Here's another answer. I don't know what you are doing with 's' in your code after the load (replace) in my code.
The code below doesn't thrown an error, but again how are you using 's'
import requests
import json
from bs4 import BeautifulSoup
ieee_content = requests.get("http://ieeexplore.ieee.org/document/8465981", timeout=180)
soup = BeautifulSoup(ieee_content.text, 'xml')
tag = soup.find_all('script')
# i is a list
for i in tag[9]:
metadata_format = re.compile(r'global.document.metadata=.*', re.MULTILINE)
metadata = re.findall(metadata_format, i)
if len(metadata) != 0:
# convert the list
convert_to_json = json.dumps(metadata)
x = json.loads(convert_to_json)
s = x[0].replace("'", '"').replace(";", '')
###########################################
# I don't know what you plan to do with 's'
###########################################
print (s)
Apparently in line 65 some of the data provided in i did not suite the regex pattern you're trying to use. Therefor your [0] will not work as the data returned is not an array of suitable length.
Solution:
x = json.loads(re.findall('global.document.metadata=(.*;)', i)
if x:
s = x[0].replace("'", '"').replace(";", ''))

decode binary from xmlrpc python

I'm new to python and xml-rpc , and I'm stuck with decoding binary data coming from a public service :
the service request response with this code is :
from xmlrpc.client import Server
import xmlrpc.client
from pprint import pprint
DEV_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'
logFile = open('stat.txt', 'w')
s1 = Server('http://muovi.roma.it/ws/xml/autenticazione/1')
s2 = Server('http://muovi.roma.it/ws/xml/paline/7')
token = s1.autenticazione.Accedi(DEV_KEY, '')
res = s2.paline.GetStatPassaggi(token)
pprint(res, logFile)
response :
{'id_richiesta': '257a369dbf46e41ba275f8c821c7e1e0',
'risposta': {'periodi_aggregazione': <xmlrpc.client.Binary object at 0x0000027B7D6E2588>,
'tempi_attesa_percorsi': <xmlrpc.client.Binary object at 0x0000027B7D9276D8>}}
I need to decode these two binary objects , and I'm stuck with this code :
from xmlrpc.client import Server
import xmlrpc.client
from pprint import pprint
DEV_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxx'
logFile = open('stat.txt', 'w')
s1 = Server('http://muovi.roma.it/ws/xml/autenticazione/1')
s2 = Server('http://muovi.roma.it/ws/xml/paline/7')
token = s1.autenticazione.Accedi(DEV_KEY, '')
res = s2.paline.GetStatPassaggi(token)
dat = xmlrpc.client.Binary(res)
out = xmlrpc.client.Binary.decode(dat)
pprint(out, logFile)
that ends in :
Traceback (most recent call last): File "stat.py", line 18, in
dat = xmlrpc.client.Binary(res) File "C:\Users\Leonardo\AppData\Local\Programs\Python\Python35\lib\xmlrpc\client.py",
line 389, in init
data.class.name) TypeError: expected bytes or bytearray, not dict
The only doc I found for xmlrpc.client is the one at docs.python.org , but I can't figure out how I could decode these binaries
If the content of res variable (what you get from the 2nd (s2) server) is the response you pasted into the question, then you should modify the last 3 lines of your 2nd snippet to (as you already have 2 Binary objects in the res dictionary):
# ... Existing code
res = s2.paline.GetStatPassaggi(token)
answer = res.get("risposta", dict())
aggregation_periods = answer.get("periodi_aggregazione", xmlrpc.client.Binary())
timeout_paths = answer.get("tempi_attesa_percorsi", xmlrpc.client.Binary())
print(aggregation_periods.data)
print(timeout_paths.data)
Notes:
According to [Python.Docs]: xmlrpc.client - Binary Objects (emphasis is mine):
Binary objects have the following methods, supported mainly for internal use by the marshalling/unmarshalling code:
I wasn't able to connect (and this test the solution), since DEV_KEY is (obviously) fake

local variable 'texts' referenced before assignment

This a code to extract Unicode values from text files but it gives me following error.
# -*- coding: utf-8 -*-
import codecs
import os
#from urllib import urlopen
from bs4 import BeautifulSoup
import re
##import nltk
#def remove_content_li(input_document) :
#soup = BeautifulSoup(input_document)
def extract_unicode(input):
_ascii_letters = re.compile(r'[a-zA-Z]', flags=re.UNICODE)
symbols = re.compile(r'[{} &+( )" =!.?.:.. / | » © : >< # « ,] 1 2 3 4 5 6 7 8 9 _ - + ; [ ] %',flags=re.UNICODE)
soup = BeautifulSoup(open(input,'r'),'lxml')
for li in soup.find_all('li'):
li.decompose()
texts = soup.findAll(text=True)
def contains_unicode(text):
try:
str(text)
except:
return True
return False
result = ' '.join((text for text in texts if contains_unicode(texts)))
result =_ascii_letters.sub(" ", result)
result = symbols.sub(" ",result)
##print(result)
## result = nltk.clean_html(result)
result.replace('*', '')
This is the error I get
File "e3.py", line 50, in <module>
extract_unicode((os.path.join(dirname, filename)))
File "e3.py", line 30, in extract_unicode
result = ' '.join((text for text in texts if contains_unicode(texts)))
UnboundLocalError: local variable 'texts' referenced before assignment
The error is telling you exactly what the problem is. You're using a variable texts before you define it. Perhaps soup.find_all('li') is returning an empty list, since you only set texts if it finds something.

Categories

Resources