I'm trying to scrape some data using the Spotify API. The code below works and returns a lot of text when I search for the track name 'if i can't'. The beginning of the output from the API prints on my website and looks like this:
It looks like a dictionary except for the funny b' at the start. Also I can't access it like a dictionary. If I try
return raw_data['info']
it throws up an error. Similarly, if I try to find its type (so return type(raw_data) instead of return raw_data), the page comes up blank.
Is there someway to save the output from the data.read() in the form of a dictionary? Using
raw_data = ast.literal_eval(raw_data)
throws up an error.
#!/usr/local/bin/python3.2
# -*- coding: utf-8 -*-
import cherrypy
import numpy as np
import urllib.request
class Root(object):
#cherrypy.expose
def index(self):
a_query = Query()
text = a_query.search()
return '''<html>
Welcome to Spoti.py! %s
</html>''' %text
class Query():
def __init__(self):
self.qstring = '''if i can't'''
def space_to_plus(self):
'''takes the instance var qstring
replaces ' ' with '+'
-----------------------
returns nothing'''
self.qstring = self.qstring.replace(' ', '+')
def search(self):
self.space_to_plus()
url = 'http://ws.spotify.com/search/1/track.json?q=' + self.qstring
data = urllib.request.urlopen(url)
raw_data = data.read()
#return raw_data['info']
#return type(raw_data)
return raw_data
cherrypy.config.update({
'environment': 'production',
'log.screen': False,
'server.socket_host': '127.0.0.1',
'server.socket_port': 15850,
#'tools.encode.on': True,
#'tools.encode.encoding': 'utf-8',
})
cherrypy.config.update({'tools.sessions.on': True})
cherrypy.quickstart(Root())
What you have there is a JSON string. The b at the beginning indicates you are printing a a byte string literal. What you have to do is parse the JSON. Simply do this:
import json
...
info_dict = json.loads(raw_data)
Related
this is webhook
. i recive json form api with this . i need extract json tags and send to mysql database . problem is it not sent all json tags . i think it need loop at the step i tag with this step at code . thanks .
import json
import urllib.parse
import urllib.request
import mysql.connector
urls = ('/.*', 'hooks')
app = web.application(urls, globals())
class hooks:
def POST(self):
data = web.data()
print()
print('DATA RECEIVED:')
print(data)
print()
cts = data.decode('utf-8') #decode bytes to string
r1 = cts.replace(cts[:9], '')
parsed = urllib.parse.unquote_plus(r1) # ready for post
print(parsed)
print(cts)
print(type(cts))
myurl = "https://webhook.site/c0e861b0-3cc1-42c2-a0c6-54ad980b01b0"
req = urllib.request.Request(myurl)
req.add_header('Content-Type', 'application/json; charset=utf-8')
jsondata = parsed
jsondataasbytes = jsondata.encode('utf-8') # convert to be bytes
req.add_header('Content-Length', len(jsondataasbytes))
print(jsondataasbytes)
response = urllib.request.urlopen(req, jsondataasbytes)
test_dict = json.loads(parsed)[0]
print(type(test_dict))
# Extracting specific keys from dictionary <<<<<<THIS STEP>>>>>>>>>
indic_label = test_dict['indicator_label']
status = test_dict['status']
creation_date = test_dict['creation_date']
laststatus = test_dict['last_status']
base = test_dict['base_currency']
quote_currency = test_dict['quote_currency']
indic = test_dict['indicator']
prices = test_dict['prices']
mydb = mysql.connector.connect(
host="*",
user="*",
password="*",
database="*"
)
cursor = mydb.cursor()
cursor.execute("""INSERT INTO allcoins
(base,quote_currency , indic,status,laststatus,creation_date,prices,indic_label)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s)""",
(base, quote_currency, indic, status, laststatus, creation_date, prices, indic_label))
mydb.commit()
cursor.close()
mydb.close()
return 'OK'
if __name__ == '__main__':
app.run()
you can post test data to this hook by this curl
curl -d "messages=%5B%7B%22values%22%3A+%7B%22momentum%22%3A+%220.00%22%7D%2C+%22exchange%22%3A+%22binance%22%2C+%22market%22%3A+%22BNT%2FETH%22%2C+%22base_currency%22%3A+%22BNT%22%2C+%22quote_currency%22%3A+%22ETH%22%2C+%22indicator%22%3A+%22momentum%22%2C+%22indicator_number%22%3A+0%2C+%22analysis%22%3A+%7B%22config%22%3A+%7B%22enabled%22%3A+true%2C+%22alert_enabled%22%3A+true%2C+%22alert_frequency%22%3A+%22once%22%2C+%22signal%22%3A+%5B%22momentum%22%5D%2C+%22hot%22%3A+0%2C+%22cold%22%3A+0%2C+%22candle_period%22%3A+%224h%22%2C+%22period_count%22%3A+10%7D%2C+%22status%22%3A+%22hot%22%7D%2C+%22status%22%3A+%22hot%22%2C+%22last_status%22%3A+%22hot%22%2C+%22prices%22%3A+%22+Open%3A+0.000989+High%3A+0.000998+Low%3A+0.000980+Close%3A+0.000998%22%2C+%22lrsi%22%3A+%22%22%2C+%22creation_date%22%3A+%222020-05-10+16%3A16%3A23%22%2C+%22hot_cold_label%22%3A+%22%22%2C+%22indicator_label%22%3A+%22%22%2C+%22price_value%22%3A+%7B%22open%22%3A+0.000989%2C+%22high%22%3A+0.000998%2C+%22low%22%3A+0.00098%2C+%22close%22%3A+0.000998%7D%2C+%22decimal_format%22%3A+%22%25.6f%22%7D%2C+%7B%22values%22%3A+%7B%22leading_span_a%22%3A+%220.00%22%2C+%22leading_span_b%22%3A+%220.00%22%7D%2C+%22exchange%22%3A+%22binance%22%2C+%22market%22%3A+%22BNT%2FETH%22%2C+%22base_currency%22%3A+%22BNT%22%2C+%22quote_currency%22%3A+%22ETH%22%2C+%22indicator%22%3A+%22ichimoku%22%2C+%22indicator_number%22%3A+1%2C+%22analysis%22%3A+%7B%22config%22%3A+%7B%22enabled%22%3A+true%2C+%22alert_enabled%22%3A+true%2C+%22alert_frequency%22%3A+%22once%22%2C+%22signal%22%3A+%5B%22leading_span_a%22%2C+%22leading_span_b%22%5D%2C+%22hot%22%3A+true%2C+%22cold%22%3A+true%2C+%22candle_period%22%3A+%224h%22%2C+%22hot_label%22%3A+%22Bullish+Alert%22%2C+%22cold_label%22%3A+%22Bearish+Alert%22%2C+%22indicator_label%22%3A+%22ICHIMOKU+4+hr%22%2C+%22mute_cold%22%3A+false%7D%2C+%22status%22%3A+%22cold%22%7D%2C+%22status%22%3A+%22cold%22%2C+%22last_status%22%3A+%22cold%22%2C+%22prices%22%3A+%22+Open%3A+0.000989+High%3A+0.000998+Low%3A+0.000980+Close%3A+0.000998%22%2C+%22lrsi%22%3A+%22%22%2C+%22creation_date%22%3A+%222020-05-10+16%3A16%3A23%22%2C+%22hot_cold_label%22%3A+%22Bearish+Alert%22%2C+%22indicator_label%22%3A+%22ICHIMOKU+4+hr%22%2C+%22price_value%22%3A+%7B%22open%22%3A+0.000989%2C+%22high%22%3A+0.000998%2C+%22low%22%3A+0.00098%2C+%22close%22%3A+0.000998%7D%2C+%22decimal_format%22%3A+%22%25.6f%22%7D%2C+%7B%22values%22%3A+%7B%22bbp%22%3A+%220.96%22%2C+%22mfi%22%3A+%2298.05%22%7D%2C+%22exchange%22%3A+%22binance%22%2C+%22market%22%3A+%22BNT%2FETH%22%2C+%22base_currency%22%3A+%22BNT%22%2C+%22quote_currency%22%3A+%22ETH%22%2C+%22indicator%22%3A+%22bbp%22%2C+%22indicator_number%22%3A+1%2C+%22analysis%22%3A+%7B%22config%22%3A+%7B%22enabled%22%3A+true%2C+%22alert_enabled%22%3A+true%2C+%22alert_frequency%22%3A+%22once%22%2C+%22candle_period%22%3A+%224h%22%2C+%22period_count%22%3A+20%2C+%22hot%22%3A+0.09%2C+%22cold%22%3A+0.8%2C+%22std_dev%22%3A+2%2C+%22signal%22%3A+%5B%22bbp%22%2C+%22mfi%22%5D%2C+%22hot_label%22%3A+%22Lower+Band%22%2C+%22cold_label%22%3A+%22Upper+Band+BB%22%2C+%22indicator_label%22%3A+%22Bollinger+4+hr%22%2C+%22mute_cold%22%3A+false%7D%2C+%22status%22%3A+%22cold%22%7D%2C+%22status%22%3A+%22cold%22%2C+%22last_status%22%3A+%22cold%22%2C+%22prices%22%3A+%22+Open%3A+0.000989+High%3A+0.000998+Low%3A+0.000980+Close%3A+0.000998%22%2C+%22lrsi%22%3A+%22%22%2C+%22creation_date%22%3A+%222020-05-10+16%3A16%3A23%22%2C+%22hot_cold_label%22%3A+%22Upper+Band+BB%22%2C+%22indicator_label%22%3A+%22Bollinger+4+hr%22%2C+%22price_value%22%3A+%7B%22open%22%3A+0.000989%2C+%22high%22%3A+0.000998%2C+%22low%22%3A+0.00098%2C+%22close%22%3A+0.000998%7D%2C+%22decimal_format%22%3A+%22%25.6f%22%7D%5D" -X POST http://192.168.30.1
Perhaps define all variables upfront because you still need them to write to the db, then check if they are in the response and update:
indic_label = ''
status = ''
creation_date = ''
laststatus = ''
base = ''
quote_currency = ''
indic =''
prices = ''
if test_dict['indicator_label']:
indic_label = test_dict['indicator_label']
if test_dict['status']:
status = test_dict['status']
...
...
If it's an all or none situation, you can check for one variable then exit, otherwise check for each of them.
I am currently working on shopware API When I am parsing the URL of
like
http://192.168.0.100/shopware531/api
and give me an error that:~
connection_type = SCHEME_TO_CONNECTION[scheme]
KeyError: u' http'
Using the
def buildHttpQuery(self, taxonomy, parameters):
if taxonomy.startswith('/'):
taxonomy = taxonomy[1:]
if not self.baseurl.endswith('/'):
self.baseurl += '/'
url = urljoin(self.baseurl, taxonomy)
url_parts = list(urlparse(url))
query = dict(parse_qsl(url_parts[4]))
query.update(parameters)
url_parts[4] = urlencode(query)
url = urlunparse(url_parts)
return url
and url return is :~ http://192.168.0.100/shopware531/api
I have the similiar problem but with bytes. I have link like: b"https://google.com" and I use httplib2.request(str(link)) because this request wants string instead of bytes. Later on debugger I saw that str function transforms b'https://google.com' into b'https://google.com' and cause KeyError. So after using b'https://google.com'.decode('utf-8') it works.
Hello this is what I want to do; I simply want to switch a word,"with" here with non-english, translation of the word "with".
return "<a href='%(verify_read)s?next=%(target_url)s'>%(sender)s %(verb)s %(target)s with %(action)s</a>" %context
I'm unable to use other non-english in the return. I don't know why...I have python file, I'm using django, and have put # -- coding: utf-8 --
at the top of my python file
This is my full code
def __unicode__(self):
target_url = self.target_object.get_absolute_url()
context = {
"sender":self.sender_object,
"verb":self.verb,
"action":self.action_object,
"target":self.target_object,
"verify_read": reverse("notifications_read", kwargs={"id": self.id}),
"target_url":target_url,
}
if self.target_object:
if self.action_object and target_url:
return "%(sender)s %(verb)s <a href='%(verify_read)s?next=%(target_url)s'>%(target)s</a> with %(action)s" %context
if self.action_object and not target_url:
return "%(sender)s %(verb)s %(target)s with %(action)s" %context
return "%(sender)s %(verb)s %(target)s" %context
return "%(sender)s %(verb)s" %context
#property
def get_link(self):
try:
target_url = self.target_object.get_absolute_url()
except:
target_url = reverse("notifications_all")
context = {
"sender": self.sender_object,
"verb": self.verb,
"action": self.action_object,
"target": self.target_object,
"verify_read": reverse("notifications_read", kwargs={"id": self.id}),
"target_url": target_url,
}
if self.target_object:
return "<a href='%(verify_read)s?next=%(target_url)s'>%(sender)s %(verb)s %(target)s with %(action)s</a>" %context
else:
return "<a href='%(verify_read)s?next=%(target_url)s'>%(sender)s %(verb)s</a>" %context
Edit:I had similar problem, and I solved it by removing string and encode
#login_required
def get_notifications_ajax(request):
if request.is_ajax() and request.method == "POST":
notifications = Notification.objects.all_for_user(MyProfile.objects.get(user=request.user)).recent()
count = notifications.count()
notes = []
for note in notifications:
notes.append(note.get_link.encode('utf-8'))
data = {
"notifications": notes,
"count": count,
}
print data
json_data = json.dumps(data)
print json_data
return HttpResponse(json_data, content_type='application/json')
else:
raise Http404
I encoded this line;
notes.append(note.get_link.encode('utf-8'))
I think I need to do similar thing but don't know
The __unicode__() magic method MUST return a unicode string (an instance of the unicode type), but you are returning a byte string (instance of the str type).
Adding the "# coding" mark on top of you code won't turn byte strings into unicode ones, it will only tells Python that your byte strings litterals are utf-8 encoded - but they are still byte strings.
The solution is dead simple: make sure you return a unicode string. First make sure each and every string in your context is unicode, then make all your litteral strings unicode too by prefixing them with a u, ie:
return u"%(sender)s %(verb)s <a href='%(verify_read)s?next=%(target_url)s'>%(target)s</a> with %(action)s" % context
# ...
return u"%(sender)s %(verb)s %(target)s with %(action)s" % context
# ...
return u"%(sender)s %(verb)s %(target)s" % context
# ...
return u"%(sender)s %(verb)s" % context
If you don't grasp the difference between a unicode string and a utf-8 encoded byte string, you definitly want to read this : http://www.joelonsoftware.com/articles/Unicode.html
I am trying to write a python web app that will take some sql and a bunch of other things and return a Json file, the latter part is not the issue and I have not even put it in the script yet, the issue is that the url being passed is being utf-8 encoded and then url encoded
turning our example
query :SELECT + ;
test: 2
into
test=2&query=SELECT+%2B+%3B
This seems to be ok
but the receiving get seems to think that it can expand the codes back into chars
and it receives
test=2&query=SELECT+++;
then this is url decoded and it chops off the semicolon, and i want to keep the semicolon!
it also turns the +'s which are rightly spaces into spaces but the previous bug made the real plus code into a literal plus which turns it into a space!
{'test': '2', 'query': 'SELECT '}
code is as follows:
#!/usr/bin/python
import web
import psycopg2
import re
import urllib
import urlparse
urls = (
'/query', 'query',
'/data/(.*)', 'data'
)
app = web.application(urls, globals())
render = web.template.render('templates/')
class query:
def GET(self):
return render.query()
def POST(self):
i = web.input()
data = {}
data['query'] = i.sql.encode('utf-8')
data['test'] = '2'
murl = urllib.urlencode(data)
return "go!"
class data:
def GET(self, urlEncodedDict):
print "raw type:", type(urlEncodedDict)
print "raw:", urlEncodedDict
urlEncodedDict = urlEncodedDict.encode('ascii', 'ignore')
print "ascii type:", type(urlEncodedDict)
print "ascii:", urlEncodedDict
data = dict(urlparse.parse_qsl(urlEncodedDict, 1)) #bad bit
print "dict:", data
print "element:", data['query']
if ( re.match('SELECT [^;]+ ;', data['query'])):
return 'good::'+data['query']
else:
return 'Bad::'+data['query']
if __name__ == "__main__":
app.run()
Url generated from my test form is:
http://localhost:8080/data/test=2&query=SELECT+%2B+%3B
Output is as follows:
raw type: <type 'unicode'>
raw: test=2&query=SELECT+++;
ascii type: <type 'str'>
ascii: test=2&query=SELECT+++;
dict: {'test': '2', 'query': 'SELECT '}
element: SELECT
127.0.0.1:53272 - - [16/Nov/2012 11:05:44] "HTTP/1.1 GET /data/test=2&query=SELECT+++;" - 200 OK
127.0.0.1:53272 - - [16/Nov/2012 11:05:44] "HTTP/1.1 GET /favicon.ico" - 404 Not Found
I wish to get the same dict out of the get that i encode in the first place.
If you want to pass data into a GET request, you need to use the query string syntax using the question mark character [?] as a delimiter.
The URL should be:
http://localhost:8080/data/?test=2&query=SELECT+%2B+%3B
After that, you just have to use web.input() to get a dictionary with all arguments already decoded.
urls = (
'/query', 'query',
'/data/', 'data'
)
[...]
class data:
def GET(self):
data = web.input()
print "dict:", data
print "element:", data['query']
if ( re.match('SELECT [^;]+ ;', data['query'])):
return 'good::'+data['query']
else:
return 'Bad::'+data['query']
Result:
dict: <Storage {'test': u'2', 'query': u'SELECT + ;'}>
element: SELECT + ;
127.0.0.1:44761 - - [16/Nov/2012 15:06:06] "HTTP/1.1 GET /data/" - 200 OK
I have a small problem here. So, I am writing some calls for a well known REST API. Everything is going well, except the fact that I want all the response to be displayed as a list(which is better for me to manipulate). My function is this:
import sys, httplib
HOST = "api.sugarsync.com"
API_URL = "https://api.sugarsync.com"
def do_request(xml_location):
request = open(xml_location,"r").read()
webservice = httplib.HTTPS(HOST)
webservice.putrequest("POST", "authorization", API_URL)
webservice.putheader("Host", HOST)
webservice.putheader("User-Agent","Python post")
webservice.putheader("Content-type", "application/xml")
webservice.putheader("Content-type", "application/xml")
webservice.putheader("Accept", "*/*")
webservice.putheader("Content-length", "%d" % len(request))
webservice.endheaders()
webservice.send(request)
statuscode, statusmessage, header = webservice.getreply()
result = webservice.getfile().read()
return statuscode, statusmessage, header
return result
do_request('C://Users/my_user/Documents/auth.xml')
I am used to use split() but in this case the result is this:
[201, 'Created', <httplib.HTTPMessage instance at 0x0000000001F68AC8>]
Well, I need also the third object(httplib.HTTPMessage instance at 0x0000000001F68AC8>), to be displayed as list, to extract some of the data in there.
Thanks in advance!
httplib.HTTPMessage is something like dict, here is a sample:
import httplib
from cStringIO import StringIO
h = httplib.HTTPMessage(StringIO(""))
h["Content-Type"] = "text/plain"
h["Content-Length"] = "1234"
print h.items()
you just call it's function items(), it will return a list of headers