Flask server could not handle non ascii characters - python

I have wrriten a simple application using flask. Its main objective is to implement CLD2 (language detector) using post and get methods. It is working well for English but for any other language such Urdu, Arabic. It gives invalid results
Following is the corresponding script
# http://127.0.0.1:5000/cld2?text="Your input text string"
# OUTPUT ( It gives output as we done in CC)
#"585&URDU-99-1155"
from flask import Flask,abort,jsonify,request
from flask_restful import Resource, Api, reqparse
import cld2
from bs4 import BeautiflSoup
import sys
import urllib2, urllib
import re
reload(sys)
sys.setdefaultencoding('utf8')
app = Flask(__name__)
api = Api(app)
class HelloWorld(Resource):
def cld2_states(self, txt):
txt = txt.encode("utf8")
isReliable, textBytesFound, details = cld2.detect(txt)
outstr = str(textBytesFound)
for item in details: # Iterate 3 languages
if item[0] != "Unknown":
outstr += '&' + item[0] + '-' + str(item[2]) + '-' + str(int(item[3]))
return outstr
def get(self):
parser = reqparse.RequestParser()
parser.add_argument('text', type=str)
parser.add_argument('url', type=str)
_dict = dict(parser.parse_args())
if _dict["text"] is not None:
value = _dict["text"]
print type(value)
return self.cld2_states(value)
return None
def post(self):
data = request.get_json(force=True)
# print data
predict_request = [data['content']][1]
out = self.cld2_states(predict_request)
return jsonify(score=out)
api.add_resource(HelloWorld, '/cld2')
if __name__ == '__main__':
app.run(debug=True, port=6161, host='0.0.0.0')
If I give a query via get method, it give correct results but for same query in post method, it return just a number. But if text is in English then post also give correct result.
My client is a simple Java application then iterate over files and find their language one by one.

The problem might be with this line:
outstr = str(textBytesFound)
Instead of using str to convert from bytes to str, use str.decode(), like this:
outstr = textBytesFound.decode("utf-8")
(obviously if your text is not encoded with UTF-8, you need to tell Python the correct encoding to use)

Related

How to validate data received via the Telegram's Web App

I'm trying to validate WebApp data but the result is not what I wanted.
Telegram documentation:
data_check_string = ...
secret_key = HMAC_SHA256(<bot_token>, "WebAppData")
if (hex(HMAC_SHA256(data_check_string, secret_key)) == hash) {
// data is from Telegram
}
MyCode:
BOT_TOKEN = '5139539316:AAGVhDje2A3mB9yA_7l8-TV8xikC7KcudNk'
data_check_string = 'query_id=AAGcqlFKAAAAAJyqUUp6-Y62&user=%7B%22id%22%3A1246866076%2C%22first_name%22%3A%22Dante%22%2C%22last_name%22%3A%22%22%2C%22username%22%3A%22S_User%22%2C%22language_code%22%3A%22en%22%7D&auth_date=1651689536&hash=de7f6b26aadbd667a36d76d91969ecf6ffec70ffaa40b3e98d20555e2406bfbb'
data_check_arr = data_check_string.split('&')
needle = 'hash='
hash_item = ''
telegram_hash = ''
for item in data_check_arr:
if item[0:len(needle)] == needle:
telegram_hash = item[len(needle):]
hash_item = item
data_check_arr.remove(hash_item)
data_check_arr.sort()
data_check_string = "\n".join(data_check_arr)
secret_key = hmac.new("WebAppData".encode(), BOT_TOKEN.encode(), hashlib.sha256).digest()
calculated_hash = hmac.new(data_check_string.encode(), secret_key, hashlib.sha256).hexdigest()
print(calculated_hash == telegram_hash) # print False
I'm trying to validate webapp data in python, but my code didn't give the intended result.
the hash which my code gives me is different from the telegram's one.
UPDATE: valid data added, and bot-token has been changed.
You need to unquote data_check_string
from urllib.parse import unquote
data_check_string = unquote('query_id=AAGcqlFKAAAAAJyqUUp6-Y62&user=%7B%22id%22%3A1246866076%2C%22first_name%22%3A%22Dante%22%2C%22last_name%22%3A%22%22%2C%22username%22%3A%22S_User%22%2C%22language_code%22%3A%22en%22%7D&auth_date=1651689536&hash=de7f6b26aadbd667a36d76d91969ecf6ffec70ffaa40b3e98d20555e2406bfbb')
And swap the arguments
calculated_hash = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest()
You can replace the for-loops with a couple of lines (already incorporates kurdyukovpv's suggestion to unquote the query string):
data_check_string = sorted([ chunk.split("=") for chunk in unquote(data_check_string).split("&")
if chunk[:len("hash=")]!="hash="],
key=lambda x: x[0])
data_check_string = "\n".join([f"{rec[0]}={rec[1]}" for rec in data_check_string])
EDIT: Figured I might as well just post the entire working function I got out of this thread ) :
import hmac
import hashlib
from urllib.parse import unquote
def validate(hash_str, init_data, token, c_str="WebAppData"):
"""
Validates the data received from the Telegram web app, using the
method documented here:
https://core.telegram.org/bots/webapps#validating-data-received-via-the-web-app
hash_str - the has string passed by the webapp
init_data - the query string passed by the webapp
token - Telegram bot's token
c_str - constant string (default = "WebAppData")
"""
init_data = sorted([ chunk.split("=")
for chunk in unquote(init_data).split("&")
if chunk[:len("hash=")]!="hash="],
key=lambda x: x[0])
init_data = "\n".join([f"{rec[0]}={rec[1]}" for rec in init_data])
secret_key = hmac.new(c_str.encode(), token.encode(),
hashlib.sha256 ).digest()
data_check = hmac.new( secret_key, init_data.encode(),
hashlib.sha256)
return data_check.hexdigest() == hash_str

how to read information about ontology using owlready2

I am working in flask app and want to load ontology and print how many classes and how many individuals in this ontology
here is my code, it does not work
import flask
from owlready2 import *
app = flask.Flask(__name__)
app.config["DEBUG"] = True
#app.route('/', methods=['GET'])
def start():
onto_path.append("pizza.owl")
onto = get_ontology("pizza.owl")
onto.load()
print(onto.classes())
print(list(onto.individuals()))
html = ''
html += '<h2>clases: ' + onto.classes() + '</br>'
html += '<h3>individuals: ' + onto.individuals()
return html
#return "<h1>Distant Reading Archive</h1><p>This site is a prototype API for distant reading of science fiction novels.</p>"
app.run()
The method classes() and individuals() returns a generator, so you should cast the generator into a list, and ask for the length of that object.
n_classes = len(list(onto.classes()))
n_individuals = len(list(onto.individuals()))
After that, you should have the numbers on your variables and you can concatenate them with your HTML.

In the controllers.py file of odoo, how to get the json string in the post request body?

I have defined a class in the controllers.py file to receive HTTP requests. The remote server sends a POST request and the data in the request body is a JSON string.
I can get the data in the request body directly by converting the JSON string to a dictionary via method http.request.jsonrequest, but for now, I need to get the original JSON string in the request body directly instead of a dictionary to verify a signature.
The method(json.dumps()) of directly converting to JSON strings cannot be used, as the string obtained in this way is not the same as the JSON string in the original request body, which can lead to a failure when verifying the signature.
What should I do about it? Please help me. Thank you.
this is my controllers.py
# -*- coding: utf-8 -*-
from odoo import http
class CallbackNotification(http.Controller):
def safety_judgement(self):
"""
:return:
"""
header = http.request.httprequest.headers.environ
signature = header['HTTP_X_TSIGN_OPEN_SIGNATURE']
time_stamp = header['HTTP_X_TSIGN_OPEN_TIMESTAMP']
remote_addr = http.request.httprequest.remote_addr
if remote_addr != '47.99.80.224':
return
#http.route('/signature/process/my_odoo', type='json', auth='none')
def receive_institution_auth(self, **kw):
"""
:param kw:
:return:
"""
self.safety_judgement()
request_body = http.request.jsonrequest
action = request_body['action']
flow_num = request_body['flowId']
http_env = http.request.env
sign_process_id = http_env['sign.process'].sudo().search([('flow_num', '=', flow_num)]).id
if action == 'SIGN_FLOW_UPDATE':
third_order = request_body['thirdOrderNo']
name_id_user_list = third_order.split(',')
model = name_id_user_list[0]
record_id = name_id_user_list[1]
approve_user_id = name_id_user_list[2]
if approve_user_id != 'p':
record_obj = http_env[model].sudo(user=int(approve_user_id)).browse(int(record_id))
sign_result = request_body['signResult']
result_description = request_body['resultDescription']
account_num = request_body['accountId']
org_or_account_num = request_body['authorizedAccountId']
sign_user_id = http_env['sign.users'].sudo().search([('account_num','=',account_num)]).id
http_manual_env = http_env['manual.sign'].sudo()
if account_num == org_or_account_num:
manual_id = http_manual_env.search([('sign_process_id','=',sign_process_id),
('sign_user_id','=',sign_user_id)]).id
else:
institution_id = http_env['institution.account'].sudo().search([('org_num','=',org_or_account_num)]).id
manual_id = http_manual_env.search([('sign_process_id', '=', sign_process_id),
('sign_user_id', '=', sign_user_id),
('institution_id','=',institution_id)]).id
if sign_result == 2:
http_manual_env.browse(manual_id).write({'sign_result':'success'})
http.request._cr.commit()
if approve_user_id != 'p':
record_obj.approve_action('approved','')
else:
http_env[model].sudo().browse(int(record_id)).write({'partner_sign_state':'success'})
elif sign_result == 3:
http_manual_env.browse(manual_id).write({'sign_result':'failed'})
if approve_user_id == 'p':
http_env[model].sudo().browse(int(record_id)).write({'partner_sign_state':'failed'})
elif sign_result == 4:
http_manual_env.browse(manual_id).write({'sign_result':'reject'})
http.request._cr.commit()
if approve_user_id != 'p':
record_obj.approve_action('reject', result_description)
else:
http_env[model].sudo().browse(int(record_id)).write({'partner_sign_state':'reject','partner_reject_reason':result_description})
To obtain a raw body data, you can use the following code:
raw_body_data = http.request.httprequest.data
pychong
You can send the value from JSON-RPC into your Json controller.
Js File:
Where pass the value before calling the controller like this.
var ajax = require('web.ajax');
ajax.jsonRpc("/custom/url", 'call', {'Your Key': Your Value}).then(function(data) {
if (data) {
// Code
} else {
// Code
}});
Py File :
Get the data from the post like this,
#http.route(['/custom/url'], type='json', auth="public", website=True)
def custom_cotroller(self, **post):
get_data = post.get('Your Key')
# Your Customise Code
Thanks
#Dipen Shah #CoolFlash95 #Charif DZ
Hello everyone,I've found a solution to this problem.But as I lay out the solution, I hope we can understand the root cause of the problem, so let's examine odoo's source code.
from odoo.http import JsonRequest--odoo version 10.0--line598
from odoo.http import JsonRequest--odoo version 11.0--line609
In Odoo10
request = self.httprequest.stream.read(),thenself.jsonrequest = json.loads(request)
In Odoo11
request=self.httprequest.get_data().decode(self.httprequest.charset),thenself.jsonrequest = json.loads(request)
We find that the self object of JsonRequest has the attribute jsonrequest that the type is dict.Unfortunately, the source code does not allow self to have 'another' attribute, which contains the original string in the request body.However,it is very easy to add the 'another' attribute,why not?
We can use setattr to dynamically change the methods in the source code.Let's change the method__init__of JsonRequest and add another attribute named stream_str.
eg.Odoo version is 10,python version is 2.7
# -*- coding: utf-8 -*-
import logging
from odoo.http import JsonRequest
import werkzeug
import json
_logger = logging.getLogger(__name__)
def __init__(self, *args):
"""
We have copied the method __init__ directly from the source code and added
only one line of code to it
"""
super(JsonRequest, self).__init__(*args)
self.jsonp_handler = None
args = self.httprequest.args
jsonp = args.get('jsonp')
self.jsonp = jsonp
request = None
request_id = args.get('id')
if jsonp and self.httprequest.method == 'POST':
# jsonp 2 steps step1 POST: save call
def handler():
self.session['jsonp_request_%s' % (request_id,)] = self.httprequest.form['r']
self.session.modified = True
headers = [('Content-Type', 'text/plain; charset=utf-8')]
r = werkzeug.wrappers.Response(request_id, headers=headers)
return r
self.jsonp_handler = handler
return
elif jsonp and args.get('r'):
# jsonp method GET
request = args.get('r')
elif jsonp and request_id:
# jsonp 2 steps step2 GET: run and return result
request = self.session.pop('jsonp_request_%s' % (request_id,), '{}')
else:
# regular jsonrpc2
request = self.httprequest.stream.read()
# We added this line of code,a new attribute named stream_str contains the origin string in request body when the request type is json.
self.stream_str = request
# Read POST content or POST Form Data named "request"
try:
self.jsonrequest = json.loads(request)
except ValueError:
msg = 'Invalid JSON data: %r' % (request,)
_logger.info('%s: %s', self.httprequest.path, msg)
raise werkzeug.exceptions.BadRequest(msg)
self.params = dict(self.jsonrequest.get("params", {}))
self.context = self.params.pop('context', dict(self.session.context))
# Replacing the __init__ method in the source code with the new __init__ method, but without changing the source code
setattr(JsonRequest, '__init__', __init__)
In the definition of the routing function, we can do this.
# -*- coding: utf-8 -*-
from odoo.http import Controller,route,request
class CallbackNotification(http.Controller):
#route('/signature/process/my_odoo', type='json', auth='none')
def receive_institution_auth(self, **kw):
# When the type='json',the request is the object of JsonRequest,we can get the new attribute stream_str very easy!
stream_str = request.stream_str
Now the problem has been solved.

ValueError: stat: path too long for Windows on Jupyter Notebook parsing a URL request

I am trying to parse my company Odata data to construct a proportion of late suppliers taking the CompanyName and LateDays fields.
I opened the file and converted it into a string since I found a really helpful post on how request urls with authentication, and I obtained my string text containing the whole report. The report is written in ?XML and I am using Python 3.7 in Jupyter Notebook to handle it.
I found another post that queries a XML file similar to mine using a class method, but my output is the error ValueError: stat: path too long for Windows.
How can I fix this?
Thanks!
import requests
import pandas as pd
import numpy as np
import base64
import urllib.request
request = urllib.request.Request('https://myUrl_OData')
base64string = base64.b64encode(bytes('%s:%s' % ('Myusername', 'Mypassword'),'ascii'))
request.add_header("Authorization", "Basic %s" % base64string.decode('utf-8'))
result = urllib.request.urlopen(request)
resulttext = result.read()
text = resulttext.decode(encoding='utf-8',errors='ignore')
from xml.sax import parse
from xml.sax.handler import ContentHandler
class properties(ContentHandler):
def __init__(self):
self.elements = [] # stack of elements
self.char_data = u'' # string buffer
self.current_vendor = u''
self.current_latedays = u''
def startElement(self, name, attrs):
if companyname == u'CompanyName':
self.elements.append(u'CompanyName')
if latedays == u'LateDays':
self.elements.append(u'LateDays')
def characters(self, chars):
if len(self.elements) > 0 and self.elements[-1] in [u'CompanyName', u'LateDays']:
self.char_data += chars
def endElement(self, name):
self.elements.pop() if len(self.elements) > 0 else None
if companyname == u'CompanyName':
self.current_vendor = self.char_data
self.char_data = ''
if latedays == u'LateDays':
self.current_latedays = self.char_data
self.char_data = ''
if companyname == 'CompanyName':
if self.current_latedays == u'LateDays':
print('Found:', self.current_customer)
# clear the buffers now that is finished
self.current_year = u''
self.current_customer = u''
self.char_data = u''
parse(r"\\\\?\\" + text, properties())
Your error doesn't seem to be related with XML parsing but with your OS limitations.
On a Windows-based OS, the path of a file cannot be longer than ~260 characters (ref).
Try to reduce the length of your filename, or reduce the number of nested folders leading to your data.

Fetching language detection from Google api

I have a CSV with keywords in one column and the number of impressions in a second column.
I'd like to provide the keywords in a url (while looping) and for the Google language api to return what type of language was the keyword in.
I have it working manually. If I enter (with the correct api key):
http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&key=myapikey&q=merde
I get:
{"responseData": {"language":"fr","isReliable":false,"confidence":6.213709E-4}, "responseDetails": null, "responseStatus": 200}
which is correct, 'merde' is French.
so far I have this code but I keep getting server unreachable errors:
import time
import csv
from operator import itemgetter
import sys
import fileinput
import urllib2
import json
E_OPERATION_ERROR = 1
E_INVALID_PARAMS = 2
#not working
def parse_result(result):
"""Parse a JSONP result string and return a list of terms"""
# Deserialize JSON to Python objects
result_object = json.loads(result)
#Get the rows in the table, then get the second column's value
# for each row
return row in result_object
#not working
def retrieve_terms(seedterm):
print(seedterm)
"""Retrieves and parses data and returns a list of terms"""
url_template = 'http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&key=myapikey&q=%(seed)s'
url = url_template % {"seed": seedterm}
try:
with urllib2.urlopen(url) as data:
data = perform_request(seedterm)
result = data.read()
except:
sys.stderr.write('%s\n' % 'Could not request data from server')
exit(E_OPERATION_ERROR)
#terms = parse_result(result)
#print terms
print result
def main(argv):
filename = argv[1]
csvfile = open(filename, 'r')
csvreader = csv.DictReader(csvfile)
rows = []
for row in csvreader:
rows.append(row)
sortedrows = sorted(rows, key=itemgetter('impressions'), reverse = True)
keys = sortedrows[0].keys()
for item in sortedrows:
retrieve_terms(item['keywords'])
try:
outputfile = open('Output_%s.csv' % (filename),'w')
except IOError:
print("The file is active in another program - close it first!")
sys.exit()
dict_writer = csv.DictWriter(outputfile, keys, lineterminator='\n')
dict_writer.writer.writerow(keys)
dict_writer.writerows(sortedrows)
outputfile.close()
print("File is Done!! Check your folder")
if __name__ == '__main__':
start_time = time.clock()
main(sys.argv)
print("\n")
print time.clock() - start_time, "seconds for script time"
Any idea how to finish the code so that it will work? Thank you!
Try to add referrer, userip as described in the docs:
An area to pay special attention to
relates to correctly identifying
yourself in your requests.
Applications MUST always include a
valid and accurate http referer header
in their requests. In addition, we
ask, but do not require, that each
request contains a valid API Key. By
providing a key, your application
provides us with a secondary
identification mechanism that is
useful should we need to contact you
in order to correct any problems. Read
more about the usefulness of having an
API key
Developers are also encouraged to make
use of the userip parameter (see
below) to supply the IP address of the
end-user on whose behalf you are
making the API request. Doing so will
help distinguish this legitimate
server-side traffic from traffic which
doesn't come from an end-user.
Here's an example based on the answer to the question "access to google with python":
#!/usr/bin/python
# -*- coding: utf-8 -*-
import json
import urllib, urllib2
from pprint import pprint
api_key, userip = None, None
query = {'q' : 'матрёшка'}
referrer = "https://stackoverflow.com/q/4309599/4279"
if userip:
query.update(userip=userip)
if api_key:
query.update(key=api_key)
url = 'http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&%s' %(
urllib.urlencode(query))
request = urllib2.Request(url, headers=dict(Referer=referrer))
json_data = json.load(urllib2.urlopen(request))
pprint(json_data['responseData'])
Output
{u'confidence': 0.070496580000000003, u'isReliable': False, u'language': u'ru'}
Another issue might be that seedterm is not properly quoted:
if isinstance(seedterm, unicode):
value = seedterm
else: # bytes
value = seedterm.decode(put_encoding_here)
url = 'http://...q=%s' % urllib.quote_plus(value.encode('utf-8'))

Categories

Resources