cgi.FieldStorage not reads json data from requests.post

cgi.FieldStorage not reads json data from requests.post - python

I've setup simple server as described in Python Cookbook (ch.11)
# server.py
import cgi
def notfound_404(environ, start_response):
start_response('404 Not found', [('Content-type', 'text-plain')])
return [b'Not found']
class PathDispatcher:
def __init__(self):
self.pathmap = {}
def __call__(self, environ, start_response):
path = environ['PATH_INFO']
post_env = environ.copy()
post_env['QUERY_STRING'] = ''
params = cgi.FieldStorage(fp=environ['wsgi.input'], environ=post_env, keep_blank_values=True)
environ['params'] = {key: params.getvalue(key) for key in params}
method = environ['REQUEST_METHOD'].lower()
handler = self.pathmap.get((method, path), notfound_404)
return handler(environ, start_response)
def register(self, method, path, function):
self.pathmap[method.lower(), path] = function
return function
and
# app.py
def send_json(environ, start_response):
start_response('200 OK', [('Content-type', 'text/plain')])
params = environ['params']
result = ""
for key, param in params.iteritems():
result += str(key) + ' :: ' + str(param) + '\n'
yield result.encode('utf-8')
if __name__ == '__main__':
from server import PathDispatcher
from wsgiref.simple_server import make_server
dispatcher = PathDispatcher()
dispatcher.register('POST', '/send-json', send_json)
httpd = make_server('', 8080, dispatcher)
print('Listening on 8080...')
httpd.handle_request()
Simple agent sends some json data with python.requests
# agent.py
import requests
import json
json_data = {'some': 'data', 'moredata':[{1: 'one'}, {2: 'two'}]}
url = "http://localhost:8080/send-json"
headers = {'Content-Type': 'application/json'}
r = requests.post(url=url, data=json.dumps(json_data), headers=headers)
print r.text
Unfortunately, it produces errors like this
Traceback (most recent call last):
File "/usr/lib/python2.7/wsgiref/handlers.py", line 85, in run
self.result = application(self.environ, self.start_response)
File "/home/phux/PycharmProjects/printoscope_sql_injection/server.py", line 24, in __call__
environ['params'] = {key: params.getvalue(key) for key in params}
File "/usr/lib/python2.7/cgi.py", line 517, in __iter__
return iter(self.keys())
File "/usr/lib/python2.7/cgi.py", line 582, in keys
raise TypeError, "not indexable"
TypeError: not indexable
127.0.0.1 - - [23/Sep/2015 12:25:17] "POST /initial-scan HTTP/1.1" 500 59
Application cannot iterate over the received data and wsgi.FieldStorage doesn't contain MiniFieldStorage fields just raw json data
FieldStorage(None, None, '{"moredata": [{"1": "one"}, {"2": "two"}], "some": "data"}')
If I try to send data like this
r = requests.post(url=url, data=json_data)
everything works fine and FieldStorage looks fine
FieldStorage(None, None, [MiniFieldStorage('moredata', '1'), MiniFieldStorage('moredata', '2'), MiniFieldStorage('some', 'data')])
BUT I need to receive json data in the final application, so ...
Thanks in advance
Phux

--------------SOLUTION-------------
Just replace these lines in server.py
post_env = environ.copy()
post_env['QUERY_STRING'] = ''
params = cgi.FieldStorage(fp=environ['wsgi.input'], environ=post_env, keep_blank_values=True)
environ['params'] = {key: params.getvalue(key) for key in params}
With this
try:
request_body_size = int(environ.get('CONTENT_LENGTH', 0))
except ValueError:
request_body_size = 0
request_body = environ['wsgi.input'].read(request_body_size)
params = json.loads(request_body)
environ['params'] = {key: params[key] for key in params}
cgi.FieldStorage expects form and I don't send one ... and this is the root of the problem. Some slight modification in app.py is also needed, but this is not the case and can be easily adjusted.

Related

Flask Sessions seem to be not working, can someone help me?

So I have been working on Flask and Stripe for a week now, and I have made some progress(I wanna thank stack overflow). I really need help with sessions though. I keep receiving a KeyError when using Flask not sure why. I have added a comment to each piece of code im referring to.
The comment is: #THIS IS THE CODE IM REFERRING TOO
Any help would be appreciated, I assume this might be due to the conflicting variable names that stripe uses and flask uses(since they both use session) but wouldn't I get an error for that specifically? Instead I just get a KeyError.
Thank you.
from flask import Flask, render_template, url_for, request, abort, session
import random
import subprocess
import stripe
app = Flask(__name__)
app.config['STRIPE_PUBLIC_KEY'] = 'PUBLIC KEY'
app.config['STRIPE_SECRET_KEY'] = 'SECRET KEY'
stripe.api_key = app.config['STRIPE_SECRET_KEY']
app.secret_key="SECRETFORSESSIONS"
#app.route('/', methods =["GET", "POST"])
def home():
if request.method == "POST":
location = request.form["location"] #THIS IS THE CODE IM REFERRING TOO
industry = request.form["industry"] #THIS IS THE CODE IM REFERRING TOO
session['location'] = location #THIS IS THE CODE IM REFERRING TOO
session['industry'] = industry #THIS IS THE CODE IM REFERRING TOO
return render_template("home.html")
else:
return render_template("home.html")
return render_template("home.html")
#app.route('/initialpay')
def index():
return render_template("index.html")
#app.route('/stripe_pay')
def stripe_pay():
session = stripe.checkout.Session.create(
payment_method_types=['card'],
line_items=[{
'price': 'priceofanitem',
'quantity': 1,
}],
mode='payment',
success_url=url_for('thanks', _external=True) + '?session_id={CHECKOUT_SESSION_ID}',
cancel_url=url_for('index', _external=True),
)
return {
'checkout_session_id': session['id'],
'checkout_public_key': app.config['STRIPE_PUBLIC_KEY']
}
#app.route('/thanks')
def thanks():
return render_template("thanks.html")
#app.route('/stripe_webhook', methods=['POST'])
def stripe_webhook():
print('WEBHOOK CALLED')
if request.content_length > 1024 * 1024:
print('REQUEST TOO BIG')
abort(400)
payload = request.get_data()
sig_header = request.environ.get('HTTP_STRIPE_SIGNATURE')
endpoint_secret = 'ENDPOINT_SECRET'
event = None
try:
event = stripe.Webhook.construct_event(
payload, sig_header, endpoint_secret
)
except ValueError as e:
# Invalid payload
print('INVALID PAYLOAD')
return {}, 400
except stripe.error.SignatureVerificationError as e:
# Invalid signatures
print('INVALID SIGNATURE')
return {}, 400
# Handle the checkout.session.completed event
if event['type'] == 'checkout.session.completed':
session = event['data']['object']
print(session)
line_items = stripe.checkout.Session.list_line_items(session['id'], limit=1)
print(line_items['data'][0]['description'])
location = session["location"] #THIS IS THE CODE IM REFERRING TOO
industry = session["industry"] #THIS IS THE CODE IM REFERRING TOO
print(location)
print(industry)
emaildata = session.customer_details.email
sessionParam = str(random.randint(10000, 90000))
if checkout_session.payment_status == 'paid':
#DO TASKS
return {}
if __name__ == "__main__":
app.run(threaded=True)
Here is the error:
[2022-11-05 07:11:51,884] ERROR in app: Exception on /stripe_webhook [POST]
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 2525, in wsgi_app
response = self.full_dispatch_request()
File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1822, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1820, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/local/lib/python3.10/dist-packages/flask/app.py", line 1796, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
File "/mainapp/__init__.py", line 85, in stripe_webhook
location = s["location"]
File "/usr/local/lib/python3.10/dist-packages/flask/sessions.py", line 80, in __getitem__
return super().__getitem__(key)
KeyError: 'location'

How Do You Write Files to IPFS via the HTTP API in Python

Could someone demonstrate writing a file to IPFS via the HTTP API (/files/write) and Python?
My code is getting messier every time I modify it.
https://pastebin.com/W9eNz1Pb
def api(*argv, **kwargs):
url = "http://127.0.0.1:5001/api/v0/"
for arg in argv:
arg = arg.replace(" ", "/")
if arg[:-1] != "/":
arg += "/"
url += arg
url = url[0:-1]
if kwargs:
url+="?"
for val in kwargs:
if val != "post":
url = url + val + "=" + kwargs[val] + "&"
url = url[0:-1]
print(url)
try:
if "post" in kwargs:
print("POST DATA")
with urllib.request.urlopen(url=url, data=urllib.parse.urlencode(kwargs["post"]).encode("ascii")) as response:
return response.read()
else:
with urllib.request.urlopen(url, timeout=300) as response:
return response.read()
except:
return b"""{"ERROR": "CANNOT CONNECT TO IPFS!"}"""
class file(object):
def __init__(self, p):
self.p = p
if self.p[0] != "/":
self.p = "/" + self.p
def read(self):
return api("files", "read", arg=self.p).decode()
def write(self, s, *argv):
if argv:
return api("files", "write", arg=self.p, offset=str(argv[0]), create="True", parents="True", post={"Data": s})
else:
return api("files", "write", arg=self.p, truncate="True", create="True", parents="True", post={"Data": s})
file.read() works perfectly. But file.write() is being a pain in the rear.

Here's a minimal example to write a file via the /files/write HTTP API in Python:
import requests, urllib
NODE = "http://localhost:5001"
FILE_PATH = "./example" # path to file you're trying to add
MFS_PATH = "/example" # mfs path you're trying to write to
response = requests.post(NODE+"/api/v0/files/write?arg=%s&create=true" % urllib.parse.quote(MFS_PATH), files={FILE_PATH:open(FILE_PATH, 'rb')})

make sure ipfs daemon is running
ipfs init
ipfs daemon
your url endpoint is wrong. If you check documentaion, for adding file, url should be
url = "http://127.0.0.1:5001/api/v0/add"
create a function to upload so u can use this logic in other parts of your project:
def add_to_ipfs(filepath):
from pathlib import Path
import requests
# rb means open in binary. read binary
with Path(filepath).open("rb") as fp:
image_binary=fp.read()
# we need to make post request to this endpoint.
url = "http://127.0.0.1:5001/api/v0/add"
# check the response object
response = requests.post(url, files={"file": image_binary})
ipfs_hash=response.json()["Hash"]
# "./img/myImage.png" -> "myImage.png" split by "/" into array, take the last element
filename=filepath.split("/")[-1:][0]
image_uri=f"https://ipfs.io/ipfs/{ipfs_hash}?filename={filename}"
print("image uri on ipfs",image_uri)
return image_uri
this is the Response type from ipfs
{
"Bytes": "<int64>",
"Hash": "<string>",
"Name": "<string>",
"Size": "<string>"
}

Python Graphene Subscription Server

I try to subscribe from my react frontend using ApolloClient to a Python GraphQL server I implemented according to https://github.com/graphql-python/graphql-ws/blob/master/examples/flask_gevent/app.py.
My frontend subscription looks like this
client.subscribe({
query: gql`
subscription{
count_seconds
}
`
}).subscribe({
next(data) {
console.log("New data received from subscription");
}
});
but my server complains that
Traceback (most recent call last):
File "C:\Users\Adrian\Miniconda3\lib\site-packages\gevent\pywsgi.py", line 976, in handle_one_response
self.run_application()
File "C:\Users\Adrian\Miniconda3\lib\site-packages\geventwebsocket\handler.py", line 75, in run_application
self.run_websocket()
File "C:\Users\Adrian\Miniconda3\lib\site-packages\geventwebsocket\handler.py", line 52, in run_websocket
list(self.application(self.environ, lambda s, h, e=None: []))
File "C:\Users\Adrian\Miniconda3\lib\site-packages\flask\app.py", line 2463, in __call__
return self.wsgi_app(environ, start_response)
File "C:\Users\Adrian\Miniconda3\lib\site-packages\flask_sockets.py", line 45, in __call__
handler(environment, **values)
File "C:\Users\Adrian\Miniconda3\lib\site-packages\flask_cors\decorator.py", line 128, in wrapped_function
resp = make_response(f(*args, **kwargs))
File "C:\Users\Adrian\Miniconda3\lib\site-packages\flask\helpers.py", line 223, in make_response
return current_app.make_response(args)
File "C:\Users\Adrian\Miniconda3\lib\site-packages\flask\app.py", line 2130, in make_response
" {rv.__class__.__name__}.".format(rv=rv)
TypeError: The view function did not return a valid response. The return type must be a string, dict, tuple, Response instance, or WSGI callable, but it was a list.
2020-03-27T21:16:07Z {'REMOTE_ADDR': '::1', 'REMOTE_PORT': '55650', 'HTTP_HOST': 'localhost:5000', (hidden keys: 32)} failed with TypeError
My server implementation
from flask import Flask, make_response, request
from flask_sockets import Sockets
from flask_cors import CORS, cross_origin
from flask_graphql import GraphQLView
from graphql_ws.gevent import GeventSubscriptionServer
from schema import schema
from gevent import pywsgi
from geventwebsocket.handler import WebSocketHandler
from graphql.backend import GraphQLCoreBackend
app = Flask(__name__)
app.debug = False
cors = CORS(app, resources={r"/graphql/*": {"origins": "*"}})
app.config['CORS_HEADERS'] = 'Content-Type'
sub_server = GeventSubscriptionServer(schema)
sockets = Sockets(app)
class Server:
def __init__(self):
self.server = pywsgi.WSGIServer(('', 5000), app, handler_class=WebSocketHandler)
app.add_url_rule('/graphql', view_func=GraphQLView.as_view('graphql', schema=schema, graphiql=False))
app.app_protocol = lambda environ_path_info: 'graphql-ws'
self.server.serve_forever()
#sockets.route('/subscriptions')
#cross_origin()
def echo_socket(ws):
sub_server.handle(ws)
return []
I couldn't figure out what the root cause for this is,... where exactly is a list returned where I should have something else?
Maybe the schema is also relevant
import graphene
from rx import Observable
import random
class Query(graphene.ObjectType):
hello = graphene.String(name=graphene.String(default_value="World"))
def resolve_hello(self, info, name):
print("answer query")
return 'Hello ' + name
class RandomType(graphene.ObjectType):
seconds = graphene.Int()
random_int = graphene.Int()
class Subscription(graphene.ObjectType):
count_seconds = graphene.Int(up_to=graphene.Int())
random_int = graphene.Field(RandomType)
def resolve_count_seconds(self, info, up_to=5):
print("new subscription")
return Observable.interval(1000) \
.map(lambda i: "{0}".format(i)) \
.take_while(lambda i: int(i) <= up_to)
def resolve_random_int(self, info):
return Observable.interval(1000).map(lambda i: RandomType(seconds=i, random_int=random.randint(0, 500)))
schema = graphene.Schema(query=Query, subscription=Subscription)

How do you send many documents to a Scout Server in Python using the Python Scout Client?

Im trying to index PDF text to a python lib called Scout. I have tried doing the same thing with elasticsearch too. In both cases I can't figure out how to post text to an index in bulk, using python.
After a lot of research, I believe I need to use async http request. The only problem is, I don't understand async calls nor do I understand what a Scout python 'client' really is. I'm a self-taught programmer and still have many things I don't understand. my thought is the client cant stay open for a loop to keep using the connection. I have seen coding concepts like "await" and "sessions" in many books on programming. However, I don't know how to implement these concepts. Can someone help me write some python code that will successfully post new documents to a running scout server and explain how it's done?
Here is My attempt:
from scout_client import Scout
# import libraries to help read and create PDF
import PyPDF2
from fpdf import FPDF
import base64
import os
from flask import Flask, jsonify, request, render_template, json
# before you start, Run the Server.py file and create a Sqlite DB
# Step one loop though PDF in 'books' folder
for k in range(14,15):
# open the pdf file
read_pdf = PyPDF2.PdfFileReader("books/%s.pdf"%(k))
# Test to see if Step one is complete and succesful
#print (read_pdf)
# Step Two Gain intel on how many Pages are in the Document
# get the page numbers
num = read_pdf.getNumPages()
print ("PDF pages:", num)
# Step Three understand the data by page
# create a dictionary object for page data
all_pages = []
# Step For Create a new index in Scout Server
# client.create_index('test3')
# iterate the page numbers
for page in range(num):
data = read_pdf.getPage(page)
#page_mode = read_pdf.getPageMode()
# extract the page's text
page_text = data.extractText()
# put the text data into the dict
all_pages.append(page_text)
# initiate the Client from scout_client.py
client = Scout('http://localhost:8000')
# THe issue: I tryed for loops, and while loops but cant get past: urllib.error.HTTPError: HTTP Error 400: BAD REQUEST
i = 1
while i <= num:
client.create_document(all_pages[i],['test3'])
print(i,"....done")
i += 1
I get an error:
Traceback (most recent call last):
File "test.py", line 37, in <module>
client.create_document(all_pages[i],['test3'])
File "../Searchtest4/scout/scout_client.py", line 149, in create_document
return self.post('/documents/', post_data, attachments)
File "../Searchtest4/scout/scout_client.py", line 53, in post
return self.post_json(url, data)
File "../Searchtest4/scout/scout_client.py", line 63, in post_json
return json.loads(urlopen(request).read().decode('utf8'))
File "../lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "../lib/python3.7/urllib/request.py", line 531, in open
response = meth(req, response)
File "../lib/python3.7/urllib/request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "../lib/python3.7/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "../lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "../lib/python3.7/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
**urllib.error.HTTPError: HTTP Error 400: BAD REQUEST**
Here is the server that runs fine (server.py):
import logging
import optparse
import os
import sys
from flask import Flask
from werkzeug.serving import run_simple
from scout.exceptions import InvalidRequestException
from scout.models import database
from scout.models import Attachment
from scout.models import BlobData
from scout.models import Document
from scout.models import Index
from scout.models import IndexDocument
from scout.models import Metadata
from scout.views import register_views
logger = logging.getLogger('scout')
def create_server(config=None, config_file=None):
app = Flask(__name__)
# Configure application using a config file.
if config_file is not None:
app.config.from_pyfile(config_file)
# (Re-)Configure application using command-line switches/environment flags.
if config is not None:
app.config.update(config)
# Initialize the SQLite database.
initialize_database(app.config.get('DATABASE') or 'scout.db',
pragmas=app.config.get('SQLITE_PRAGMAS') or None)
register_views(app)
#app.errorhandler(InvalidRequestException)
def handle_invalid_request(exc):
return exc.response()
#app.before_request
def connect_database():
if database.database != ':memory:':
database.connect()
#app.teardown_request
def close_database(exc):
if database.database != ':memory:' and not database.is_closed():
database.close()
return app
def initialize_database(database_file, pragmas=None):
database.init(database_file, pragmas=pragmas)
try:
meth = database.execution_context
except AttributeError:
meth = database
with meth:
database.create_tables([
Attachment,
BlobData,
Document,
Index,
IndexDocument,
Metadata])
def run(app):
if app.config['DEBUG']:
app.run(host=app.config['HOST'], port=app.config['PORT'], debug=True)
else:
run_simple(
hostname=app.config['HOST'],
port=app.config['PORT'],
application=app,
threaded=True)
def panic(s, exit_code=1):
sys.stderr.write('\033[91m%s\033[0m\n' % s)
sys.stderr.flush()
sys.exit(exit_code)
def get_option_parser():
parser = optparse.OptionParser()
parser.add_option(
'-H',
'--host',
default='127.0.0.1',
dest='host',
help='The hostname to listen on. Defaults to 127.0.0.1.')
parser.add_option(
'-p',
'--port',
default=8000,
dest='port',
help='The port to listen on. Defaults to 8000.',
type='int')
parser.add_option(
'-u',
'--url-prefix',
dest='url_prefix',
help='URL path to prefix Scout API.')
parser.add_option(
'-s',
'--stem',
dest='stem',
help='Specify stemming algorithm for content.')
parser.add_option(
'-d',
'--debug',
action='store_true',
dest='debug',
help='Run Flask app in debug mode.')
parser.add_option(
'-c',
'--config',
dest='config',
help='Configuration module (python file).')
parser.add_option(
'--paginate-by',
default=50,
dest='paginate_by',
help='Number of documents displayed per page of results, default=50',
type='int')
parser.add_option(
'-k',
'--api-key',
dest='api_key',
help='Set the API key required to access Scout.')
parser.add_option(
'-C',
'--cache-size',
default=64,
dest='cache_size',
help='SQLite page-cache size (MB). Defaults to 64MB.',
type='int')
parser.add_option(
'-f',
'--fsync',
action='store_true',
dest='fsync',
help='Synchronize database to disk on every write.')
parser.add_option(
'-j',
'--journal-mode',
default='wal',
dest='journal_mode',
help='SQLite journal mode. Defaults to WAL (recommended).')
parser.add_option(
'-l',
'--logfile',
dest='logfile',
help='Log file')
return parser
def parse_options():
option_parser = get_option_parser()
options, args = option_parser.parse_args()
if options.logfile:
handler = logging.FileHandler(options.logfile)
logger.addHandler(handler)
config_file = os.environ.get('SCOUT_CONFIG') or options.config
config = {'DATABASE': os.environ.get('SCOUT_DATABASE')}
if len(args) == 0 and not config['DATABASE']:
panic('Error: missing required path to database file.')
elif len(args) > 1:
panic('Error: [%s] only accepts one argument, which is the path '
'to the database file.' % __file__)
elif args:
config['DATABASE'] = args[0]
pragmas = [('journal_mode', options.journal_mode)]
if options.cache_size:
pragmas.append(('cache_size', -1024 * options.cache_size))
if not options.fsync:
pragmas.append(('synchronous', 0))
config['SQLITE_PRAGMAS'] = pragmas
# Handle command-line options. These values will override any values
# that may have been specified in the config file.
if options.api_key:
config['AUTHENTICATION'] = options.api_key
if options.debug:
config['DEBUG'] = True
config['HOST'] = options.host or '127.0.0.1'
config['PORT'] = options.port or 8000
config['URL_PREFIX'] = options.url_prefix or ''
if options.paginate_by:
if options.paginate_by < 1 or options.paginate_by > 1000:
panic('paginate-by must be between 1 and 1000')
config['PAGINATE_BY'] = options.paginate_by
if options.stem:
if options.stem not in ('simple', 'porter'):
panic('Unrecognized stemmer. Must be "porter" or "simple".')
config['STEM'] = options.stem
return create_server(config, config_file)
def main():
app = parse_options()
run(app)
if __name__ == '__main__':
main()
and the so-called client (scout_client.py):
import base64
import json
try:
from email.generator import _make_boundary as choose_boundary
except ImportError:
from mimetools import choose_boundary
import mimetypes
import os
try:
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
try:
from urllib.request import Request
from urllib.request import urlopen
except ImportError:
from urllib2 import Request
from urllib2 import urlopen
import zlib
ENDPOINT = None
KEY = None
class Scout(object):
def __init__(self, endpoint=ENDPOINT, key=KEY):
self.endpoint = endpoint.rstrip('/')
self.key = key
def get_full_url(self, url):
return self.endpoint + url
def get_raw(self, url, **kwargs):
headers = {'Content-Type': 'application/json'}
if self.key:
headers['key'] = self.key
if kwargs:
if '?' not in url:
url += '?'
url += urlencode(kwargs, True)
request = Request(self.get_full_url(url), headers=headers)
fh = urlopen(request)
return fh.read()
def get(self, url, **kwargs):
return json.loads(self.get_raw(url, **kwargs))
def post(self, url, data=None, files=None):
if files:
return self.post_files(url, data, files)
else:
return self.post_json(url, data)
def post_json(self, url, data=None):
headers = {'Content-Type': 'application/json'}
if self.key:
headers['key'] = self.key
data = json.dumps(data or {})
if not isinstance(data, bytes):
data = data.encode('utf-8')
request = Request(self.get_full_url(url), data=data, headers=headers)
return json.loads(urlopen(request).read().decode('utf8'))
def post_files(self, url, json_data, files=None):
if not files or not isinstance(files, dict):
raise ValueError('One or more files is required. Files should be '
'passed as a dictionary of filename: file-like-'
'object.')
boundary = choose_boundary()
form_files = []
for i, (filename, file_obj) in enumerate(files.items()):
try:
data = file_obj.read()
except AttributeError:
data = bytes(file_obj)
mimetype = mimetypes.guess_type(filename)[0]
form_files.append((
'file_%s' % i,
filename,
mimetype or 'application/octet-stream',
data))
part_boundary = '--' + boundary
parts = [
part_boundary,
'Content-Disposition: form-data; name="data"',
'',
json.dumps(json_data)]
for field_name, filename, mimetype, data in form_files:
parts.extend((
part_boundary,
'Content-Disposition: file; name="%s"; filename="%s"' % (
field_name, filename),
'Content-Type: %s' % mimetype,
'',
data))
parts.append('--' + boundary + '--')
parts.append('')
headers = {'Content-Type': 'multipart/form-data; boundary=%s' %
boundary}
if self.key:
headers['key'] = self.key
data = '\r\n'.join(parts)
if not isinstance(data, bytes):
data = data.encode('utf-8')
request = Request(self.get_full_url(url), data=data, headers=headers)
return json.loads(urlopen(request).read())
def delete(self, url):
headers = {}
if self.key:
headers['key'] = self.key
request = Request(self.get_full_url(url), headers=headers)
request.get_method = lambda: 'DELETE'
fh = urlopen(request)
return json.loads(fh.read())
def get_indexes(self, **kwargs):
return self.get('/', **kwargs)['indexes']
def create_index(self, name):
return self.post('/', {'name': name})
def rename_index(self, old_name, new_name):
return self.post('/%s/' % old_name, {'name': new_name})
def delete_index(self, name):
return self.delete('/%s/' % name)
def get_index(self, name, **kwargs):
return self.get('/%s/' % name, **kwargs)
def get_documents(self, **kwargs):
return self.get('/documents/', **kwargs)
def create_document(self, content, indexes, identifier=None,
attachments=None, **metadata):
if not isinstance(indexes, (list, tuple)):
indexes = [indexes]
post_data = {
'content': content,
'identifier': identifier,
'indexes': indexes,
'metadata': metadata}
return self.post('/documents/', post_data, attachments)
def update_document(self, document_id=None, content=None, indexes=None,
metadata=None, identifier=None, attachments=None):
if not document_id and not identifier:
raise ValueError('`document_id` must be provided.')
data = {}
if content is not None:
data['content'] = content
if indexes is not None:
if not isinstance(indexes, (list, tuple)):
indexes = [indexes]
data['indexes'] = indexes
if metadata is not None:
data['metadata'] = metadata
if not data and not attachments:
raise ValueError('Nothing to update.')
return self.post('/documents/%s/' % document_id, data, attachments)
def delete_document(self, document_id=None):
if not document_id:
raise ValueError('`document_id` must be provided.')
return self.delete('/documents/%s/' % document_id)
def get_document(self, document_id=None):
if not document_id:
raise ValueError('`document_id` must be provided.')
return self.get('/documents/%s/' % document_id)
def attach_files(self, document_id, attachments):
return self.post_files('/documents/%s/attachments/' % document_id,
{}, attachments)
def detach_file(self, document_id, filename):
return self.delete('/documents/%s/attachments/%s/' %
(document_id, filename))
def update_file(self, document_id, filename, file_object):
return self.post_files('/documents/%s/attachments/%s/' %
(document_id, filename),
{}, {filename: file_object})
def get_attachments(self, document_id, **kwargs):
return self.get('/documents/%s/attachments/' % document_id, **kwargs)
def get_attachment(self, document_id, filename):
return self.get('/documents/%s/attachments/%s/' %
(document_id, filename))
def download_attachment(self, document_id, filename):
return self.get_raw('/documents/%s/attachments/%s/download/' %
(document_id, filename))
def search_attachments(self, **kwargs):
return self.get('/documents/attachments/', **kwargs)
class SearchProvider(object):
def content(self, obj):
raise NotImplementedError
def identifier(self, obj):
raise NotImplementedError
def metadata(self, obj):
raise NotImplementedError
class SearchSite(object):
def __init__(self, client, index):
self.client = client
self.index = index
self.registry = {}
def register(self, model_class, search_provider):
self.registry.setdefault(model_class, [])
self.registry[model_class].append(search_provider())
def unregister(self, model_class, search_provider=None):
if search_provider is None:
self.registry.pop(model_class, None)
elif model_class in self.registry:
self.registry[model_class] = [
sp for sp in self.registry[model_class]
if not isinstance(sp, search_provider)]
def store(self, obj):
if type(obj) not in self.registry:
return False
for provider in self.registry[type(obj)]:
content = provider.content(obj)
try:
metadata = provider.metadata(obj)
except NotImplementedError:
metadata = {}
try:
identifier = provider.identifier(obj)
except NotImplementedError:
pass
else:
metadata['identifier'] = identifier
self.client.create_document(content, self.index, **metadata)
return True
def remove(self, obj):
if type(obj) not in self.registry:
return False
for provider in self.registry[type(obj)]:
self.client.delete_document(provider.identifier(obj))
return True
Finally the Documentation for Scout:
https://scout.readthedocs.io/en/latest/server.html#index-detail-index-name
https://charlesleifer.com/blog/meet-scout-a-search-server-powered-by-sqlite/
Any Detailed Help is much appreciated:)

So i find a lib called scout and...got it to work!
from scout_client import Scout
# import libraries to help read and create PDF
import PyPDF2
from fpdf import FPDF
import base64
import os
from flask import Flask, jsonify, request, render_template, json
client = Scout('http://localhost:8000')
for k in range(7,18):
read_pdf = PyPDF2.PdfFileReader("books/%s.pdf"%(k))
num = read_pdf.getNumPages()
print ("PDF pages:", num)
all_pages = []
for page in range(num):
data = read_pdf.getPage(page)
page_text = data.extractText()
all_pages.append(page_text)
import requests
for z in all_pages:
url = 'http://localhost:8000/documents/'
data = {'content': z, 'indexes': ['test13']}
headers = {
'Content-Type': 'application/json',
}
response = requests.post(url, data=json.dumps(data), headers=headers)
print(response)
I can now loop though as many PDF's as I want locally
Post to the server for indexing
and search for keywords
Now I just need help with Making a basic front end with a search bar that calls data from a JSON response in python and flask.

How to get content from tornado future object

I'm really confused by the tornado frame work and the 'future' object.
So I want to get a async response by making a http call
Code is:
class TestAsyncHttp(object):
def __init__(self):
self._http_client = httpclient.AsyncHTTPClient()
#gen.coroutine
def get_response(self, params)
response = yield self._request(
method='POST',
endpoint='test'
data=params
)
raise gen.Return(response)
#gen.coroutine
def _request(self, method, endpoint, data):
url = self._make_url(endpoint) #this includes the port..
headers = self._load_headers()
request = httpclient.HTTPRequest(
url,
method=method,
headers=header,
body=json.dump(data)
)
response = yield self._http_client.fetch(request)
raise gen.Return(response)
The thing is, after I finished this one, how can I test it?
I tried to write a scrip which contains...:
import json
with open('test/request.json') as json_file:
request_json = json.loads(json_file.read())
def get_response():
x = TestAsyncHttp()
ret = yield x.get_response(request_json)
body = ret.body
print body['value']
get_response
But then I 'python "path-to-the-script"'
There's nothing output.
If I just stepped into the "python" environment, I got "future" object doesn't have getitem
..How can I get the content from a future..?
Thanks!

Use run_sync to run an async coroutine in a synchronous fashion:
def get_response():
x = TestAsyncHttp()
ret = IOLoop.current().run_sync(lambda: x.get_response(request_json))
body = ret.body
print body['value']
The lambda is required here simply to pass the request_json parameter. If get_response took no arguments, you could instead do:
ret = IOLoop.current().run_sync(x.get_response)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

cgi.FieldStorage not reads json data from requests.post - python

Related

Flask Sessions seem to be not working, can someone help me?

How Do You Write Files to IPFS via the HTTP API in Python

Python Graphene Subscription Server

How do you send many documents to a Scout Server in Python using the Python Scout Client?

How to get content from tornado future object

Categories

Resources