How to debug/log wsgi python app? - python

I tried this:
#!/usr/bin/python
from wsgiref.simple_server import make_server
from cgi import parse_qs, escape
import logging
import os
import sys
html = """
<html>
<body>
<form method="post" action="parsing_post.wsgi">
<p>
Age: <input type="text" name="age">
</p>
<p>
Hobbies:
<input name="hobbies" type="checkbox" value="software"> Software
<input name="hobbies" type="checkbox" value="tunning"> Auto Tunning
</p>
<p>
<input type="submit" value="Submit">
</p>
</form>
<p>
Age: %s<br>
Hobbies: %s
</p>
</body>
</html>
"""
def application(environ, start_response):
# the environment variable CONTENT_LENGTH may be empty or missing
try:
request_body_size = int(environ.get('CONTENT_LENGTH', 0))
except (ValueError):
request_body_size = 0
# When the method is POST the query string will be sent
# in the HTTP request body which is passed by the WSGI server
# in the file like wsgi.input environment variable.
logger = logging.getLogger(__name__)
request_body = environ['wsgi.input'].read(request_body_size)
d = parse_qs(request_body)
age = d.get('age', [''])[0] # Returns the first age value.
hobbies = d.get('hobbies', []) # Returns a list of hobbies.
# Always escape user input to avoid script injection
age = escape(age)
hobbies = [escape(hobby) for hobby in hobbies]
response_body = html % (age or 'Empty',
', '.join(hobbies or ['No Hobbies']))
status = '200 OK'
response_headers = [('Content-Type', 'text/html'),
('Content-Length', str(len(response_body)))]
start_response(status, response_headers)
return [response_body]
But i don't know where it logs. I'm trying to display/log the value on webpage or in a file /var/log/apache2/myapp.log
What's the best way to do this?
Any answer will be highly appreciated.

Note that the above code won't actually produce any log whatsoever since your not calling any of the logger.log() variants - but I guess that's not the point.
If you're running your code with apache/mod_wsgi, the simplest solution is to configure your logger(s) to log to sys.stderr using a StreamHandler (cf http://docs.python.org/howto/logging.html#configuring-logging), and define the error log path, name and level in your apache conf (beware, the default apache behaviour is to only log "error level" message).

Related

Why can't i get the text value using request.form in flask to perform sentimental analysis on the text?

I am quite inexperienced in both HTML, JS and flask but I am working on a chatbot that able to detect sentimental analysis of the sender.
My HTML code:
<div class="bottom_wrapper clearfix">
<div class="message_input_wrapper">
<form action = "{{ url_for('reply') }}" method = "POST">
<input
class="message_input"
id="text_message"
name = "sentimental_name"
placeholder="Tell me how you feel today..."
onkeydown="if (event.keyCode == 13)document.getElementById('send').click()">
</div>
<!--div class = "send_message1" id = 'audio' onclick = "start_dictation()">
<span style="font-size: 32px; color:black;">
<i class="fas fa-microphone"></i>
</span>
</div-->
<div class="send_message" id="send" onclick="get_message()">
<!--<div class="icon"></div>-->
<div class="text">Send</div>
</div>
</form>
</div>
This is my python-flask code:
#app.route('/senti', methods = ['POST'])
def reply():
if request.method == 'POST':
message = request.form['text_message']
a = TextBlob(message).sentiment.polarity
b = TextBlob(message).sentiment.subjectivity
My js that links to the onlick =
function get_message(){
var message = document.getElementById("text_message").value;
var json_data = {"msg":message}
var sender = JSON.stringify(json_data)
console.log(sender)
console.log(message);
insert_chat('me',message);
interact(sender);
}
Console log:
POST http://127.0.0.1:5000/senti 500 (INTERNAL SERVER ERROR)
send # jquery-3.4.1.js:9837
ajax # jquery-3.4.1.js:9434
interact # chat.js:34
get_message # chat.js:55
onclick # chat:58
It seems really simple but it is like I miss something. Thank you so much!
You would have to use "sentimental_name" in
request.form["sentimental_name"]
because you have
But it uses JavaScript function get_message() to get data when you click ENTER
<input ... onkeydown="if (event.keyCode == 13)document.getElementById('send').click()">
<div class="send_message" id="send" onclick="get_message()">
and converts to JSON with field "msg" so it sends it as data or json, not form.
function get_message(){
var message = document.getElementById("text_message").value;
var json_data = {"msg":message}
var sender = JSON.stringify(json_data)
console.log(sender)
console.log(message);
insert_chat('me',message);
interact(sender);
In flask reply() you can check this using:
print(request.args)
print(request.data)
print(request.form)
print(request.json)
JavaScript may expect that reply() returns also JSON - ie.
return jsonify(list_or_dictionary).
In JavaScript I see interact(sender); so you would have to find this function and see what it sends and what result it may expect.
BTW: you can also use requests.data.get("msg") and request.form.get("msg") instead of ["msg"] becauses .get() returns None when it can't find "msg" and you can use if not message: to catch this problem. And ["msg"] raises error when there is no "msg" and you would have to use try:/except: to catch it.

python - Scrape many URL's with Login in reasonable time

I'm trying to scrape some data from a website where I need to be logged in to see the actual content. It all works fine but takes about 5 seconds per request which is way to slow for my needs (>5000 urls to scrape from). It seems there are faster ways like asyncio aiohttp modules.
However all examples I found on the web did not show how to login to a site and then use these tools.
So I basically need an easy to follow example how to do such a thing.
I tried to rebuild this example:
https://realpython.com/python-concurrency/#what-is-concurrency
with my code, which did not work. I also tried AsyncHTMLSession() from requests_html which returned something but did not seem to remember the login.
This is my code so far:
import requests
from bs4 import BeautifulSoup
payload = {
"name" : "username",
"password" : "example_pass",
"destination" : "MAS_Management_UserConsole",
"loginType" : ""
}
links = [several urls]
### stuff with requests
with requests.Session() as c:
c.get('http://boldsystems.org/')
c.post('http://boldsystems.org/index.php/Login', data = payload)
def return_id(link):
page = c.get(link).content
soup = BeautifulSoup(page, 'html.parser')
return soup.find(id = 'processidLC').text
for link in links:
print(return_id(link))
It looks like you're already using requests so you can try requests-async. The example below should help you with "in reasonable time" part of your question, just adjust parse_html function accordingly to search for your HTML tag. By default it will run 50 requests in parallel (MAX_REQUESTS) to not exhaust resources on your system (file descriptors etc.).
Example:
import asyncio
import requests_async as requests
import time
from bs4 import BeautifulSoup
from requests_async.exceptions import HTTPError, RequestException, Timeout
MAX_REQUESTS = 50
URLS = [
'http://envato.com',
'http://amazon.co.uk',
'http://amazon.com',
'http://facebook.com',
'http://google.com',
'http://google.fr',
'http://google.es',
'http://google.co.uk',
'http://internet.org',
'http://gmail.com',
'http://stackoverflow.com',
'http://github.com',
'http://heroku.com',
'http://djangoproject.com',
'http://rubyonrails.org',
'http://basecamp.com',
'http://trello.com',
'http://yiiframework.com',
'http://shopify.com',
'http://airbnb.com',
'http://instagram.com',
'http://snapchat.com',
'http://youtube.com',
'http://baidu.com',
'http://yahoo.com',
'http://live.com',
'http://linkedin.com',
'http://yandex.ru',
'http://netflix.com',
'http://wordpress.com',
'http://bing.com',
]
class BaseException(Exception):
pass
class HTTPRequestFailed(BaseException):
pass
async def fetch(url, timeout=5):
async with requests.Session() as session:
try:
resp = await session.get(url, timeout=timeout)
resp.raise_for_status()
except HTTPError:
raise HTTPRequestFailed(f'Skipped: {resp.url} ({resp.status_code})')
except Timeout:
raise HTTPRequestFailed(f'Timeout: {url}')
except RequestException as e:
raise HTTPRequestFailed(e)
return resp
async def parse_html(html):
bs = BeautifulSoup(html, 'html.parser')
if not html: print(html)
title = bs.title.text.strip()
return title if title else "Unknown"
async def run(sem, url):
async with sem:
start_t = time.time()
resp = await fetch(url)
title = await parse_html(resp.text)
end_t = time.time()
elapsed_t = end_t - start_t
r_time = resp.elapsed.total_seconds()
print(f'{url}, title: "{title}" (total: {elapsed_t:.2f}s, request: {r_time:.2f}s)')
return resp
async def main():
sem = asyncio.Semaphore(MAX_REQUESTS)
tasks = [asyncio.create_task(run(sem, url)) for url in URLS]
for f in asyncio.as_completed(tasks):
try:
result = await f
except Exception as e:
print(e)
if __name__ == '__main__':
asyncio.run(main())
Output:
# time python req.py
http://google.com, title: "Google" (total: 0.69s, request: 0.58s)
http://yandex.ru, title: "Яндекс" (total: 2.01s, request: 1.65s)
http://github.com, title: "The world’s leading software development platform · GitHub" (total: 2.12s, request: 1.90s)
Timeout: http://yahoo.com
...
real 0m6.868s
user 0m3.723s
sys 0m0.524s
Now, this may still not help you with your logging issue. The HTML tag that you're looking for (or the entire web page) could be generated by JavaScript so you'll need tools like requests-html that is using a headless browser to read content rendered by JavaScript.
It's also possible that your login form is using CSRF protection, example with login to Django admin backend:
>>> import requests
>>> s = requests.Session()
>>> get = s.get('http://localhost/admin/')
>>> csrftoken = get.cookies.get('csrftoken')
>>> payload = {'username': 'admin', 'password': 'abc123', 'csrfmiddlewaretoken': csrftoken, 'next': '/admin/'}
>>> post = s.post('http://localhost/admin/login/?next=/admin/', data=payload)
>>> post.status_code
200
We use session to perform a get request first, to get the token from the csrftoken cookie and then we login with two hidden form fields:
<form action="/admin/login/?next=/admin/" method="post" id="login-form">
<input type="hidden" name="csrfmiddlewaretoken" value="uqX4NIOkQRFkvQJ63oBr3oihhHwIEoCS9350fVRsQWyCrRub5llEqu1iMxIDWEem">
<div class="form-row">
<label class="required" for="id_username">Username:</label>
<input type="text" name="username" autofocus="" required="" id="id_username">
</div>
<div class="form-row">
<label class="required" for="id_password">Password:</label> <input type="password" name="password" required="" id="id_password">
<input type="hidden" name="next" value="/admin/">
</div>
<div class="submit-row">
<label> </label>
<input type="submit" value="Log in">
</div>
</form>
Note: examples are using Python 3.7+
Look at asyncio and using the asyncio.gather function.
Wrap everything below this "links = [several urls]" line in a method.
Be careful this is not thread safe, so don't change variables within the method.
Also this is threading so could be useful to use asyncio.sleep(randint(0,2)), to delay some of the threads, so its not firing all at the same time.
Then using asyncio call the below method with a new url like so
tasks =[]
for url in urls:
tasks.append(wrapped_method(url))
results = asyncio.gather(*tasks)
Hope that helps.
Otherwise look at https://github.com/jreese/aiomultiprocess

Python Flask AWS S3 EU Central | Bad Request The authorization mechanism you have provided is not supported. Please use AWS4-HMAC-SHA256

First of all I know there are many similar threads, I red all of them and the S3 Docu (please dont close this thread). The fix is everywhere the same:
Simply change the sugnature_version to v4, because eu central was created after 2014 and does not support v2 anymore.
I have tried every syntax now and I am still getting the error.
session = boto3.Session(
aws_access_key_id=app.config['MY_AWS_ID'],
aws_secret_access_key=app.config['MY_AWS_SECRET'],
region_name='eu-central-1'
)
s3 = session.client('s3', config=Config(signature_version='s3v4'))
presigned_post = s3.generate_presigned_post(
Bucket = 'mybucket',
Key = 'videos/' + file_name,
Fields = {"acl": "public-read", "Content-Type": file_type},
Conditions = [
{"acl": "public-read"},
{"Content-Type": file_type}
],
ExpiresIn = 3600
)
I have tried changing it everywhere. I also downgraded my boto3 installation to versions 1.6.6 and 1.4.4, did not work aswell. I upgarded it back to the newest version, which is boto3==1.7.26
The Error:
InvalidRequest
The authorization mechanism you have provided is not supported. Please use AWS4-HMAC-SHA256.
Every thread suggests the same fix, probably it does not work because I use Python / Flask. Something has to be done in a different way?
I am trying to upload huge video files via clientside directly to S3, therefore I need to sign the request.
EDIT
I thought maybe there is an SSL issue. I am testing everything on localhost and the default option for use_ssl is true.
I tried to upload this version to the live site (there is SSL enabled). Did not work, still the same error.
I also tried to use use_ssl = False on localhost, still the same error.
The problem was in the HTML and how I named the input fields. I took an example from an older tutorial, but you have to build your form the way it is explained here by amazon
I have used every input they provided. I have checked my response, which was generated by my sign_s3 function and pupulated all corresponding fields in the form.
Here is my sign function:
# Sign request for direct file upload through client for video
#app.route('/sign_s3/<path:file_name_data>/<path:file_type_data>/<up_type>', methods=["GET", "POST"])
#login_required
#check_confirmed
def sign_s3(file_name_data, file_type_data, up_type):
if "localhost" in request.url_root:
if up_type == "profile_vid":
file_name = str(current_user.id) + get_random_code(5) + "local-profil-video." + file_name_data.split(".")[-1]
else:
file_name = str(current_user.id) + str(randint(1,100)) + "local-post-video-temp." + file_name_data.split(".")[-1]
else:
if up_type == "profile_vid":
file_name = str(current_user.id) + get_random_code(5) + "-profil-video." + file_name_data.split(".")[-1]
else:
file_name = str(current_user.id) + str(randint(1,100)) + "-post-video-temp." + file_name_data.split(".")[-1]
file_type = file_type_data
session = boto3.Session(
aws_access_key_id=app.config['MY_AWS_ID'],
aws_secret_access_key=app.config['MY_AWS_SECRET'],
region_name='eu-central-1'
)
s3 = session.client('s3', config=Config(signature_version='s3v4'))
presigned_post = s3.generate_presigned_post(
Bucket = 'mybucket',
Key = 'videos/' + file_name,
Fields = {"acl": "public-read", "Content-Type": file_type},
Conditions = [
{"acl": "public-read"},
{"Content-Type": file_type}
],
ExpiresIn = 3600
)
if up_type == "profile_vid":
if current_user.profile_video != None:
delete_file_from_aws("videos/", current_user.profile_video)
setattr(current_user, "profile_video", file_name)
else:
print ('post video has been uploaded, no need to delete or set here')
db_session.commit()
return json.dumps({'data': presigned_post, 'url': 'https://s3.eu-central-1.amazonaws.com/mybucket/' + 'videos/' + file_name, 'created_file_name' : file_name})
I looked at the generated response in the dev console, there I had these values:
The HTML form I used is here, all the input fields which are uncommented have not been used by me. I simply include them as amazon shows them all in their example:
<form id="direct_s3_profile_video_form" class="form-horizontal" role="form" method="POST" enctype="multipart/form-data">
<!-- Content-Type: -->
<input type="hidden" name="Content-Type">
<!-- <input type="hidden" name="x-amz-meta-uuid"> -->
<!-- <input type="hidden" name="x-amz-server-side-encryption"> -->
<input type="hidden" name="X-Amz-Credential">
<input type="hidden" name="X-Amz-Algorithm">
<input type="hidden" name="X-Amz-Date">
<!-- Tags for File: -->
<!-- <input type="hidden" name="x-amz-meta-tag"> -->
<input type="hidden" name="Policy">
<input type="hidden" name="X-Amz-Signature">
<input id="NEW_fileupload_video" type="file" name="file" accept="video/*">
<button type="submit"> Upload </button>
</form>
Also note here that the file input must be at the bottom because:
elements after this will be ignored
In my case the values for the form were dynamically created, so I populated the form with JS:
$('#direct_s3_profile_video_form').find('input[name="key"]').val(response_json_data.data.fields['key']);
$('#direct_s3_profile_video_form').find('input[name="acl"]').val(response_json_data.data.fields['acl']);
$('#direct_s3_profile_video_form').find('input[name="Content-Type"]').val(response_json_data.data.fields['Content-Type']);
$('#direct_s3_profile_video_form').find('input[name="X-Amz-Credential"]').val(response_json_data.data.fields['x-amz-credential']);
$('#direct_s3_profile_video_form').find('input[name="X-Amz-Algorithm"]').val(response_json_data.data.fields['x-amz-algorithm']);
$('#direct_s3_profile_video_form').find('input[name="X-Amz-Date"]').val(response_json_data.data.fields['x-amz-date']);
$('#direct_s3_profile_video_form').find('input[name="Policy"]').val(response_json_data.data.fields['policy']);
$('#direct_s3_profile_video_form').find('input[name="X-Amz-Signature"]').val(response_json_data.data.fields['x-amz-signature']);
$('#direct_s3_profile_video_form').attr('action', 'https://mybucket.s3.amazonaws.com');

Have IP client address

I'm doing pages in html and python (I'm novice in python), I would like to have IP client address, but I don't know if it is possible. I saw it is possible with PHP language.
So, I execute my code in command line (with Linux) like that:
./code.py client_server app_name app_version
infos.py
def main( client_server, app_name, app_version):
template = open('infoHTML.py').read()
c = string.Template(template).substitute(
app_name = app_name,
app_version = app_version,
os = user,
user = login)
f = tempfile.NamedTemporaryFile(prefix='/tmp/info.html', mode='w', delete=False)
f.write(contenu)
f.close()
webbrowser.open(f.name)
if __name__ == "__main__":
client_server = sys.argv[1]
app_name = sys.argv[2]
app_version = sys.argv[3]
user = sys.platform
sys.argv.append(user)
login = getpass.getuser()
sys.argv.append(login)
main(client_server, app_name, app_version)
I have an html code into python code here: infoHTML.py
<html>
App: ${app_name}<br/><br/>
Version: ${app_version}<br/><br/>
User: ${user}<br/><br/>
<form name="sendData" method="get" action="http://localhost:8000/cgi/display.py">
Project: <input type="text" name="pro"><br/><br/>
Number: <input type="text" name="num"/><br/><br/>
<input type="submit" value="OK"/>
</form>
</body>
</html>
It's possible. You need to do it either by rendering the address on the response body or by requesting it with ajax after the response has already been rendered.
It would be hard to give you a code solution without seeing what web server you are using, but here are a couple of pointers for the first approach. To obtain the address, on the server side (python handler):
import socket
ip = socket.gethostbyname(socket.gethostname())
or if you are using something like Google App Engine:
ip = self.request.remote_addr
you should then write the IP to the response. For example, if you are using a templating engine to render your HTML, your HTML can look like something similar to this:
<html>
<script>
var ip = {{ip}}
</script>
and on the python code that renders the template you should do something like that:
htmlContent = template.render(ip=ip)
self.response.write(htmlContent)

twisted NOT_DONE_YET does not work with firefox

I run a twisted server and It works fine with chrome but in firefox I can't push anything to client. I can post data but when I call request.write nothing happen in firefox!
the code is:
from twisted.web import resource, server, http
from twisted.internet import reactor
import random
# iframe closing tags are apparently necessary.
tophtml = """<html><head><title>Chat channel</title>
<style>
iframe { border: 0 }
</style>
</head><body>
<h1>HTML chat demo</h1>
<p>Inspired by Ka-Ping Yee's awesome <a href="http://zesty.ca/chat/";
>libdynim GIF chat app</a>.</p>
<!-- Like it, this uses no Java, DHTML, or reloading; unlike it,
this uses frames and no JavaScript (Ping's demo works with or without
JavaScript, but without JavaScript, it reloads the page when you speak.) -->
<iframe width="100%%" height="50" src="?frame=form;sess_id=%(sess_id)s">
</iframe>
<iframe width="100%%" height="300" src="?frame=output;sess_id=%(sess_id)s">
</iframe>
</body></html>
"""
formhtml = """<html><head><title>Chat post form</title></head><body>
<form method="POST">
<input name="sess_id" type="hidden" value="%(sess_id)s" />
to: <input name="destination" />
say: <input name="chatline" size="80" />
<input type="submit" value="send" />
</form>
</html>
"""
class ChatSession:
"A persistent connection to a user's browser."
def __init__(self, channel, request, sess_id):
(self.channel, self.request, self.sess_id) = (channel, request, sess_id)
self.deferred = request.notifyFinish()
self.deferred.addCallback(self.stop)
self.deferred.addErrback(self.stop)
def stop(self, reason):
"Called when the request finishes to close the channel."
print "%s stopping: %s" % (self.sess_id, reason)
self.channel.delete_session(self.sess_id)
def sendmsg(self, origin, msg):
"Display a chat message to the user."
self.request.write("""<div>
<%(origin)s> %(msg)s </div>
""" % {'origin': origin, 'msg': msg})
class ChatChannel(resource.Resource):
"A resource representing a chat room, plus private messages."
isLeaf = True
def __init__(self):
resource.Resource.__init__(self) # ??? necessary??
self.sessions = {}
def render_GET(self, request):
"Handle HTTP GET requests by dispatching on 'frame' arg."
if request.args.has_key('frame'):
frame = request.args['frame'][0]
if frame == 'form': return self.render_form(request)
elif frame == 'output': return self.do_chat_output(request)
sess_id = random.randrange(1000) # not secure, like everything here
return tophtml % {'sess_id': sess_id}
def render_form(self, request):
"The form used for posting."
return formhtml % {'sess_id': request.args['sess_id'][0]}
def do_chat_output(self, request):
"Open a persistent ChatSession."
sess_id = request.args['sess_id'][0]
# Note that this may remove a previous ChatSession from
# self.sessions:
self.sessions[sess_id] = ChatSession(self, request, sess_id)
# The next line is a hack due to Donovan Preston: increases
# browsers' per-server connection limit, which is normally 2
# if the server seems to support HTTP 1.1 connection
# keepalive, to 8.
request.setHeader('connection', 'close')
request.write("<html><head><title>session %s</title><body>\n" % sess_id)
return server.NOT_DONE_YET
def render_POST(self, request):
"Send back 204 No Content to an utterance of a chat line."
def arg(name):
return request.args[name][0]
self.handle_chatline(arg('destination'), arg('sess_id'),
arg('chatline'))
request.setResponseCode(http.NO_CONTENT)
return ""
def handle_chatline(self, dest, sess_id, chatline):
"Send a chat line from a source to a destination, '' meaning 'all'."
try:
if dest:
self.sessions[dest].sendmsg(sess_id, '(private) ' + chatline)
self.sessions[sess_id].sendmsg('server', 'private message sent')
else:
for session in self.sessions.values():
session.sendmsg(sess_id, chatline)
except Exception, e:
self.sessions[sess_id].sendmsg('error', str(e))
def delete_session(self, sess_id):
"Delete a session by ID --- if it's there."
try: del self.sessions[sess_id]
except KeyError: pass
if __name__ == '__main__':
port = 8086
reactor.listenTCP(port, server.Site(ChatChannel()))
print "ok, running on port", port
reactor.run()
what is the problem?

Categories

Resources