This is my block of code and messages dont sends.
First of all, I took out the main function with the message to another file and took it from it, but it didn't work either, I also tried several different libraries and different approaches and I don't know at all why it doesn't work, since there are no errors
import asyncio
from aiogram import Bot, Dispatcher, types, executor
from db import Datebase
from config import BOT_TOKEN, API_KEY
from apscheduler.schedulers.asyncio import AsyncIOScheduler
import requests
import datetime
from bs4 import BeautifulSoup
import lxml
import random
bot = Bot(BOT_TOKEN)
dp = Dispatcher(bot)
db = Datebase('database.db')
#dp.message_handler(commands=['start'])
async def start(msg: types.Message) -> None:
if msg.chat.type == 'private':
if not db.user_exists(msg.from_user.id):
db.add_user(msg.from_user.id)
await msg.answer('Добро пожаловать!')
#dp.message_handler()
async def start(msg: types.Message) -> None:
if msg.chat.type == 'private':
try:
URL= f'https://api.openweathermap.org/data/2.5/weather?lat=53.9&lon=27.5667&appid={API_KEY}&units=metric&lang=ru'
r = requests.get(url=URL)
data = r.json()
city = data['name']
cur_weath = round(float(data['main']['temp']))
feel_weth = round(float(data['main']['feels_like']))
humidity = data['main']['humidity']
wind = data['wind']['speed']
description = data['weather'][0]['description']
sunrise = datetime.datetime.fromtimestamp(data['sys']['sunrise']).strftime('%H:%M')
sunset = datetime.datetime.fromtimestamp(data['sys']['sunset']).strftime('%H:%M')
rub = requests.get('https://www.nbrb.by/api/exrates/rates/RUB?parammode=2')
data1 = rub.json()
price_rub = round(float(data1['Cur_OfficialRate']), 2)
usd = requests.get('https://www.nbrb.by/api/exrates/rates/USD?parammode=2')
data2 = usd.json()
price_usd = round(float(data2['Cur_OfficialRate']), 2)
eur = requests.get('https://www.nbrb.by/api/exrates/rates/EUR?parammode=2')
data3 = eur.json()
price_eur = round(float(data3['Cur_OfficialRate']), 2)
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
}
req = requests.get('https://wikiphile.ru/570-fraz-o-motivacii/?ysclid=le2s2e8ouu459711332' ,headers=headers)
soup = BeautifulSoup(req.text, 'lxml')
lst = soup.find('ol').find_all('li')
random_index = random.randint(0, len(lst) - 1)
await msg.answer(
f'---------------{datetime.datetime.now().strftime("%d-%m-%Y")}---------------\n'\
'\n'\
'-----Погода-----\n'\
f'Город: {city}\n'\
f'Описание: {description}\n'\
f'Текущая погода: {cur_weath}°\n'\
f'Ощущается как: {feel_weth}°\n'\
f'Скорость ветра: {wind} м/с \n'\
f'Влажность: {humidity}\n'\
f'Рассвет: {sunrise}\n'\
f'Закат: {sunset}\n'\
'\n'\
f'-----Курсы валют-----\n'\
f'Доллар\n'\
f'Курс по НБРБ: {price_usd} BYN\n'\
f'Евро\n'\
f'Курс по НБРБ: {price_eur} BYN\n'\
f'Российский рубль(за 100р)\n'\
f'Курс по НБРБ: {price_rub} BYN\n'\
'\n'\
f'-----Рандомная цитата-----\n'\
f'{lst[random_index].text}'
)
except:
await msg.answer('Ошибка')
async def sms() -> None:
scheduler = AsyncIOScheduler(timezone='Europe/Minsk')
scheduler.add_job(start, trigger='cron', hour=datetime.datetime.now().hour, minute=datetime.datetime.now().minute + 1, start_date=datetime.datetime.now(), kwargs={'msg':types.Message, } )
scheduler.start()
if __name__ == '__main__':
executor.start_polling(dp, skip_updates=True)
I trying many types of scheduler modules and my code dosn't work
Related
I can't wrap my head around how to accomplish the following scenario:
On my main.py and request.py i have a reference to a config.py witch contains some configuration variables.
In this sample an offline variable that are either True or False.
What I would like to do is:
If eg. in my request.py I set the config.offline = True, then I would like to do somethin on my main.py.
But main.py also references the request.py so I cannot call any functions from request.py..
Any ideas to how I can do this?
I have over 1000 lines of code, so I cannot show every thing, but i have tried to show the most important:
main.py:
import config as cfg
import request as req
def doStuffWhenOfflineVarChanges(newState):
print(newState)
config.py:
offline = True
request.py:
import config as cfg
def logEntrance(barCode, noOfGuests, dt=datetime.now()):
date = dt.strftime("%Y-%m-%d")
time = dt.strftime("%H:%M:%S")
headers = {'Content-type': 'application/json', 'Authorization': cfg.auth}
url = 'https://xxxxx.xxxxxxxx.xxx/' + cfg.customerId + '/api.ashx'
params = {"inputtype": "logentrances"}
pl = [{"Barcode": barCode , "PoeID": cfg.poeId, "UserID": cfg.userId, "EntranceDate": date, "EntranceTime": time, "NoOfGuests": str(noOfGuests), "OfflineMode": cfg.offline}]
#print(url)
print(pl)
try:
r = requests.post(url, json=pl, params=params, headers=headers)
print(r.status_code)
except:
cfg.offline = True
return r
You need a call back function to handle the change in your onfig.py file!
# config.py
offline = False
def doStuffWhenOfflineVarChanges(newState):
print(newState)
# request.py
import config as cfg
class OfflineState:
def __init__(self, callback):
self._callback = callback
self._offline = cfg.offline
#property
def offline(self):
return self._offline
#offline.setter
def offline(self, value):
self._offline = value
self._callback(value)
offline_state = OfflineState(cfg.doStuffWhenOfflineVarChanges)
import requests
import datetime
def logEntrance(barCode, noOfGuests, dt=datetime.now()):
date = dt.strftime("%Y-%m-%d")
time = dt.strftime("%H:%M:%S")
headers = {'Content-type': 'application/json', 'Authorization': cfg.auth}
url = 'https://xxxxx.xxxxxxxx.xxx/' + cfg.customerId + '/api.ashx'
params = {"inputtype": "logentrances"}
pl = [{"Barcode": barCode , "PoeID": cfg.poeId, "UserID": cfg.userId, "EntranceDate": date, "EntranceTime": time, "NoOfGuests": str(noOfGuests), "OfflineMode": offline_state.offline}]
try:
r = requests.post(url, json=pl, params=params, headers=headers)
except:
offline_state.offline = True
return r
How can improve my multithreading speed in my code?
My code takes 130 seconds with 100 threads to do 700 requests which is really slow and frustrating assuming that i use 100 threads.
My code edits the parameter values from an url and makes a request to it including the original url (unedited) the urls are received from a file (urls.txt)
Let me show you an example:
Let's consider the following url:
https://www.test.com/index.php?parameter=value1¶meter2=value2
The url contains 2 parameters so my code will make 3 requests.
1 request to the original url:
https://www.test.com/index.php?parameter=value1¶meter2=value2
1 request to the first modified value:
https://www.test.com/index.php?parameter=replaced_value¶meter2=value2
1 request to the second modified value:
https://www.test.com/index.php?parameter=value1¶meter2=replaced_value
I have tried using asyncio for this but I had more success with concurrent.futures
I even tried increasing the threads which I thought it was the issue at first but in this case wasnt if I would increase the threads considerably then the script would freeze at start for 30-50 seconds and it really didnt increased the speed as i expected
I assume this is a code issue how I build up the multithreading becuase I saw other people achieved incredible speeds with concurrent.futures
import requests
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
start = time.time()
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
def make_request(url2):
try:
if '?' and '=':
request_1 = requests.get(url2, headers=headers, timeout=10)
url2_modified = url2.split("?")[1]
times = url2_modified.count("&") + 1
for x in range(0, times):
split1 = url2_modified.split("&")[x]
value = split1.split("=")[1]
parameter = split1.split("=")[0]
url = url2.replace('='+value, '=1')
request_2 = requests.get(url, stream=True, headers=headers, timeout=10)
html_1 = request_1.text
html_2 = request_2.text
print(request_1.status_code + ' - ' + url2)
print(request_2.status_code + ' - ' + url)
except requests.exceptions.RequestException as e:
return e
def runner():
threads= []
with ThreadPoolExecutor(max_workers=100) as executor:
file1 = open('urls.txt', 'r', errors='ignore')
Lines = file1.readlines()
count = 0
for line in Lines:
count += 1
threads.append(executor.submit(make_request, line.strip()))
runner()
end = time.time()
print(end - start)
Inside loop in make_request you run normal requests.get and it doesn't use thread (or any other method) to make it faster - so it has to wait for end of previous request to run next request.
In make_request I use another ThreadPoolExecutor to run every requests.get (created in loop) in separated thread
executor.submit(make_modified_request, modified_url)
and it gives me time ~1.2s
If I use normal
make_modified_request(modified_url)
then it gives me time ~3.2s
Minimal working example:
I use real urls https://httpbin.org/get so everyone can simply copy and run it.
from concurrent.futures import ThreadPoolExecutor
import requests
import time
#import urllib.parse
# --- constansts --- (PEP8: UPPER_CASE_NAMES)
HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
# --- functions ---
def make_modified_request(url):
"""Send modified url."""
print('send:', url)
response = requests.get(url, stream=True, headers=HEADERS)
print(response.status_code, '-', url)
html = response.text # ???
# ... code to process HTML ...
def make_request(url):
"""Send normal url and create threads with modified urls."""
threads = []
with ThreadPoolExecutor(max_workers=10) as executor:
print('send:', url)
# send base url
response = requests.get(url, headers=HEADERS)
print(response.status_code, '-', url)
html = response.text # ???
#parts = urllib.parse.urlparse(url)
#print('query:', parts.query)
#arguments = urllib.parse.parse_qs(parts.query)
#print('arguments:', arguments) # dict {'a': ['A'], 'b': ['B'], 'c': ['C'], 'd': ['D'], 'e': ['E']}
arguments = url.split("?")[1]
arguments = arguments.split("&")
arguments = [arg.split("=") for arg in arguments]
print('arguments:', arguments) # list [['a', 'A'], ['b', 'B'], ['c', 'C'], ['d', 'D'], ['e', 'E']]
for name, value in arguments:
modified_url = url.replace('='+value, '=1')
print('modified_url:', modified_url)
# run thread with modified url
threads.append(executor.submit(make_modified_request, modified_url))
# run normal function with modified url
#make_modified_request(modified_url)
print('[make_request] len(threads):', len(threads))
def runner():
threads = []
with ThreadPoolExecutor(max_workers=10) as executor:
#fh = open('urls.txt', errors='ignore')
fh = [
'https://httpbin.org/get?a=A&b=B&c=C&d=D&e=E',
'https://httpbin.org/get?f=F&g=G&h=H&i=I&j=J',
'https://httpbin.org/get?k=K&l=L&m=M&n=N&o=O',
'https://httpbin.org/get?a=A&b=B&c=C&d=D&e=E',
'https://httpbin.org/get?f=F&g=G&h=H&i=I&j=J',
'https://httpbin.org/get?k=K&l=L&m=M&n=N&o=O',
]
for line in fh:
url = line.strip()
# create thread with url
threads.append(executor.submit(make_request, url))
print('[runner] len(threads):', len(threads))
# --- main ---
start = time.time()
runner()
end = time.time()
print('time:', end - start)
BTW:
I was thinking to use single
executor = ThreadPoolExecutor(max_workers=10)
and later use the same executor in all functions - and maybe it would run little faster - but at this moment I don't have working code.
I try to download an excel file from a specific website. In my local computer it works perfectly:
>>> r = requests.get('http://www.health.gov.il/PublicationsFiles/IWER01_2004.xls')
>>> r.status_code
200
>>> r.content
b'\xd0\xcf\x11\xe0\xa1\xb1...\x00\x00' # Long binary string
But when I connect to a remote ubuntu server, I get a message related to enabling cookies/javascript.
r = requests.get('http://www.health.gov.il/PublicationsFiles/IWER01_2004.xls')
>>> r.status_code
200
>>> r.content
b'<HTML>\n<head>\n<script>\nChallenge=141020;\nChallengeId=120854618;\nGenericErrorMessageCookies="Cookies must be enabled in order to view this page.";\n</script>\n<script>\nfunction test(var1)\n{\n\tvar var_str=""+Challenge;\n\tvar var_arr=var_str.split("");\n\tvar LastDig=var_arr.reverse()[0];\n\tvar minDig=var_arr.sort()[0];\n\tvar subvar1 = (2 * (var_arr[2]))+(var_arr[1]*1);\n\tvar subvar2 = (2 * var_arr[2])+var_arr[1];\n\tvar my_pow=Math.pow(((var_arr[0]*1)+2),var_arr[1]);\n\tvar x=(var1*3+subvar1)*1;\n\tvar y=Math.cos(Math.PI*subvar2);\n\tvar answer=x*y;\n\tanswer-=my_pow*1;\n\tanswer+=(minDig*1)-(LastDig*1);\n\tanswer=answer+subvar2;\n\treturn answer;\n}\n</script>\n<script>\nclient = null;\nif (window.XMLHttpRequest)\n{\n\tvar client=new XMLHttpRequest();\n}\nelse\n{\n\tif (window.ActiveXObject)\n\t{\n\t\tclient = new ActiveXObject(\'MSXML2.XMLHTTP.3.0\');\n\t};\n}\nif (!((!!client)&&(!!Math.pow)&&(!!Math.cos)&&(!![].sort)&&(!![].reverse)))\n{\n\tdocument.write("Not all needed JavaScript methods are supported.<BR>");\n\n}\nelse\n{\n\tclient.onreadystatechange = function()\n\t{\n\t\tif(client.readyState == 4)\n\t\t{\n\t\t\tvar MyCookie=client.getResponseHeader("X-AA-Cookie-Value");\n\t\t\tif ((MyCookie == null) || (MyCookie==""))\n\t\t\t{\n\t\t\t\tdocument.write(client.responseText);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\t\n\t\t\tvar cookieName = MyCookie.split(\'=\')[0];\n\t\t\tif (document.cookie.indexOf(cookieName)==-1)\n\t\t\t{\n\t\t\t\tdocument.write(GenericErrorMessageCookies);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\twindow.location.reload(true);\n\t\t}\n\t};\n\ty=test(Challenge);\n\tclient.open("POST",window.location,true);\n\tclient.setRequestHeader(\'X-AA-Challenge-ID\', ChallengeId);\n\tclient.setRequestHeader(\'X-AA-Challenge-Result\',y);\n\tclient.setRequestHeader(\'X-AA-Challenge\',Challenge);\n\tclient.setRequestHeader(\'Content-Type\' , \'text/plain\');\n\tclient.send();\n}\n</script>\n</head>\n<body>\n<noscript>JavaScript must be enabled in order to view this page.</noscript>\n</body>\n</HTML>'
On local I run from MACos that has Chrome installed (I'm not actively using it for the script, but maybe it's related?), on remote I run ubuntu on digital ocean without any GUI browser installed.
The behavior of requests has nothing to do with what browsers are installed on the system, it does not depend on or interact with them in any way.
The problem here is that the resource you are requesting has some kind of "bot mitigation" mechanism enabled to prevent just this kind of access. It returns some javascript with logic that needs to be evaluated, and the results of that logic are then used for an additional request to "prove" you're not a bot.
Luckily, it appears that this specific mitigation mechanism has been solved before, and I was able to quickly get this request working utilizing the challenge-solving functions from that code:
from math import cos, pi, floor
import requests
URL = 'http://www.health.gov.il/PublicationsFiles/IWER01_2004.xls'
def parse_challenge(page):
"""
Parse a challenge given by mmi and mavat's web servers, forcing us to solve
some math stuff and send the result as a header to actually get the page.
This logic is pretty much copied from https://github.com/R3dy/jigsaw-rails/blob/master/lib/breakbot.rb
"""
top = page.split('<script>')[1].split('\n')
challenge = top[1].split(';')[0].split('=')[1]
challenge_id = top[2].split(';')[0].split('=')[1]
return {'challenge': challenge, 'challenge_id': challenge_id, 'challenge_result': get_challenge_answer(challenge)}
def get_challenge_answer(challenge):
"""
Solve the math part of the challenge and get the result
"""
arr = list(challenge)
last_digit = int(arr[-1])
arr.sort()
min_digit = int(arr[0])
subvar1 = (2 * int(arr[2])) + int(arr[1])
subvar2 = str(2 * int(arr[2])) + arr[1]
power = ((int(arr[0]) * 1) + 2) ** int(arr[1])
x = (int(challenge) * 3 + subvar1)
y = cos(pi * subvar1)
answer = x * y
answer -= power
answer += (min_digit - last_digit)
answer = str(int(floor(answer))) + subvar2
return answer
def main():
s = requests.Session()
r = s.get(URL)
if 'X-AA-Challenge' in r.text:
challenge = parse_challenge(r.text)
r = s.get(URL, headers={
'X-AA-Challenge': challenge['challenge'],
'X-AA-Challenge-ID': challenge['challenge_id'],
'X-AA-Challenge-Result': challenge['challenge_result']
})
yum = r.cookies
r = s.get(URL, cookies=yum)
print(r.content)
if __name__ == '__main__':
main()
you can use this code to avoid block
url = 'your url come here'
s = HTMLSession()
s.headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
r = s.get(url)
r.html.render(timeout=8000)
print(r.status_code)
print(r.content)
Full disclosure: Python rookie. Trying to use code shared by other people to scrape the internet page (real estate listing) by tweaking the arguments. My questions are probably very rudimentary, so if you can provide some links for further reading and study, that will be great too. I just can't seem to figure out how to proceed from this step. Thanks for your time in advance.
from lxml import html
import requests
import unicodecsv as csv
import argparse
def parse(zipcode,filter=None):
if filter=="newest":
url = "https://www.zillow.com/homes/for_sale/{0}/0_singlestory/days_sort".format(zipcode)
elif filter == "cheapest":
url = "https://www.zillow.com/homes/for_sale/{0}/0_singlestory/pricea_sort/".format(zipcode)
else:
url = "https://www.zillow.com/homes/for_sale/{0}_rb/?fromHomePage=true&shouldFireSellPageImplicitClaimGA=false&fromHomePageTab=buy".format(zipcode)
for i in range(5):
# try:
headers= {
'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'accept-encoding':'gzip, deflate, sdch, br',
'accept-language':'en-GB,en;q=0.8,en-US;q=0.6,ml;q=0.4',
'cache-control':'max-age=0',
'upgrade-insecure-requests':'1',
'user-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}
response = requests.get(url,headers=headers)
print(response.status_code)
parser = html.fromstring(response.text)
search_results = parser.xpath("//div[#id='search-results']//article")
properties_list = []
for properties in search_results:
raw_address = properties.xpath(".//span[#itemprop='address']//span[#itemprop='streetAddress']//text()")
raw_city = properties.xpath(".//span[#itemprop='address']//span[#itemprop='addressLocality']//text()")
raw_state= properties.xpath(".//span[#itemprop='address']//span[#itemprop='addressRegion']//text()")
raw_postal_code= properties.xpath(".//span[#itemprop='address']//span[#itemprop='postalCode']//text()")
raw_price = properties.xpath(".//span[#class='zsg-photo-card-price']//text()")
raw_info = properties.xpath(".//span[#class='zsg-photo-card-info']//text()")
raw_broker_name = properties.xpath(".//span[#class='zsg-photo-card-broker-name']//text()")
url = properties.xpath(".//a[contains(#class,'overlay-link')]/#href")
raw_title = properties.xpath(".//h4//text()")
address = ' '.join(' '.join(raw_address).split()) if raw_address else None
city = ''.join(raw_city).strip() if raw_city else None
state = ''.join(raw_state).strip() if raw_state else None
postal_code = ''.join(raw_postal_code).strip() if raw_postal_code else None
price = ''.join(raw_price).strip() if raw_price else None
info = ' '.join(' '.join(raw_info).split()).replace(u"\xb7",',')
broker = ''.join(raw_broker_name).strip() if raw_broker_name else None
title = ''.join(raw_title) if raw_title else None
property_url = "https://www.zillow.com"+url[0] if url else None
is_forsale = properties.xpath('.//span[#class="zsg-icon-for-sale"]')
properties = {
'address':address,
'city':city,
'state':state,
'postal_code':postal_code,
'price':price,
'facts and features':info,
'real estate provider':broker,
'url':property_url,
'title':title
}
if is_forsale:
properties_list.append(properties)
return properties_list
# except:
# print ("Failed to process the page",url)
if __name__=="__main__":
argparser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
argparser.add_argument('zipcode',help = '')
sortorder_help = """
available sort orders are :
newest : Latest property details,
cheapest : Properties with cheapest price
"""
argparser.add_argument('sort',nargs='?',help = sortorder_help,default ='Homes For You')
args = argparser.parse_args()
zipcode = args.zipcode
sort = args.sort
print ("Fetching data for %s"%(zipcode))
scraped_data = parse(zipcode,sort)
print ("Writing data to output file")
with open("properties-%s.csv"%(zipcode),'wb')as csvfile:
fieldnames = ['title','address','city','state','postal_code','price','facts and features','real estate provider','url']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in scraped_data:
writer.writerow(row)
I usually don't expect the code to work from the get go and was ready to quick. After I ran it, the error appears:
I know the first one was to make me run "%tb" but I don't know how to deal with the second one, where shall I run 'exit' or 'quit'?
After I ran "%tb", following messages appear:
SystemExit Traceback (most recent call last)
<ipython-input-29-dcd1916da548> in <module>()
76 """
77 argparser.add_argument('sort',nargs='?',help = sortorder_help,default ='Homes For You')
---> 78 args = argparser.parse_args()
79 zipcode = args.zipcode
80 sort = args.sort
C:\Users\AppData\Local\Continuum\Anaconda3\lib\argparse.py in parse_args(self, args, namespace)
1731 if argv:
1732 msg = _('unrecognized arguments: %s')
-> 1733 self.error(msg % ' '.join(argv))
1734 return args
1735
C:\Users\AppData\Local\Continuum\Anaconda3\lib\argparse.py in error(self, message)
2387 self.print_usage(_sys.stderr)
2388 args = {'prog': self.prog, 'message': message}
-> 2389 self.exit(2, _('%(prog)s: error: %(message)s\n') % args)
C:\Users\AppData\Local\Continuum\Anaconda3\lib\argparse.py in exit(self, status, message)
2374 if message:
2375 self._print_message(message, _sys.stderr)
-> 2376 _sys.exit(status)
2377
2378 def error(self, message):
SystemExit: 2
What shall I do with this? Is there something that I shall do in the command line to fix it?
Thanks again
I had some issues with the tabbing which I had to fix but I'm going to assume those were from copying it over or from Stack Overflow.
Other than that, everything seems to be working.
Just run the program from the command line with the command:
python YourFileName.py 12345
where 12345 is the zip code you want to query.
Make sure you are using Python3 not ipython as mentioned in the comments.
I was wondering how I would go about checking HTTP headers to determine whether the request is valid or malformed. How can I do this in Python, more specifically, how can I do this in GAE?
For some debugging and viewing the request with the headers I use the following DDTHandler class.
import cgi
import wsgiref.handlers
import webapp2
class DDTHandler(webapp2.RequestHandler):
def __start_display(self):
self.response.out.write("<!--\n")
def __end_display(self):
self.response.out.write("-->\n")
def __show_dictionary_items(self,dictionary,title):
if (len(dictionary) > 0):
request = self.request
out = self.response.out
out.write("\n" + title + ":\n")
for key, value in dictionary.iteritems():
out.write(key + " = " + value + "\n")
def __show_request_members(self):
request = self.request
out = self.response.out
out.write(request.url+"\n")
out.write("Query = "+request.query_string+"\n")
out.write("Remote = "+request.remote_addr+"\n")
out.write("Path = "+request.path+"\n\n")
out.write("Request payload:\n")
if (len(request.arguments()) > 0):
for argument in request.arguments():
value = cgi.escape(request.get(argument))
out.write(argument+" = "+value+"\n")
else:
out.write("Empty\n")
self.__show_dictionary_items(request.headers, "Headers")
self.__show_dictionary_items(request.cookies, "Cookies")
def view_request(self):
self.__start_display()
self.__show_request_members()
self.__end_display()
def view(self, aString):
self.__start_display()
self.response.out.write(aString+"\n")
self.__end_display()
Example:
class RootPage(DDTHandler):
def get(self):
self.view_request()
Will output the request and contains the headers.
So check the code and get what you need. Thought as said, a malformed "invalid" request won't probably hit your app.
<!--
http://localhost:8081/
Query =
Remote = 127.0.0.1
Path = /
Request payload:
Empty
Headers:
Referer = http://localhost:8081/_ah/login?continue=http%3A//localhost%3A8081/
Accept-Charset = ISO-8859-7,utf-8;q=0.7,*;q=0.3
Cookie = hl=en_US; dev_appserver_login="test#example.com:False:185804764220139124118"
User-Agent = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17
Host = localhost:8081
Accept = text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language = en-US,en;q=0.8,el;q=0.6
Cookies:
dev_appserver_login = test#example.com:False:185804764220139124118
hl = en_US
-->