I have a little problem with caching the images in the browser for my app-engine application
I`m sending last-modified, expires and cache-control headers but image is loaded from the server every time.
Here is the header part of the code:
response['Content-Type'] = 'image/jpg'
response['Last-Modified'] = current_time.strftime('%a, %d %b %Y %H:%M:%S GMT')
response['Expires'] = current_time + timedelta(days=30)
response['Cache-Control'] = 'public, max-age=2592000'
Here is an example code for my fix copy in dpaste here
def view_image(request, key):
data = memcache.get(key)
if data is not None:
if(request.META.get('HTTP_IF_MODIFIED_SINCE') >= data['Last-Modified']):
data.status_code = 304
return data
else:
image_content_blob = #some code to get the image from the data store
current_time = datetime.utcnow()
response = HttpResponse()
last_modified = current_time - timedelta(days=1)
response['Content-Type'] = 'image/jpg'
response['Last-Modified'] = last_modified.strftime('%a, %d %b %Y %H:%M:%S GMT')
response['Expires'] = current_time + timedelta(days=30)
response['Cache-Control'] = 'public, max-age=315360000'
response['Date'] = current_time
response.content = image_content_blob
memcache.add(image_key, response, 86400)
return response
Related
I have program that is calling into an API every 60 seconds and storing the data. The program is running on a cellular modem that is using Python 2.6. What I'm trying to do is have variables StartTimeConv and EndTimeConv from the try statement stored so that if the try statement fails the except statement can reference them. I've had them declared outside of the try statement, but that it generated a "referenced before assignment" error. What I'm ultimately trying to accomplish with this is if there's a cell signal issue or the API service isn't reachable, the start & stop times can still be referenced and the digital io triggers can still function.
def Client():
threading.Timer(60, Client).start()
# Request Session ID
request = urllib2.Request(url)
b64auth = base64.standard_b64encode("%s:%s" % (username,password))
request.add_header("Authorization", "Basic %s" % b64auth)
result = urllib2.urlopen(request)
# Parse and store Session ID
tree = ET.parse(result)
xml_data = tree.getroot()
sessionid = xml_data[1].text
# Dispatch Event Request
url1 = "SiteURL".format(sessionid)
request1 = urllib2.Request(url1)
result1 = urllib2.urlopen(request1)
# Read and store sys time
sys_time = time.localtime()
# Convert Sys time to datetime object
dt = datetime.fromtimestamp(mktime(sys_time))
# Parse and store Dispatch Event, start and stop time
try:
tree1 = ET.parse(result1)
xml_data1 = tree1.getroot()
dispatchEvent = xml_data1[0][0][2].text
EventStartTime = xml_data1[0][0][14].text
EventEndTime = xml_data1[0][0][1].text
#Convert string time to datetime object
StartTimeConv = datetime.strptime(xml_data1[0][0][14].text, "%a %B %d, %Y %H:%M")
EndTimeConv = datetime.strptime(xml_data1[0][0][1].text, "%a %B %d, %Y %H:%M")
print(dispatchEvent)
print(StartTimeConv)
print(EndTimeConv)
print(dt)
except:
print("No Event")
pass
else:
if dispatchEvent is not None and dt >= StartTimeConv:
set_digital_io('D0', 'on')
elif dispatchEvent is not None and dt <= EndTimeConv:
set_digital_io('D0', 'off')
else:
set_digital_io('D0', 'off')
I am completing the 'Python for everybody' course on coursera. I am stuck on the 'Mailing List Data - Part I'
I have the following code below:
import sys
import sqlite3
import time
import ssl
from urllib import request
from urllib.parse import urljoin
from urllib.parse import urlparse
import re
from datetime import datetime, timedelta
# Not all systems have this so conditionally define parser
try:
import dateutil.parser as parser
except:
pass
def parsemaildate(md):
# See if we have dateutil
try:
pdate = parser.parse(tdate)
test_at = pdate.isoformat()
return test_at
except:
pass
# Non-dateutil version - we try our best
pieces = md.split()
notz = " ".join(pieces[:4]).strip()
# Try a bunch of format variations - strptime() is *lame*
dnotz = None
for form in ['%d %b %Y %H:%M:%S', '%d %b %Y %H:%M:%S',
'%d %b %Y %H:%M', '%d %b %Y %H:%M', '%d %b %y %H:%M:%S',
'%d %b %y %H:%M:%S', '%d %b %y %H:%M', '%d %b %y %H:%M']:
try:
dnotz = datetime.strptime(notz, form)
break
except:
continue
if dnotz is None:
# print 'Bad Date:',md
return None
iso = dnotz.isoformat()
tz = "+0000"
try:
tz = pieces[4]
ival = int(tz) # Only want numeric timezone values
if tz == '-0000': tz = '+0000'
tzh = tz[:3]
tzm = tz[3:]
tz = tzh + ":" + tzm
except:
pass
return iso + tz
conn = sqlite3.connect('emreyavuzher.sqlite')
cur = conn.cursor()
conn.text_factory = str
baseurl = "http://mbox.dr-chuck.net/sakai.devel/"
cur.execute('''CREATE TABLE IF NOT EXISTS Messages
(id INTEGER UNIQUE, email TEXT, sent_at TEXT,
subject TEXT, headers TEXT, body TEXT)''')
start = 0
cur.execute('SELECT max(id) FROM Messages')
try:
row = cur.fetchone()
if row[0] is not None:
start = row[0]
except:
start = 0
row = None
print(start)
many = 0
# Skip up to five messages
skip = 5
while True:
if (many < 1):
sval = input('How many messages:')
if (len(sval) < 1): break
many = int(sval)
start = start + 1
cur.execute('SELECT id FROM Messages WHERE id=?', (start,))
try:
row = cur.fetchone()
if row is not None: continue
except:
row = None
many = many - 1
url = baseurl + str(start) + '/' + str(start + 1)
try:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
document = request.urlopen(url)
text = document.read()
if document.getcode() != 200:
print("Error code=", document.getcode(), url)
break
except KeyboardInterrupt:
print('')
print('Program interrupted by user...')
break
except:
print("Unable to retrieve or parse page", url)
print(sys.exc_info()[0])
break
print(url, len(text))
if not text.startswith('From '):
if skip < 1:
print(text)
print("End of mail stream reached...")
quit()
print("Skipping badly formed message")
skip = skip - 1
continue
However, the code keeps giving me the error: Traceback (most recent call last):
File "", line 128, in
TypeError: startswith first arg must be bytes or a tuple of bytes, not str
Would anybody be able to give me a helping hand?
I just finished creating my first news web scraping script and I am quite content with it even though the code does not look nice at all. I was wondering how I should go about sending the output of the script to myself via email (gmail address) when I run it. I tried to run smtplib, but it's not working for me.
Here is my current code:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
from datetime import date
from dateutil import parser
import smtplib
from email.mime.text import MIMEText
my_url1 = "https://www.coindesk.com/category/business-news/legal"
my_url2 = "https://cointelegraph.com/tags/bitcoin-regulation"
my_url3 = "https://techcrunch.com/tag/blockchain/"
# Opening up the website, grabbing the page
uFeedOne = uReq(my_url1, timeout=5)
page_one = uFeedOne.read()
uFeedOne.close()
# html parser
page_soup1 = soup(page_one, "html.parser")
# grabs each publication block
containers_one = page_soup1.findAll("a", {"class": "stream-article"} )
for container_one in containers_one:
## get todays date.
## I have taken an offset as the site has older articles than today.
today = date.today().strftime("%Y, %m, %d")
## The actual datetime string is in the datetime attribute of the time tag.
date_time1 = container_one.time['datetime']
## The dateutil package parses the ISO-formatted date and returns a condensed version.
date1 = parser.parse(date_time1)
dt1 = date1.strftime("%Y, %m, %d")
## Simple comparison
if dt1 == today:
link1 = container_one.attrs['href']
publication_date1 = "published on " + container_one.time.text
title1 = container_one.h3.text
description1 = "(CoinDesk)-- " + container_one.p.text
print("link: " + link1)
print("publication_date: " + publication_date1)
print("title: ", title1)
print("description: " + description1 + " \n")
uFeedTwo = uReq(my_url2, timeout=5)
page_two = uFeedTwo.read()
uFeedTwo.close()
page_soup2 = soup(page_two, "html.parser")
containers_two = page_soup2.findAll("div",{"class": "post-preview-item-inline__content"})
for container_two in containers_two:
today = date.today().strftime("%Y, %m, %d")
date_time2 = container_two.time['datetime']
date2 = parser.parse(date_time2)
dt2 = date2.strftime("%Y, %m, %d")
title_container2 = container_two.find("span",{"class": "post-preview-item-inline__title"})
description_container2 = container_two.find("p",{"class": "post-preview-item-inline__text"}).text
if dt2 == today:
link2 = container_two.div.a.attrs['href']
publication_date2 = "published on " + date2.strftime("%b %d, %Y")
title2 = title_container2.text
description2 = "(CoinTelegraph)-- " + description_container2
print("link: " + link2)
print("publication_date: " + publication_date2)
print("title: ", title1)
print("description: " + description2 + " \n")
uFeedThree = uReq(my_url3, timeout=5)
page_three = uFeedThree.read()
uFeedThree.close()
# html parser
page_soup3 = soup(page_three, "html.parser")
# grabs each publication block
containers_three = page_soup3.findAll("div",{"class": "post-block post-block--image post-block--unread"})
for container_three in containers_three:
today = date.today().strftime("%Y, %m, %d")
date_time3 = container_three.time['datetime']
date3 = parser.parse(date_time3)
dt3 = date3.strftime("%Y, %m, %d")
keyword1 = "law"
keyword2 = "legal"
description_container3 = container_three.find("div", {"class": "post-block__content"}).text.strip()
if dt3 == today and (keyword2 in description_container3) or (keyword1 in description_container3):
link3 = container_three.header.h2.a.attrs['href']
publication_date3 = "published on " + date3.strftime("%b %d, %Y")
title3 = container_three.header.h2.a.text.strip()
description3 = "(TechCrunch)-- " + description_container3
print("link: " + link3)
print("publication_date: " + publication_date3)
print("title: ", title3)
print("description: " + description3 + " \n")
I understand that I am suppose to do a variation of this:
# Open a plain text file for reading. For this example, assume that
# the text file contains only ASCII characters.
with open(textfile) as fp:
# Create a text/plain message
msg = MIMEText(fp.read())
# me == the sender's email address
# you == the recipient's email address
msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you
# Send the message via our own SMTP server.
s = smtplib.SMTP('localhost')
s.send_message(msg)
s.quit()
This is the code snippet to send a mail to anyone using SMTP.
Below code is configured for gmail SMT P.If you have any other it can
be configred.
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
msg = MIMEMultipart()
msg['From'] = 'me#gmail.com'
msg['To'] = 'you#gmail.com'
msg['Subject'] = 'Enter subjecy of msg here'
message = 'here is the email'
msg.attach(MIMEText(message))
# GMAIL_SMTP_HOST = 'smtp.gmail.com'
# GMAIL_SMTP_PORT = '587'
mailserver = smtplib.SMTP('smtp.gmail.com',587)
# secure our email with tls encryption
mailserver.starttls()
mailserver.sendmail('me#gmail.com','you#gmail.com',msg.as_string())
mailserver.quit()
Little bit of background: I'm using Python 2.7.12 on a Windows 10 computer.
This is by far one of the oddest problems I have ever encountered with Python.
I have written a script that makes a GET request to an API, with the correct headers, and gets some XML data back. For the record, when I paste the script like this in a python file and run it via CMD, it works perfectly fine.
But..
It stops working as soon as I wrap this inside a function. Nothing
else, just wrap it inside a function, and use
if __name__ == '__main__':
my_new_function()
to run it from CMD and it won't work anymore. It still works but the API says I have wrong auth credentials, and thus I don't get any data back.
I went over every piece of string that is in this code, and it's all ASCII encoded. I also checked the timestamps, and they are all correct.
This is my script:
SECRET_KEY = 'YYY'
PUBLIC_KEY = 'XXX'
content_type = 'application/xml'
date = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime())
method = 'GET'
uri = '/uri'
msg = """{method}
{content_type}
{date}
x-bol-date:{date}
{uri}""".format(content_type=content_type,
date=date,
method=method,
uri=uri)
h = hmac.new(
SECRET_KEY,
msg, hashlib.sha256)
b64 = base64.b64encode(h.digest())
signature = PUBLIC_KEY + b':' + b64
headers = {'Content-Type': content_type,
'X-BOL-Date': date,
'X-BOL-Authorization': signature}
r = requests.get('example.com/uri', headers=headers)
the same code inside a function:
def get_orders():
SECRET_KEY = 'XXX'
PUBLIC_KEY = 'YYY'
content_type = 'application/xml'
date = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime())
method = 'GET'
uri = '/uri'
msg = """{method}
{content_type}
{date}
x-bol-date:{date}
{uri}""".format(content_type=content_type,
date=date,
method=method,
uri=uri)
h = hmac.new(
SECRET_KEY,
msg, hashlib.sha256)
b64 = base64.b64encode(h.digest())
signature = PUBLIC_KEY + b':' + b64
headers = {'Content-Type': content_type,
'X-BOL-Date': date,
'X-BOL-Authorization': signature}
r = requests.get('example.com/uri', headers=headers)
if __name__ == '__main__':
get_orders()
I think your multi-line string is getting spaces in it when you indent it in a function. Concatenate it on each line instead and it should work.
I'm trying to implement a python script that will compare the last modified dates of a local and remotely hosted file.
If the remote file is newer it should:
- delete the local file
- download the remote file with the last modified date intact
The closest answer I've found to this is Last Modified of file downloaded does not match its HTTP header, however I believe this downloads the whole file, so doesn't save much resource/time
What I'd like to do is just review the remote file's headers rather than download the whole file which I believe should be quicker.
Here's my current code, which is very messy and noobish (see string replace etc) I'm sure there's a better/quicker way - what can you suggest?
remote_source = 'http://example.com/somefile.xml'
local_source = 'path/to/myfile.xml'
if path.exists(local_source):
local_source_last_modified = os.path.getmtime(local_source)
local_source_last_modified = datetime.datetime.fromtimestamp(local_source_last_modified).strftime('(%Y, %m, %d, %H, %M, %S)')
conn = urllib.urlopen(remote_source)
remote_source_last_modified = conn.info().getdate('last-modified')
remote_source_last_modified = str(remote_source_last_modified)
remote_source_last_modified = remote_source_last_modified.replace(", 0, 1, 0)", ")")
if local_source_last_modified < remote_source_last_modified:
pass
else:
headers = urlretrieve(remote_source, local_source)[1]
lmStr = headers.getheader("Last-Modified")
remote_source_last_modified = mktime(strptime(lmStr, "%a, %d %b %Y %H:%M:%S GMT"))
os.utime(local_source, (remote_source_last_modified, remote_source_last_modified))
else:
headers = urlretrieve(remote_source, local_source)[1]
lmStr = headers.getheader("Last-Modified")
remote_source_last_modified = mktime(strptime(lmStr, "%a, %d %b %Y %H:%M:%S GMT"))
os.utime(local_source, (remote_source_last_modified, remote_source_last_modified))
Just in case anybody reads this, here's what I ended up with:
def syncCheck(file_path):
remote_source = 'http://example.com/' + os.path.basename(file_path)
local_source = file_path
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
response = requests.head(remote_source, headers = headers)
remote_source_last_modified = response.headers["last-modified"]
remote_source_last_modified = time.mktime(datetime.datetime.strptime(remote_source_last_modified[:-4], "%a, %d %b %Y %H:%M:%S").timetuple())
try:
if os.path.exists(local_source):
local_source_last_modified = os.path.getmtime(local_source)
if local_source_last_modified == remote_source_last_modified:
break
else:
try:
os.remove(local_source)
except:
break
urlretrieve(remote_source, local_source)
os.utime(local_source, (remote_source_last_modified, remote_source_last_modified))
else:
urlretrieve(remote_source, local_source)
os.utime(local_source, (remote_source_last_modified, remote_source_last_modified))
except HTTPError, e:
print("HTTP Error: " + str(e.fp.read()))
except URLError, e:
print("URL Error: " + str(e.reason))