I have made an API on a remote server using bottle. When I start hosting, and then try and access the API using browser, the request gets no response. In fact, the request is not reaching the server itself. However, on using the same with wget, I am getting the required response. Here is the code:
from pymongo import MongoClient
import json
from twython import Twython
from bottle import route, request, response, run
client = MongoClient()
db = client.PWSocial
tweets = db.tweets
follower_count = db.follower_count
APP_KEY = 'XXXX'
APP_SECRET = 'XXXX'
OAUTH_TOKEN = 'XXXX'
OAUTH_TOKEN_SECRET = 'XXXX'
twitter = Twython(APP_KEY, APP_SECRET,OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
id_list = [57947109, 183093247, 89443197, 431336956]
#route('/')
def hello():
dict1 = {'me': 'hello'}
result = json.dumps(dict1)
return result
#route('/api/follower')
def disp_follower():
ac_id = request.query.id
fc = list(follower_count.find({'id': ac_id}))
mydict = fc[0]
del mydict['_id']
result = json.dumps(mydict)
return result
#route('/api/retweet')
def rt():
ac_id = request.query.id
retweets = db.retweets
rt = list(retweets.find({'usr_id': ac_id}))
result = json.dumps(rt)
return result
#route('/api/favorite')
def fav():
ac_id = request.query.id
retweets = db.retweets
rt = list(retweets.find({'id': ac_id}, sort=[('rt_count',-1)], limit= 100))
mydict = {}
for i in rt:
a = i.get('id')
status = twitter.show_status(id = a)
b = status.get('favorite_count')
mydict[a] = b
result = json.dumps(mydict)
return result
#route('/api/max_rt')
def most_rt():
ac_id = request.query.id
retweets = db.retweets
rt = list(retweets.find({'usr_id': ac_id}, {'usr_id':57947109},sort=[('rt_co
result = json.dumps(rt)
return result
run(host= '180.179.212.200', debug=True)
Can anyone suggest a reason and a solution for this?
Found the answer, its actually a simpple firewall issue. A hole has to be created in the firewall for any such API to run.
Related
So I've been trying to make a code to print my Gmail inbox onto a website. I want to further develop this to only contain certain data from the email and write it onto a database. However, it does not seem to me that there is anything wrong with the code, but the localhost:8080 (The port I'm using) does not load at all. The browser has the loading icon when trying to access the page, but it does not load, even after hours. Command line does not respond with any errors. I also have the GMAIL imap settings correctly, and I have tried it with Outlooks email as well. Here is the code:
import webapp2
import smtplib
import time
import imaplib
import email
class ReadMail(webapp2.RequestHandler):
def get(self):
mail = imaplib.IMAP4('xxx#gmail.com',993)
mail.login('email#gmail.com','password')
type, data = mail.search(None, 'ALL')
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id = int(id_list[0])
latest_email_id = int(id_list[-1])
for i in range(latest_email_id,first_email_id, -1):
typ, data = mail.fetch(i, '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
email_subject = msg['subject']
email_from = msg['from']
self.response.headers["Content-Type"] = "text/plain"
self.response.write("From:" + email_from)
self.response.write("Subject:" + email_subject)
routes = [('/', ReadMail),]
app = webapp2.WSGIApplication(routes, debug=True)
App.yml is correctly setup as well. This code works with something really simple, such as only containing a print "this". Hopefully someone can help with my problem, thanks in advance!
So after a while I got it working by making my own WSGI application file instead of using the webapp2. There are still some issues such as the message being formatted wrong, but this is my code now:
from pyramid.config import Configurator
from pyramid.response import Response
import email, getpass, imaplib, os, re
import sys
detach_dir = "C:\OTHERS\CS\PYTHONPROJECTS"
def imaptest(request):
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login("testi.protokolla#gmail.com", "testiprotokolla221")
m.select("INBOX")
resp, items = m.search(None, '(FROM "vallu.toivonen96#gmail.com")')
items = items [0].split()
my_msg = []
msg_cnt = 0
break_ = False
for emailid in items[::1]:
resp, data = m.fetch(emailid, "(RFC822)")
if ( break_ ):
break
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
varSubject = msg['subject']
varDate = msg['date']
if varSubject[0] == '$':
r, d = m.fetch(emailid, "(UID BODY[TEXT])")
ymd = email.utils.parsedate(varDate)[0:3]
my_msg.append([ email.message_from_string(d[0][1]), ymd])
msg_cnt += 1
# Print as HTML
return Response(
'Content-Type': 'text/html'
"Your latest Email:" + str(msg)
)
config = Configurator()
config.add_route('imaptest', '/imaptest')
config.add_view(imaptest, route_name='imaptest')
app = config.make_wsgi_app()
I am trying to get started with the plaid API. I created my account to get the API keys and I have the quickstart project. I put my keys in the code(they are not applied in the code below) and when I run it I use the sandbox credentials. Unfortunately after the log in succeeds I always receive the same error when trying to receive the access token:
HTTP500: SERVER ERROR - The server encountered an unexpected condition
that prevented it from fulfilling the request.(XHR)POST -
http://127.0.0.1:5000/get_access_token
Here is the code:
import os
import datetime
import plaid
from flask import Flask
from flask import render_template
from flask import request
from flask import jsonify
app = Flask(__name__)
# Fill in your Plaid API keys - https://dashboard.plaid.com/account/keys
PLAID_CLIENT_ID = os.getenv('PLAID_CLIENT_ID')
PLAID_SECRET = os.getenv('PLAID_SECRET')
PLAID_PUBLIC_KEY = os.getenv('PLAID_PUBLIC_KEY')
# Use 'sandbox' to test with Plaid's Sandbox environment (username:
user_good,
# password: pass_good)
# Use `development` to test with live users and credentials and `production`
# to go live
PLAID_ENV = os.getenv('PLAID_ENV', 'sandbox')
client = plaid.Client(client_id = PLAID_CLIENT_ID, secret=PLAID_SECRET,
public_key=PLAID_PUBLIC_KEY, environment=PLAID_ENV)
#app.route("/")
def index():
return render_template('index.ejs', plaid_public_key=PLAID_PUBLIC_KEY,
plaid_environment=PLAID_ENV)
access_token = None
public_token = None
#app.route("/get_access_token", methods=['POST'])
def get_access_token():
global access_token
public_token = request.form['public_token']
exchange_response = client.Item.public_token.exchange(public_token)
print ('public token: ' + public_token)
print ('access token: ' + exchange_response['access_token'])
print ('item ID: ' + exchange_response['item_id'])
access_token = exchange_response['access_token']
return jsonify(exchange_response)
#app.route("/accounts", methods=['GET'])
def accounts():
global access_token
accounts = client.Auth.get(access_token)
return jsonify(accounts)
#app.route("/item", methods=['GET', 'POST'])
def item():
global access_token
item_response = client.Item.get(access_token)
institution_response = client.Institutions.get_by_id(item_response['item']
['institution_id'])
return jsonify({'item': item_response['item'], 'institution':
institution_response['institution']})
#app.route("/transactions", methods=['GET', 'POST'])
def transactions():
global access_token
# Pull transactions for the last 30 days
start_date = "{:%Y-%m-%d}".format(datetime.datetime.now() +
datetime.timedelta(-30))
end_date = "{:%Y-%m-%d}".format(datetime.datetime.now())
try:
response = client.Transactions.get(access_token, start_date, end_date)
return jsonify(response)
except plaid.errors.PlaidError as e:
return jsonify({'error': {'error_code': e.code, 'error_message':
str(e)}})
#app.route("/create_public_token", methods=['GET'])
def create_public_token():
global access_token
# Create a one-time use public_token for the Item. This public_token can
be used to
# initialize Link in update mode for the user.
response = client.Item.public_token.create(access_token)
return jsonify(response)
if __name__ == "__main__":
app.run(port=os.getenv('PORT', 5000))
Update your code like
PLAID_CLIENT_ID = 'client_id'
PLAID_SECRET = 'secret'
PLAID_PUBLIC_KEY = 'key'
PLAID_ENV = 'sandbox'
the problem was solved by putting the credentials to the client.py-file which is being created while you install plaid
I am using python twisted to get streaming data from twitter streaming api.There are two steps in short. 1) get access_token 2) use access_token to make request for the data.
step 1 work completely fine but at step 2 i am gettin gthis error of bad request status 400. why is so? I think its because twitter uses HTTP1.1 and twisted is using HTTP1.0 by deafult. then how to upgrade connections to HTTP1.1
EDIT: Here is my error message
HTTP/1.0 400 Bad Request
content-length: 0
date: Sun, 12 Mar 2017 14:57:13 GMT
server: tsa
x-connection-hash: dca361a2b4214ad66203e9912b05cf7f
[Failure instance: Traceback (failure with no frames): <class 'twisted.internet.error.ConnectionDone'>: Connection was closed cleanly.
.
#!/usr/bin/python
import oauth2 as oauth
import urlparse
import time
import webbrowser
from twisted.internet import reactor, protocol, ssl
from twisted.web import http
CONSUMER_KEY = 'xxxx'
CONSUMER_SECRET = 'xxxx'
CONSUMER = oauth.Consumer(CONSUMER_KEY, CONSUMER_SECRET)
ACCESS_TOKEN_FILE = 'OAUTH_ACCESS_TOKEN'
TWITTER_REQUEST_TOKEN_URL = 'https://twitter.com/oauth/request_token'
TWITTER_ACCESS_TOKEN_URL = 'https://twitter.com/oauth/access_token'
TWITTER_AUTHORIZE_URL = 'https://twitter.com/oauth/authorize'
TWITTER_STREAM_API_HOST = 'stream.twitter.com'
TWITTER_STREAM_API_PATH = '/1.1/statuses/sample.json'
class TwitterStreamer(http.HTTPClient):
def connectionMade(self):
self.sendCommand('GET', self.factory.url)
self.sendHeader('Host', self.factory.host)
self.sendHeader('User-Agent', self.factory.agent)
self.sendHeader('Authorization', self.factory.oauth_header)
self.endHeaders()
def handleStatus(self, version, status, message):
if status != '200':
self.factory.tweetError(ValueError("bad status"))
def lineReceived(self, line):
self.factory.tweetReceived(line)
def connectionLost(self, reason):
self.factory.tweetError(reason)
class TwitterStreamerFactory(protocol.ClientFactory):
protocol = TwitterStreamer
def __init__(self, oauth_header):
self.url = TWITTER_STREAM_API_PATH
self.agent = 'Twisted/TwitterStreamer'
self.host = TWITTER_STREAM_API_HOST
self.oauth_header = oauth_header
def clientConnectionFailed(self, _, reason):
self.tweetError(reason)
def tweetReceived(self, tweet):
print tweet
def tweetError(self, error):
print error
def save_access_token(key, secret):
with open(ACCESS_TOKEN_FILE, 'w') as f:
f.write("ACCESS_KEY=%s\n" % key)
f.write("ACCESS_SECRET=%s\n" % secret)
def load_access_token():
with open(ACCESS_TOKEN_FILE) as f:
lines = f.readlines()
str_key = lines[0].strip().split('=')[1]
str_secret = lines[1].strip().split('=')[1]
return oauth.Token(key=str_key, secret=str_secret)
def fetch_access_token():
CONSUMER_KEY = 'xxxxxxxx'
CONSUMER_SECRET = 'xxxxxxxxx'
ACCESS_KEY="xxxxxxx"
ACCESS_SECRET="xxxxxxxxx"
consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET)
access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET)
return (access_token.key, access_token.secret)
def build_authorization_header(access_token):
url = "https://%s%s" % (TWITTER_STREAM_API_HOST, TWITTER_STREAM_API_PATH)
params = {
'oauth_version': "1.0",
'oauth_nonce': oauth.generate_nonce(),
'oauth_timestamp': str(int(time.time())),
'oauth_token': access_token.key,
'oauth_consumer_key': CONSUMER.key
}
# Sign the request.
# For some messed up reason, we need to specify is_form_encoded to prevent
# the oauth2 library from setting oauth_body_hash which Twitter doesn't like.
req = oauth.Request(method="GET", url=url, parameters=params, is_form_encoded=True)
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), CONSUMER, access_token)
# Grab the Authorization header
header = req.to_header()['Authorization'].encode('utf-8')
print "Authorization header:"
print " header = %s" % header
return header
if __name__ == '__main__':
# Check if we have saved an access token before.
try:
f = open(ACCESS_TOKEN_FILE)
except IOError:
# No saved access token. Do the 3-legged OAuth dance and fetch one.
(access_token_key, access_token_secret) = fetch_access_token()
# Save the access token for next time.
save_access_token(access_token_key, access_token_secret)
# Load access token from disk.
access_token = load_access_token()
# Build Authorization header from the access_token.
auth_header = build_authorization_header(access_token)
# Twitter stream using the Authorization header.
twsf = TwitterStreamerFactory(auth_header)
reactor.connectSSL(TWITTER_STREAM_API_HOST, 443, twsf, ssl.ClientContextFactory())
reactor.run()
UPDATE: Working code:
import base64, urllib
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.protocols import basic
from twisted.python.failure import DefaultException
from twisted.web.client import Agent
from twisted.web.http_headers import Headers
import json
import oauth2 as oauth
import time
from twisted.web import server,resource
from twisted.internet import endpoints
from twisted.web.server import Site
CONSUMER_KEY = 'xxxxxxxxxxxx'
CONSUMER_SECRET = 'xxxxxxxxxxxxxx'
TWITTER_STREAM_API_HOST = 'stream.twitter.com'
TWITTER_STREAM_API_PATH = '/1.1/statuses/sample.json'
ACCESS_TOKEN_FILE = 'OAUTH_ACCESS_TOKEN'
CONSUMER = oauth.Consumer(CONSUMER_KEY, CONSUMER_SECRET)
def callback(result):
print result
def errback(error):
print error
class StreamingParser(basic.LineReceiver):
delimiter = '\r\n'
def __init__(self, user_callback, user_errback):
self.user_callback = user_callback
self.user_errback = user_errback
def lineReceived(self, line):
d = Deferred()
d.addCallback(self.user_callback)
d.addErrback(self.user_errback)
line = line.strip()
print line,'........'
try:
d.callback(json.loads(line))
except ValueError, e:
if self.user_errback:
d.errback(e)
def connectionLost(self, reason):
if self.user_errback:
d = Deferred()
d.addErrback(self.user_errback)
d.errback(DefaultException(reason.getErrorMessage()))
def _get_response(response, callback, errback):
print 'got response......'
response.deliverBody(StreamingParser(callback, errback))
return Deferred()
def _shutdown(reason, errback):
d = Deferred()
d.addErrback(errback)
d.errback(reason)
if reactor.running:
reactor.stop()
def save_access_token(key, secret):
with open(ACCESS_TOKEN_FILE, 'w') as f:
f.write("ACCESS_KEY=%s\n" % key)
f.write("ACCESS_SECRET=%s\n" % secret)
def load_access_token():
with open(ACCESS_TOKEN_FILE) as f:
lines = f.readlines()
str_key = lines[0].strip().split('=')[1]
str_secret = lines[1].strip().split('=')[1]
return oauth.Token(key=str_key, secret=str_secret)
def fetch_access_token():
ACCESS_KEY="xxxxx-xxxx"
ACCESS_SECRET="xxxxxxxxxxxx"
access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET)
return (access_token.key, access_token.secret)
def make_header(access_token):
url = "https://%s%s" % (TWITTER_STREAM_API_HOST, TWITTER_STREAM_API_PATH)
params = {
# "Authorization": "Oauth %s" % auth,
"oauth_version": "1.0",
"oauth_nonce": oauth.generate_nonce(),
"oauth_timestamp": str(int(time.time())),
"oauth_token": access_token.key,
"oauth_consumer_key": CONSUMER.key
}
req = oauth.Request(method="GET", url=url, parameters=params, is_form_encoded=True)
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), CONSUMER, access_token)
header = req.to_header()['Authorization'].encode('utf-8')
print "Authorization header:"
print " header = %s" % header
return header
def start_streaming():
print 'streaming started...........'
try:
f = open(ACCESS_TOKEN_FILE)
except IOError:
access_token_key, access_token_secret = fetch_access_token()
save_access_token(access_token_key, access_token_secret)
access_token = load_access_token()
auth_header = make_header(access_token)
url = 'https://stream.twitter.com/1.1/statuses/sample.json'
headers = Headers({
'User-Agent': ['TwistedSTreamReciever'],
'Authorization': [auth_header]})
agent = Agent(reactor)
d = agent.request('GET', url, headers, None)
d.addCallback(_get_response, callback, errback)
d.addBoth(_shutdown, errback)
# reactor.run()
class _Stream(resource.Resource):
isLeaf = True
def render_GET(self, request):
start_streaming()# Streaming started here.......
time.sleep(8) # wait for 8 seconds...
########.........??? stop streaming here??
return "<html>streaming started...........%s</html>" % (time.ctime(),)
if __name__ == "__main__":
resource = _Stream()
factory = Site(resource)
endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880)
endpoint.listen(factory)
reactor.run()
To give up on reading a particular streaming response (which it seems may be necessary - I'm guessing these Twitter streams never end on their own) and close the connection associated with that request/response (because HTTP has no other way to give up on a response), use the body delivery protocol's transport.loseConnection method. So, for example:
def _get_response(response, callback, errback):
print 'got response......'
proto = StreamingParser(callback, errback)
save_stream_by_name(stream_name, proto)
response.deliverBody(proto)
return Deferred()
When you're done with that stream:
pop_stream_by_name(stream_name).transport.loseConnection()
I am a beginner writing a small twitter tool for scheduled tweets and automatic retweets in python/flask.
I got stuck with issues of processes running in the background.
I want scheduled tweets and retweets to work simultaneously in the background for a given user.
I want to be able to terminate these background processes running retweets/scheduled tweets separately from each other.
How would you change the code below to achieve this?
If you look at the code below now, it works, but user can not run scheduled tweets and retweets simultaneously. Also if user decides to terminate one of the processes, let us say retweets the other process terminates as well (scheduled tweets) and vice versa.
I thought about putting the identification data for a given process into a database and recalling this identification data from the database when there is a need to terminate it, instead of using cookies session, but I do not know how to implement this idea in code.
import ........
mysql = MySQL()
app = Flask(__name__)
app.secret_key = 'xxx'
app.config['MYSQL_DATABASE_USER'] = 'xxx'
app.config['MYSQL_DATABASE_PASSWORD'] = 'xxx'
app.config['MYSQL_DATABASE_DB'] = 'xxx'
app.config['MYSQL_DATABASE_HOST'] = '0.0.0.0'
mysql.init_app(app)
#app.route('/showSignin')
def showSignin():
if session.get('user'):
return redirect('/userHome')
else:
return render_template('signin.html')
#app.route('/showscheduletweets')
def showscheduletweets():
if session.get('user'):
return render_template('scheduletweets.html')
else:
return render_template('signin.html')
#app.route('/validateLogin',methods=['POST'])
def validateLogin():
try:
_username = request.form['inputEmail']
_password = request.form['inputPassword']
# connect to mysql
con = mysql.connect()
cursor = con.cursor()
cursor.callproc('sp_validateLogin',(_username,))
data = cursor.fetchall()
if len(data) > 0:
if check_password_hash(str(data[0][3]),_password):
session['user'] = data[0][0]
consumerkey = data [0][4]
consumersecret = data [0][5]
accesstoken = data [0][6]
tokensecret = data [0][7]
twitter = Twython(consumerkey, consumersecret, accesstoken, tokensecret)
twitter.update_status(status="xxx says hello.")
return render_template('userHome.html')
else:
return render_template('error.html',error = 'Wrong Email address or Password.')
else:
return render_template('error.html',error = 'Wrong Email address or Password.')
except Exception as e:
return render_template('error.html',error = str(e))
finally:
cursor.close()
con.close()
#schedule tweets
#app.route('/scheduletweets',methods=['POST'])
def scheduletweets():
if session.get('user'):
_username = request.form['inputEmail']
con = mysql.connect()
cursor = con.cursor()
cursor.callproc('sp_GetTwitter', (_username,))
data = cursor.fetchall()
session['user'] = data[0][0]
consumerkey = data [0][4]
consumersecret = data [0][5]
accesstoken = data [0][6]
tokensecret = data [0][7]
twitter = Twython(consumerkey, consumersecret, accesstoken, tokensecret)
tweet1 = request.form['inputTweet1']
tweet2 = request.form['inputTweet2']
tweet3 = request.form['inputTweet3']
tweet4 = request.form['inputTweet4']
tweet5 = request.form['inputTweet5']
tweet6 = request.form['inputTweet6']
Hash1 = request.form['inputHash1']
Hash2 = request.form['inputHash2']
Hash3 = request.form['inputHash3']
Hash4 = request.form['inputHash4']
fruits = [Hash1, Hash2, Hash3, Hash4]
list = [tweet1, tweet2, tweet3, tweet4, tweet5, tweet6]
def workit():
while True:
try:
if len(list) > 0:
z = random.randint(1, len(fruits))
a = random.sample(fruits, z)
b=" ".join(str(x) for x in a)
toTweet = list[random.randint(0,len(list))-1] + " " + b
twitter.update_status(status=toTweet)
time.sleep(10)
else:
twitter.update_status(status="Oh dear... I'm afraid I'm rather empty =(")
break
except TwythonError as e:
print (e)
if 'work_process' not in session:
process = Process(target=workit)
process.start()
pid = process.pid
parent_pid = psutil.Process(process.pid).parent().pid
session['work_process'] = (parent_pid, pid)
return redirect('/showscheduletweets')
#retweets
#app.route('/retweet',methods=['POST'])
def retweet():
if session.get('user'):
_username = request.form['inputEmail']
con = mysql.connect()
cursor = con.cursor()
cursor.callproc('sp_GetTwitter', (_username,))
data = cursor.fetchall()
session['user'] = data[0][0]
consumerkey = data [0][4]
consumersecret = data [0][5]
accesstoken = data [0][6]
tokensecret = data [0][7]
Retweet1 = request.form['inputRetweet1']
Retweet2 = request.form['inputRetweet2']
Retweet3 = request.form['inputRetweet3']
Retweet4 = request.form['inputRetweet4']
Exclude1 = request.form['inputExclude1']
Exclude2 = request.form['inputExclude2']
def work():
twitter = Twython(consumerkey, consumersecret, accesstoken, tokensecret)
naughty_words = [Exclude1, Exclude2]
good_words = [Retweet1, Retweet2, Retweet3, Retweet4]
filter = " OR ".join(good_words)
blacklist = " -".join(naughty_words)
keywords = filter +" -"+ blacklist
print(keywords)
while True:
search_results = twitter.search(q=keywords, count=10)
try:
for tweet in search_results["statuses"]:
try:
twitter.retweet(id = tweet["id_str"])
time.sleep(60)
except TwythonError as e:
print (e)
except TwythonError as e:
print (e)
if 'work_process' not in session:
process = Process(target=work)
process.start()
pid = process.pid
parent_pid = psutil.Process(process.pid).parent().pid
session['work_process'] = (parent_pid, pid)
return redirect('/showretweet')
#terminating scheduled tweets and retweets
#app.route('/stoptweet', methods=['POST'])
def stoptweet():
if 'work_process' in session:
parent_pid, pid = session['work_process']
try:
process = psutil.Process(pid)
if process.parent().pid == parent_pid:
process.terminate()
except psutil.NoSuchProcess:
pass
session.pop('work_process')
return render_template('index.html')
else:
return render_template('index.html')
if __name__ == '__main__':
app.run(host=os.getenv('IP', '0.0.0.0'),port=int(os.getenv('PORT', xxx)))
You might want to use celery python module, and move schedule tweet and retweet as background works.
For further info, see doc: http://flask.pocoo.org/docs/0.11/patterns/celery/
You will decorate those functions related to celery, rather than flask.
As example:
In your script:
import my_schedule_module
and then in my_schedule_module.py:
from celery import Celery, Task
from celery.result import AsyncResult
from celery.task.base import periodic_task
import sqlite3 # Here I use sqlite, can be sql
import redis # Here I am using redis, you can use another db as well > check documentation
from datetime import timedelta # used to schedule your background jobs, see in configuration below
app_schedule = Celery('my_schedule_module')
'''
Celery Configuration
'''
# a mockup configuration of your background jobs, as example use retweet each 60s
app_schedule.conf.update(
CELERY_ACCEPT_CONTENT = ['application/json'],
CELERY_TASK_SERIALIZER='json',
# CELERY_ACCEPT_CONTENT=['json'], # Ignore other content
CELERY_RESULT_SERIALIZER='json',
# CELERY_TIMEZONE='Europe/Oslo',
# CELERY_ENABLE_UTC=True,
CELERYD_TASK_TIME_LIMIT = 600,
CELERYD_TASK_SOFT_TIME_LIMIT = 600,
CELERYD_MAX_TASKS_PER_CHILD = 1000,
CELERYD_OPTS="--time-limit=600 --concurrency=4",
BROKER_URL = 'redis://localhost:6379/0',
CELERY_RESULT_BACKEND = 'redis://localhost',
CELERYBEAT_SCHEDULE = {
'add-every-60-seconds': {
'task': 'my_schedule_module.retweet',
'schedule': timedelta(seconds=60)
},
}
)
#app_schedule.task()
def retweet(tweet):
# your tweet function
#app_schedule.task()
def scheduletweets():
# your background job
# pseudo code
tweets = get_tweets()
process_tweet_list = []
for tweet in tweets:
process_tweet_list.append( retweet.s(tweet) )
job = group(process_tweet_list) #group is celery.group, see documentation
result = job.apply_async() # process job list async
print 'result', result.ready(), result.successful()
You can also use callback functions - as example, you might want to update datetime in your db of when your tweet was retweeted.
In this case, you would have a syntax like:
result = my_schedule_module.retweet.apply_async( (tweet,) , link=my_schedule_module.callback_to_store_results_of_retweet.s())
I have a python script to get all the followers and friends on Twitter. I use supervisor to manage the process of the script. One thing I notice is that supervisor will restart the script as it sleeps to wait for the rate limit of Twitter to clear up. How do I stop that?
This is my script.
#!/usr/bin/env python
import pymongo
import tweepy
from pymongo import MongoClient
from sweepy.get_config import get_config
config = get_config()
consumer_key = config.get('PROCESS_TWITTER_CONSUMER_KEY')
consumer_secret = config.get('PROCESS_TWITTER_CONSUMER_SECRET')
access_token = config.get('PROCESS_TWITTER_ACCESS_TOKEN')
access_token_secret = config.get('PROCESS_TWITTER_ACCESS_TOKEN_SECRET')
MONGO_URL = config.get('MONGO_URL')
MONGO_PORT = config.get('MONGO_PORT')
MONGO_USERNAME = config.get('MONGO_USERNAME')
MONGO_PASSWORD = config.get('MONGO_PASSWORD')
client = MongoClient(MONGO_URL, int(MONGO_PORT))
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, retry_count=3)
db = client.tweets
db.authenticate(MONGO_USERNAME, MONGO_PASSWORD)
raw_tweets = db.raw_tweets
users = db.users
def is_user_in_db(user_id):
return get_user_from_db(user_id) is None
def get_user_from_db(user_id):
return users.find_one({'user.id' : user_id})
def get_user_from_twitter(user_id):
return api.get_user(user_id)
def get_followers(user_id):
users = []
for i, page in enumerate(tweepy.Cursor(api.followers, id=user_id, count=200).pages()):
print 'Getting page {} for followers'.format(i)
users += page
return users
def get_friends(user_id):
users = []
for i, page in enumerate(tweepy.Cursor(api.friends, id=user_id, count=200).pages()):
print 'Getting page {} for friends'.format(i)
users += page
return users
def get_followers_ids(user_id):
ids = []
for i, page in enumerate(tweepy.Cursor(api.followers_ids, id=user_id, count=5000).pages()):
print 'Getting page {} for followers ids'.format(i)
ids += page
return ids
def get_friends_ids(user_id):
ids = []
for i, page in enumerate(tweepy.Cursor(api.friends_ids, id=user_id, count=5000).pages()):
print 'Getting page {} for friends ids'.format(i)
ids += page
return ids
def process_user(user):
user_id = user['id']
screen_name = user['screen_name']
print 'Processing user : {}'.format(screen_name)
the_user = get_user_from_db(user_id)
if the_user is None:
user['followers_ids'] = get_followers_ids(screen_name)
user['friends_ids'] = get_friends_ids(screen_name)
users.insert_one(user)
if __name__ == "__main__":
for doc in raw_tweets.find({'processed' : {'$exists': False}}):
print 'Start processing'
try:
process_user(doc['user'])
except KeyError:
pass
try:
process_user(doc['retweeted_status']['user'])
except KeyError:
pass
raw_tweets.update_one({'_id': doc['_id']}, {'$set':{'processed':True}})
When rate limit is hit Tweepy will sleep in and wait for it. I would get this message.
Rate limit reached. Sleeping for: 896
However, supervisor somehow restart the script and run it again so the script will never finish. How do I stop that?
This is my supervisor configuration.
[program:twitter_processer]
command=/usr/bin/python -u /home/ubuntu/processer.py
directory=/home/ubuntu
autostart=true
autorestart=true
startretries=3
stderr_logfile=/home/ubuntu/processer.err.log
stdout_logfile=/home/ubuntu/processer.out.log
user=ubuntu