twisted twitter streaming api bad request error - python
I am using python twisted to get streaming data from twitter streaming api.There are two steps in short. 1) get access_token 2) use access_token to make request for the data.
step 1 work completely fine but at step 2 i am gettin gthis error of bad request status 400. why is so? I think its because twitter uses HTTP1.1 and twisted is using HTTP1.0 by deafult. then how to upgrade connections to HTTP1.1
EDIT: Here is my error message
HTTP/1.0 400 Bad Request
content-length: 0
date: Sun, 12 Mar 2017 14:57:13 GMT
server: tsa
x-connection-hash: dca361a2b4214ad66203e9912b05cf7f
[Failure instance: Traceback (failure with no frames): <class 'twisted.internet.error.ConnectionDone'>: Connection was closed cleanly.
.
#!/usr/bin/python
import oauth2 as oauth
import urlparse
import time
import webbrowser
from twisted.internet import reactor, protocol, ssl
from twisted.web import http
CONSUMER_KEY = 'xxxx'
CONSUMER_SECRET = 'xxxx'
CONSUMER = oauth.Consumer(CONSUMER_KEY, CONSUMER_SECRET)
ACCESS_TOKEN_FILE = 'OAUTH_ACCESS_TOKEN'
TWITTER_REQUEST_TOKEN_URL = 'https://twitter.com/oauth/request_token'
TWITTER_ACCESS_TOKEN_URL = 'https://twitter.com/oauth/access_token'
TWITTER_AUTHORIZE_URL = 'https://twitter.com/oauth/authorize'
TWITTER_STREAM_API_HOST = 'stream.twitter.com'
TWITTER_STREAM_API_PATH = '/1.1/statuses/sample.json'
class TwitterStreamer(http.HTTPClient):
def connectionMade(self):
self.sendCommand('GET', self.factory.url)
self.sendHeader('Host', self.factory.host)
self.sendHeader('User-Agent', self.factory.agent)
self.sendHeader('Authorization', self.factory.oauth_header)
self.endHeaders()
def handleStatus(self, version, status, message):
if status != '200':
self.factory.tweetError(ValueError("bad status"))
def lineReceived(self, line):
self.factory.tweetReceived(line)
def connectionLost(self, reason):
self.factory.tweetError(reason)
class TwitterStreamerFactory(protocol.ClientFactory):
protocol = TwitterStreamer
def __init__(self, oauth_header):
self.url = TWITTER_STREAM_API_PATH
self.agent = 'Twisted/TwitterStreamer'
self.host = TWITTER_STREAM_API_HOST
self.oauth_header = oauth_header
def clientConnectionFailed(self, _, reason):
self.tweetError(reason)
def tweetReceived(self, tweet):
print tweet
def tweetError(self, error):
print error
def save_access_token(key, secret):
with open(ACCESS_TOKEN_FILE, 'w') as f:
f.write("ACCESS_KEY=%s\n" % key)
f.write("ACCESS_SECRET=%s\n" % secret)
def load_access_token():
with open(ACCESS_TOKEN_FILE) as f:
lines = f.readlines()
str_key = lines[0].strip().split('=')[1]
str_secret = lines[1].strip().split('=')[1]
return oauth.Token(key=str_key, secret=str_secret)
def fetch_access_token():
CONSUMER_KEY = 'xxxxxxxx'
CONSUMER_SECRET = 'xxxxxxxxx'
ACCESS_KEY="xxxxxxx"
ACCESS_SECRET="xxxxxxxxx"
consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET)
access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET)
return (access_token.key, access_token.secret)
def build_authorization_header(access_token):
url = "https://%s%s" % (TWITTER_STREAM_API_HOST, TWITTER_STREAM_API_PATH)
params = {
'oauth_version': "1.0",
'oauth_nonce': oauth.generate_nonce(),
'oauth_timestamp': str(int(time.time())),
'oauth_token': access_token.key,
'oauth_consumer_key': CONSUMER.key
}
# Sign the request.
# For some messed up reason, we need to specify is_form_encoded to prevent
# the oauth2 library from setting oauth_body_hash which Twitter doesn't like.
req = oauth.Request(method="GET", url=url, parameters=params, is_form_encoded=True)
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), CONSUMER, access_token)
# Grab the Authorization header
header = req.to_header()['Authorization'].encode('utf-8')
print "Authorization header:"
print " header = %s" % header
return header
if __name__ == '__main__':
# Check if we have saved an access token before.
try:
f = open(ACCESS_TOKEN_FILE)
except IOError:
# No saved access token. Do the 3-legged OAuth dance and fetch one.
(access_token_key, access_token_secret) = fetch_access_token()
# Save the access token for next time.
save_access_token(access_token_key, access_token_secret)
# Load access token from disk.
access_token = load_access_token()
# Build Authorization header from the access_token.
auth_header = build_authorization_header(access_token)
# Twitter stream using the Authorization header.
twsf = TwitterStreamerFactory(auth_header)
reactor.connectSSL(TWITTER_STREAM_API_HOST, 443, twsf, ssl.ClientContextFactory())
reactor.run()
UPDATE: Working code:
import base64, urllib
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.protocols import basic
from twisted.python.failure import DefaultException
from twisted.web.client import Agent
from twisted.web.http_headers import Headers
import json
import oauth2 as oauth
import time
from twisted.web import server,resource
from twisted.internet import endpoints
from twisted.web.server import Site
CONSUMER_KEY = 'xxxxxxxxxxxx'
CONSUMER_SECRET = 'xxxxxxxxxxxxxx'
TWITTER_STREAM_API_HOST = 'stream.twitter.com'
TWITTER_STREAM_API_PATH = '/1.1/statuses/sample.json'
ACCESS_TOKEN_FILE = 'OAUTH_ACCESS_TOKEN'
CONSUMER = oauth.Consumer(CONSUMER_KEY, CONSUMER_SECRET)
def callback(result):
print result
def errback(error):
print error
class StreamingParser(basic.LineReceiver):
delimiter = '\r\n'
def __init__(self, user_callback, user_errback):
self.user_callback = user_callback
self.user_errback = user_errback
def lineReceived(self, line):
d = Deferred()
d.addCallback(self.user_callback)
d.addErrback(self.user_errback)
line = line.strip()
print line,'........'
try:
d.callback(json.loads(line))
except ValueError, e:
if self.user_errback:
d.errback(e)
def connectionLost(self, reason):
if self.user_errback:
d = Deferred()
d.addErrback(self.user_errback)
d.errback(DefaultException(reason.getErrorMessage()))
def _get_response(response, callback, errback):
print 'got response......'
response.deliverBody(StreamingParser(callback, errback))
return Deferred()
def _shutdown(reason, errback):
d = Deferred()
d.addErrback(errback)
d.errback(reason)
if reactor.running:
reactor.stop()
def save_access_token(key, secret):
with open(ACCESS_TOKEN_FILE, 'w') as f:
f.write("ACCESS_KEY=%s\n" % key)
f.write("ACCESS_SECRET=%s\n" % secret)
def load_access_token():
with open(ACCESS_TOKEN_FILE) as f:
lines = f.readlines()
str_key = lines[0].strip().split('=')[1]
str_secret = lines[1].strip().split('=')[1]
return oauth.Token(key=str_key, secret=str_secret)
def fetch_access_token():
ACCESS_KEY="xxxxx-xxxx"
ACCESS_SECRET="xxxxxxxxxxxx"
access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET)
return (access_token.key, access_token.secret)
def make_header(access_token):
url = "https://%s%s" % (TWITTER_STREAM_API_HOST, TWITTER_STREAM_API_PATH)
params = {
# "Authorization": "Oauth %s" % auth,
"oauth_version": "1.0",
"oauth_nonce": oauth.generate_nonce(),
"oauth_timestamp": str(int(time.time())),
"oauth_token": access_token.key,
"oauth_consumer_key": CONSUMER.key
}
req = oauth.Request(method="GET", url=url, parameters=params, is_form_encoded=True)
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), CONSUMER, access_token)
header = req.to_header()['Authorization'].encode('utf-8')
print "Authorization header:"
print " header = %s" % header
return header
def start_streaming():
print 'streaming started...........'
try:
f = open(ACCESS_TOKEN_FILE)
except IOError:
access_token_key, access_token_secret = fetch_access_token()
save_access_token(access_token_key, access_token_secret)
access_token = load_access_token()
auth_header = make_header(access_token)
url = 'https://stream.twitter.com/1.1/statuses/sample.json'
headers = Headers({
'User-Agent': ['TwistedSTreamReciever'],
'Authorization': [auth_header]})
agent = Agent(reactor)
d = agent.request('GET', url, headers, None)
d.addCallback(_get_response, callback, errback)
d.addBoth(_shutdown, errback)
# reactor.run()
class _Stream(resource.Resource):
isLeaf = True
def render_GET(self, request):
start_streaming()# Streaming started here.......
time.sleep(8) # wait for 8 seconds...
########.........??? stop streaming here??
return "<html>streaming started...........%s</html>" % (time.ctime(),)
if __name__ == "__main__":
resource = _Stream()
factory = Site(resource)
endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880)
endpoint.listen(factory)
reactor.run()
To give up on reading a particular streaming response (which it seems may be necessary - I'm guessing these Twitter streams never end on their own) and close the connection associated with that request/response (because HTTP has no other way to give up on a response), use the body delivery protocol's transport.loseConnection method. So, for example:
def _get_response(response, callback, errback):
print 'got response......'
proto = StreamingParser(callback, errback)
save_stream_by_name(stream_name, proto)
response.deliverBody(proto)
return Deferred()
When you're done with that stream:
pop_stream_by_name(stream_name).transport.loseConnection()
Related
How do you send many documents to a Scout Server in Python using the Python Scout Client?
Im trying to index PDF text to a python lib called Scout. I have tried doing the same thing with elasticsearch too. In both cases I can't figure out how to post text to an index in bulk, using python. After a lot of research, I believe I need to use async http request. The only problem is, I don't understand async calls nor do I understand what a Scout python 'client' really is. I'm a self-taught programmer and still have many things I don't understand. my thought is the client cant stay open for a loop to keep using the connection. I have seen coding concepts like "await" and "sessions" in many books on programming. However, I don't know how to implement these concepts. Can someone help me write some python code that will successfully post new documents to a running scout server and explain how it's done? Here is My attempt: from scout_client import Scout # import libraries to help read and create PDF import PyPDF2 from fpdf import FPDF import base64 import os from flask import Flask, jsonify, request, render_template, json # before you start, Run the Server.py file and create a Sqlite DB # Step one loop though PDF in 'books' folder for k in range(14,15): # open the pdf file read_pdf = PyPDF2.PdfFileReader("books/%s.pdf"%(k)) # Test to see if Step one is complete and succesful #print (read_pdf) # Step Two Gain intel on how many Pages are in the Document # get the page numbers num = read_pdf.getNumPages() print ("PDF pages:", num) # Step Three understand the data by page # create a dictionary object for page data all_pages = [] # Step For Create a new index in Scout Server # client.create_index('test3') # iterate the page numbers for page in range(num): data = read_pdf.getPage(page) #page_mode = read_pdf.getPageMode() # extract the page's text page_text = data.extractText() # put the text data into the dict all_pages.append(page_text) # initiate the Client from scout_client.py client = Scout('http://localhost:8000') # THe issue: I tryed for loops, and while loops but cant get past: urllib.error.HTTPError: HTTP Error 400: BAD REQUEST i = 1 while i <= num: client.create_document(all_pages[i],['test3']) print(i,"....done") i += 1 I get an error: Traceback (most recent call last): File "test.py", line 37, in <module> client.create_document(all_pages[i],['test3']) File "../Searchtest4/scout/scout_client.py", line 149, in create_document return self.post('/documents/', post_data, attachments) File "../Searchtest4/scout/scout_client.py", line 53, in post return self.post_json(url, data) File "../Searchtest4/scout/scout_client.py", line 63, in post_json return json.loads(urlopen(request).read().decode('utf8')) File "../lib/python3.7/urllib/request.py", line 222, in urlopen return opener.open(url, data, timeout) File "../lib/python3.7/urllib/request.py", line 531, in open response = meth(req, response) File "../lib/python3.7/urllib/request.py", line 641, in http_response 'http', request, response, code, msg, hdrs) File "../lib/python3.7/urllib/request.py", line 569, in error return self._call_chain(*args) File "../lib/python3.7/urllib/request.py", line 503, in _call_chain result = func(*args) File "../lib/python3.7/urllib/request.py", line 649, in http_error_default raise HTTPError(req.full_url, code, msg, hdrs, fp) **urllib.error.HTTPError: HTTP Error 400: BAD REQUEST** Here is the server that runs fine (server.py): import logging import optparse import os import sys from flask import Flask from werkzeug.serving import run_simple from scout.exceptions import InvalidRequestException from scout.models import database from scout.models import Attachment from scout.models import BlobData from scout.models import Document from scout.models import Index from scout.models import IndexDocument from scout.models import Metadata from scout.views import register_views logger = logging.getLogger('scout') def create_server(config=None, config_file=None): app = Flask(__name__) # Configure application using a config file. if config_file is not None: app.config.from_pyfile(config_file) # (Re-)Configure application using command-line switches/environment flags. if config is not None: app.config.update(config) # Initialize the SQLite database. initialize_database(app.config.get('DATABASE') or 'scout.db', pragmas=app.config.get('SQLITE_PRAGMAS') or None) register_views(app) #app.errorhandler(InvalidRequestException) def handle_invalid_request(exc): return exc.response() #app.before_request def connect_database(): if database.database != ':memory:': database.connect() #app.teardown_request def close_database(exc): if database.database != ':memory:' and not database.is_closed(): database.close() return app def initialize_database(database_file, pragmas=None): database.init(database_file, pragmas=pragmas) try: meth = database.execution_context except AttributeError: meth = database with meth: database.create_tables([ Attachment, BlobData, Document, Index, IndexDocument, Metadata]) def run(app): if app.config['DEBUG']: app.run(host=app.config['HOST'], port=app.config['PORT'], debug=True) else: run_simple( hostname=app.config['HOST'], port=app.config['PORT'], application=app, threaded=True) def panic(s, exit_code=1): sys.stderr.write('\033[91m%s\033[0m\n' % s) sys.stderr.flush() sys.exit(exit_code) def get_option_parser(): parser = optparse.OptionParser() parser.add_option( '-H', '--host', default='127.0.0.1', dest='host', help='The hostname to listen on. Defaults to 127.0.0.1.') parser.add_option( '-p', '--port', default=8000, dest='port', help='The port to listen on. Defaults to 8000.', type='int') parser.add_option( '-u', '--url-prefix', dest='url_prefix', help='URL path to prefix Scout API.') parser.add_option( '-s', '--stem', dest='stem', help='Specify stemming algorithm for content.') parser.add_option( '-d', '--debug', action='store_true', dest='debug', help='Run Flask app in debug mode.') parser.add_option( '-c', '--config', dest='config', help='Configuration module (python file).') parser.add_option( '--paginate-by', default=50, dest='paginate_by', help='Number of documents displayed per page of results, default=50', type='int') parser.add_option( '-k', '--api-key', dest='api_key', help='Set the API key required to access Scout.') parser.add_option( '-C', '--cache-size', default=64, dest='cache_size', help='SQLite page-cache size (MB). Defaults to 64MB.', type='int') parser.add_option( '-f', '--fsync', action='store_true', dest='fsync', help='Synchronize database to disk on every write.') parser.add_option( '-j', '--journal-mode', default='wal', dest='journal_mode', help='SQLite journal mode. Defaults to WAL (recommended).') parser.add_option( '-l', '--logfile', dest='logfile', help='Log file') return parser def parse_options(): option_parser = get_option_parser() options, args = option_parser.parse_args() if options.logfile: handler = logging.FileHandler(options.logfile) logger.addHandler(handler) config_file = os.environ.get('SCOUT_CONFIG') or options.config config = {'DATABASE': os.environ.get('SCOUT_DATABASE')} if len(args) == 0 and not config['DATABASE']: panic('Error: missing required path to database file.') elif len(args) > 1: panic('Error: [%s] only accepts one argument, which is the path ' 'to the database file.' % __file__) elif args: config['DATABASE'] = args[0] pragmas = [('journal_mode', options.journal_mode)] if options.cache_size: pragmas.append(('cache_size', -1024 * options.cache_size)) if not options.fsync: pragmas.append(('synchronous', 0)) config['SQLITE_PRAGMAS'] = pragmas # Handle command-line options. These values will override any values # that may have been specified in the config file. if options.api_key: config['AUTHENTICATION'] = options.api_key if options.debug: config['DEBUG'] = True config['HOST'] = options.host or '127.0.0.1' config['PORT'] = options.port or 8000 config['URL_PREFIX'] = options.url_prefix or '' if options.paginate_by: if options.paginate_by < 1 or options.paginate_by > 1000: panic('paginate-by must be between 1 and 1000') config['PAGINATE_BY'] = options.paginate_by if options.stem: if options.stem not in ('simple', 'porter'): panic('Unrecognized stemmer. Must be "porter" or "simple".') config['STEM'] = options.stem return create_server(config, config_file) def main(): app = parse_options() run(app) if __name__ == '__main__': main() and the so-called client (scout_client.py): import base64 import json try: from email.generator import _make_boundary as choose_boundary except ImportError: from mimetools import choose_boundary import mimetypes import os try: from urllib.parse import urlencode except ImportError: from urllib import urlencode try: from urllib.request import Request from urllib.request import urlopen except ImportError: from urllib2 import Request from urllib2 import urlopen import zlib ENDPOINT = None KEY = None class Scout(object): def __init__(self, endpoint=ENDPOINT, key=KEY): self.endpoint = endpoint.rstrip('/') self.key = key def get_full_url(self, url): return self.endpoint + url def get_raw(self, url, **kwargs): headers = {'Content-Type': 'application/json'} if self.key: headers['key'] = self.key if kwargs: if '?' not in url: url += '?' url += urlencode(kwargs, True) request = Request(self.get_full_url(url), headers=headers) fh = urlopen(request) return fh.read() def get(self, url, **kwargs): return json.loads(self.get_raw(url, **kwargs)) def post(self, url, data=None, files=None): if files: return self.post_files(url, data, files) else: return self.post_json(url, data) def post_json(self, url, data=None): headers = {'Content-Type': 'application/json'} if self.key: headers['key'] = self.key data = json.dumps(data or {}) if not isinstance(data, bytes): data = data.encode('utf-8') request = Request(self.get_full_url(url), data=data, headers=headers) return json.loads(urlopen(request).read().decode('utf8')) def post_files(self, url, json_data, files=None): if not files or not isinstance(files, dict): raise ValueError('One or more files is required. Files should be ' 'passed as a dictionary of filename: file-like-' 'object.') boundary = choose_boundary() form_files = [] for i, (filename, file_obj) in enumerate(files.items()): try: data = file_obj.read() except AttributeError: data = bytes(file_obj) mimetype = mimetypes.guess_type(filename)[0] form_files.append(( 'file_%s' % i, filename, mimetype or 'application/octet-stream', data)) part_boundary = '--' + boundary parts = [ part_boundary, 'Content-Disposition: form-data; name="data"', '', json.dumps(json_data)] for field_name, filename, mimetype, data in form_files: parts.extend(( part_boundary, 'Content-Disposition: file; name="%s"; filename="%s"' % ( field_name, filename), 'Content-Type: %s' % mimetype, '', data)) parts.append('--' + boundary + '--') parts.append('') headers = {'Content-Type': 'multipart/form-data; boundary=%s' % boundary} if self.key: headers['key'] = self.key data = '\r\n'.join(parts) if not isinstance(data, bytes): data = data.encode('utf-8') request = Request(self.get_full_url(url), data=data, headers=headers) return json.loads(urlopen(request).read()) def delete(self, url): headers = {} if self.key: headers['key'] = self.key request = Request(self.get_full_url(url), headers=headers) request.get_method = lambda: 'DELETE' fh = urlopen(request) return json.loads(fh.read()) def get_indexes(self, **kwargs): return self.get('/', **kwargs)['indexes'] def create_index(self, name): return self.post('/', {'name': name}) def rename_index(self, old_name, new_name): return self.post('/%s/' % old_name, {'name': new_name}) def delete_index(self, name): return self.delete('/%s/' % name) def get_index(self, name, **kwargs): return self.get('/%s/' % name, **kwargs) def get_documents(self, **kwargs): return self.get('/documents/', **kwargs) def create_document(self, content, indexes, identifier=None, attachments=None, **metadata): if not isinstance(indexes, (list, tuple)): indexes = [indexes] post_data = { 'content': content, 'identifier': identifier, 'indexes': indexes, 'metadata': metadata} return self.post('/documents/', post_data, attachments) def update_document(self, document_id=None, content=None, indexes=None, metadata=None, identifier=None, attachments=None): if not document_id and not identifier: raise ValueError('`document_id` must be provided.') data = {} if content is not None: data['content'] = content if indexes is not None: if not isinstance(indexes, (list, tuple)): indexes = [indexes] data['indexes'] = indexes if metadata is not None: data['metadata'] = metadata if not data and not attachments: raise ValueError('Nothing to update.') return self.post('/documents/%s/' % document_id, data, attachments) def delete_document(self, document_id=None): if not document_id: raise ValueError('`document_id` must be provided.') return self.delete('/documents/%s/' % document_id) def get_document(self, document_id=None): if not document_id: raise ValueError('`document_id` must be provided.') return self.get('/documents/%s/' % document_id) def attach_files(self, document_id, attachments): return self.post_files('/documents/%s/attachments/' % document_id, {}, attachments) def detach_file(self, document_id, filename): return self.delete('/documents/%s/attachments/%s/' % (document_id, filename)) def update_file(self, document_id, filename, file_object): return self.post_files('/documents/%s/attachments/%s/' % (document_id, filename), {}, {filename: file_object}) def get_attachments(self, document_id, **kwargs): return self.get('/documents/%s/attachments/' % document_id, **kwargs) def get_attachment(self, document_id, filename): return self.get('/documents/%s/attachments/%s/' % (document_id, filename)) def download_attachment(self, document_id, filename): return self.get_raw('/documents/%s/attachments/%s/download/' % (document_id, filename)) def search_attachments(self, **kwargs): return self.get('/documents/attachments/', **kwargs) class SearchProvider(object): def content(self, obj): raise NotImplementedError def identifier(self, obj): raise NotImplementedError def metadata(self, obj): raise NotImplementedError class SearchSite(object): def __init__(self, client, index): self.client = client self.index = index self.registry = {} def register(self, model_class, search_provider): self.registry.setdefault(model_class, []) self.registry[model_class].append(search_provider()) def unregister(self, model_class, search_provider=None): if search_provider is None: self.registry.pop(model_class, None) elif model_class in self.registry: self.registry[model_class] = [ sp for sp in self.registry[model_class] if not isinstance(sp, search_provider)] def store(self, obj): if type(obj) not in self.registry: return False for provider in self.registry[type(obj)]: content = provider.content(obj) try: metadata = provider.metadata(obj) except NotImplementedError: metadata = {} try: identifier = provider.identifier(obj) except NotImplementedError: pass else: metadata['identifier'] = identifier self.client.create_document(content, self.index, **metadata) return True def remove(self, obj): if type(obj) not in self.registry: return False for provider in self.registry[type(obj)]: self.client.delete_document(provider.identifier(obj)) return True Finally the Documentation for Scout: https://scout.readthedocs.io/en/latest/server.html#index-detail-index-name https://charlesleifer.com/blog/meet-scout-a-search-server-powered-by-sqlite/ Any Detailed Help is much appreciated:)
So i find a lib called scout and...got it to work! from scout_client import Scout # import libraries to help read and create PDF import PyPDF2 from fpdf import FPDF import base64 import os from flask import Flask, jsonify, request, render_template, json client = Scout('http://localhost:8000') for k in range(7,18): read_pdf = PyPDF2.PdfFileReader("books/%s.pdf"%(k)) num = read_pdf.getNumPages() print ("PDF pages:", num) all_pages = [] for page in range(num): data = read_pdf.getPage(page) page_text = data.extractText() all_pages.append(page_text) import requests for z in all_pages: url = 'http://localhost:8000/documents/' data = {'content': z, 'indexes': ['test13']} headers = { 'Content-Type': 'application/json', } response = requests.post(url, data=json.dumps(data), headers=headers) print(response) I can now loop though as many PDF's as I want locally Post to the server for indexing and search for keywords Now I just need help with Making a basic front end with a search bar that calls data from a JSON response in python and flask.
keep-alive messages not being sent when using python request module
I am observing that with python requests module, HTTP keep-alive is not being honored. I dont see Acks for keep-alive being sent from the host where i am running the python script. Please let me know how it can be fixed.Following is my code: import json import requests import logging import sys import time from threading import Thread logging.basicConfig(level=logging.DEBUG) class NSNitro: def __init__(self,*args): if len(args) > 2: self.ip = args[0] self.username = args[1] self.password = args[2] self.session_id = None url = 'http://'+self.ip+'/nitro/v1/config/login' payload = { "login": { "username":"nsroot", "password":"nsroot" }} headers = {"Content-type": "application/json", 'Connection': 'keep-alive'} try: r = requests.post(url=url,headers=headers,data=json.dumps(payload),timeout=5) logging.info(r.json()["sessionid"]) if(r.json()["sessionid"] != None): self.session_id = r.json()["sessionid"] except requests.exceptions.RequestException: logging.critical("Some error occurred during connection") else: logging.error("Not sufficient parameters provided.Required : ipaddress , username , password") def install_build(self,build_url): url = 'http://ip/nitro/v1/config/install' headers = {"Content-type": "application/json","Connection": "keep-alive"} payload = {"install": {"url": build_url}} try: cookie = {"NITRO_AUTH_TOKEN": self.session_id} r = requests.post(timeout=5, url=url, data=json.dumps(payload), headers=headers,cookies=cookie) except requests.exceptions.RequestException: print("Connection Error occurred") raise '''this will give details of exception''' else: assert r.status_code == 201, "Status code seen: " + str(r.status_code) + "\n" + "Error message from system: " + \ r.json()["message"] print("Successfully triggered job on device to install build") def __del__(self): logging.debug("Deleted the object") if __name__ == '__main__': ns_session = NSNitro(ip,username,password) url_i = 'https://myupload-server.net/build-13.0-480.16.tgz' t1 = Thread(target=ns_session.install_build,args=(url_i,)) t1.start() ''' while t1.is_alive(): t2 = Thread(target=ns_session.get_installed_version,) t2.start() t2.join()''' time.sleep(100) logging.info("Install thread completed") t1.join() ns_session.logout() When the request is posted using curl command, the acks are sent in specified keep-alive intervals. Without ack being sent , server is resetting the connection.
JMS server can't recognize message.body and raise an exception: "Protocol message contained an invalid tag (zero)." It works fine with Stomp protocol
Request from python-qpid-proton client to JMS server through ActiveMQ. message.body contains a Protobuf. JMS server can't recognize message.body and raise an exception: "Protocol message contained an invalid tag (zero)." It works fine with Stomp protocol (but it's another story). code: import sys import socket import uuid from time import time from proton import Message from proton.handlers import MessagingHandler from proton.reactor import Container from proto import common def make_message(message_body): message = Message() message.body = message_body message.correlation_id = 1 message.properties = { 'type': 'ru.messages.CommonProtos$LoginRequest', 'content-length': len(message_body), 'messageId': str(uuid.uuid4()), 'errorMsg': '', 'timestamp': str(int(time())), 'user': 'vasja' } return message def make_login_message(): login_request = common.LoginRequest() login_request.ip.append(socket.gethostbyname(socket.gethostname())) login_request.username = "vasja" login_request.password = "123" login_request.clientVersion = "1.1.1" login_request.subscriber = "test" login_request.clientTime = int(time()) login_request.pkName = "test" login_request.hostName = "vasja" return make_message(login_request.SerializeToString()) class AMQPClient(MessagingHandler): def __init__(self, server, request, receive=None): super(AMQPClient, self).__init__() self.server = server self.request = request self.receive = ( request + "receive") if receive is None else (request + receive) def on_start(self, event): conn = event.container.connect(self.server) event.container.create_receiver(conn, self.receive) event.container.create_sender(conn, self.request) def on_sendable(self, event): message = make_login_message() event.sender.send(message) event.sender.close() def on_message(self, event): print(event.message.body) event.connection.close() def main(): Container(AMQPClient(server="192.168.77.100:5672", request="test")).run() if __name__ == "__main__": main()
Stomp Consumer using deferred.inlinecallback
I am implementing stomp consumer as a library. By calling this library in other application i should be able to get the data in ActiveMQ. I am implementing it as below, but I have a problem in returning the frame.body. I am not able to retrieve the data from outside the class. from twisted.internet import defer from stompest.async import Stomp from stompest.async.listener import SubscriptionListener from stompest.config import StompConfig from socket import gethostname from uuid import uuid1 import json class Consumer(object): def __init__(self, amq_uri): self.amq_uri = amq_uri self.hostname = gethostname() self.config = StompConfig(uri=self.amq_uri) #defer.inlineCallbacks def run(self, in_queue): client = yield Stomp(self.config) headers = { StompSpec.ACK_HEADER: StompSpec.ACK_CLIENT_INDIVIDUAL, StompSpec.ID_HEADER: self.hostname, 'activemq.prefetchSize': '1000', } yield client.connect(headers=self._return_client_id()) client.subscribe( in_queue, headers, listener=SubscriptionListener(self.consume) ) try: client = yield client.disconnected except StompConnectionError: yield client.connect(headers=self._return_client_id()) client.subscribe( in_queue, headers, listener=SubscriptionListener(self.consume) ) while True: try: yield client.disconnected except StompProtocolError: pass except StompConnectionError: yield client.connect(headers=self._return_client_id()) client.subscribe( in_queue, headers, listener=SubscriptionListener(self.consume) ) def _return_client_id(self): client_id = {} client_id['client-id'] = gethostname() + '-' + str(uuid1()) return client_id def consume(self, client, frame): data = json.loads(frame.body) print 'Received Message Type {}'.format(type(data)) print 'Received Message {}'.format(data) ## I want to return data here. I am able to print the frame.body here. # Call from another application import Queue from twisted.internet import reactor amq_uri = 'tcp://localhost:61613' in_queue = '/queue/test_queue' c = Consumer(amq_uri) c.run(in_queue) print "data is from outside function", data # Should be able to get the data which is returned by consume here reactor.run() Can someone please let me know how can i achieve this. Thanks
I found a solution to my problem. Instead of using async stomp library, i used sync stomp library. Implemented it as below, class Consumer(object): def __init__(self, amq_uri): self.amq_uri = amq_uri self.hostname = gethostname() self.config = StompConfig(uri=self.amq_uri) def run(self, in_queue, return_dict): client = Stomp(self.config) headers = { StompSpec.ACK_HEADER: StompSpec.ACK_CLIENT_INDIVIDUAL, StompSpec.ID_HEADER: self.hostname } client.connect() client.subscribe(in_queue, headers) try: frame = client.receiveFrame() data = json.dumps(frame.body) except Exception as exc: print exc client.ack(frame) client.disconnect() return_dict['data'] = data return data
Python script to harvest tweets to a MongoDb works with users but not hashtags. Any ideas why not?
I'm playing around the Twitter API and am in the process of developing a script to pull all Tweets with a certain hashtag down to a local mongoDB. I have it working fine when I'm downloading tweets from users, but when downloading tweets from a hashtag I get: return loads(fp.read(), AttributeError: 'int' object has no attribute 'read' Can anyone offer their infinite wisdom into how I could get this script to work? To run, save it as a .py file, cd to the folder and run: python twitter.py Code: __author__ = 'Tom Cusack' import pymongo import oauth2 as oauth import urllib2, json import sys, argparse, time def oauth_header(url, consumer, token): params = {'oauth_version': '1.0', 'oauth_nonce': oauth.generate_nonce(), 'oauth_timestamp': int(time.time()), } req = oauth.Request(method = 'GET',url = url, parameters = params) req.sign_request(oauth.SignatureMethod_HMAC_SHA1(),consumer, token) return req.to_header()['Authorization'].encode('utf-8') def main(): ### Twitter Settings numtweets = '32000' verbose = 'store_true' retweet = 'store_false' CONSUMER_KEY = 'M7Xu9Wte0eIZvqhb4G9HnIn3G' CONSUMER_SECRET = 'c8hB4Qwps2aODQUx7UsyzQuCRifEp3PKu6hPQll8wnJGIhbKgZ' ACCESS_TOKEN = '3213221313-APuXuNjVMbRbZpu6sVbETbgqkponGsZJVT53QmG' ACCESS_SECRET = 'BJHrqWC9ed3pA5oDstSMCYcUcz2pYF3DmJ7jcuDe7yxvi' base_url = url = 'https://api.twitter.com/1.1/search/tweets.json?include_entities=true&count=200&q=#mongodb&include_rts=%s' % (retweet) oauth_consumer = oauth.Consumer(key = CONSUMER_KEY, secret = CONSUMER_SECRET) oauth_token = oauth.Token(key = ACCESS_TOKEN, secret = ACCESS_SECRET) ### Mongodb Settings uri = 'mongodb://127.0.0.1:27017/SARKY' if uri != None: try: conn = pymongo.MongoClient(uri) print 'Pulling Tweets..' except: print 'Error: Unable to connect to DB. Check uri variable.' return uri_parts = pymongo.uri_parser.parse_uri(uri) db = conn[uri_parts['database']] db['twitter-harvest'].ensure_index('id_str') ### Helper Variables for Harvest max_id = -1 tweet_count = 0 stream = 0 ### Begin Harvesting while True: auth = oauth_header(url, oauth_consumer, oauth_token) headers = {"Authorization": auth} request = urllib2.Request(url, headers = headers) try: stream = urllib2.urlopen(request) except urllib2.HTTPError, err: if err.code == 404: print 'Error: Unknown user. Check --user arg' return if err.code == 401: print 'Error: Unauthorized. Check Twitter credentials' return tweet_list = json.load(stream) if len(tweet_list) == 0: print 'No tweets to harvest!' return if 'errors' in tweet_list: print 'Hit rate limit, code: %s, message: %s' % (tweets['errors']['code'], tweets['errors']['message']) return if max_id == -1: tweets = tweet_list else: tweets = tweet_list[1:] if len(tweets) == 0: print 'Finished Harvest!' return for tweet in tweets: max_id = id_str = tweet['id_str'] try: if tweet_count == numtweets: print 'Finished Harvest- hit numtweets!' return if uri != None: db[user].update({'id_str':id_str},tweet,upsert = True) else: print tweet['text'] tweet_count+=1 if verbose == True and uri != None: print tweet['text'] except Exception, err: print 'Unexpected error encountered: %s' %(err) return url = base_url + '&max_id=' + max_id if __name__ == '__main__': try: main() except SystemExit as e: if e.code == 0: pass
You initially set stream = 0. When your try...except block catches a HTTP response with a code that isn't 404 or 401, stream is still equal to 0, but your except block doesn't break out of the function. I'd look more closely at what this response says.