Setting tweepy useragent universally? - python

I've been playing with tweepy for a while, but I keep having rate-limiting issues, getting 429 errors. I know you can set the headers on individual calls like
api.get_user('twitter', headers={'User-Agent': 'MyUserAgent'})
but is there a way to set the header in one place and not have to do it on every api call?

Hacky way:
import functools
class NewAPI(object):
def __init__(self, api):
self.api = api
def __getattr__(self, key):
call = getattr(self.api, key)
#functools.wraps(call)
def wrapped_call(*args, **kwargs):
headers = kwargs.pop('headers', {})
headers['User-Agent'] = 'MyUserAgent' # or make this a class variable/instance variable
kwargs['headers'] = headers
return call(*args, **kwargs)
return wrapped_call
api = NewAPI(api)
print(api.get_user('twitter'))
Disclaimer: untested as I don't have tweepy.

Related

requests.auth.AuthBase TypeError on call

From the docs at https://docs.python-requests.org/en/master/user/authentication/
I gathered that the __call__ function in my own Auth Class should have the r argument,
However when i go to call this class in requests.get(auth=MyClass), I get the error TypeError: __call__() missing 1 required positional argument: 'r'
The code for my class can be found here https://pastebin.com/YDZ2DeaT
import requests
import time
import base64
from requests.auth import AuthBase
class TokenAuth(AuthBase):
"""Refreshes SkyKick token, for use with all Skykick requests"""
def __init__(self, Username: str, SubKey: str):
self.Username = Username
self.SubKey = SubKey
# Initialise with no token and instant expiry
self.Token = None
self.TokenExpiry = time.time()
self.Headers = {
# Request headers
'Content-Type' : 'application/x-www-form-urlencoded',
'Ocp-Apim-Subscription-Key': self.SubKey,
}
self.Body = {
# Request body
'grant_type': 'client_credentials',
'scope' : 'Partner'
}
def regenToken(self):
# Sends request to regenerate token
try:
# Get key from API
response = requests.post("https://apis.skykick.com/auth/token",
headers=self.Headers,
auth=(self.Username, self.SubKey),
data=self.Body,
).json()
except:
raise Exception("Sending request failed, check connection.")
# API errors are inconsistent, easiest way to catch them
if "error" in response or "statusCode" in response:
raise Exception(
"Token requesting failed, cannot proceed with any Skykick actions, exiting.\n"
f"Error raised was {response}")
# Get token from response and set expiry
self.Token = response["access_token"]
self.TokenExpiry = time.time() + 82800
def __call__(self, r):
# If token expiry is now or in past, call regenToken
if self.TokenExpiry <= time.time():
self.regenToken()
# Set headers and return complete requests.Request object
r.headers["Authorization"] = f"Bearer {self.Token}"
return r
# Initialise our token class, so it is ready to call
TokenClass = TokenAuth("test", "1234")
#Send request with class as auth method.
requests.get("https://apis.skykick.com/whoami", auth=TokenClass())
I've tried using the example code, which works, but I can't figure out why mine won't work.
python-requests version is 2.25.1
I think I know what is going on.
This line instantiates an object, called TokenClass
TokenClass = TokenAuth("test", "1234")
then here,
requests.get("https://apis.skykick.com/whoami", auth=TokenClass())
you are calling that object like a function
when you call an object like a function, python looks for the __call__ method of the object.
And you are not calling in any arguments here. What you have is roughly the same as this I think
requests.get("https://apis.skykick.com/whoami", auth=TokenClass.__call__())
and so it complains that you are missing the r argument
This is their example:
import requests
class MyAuth(requests.auth.AuthBase):
def __call__(self, r):
# Implement my authentication
return r
url = 'https://httpbin.org/get'
requests.get(url, auth=MyAuth())
MyAuth is a class that they define, and then MyAuth() creates an instance of it that they pass in to get.
Yours is more like this
import requests
class MyAuth(requests.auth.AuthBase):
def __call__(self, r):
# Implement my authentication
return r
url = 'https://httpbin.org/get'
myAuth = MyAuth() # create an instance of the class
requests.get(url, auth=myAuth()) # call the instance and pass in result
It could also be written like this
import requests
class MyAuth(requests.auth.AuthBase):
def __call__(self, r):
# Implement my authentication
return r
url = 'https://httpbin.org/get'
requests.get(url, auth=MyAuth()())
This program with produce the same error you are getting
import requests
class MyAuth(requests.auth.AuthBase):
def __call__(self, r):
# Implement my authentication
return r
url = 'https://httpbin.org/get'
MyAuth()()
because when you put () after a class, you get an instance, and when you put () after an instance, you call the __call__ method

Converting from requests to aiohttp

I currently have this class for making requests to an API and caching the JSON response:
import os
import pathlib
import json
import hashlib
import time
import requests
class NoJSONResponseError(Exception):
pass
class JSONRequestCacher(object):
"""Manage a JSON object through the cache.
Download the associated resource from the provided URL
when need be and retrieve the JSON from a cached file
if possible.
"""
def __init__(self, duration=86400, cachedir=None):
self.duration = duration
self.cachedir = self._get_cachedir(cachedir)
self._validate_cache()
def _get_cachedir(self, cachedir):
if cachedir is None:
cachedir = os.environ.get(
'CUSTOM_CACHEDIR',
pathlib.Path(pathlib.Path.home(), '.custom_cache/')
)
return cachedir
def _validate_cache(self):
"""Create the cache directory if it doesn't exist"""
self.cachedir.mkdir(parents=True, exist_ok=True)
def _request(self, url):
"""Perform the retrieval of the requested JSON data"""
return requests.get(url)
def save(self, raw, cachefile):
"""Save the provided raw JSON data into the cached file"""
with open(cachefile, 'w') as out:
json.dump(raw, out)
def load(self, cachefile):
"""Retrieve the saved JSON data from the cached file"""
with open(cachefile) as cached:
return json.load(cached)
def cache_is_valid(self, cachefile):
"""Check if cache exists and is more recent than the cutoff"""
if cachefile.is_file():
cache_age = time.time() - cachefile.stat().st_mtime
return cache_age < self.duration
return False
def request(self, url, refresh=False):
"""The JSON data associated to the given URL.
Either read from the cache or fetch from the web.
"""
urlhash = hashlib.md5(url.encode()).hexdigest()
cachefile = self.cachedir.joinpath(urlhash)
start = time.time()
if not refresh and self.cache_is_valid(cachefile):
return self.load(cachefile), True, time.time() - start
resp = self._request(url)
resp.raise_for_status()
try:
raw = resp.json()
except ValueError:
raise NoJSONResponseError()
self.save(raw, cachefile)
return raw, False, resp.elapsed.total_seconds()
I then have other classes and code which call the request method of this code like so:
class APIHelper():
def __init__(self):
self.cache = JSONRequestCacher()
def fetch(val):
url = 'my/url/{}'.format(val)
return self.cache.request(url)
def fetchall(vals):
repsonses = []
for val in vals:
responses.append(self.fetch(val))
return responses
For a small number of vals this is fine and it's really no big deal to wait 10 mins. However I am now looking at making 30,000+ hits to this endpoint. In the past I have used threadpools (multiprocessing.dummy.Pool) to achieve some parallelism, however from my reading it seems like async/await and aiothttp is a better way to go. Unfortunately try as I might I cannot wrap my head around how to translate that to this code. I am using Python 3.8.
EDIT
I tried making this change:
class JSONRequestCacher():
def __init__():
self.http = aiohttp.ClientSession()
async def _request(self, url):
async with self.http.get(url) as response:
return await response.read()
Got the error: AttributeError: 'coroutine' object has no attribute 'json' from my raw = resp.json() line
Tried then adding resp = await self._request(url) but that is SyntaxError: 'await' outside async function. Then if I make request an async function then calling it just seems to return me a coroutine object that doesn't give me the expected response.
And this is just trying to make the _request call async. I can't even start to understand how I am meant to make multiple calls to it via another class (APIHelper).

What is the best way to force a keyword while using **kwargs?

I'm not sure if I have used the correct terminology in the question.
Currently, I am trying to make a wrapper/interface around Google's Blogger API (Blog service).
[I know it has been done already, but I am using this as a project to learn OOP/python.]
I have made a method that gets a set of 25 posts from a blog:
def get_posts(self, **kwargs):
""" Makes an API request. Returns list of posts. """
api_url = '/blogs/{id}/posts'.format(id=self.id)
return self._send_request(api_url, kwargs)
def _send_request(self, url, parameters={}):
""" Sends an HTTP GET request to the Blogger API.
Returns JSON decoded response as a dict. """
url = '{base}{url}?'.format(base=self.base, url=url)
# Requests formats the parameters into the URL for me
try:
r = requests.get(url, params=parameters)
except:
print "** Could not reach url:\n", url
return
api_response = r.text
return self._jload(api_response)
The problem is, I have to specify the API key every time I call the get_posts function:
someblog = BloggerClient(url='http://someblog.blogger.com', key='0123')
someblog.get_posts(key=self.key)
Every API call requires that the key be sent as a parameter on the URL.
Then, what is the best way to do that?
I'm thinking a possible way (but probably not the best way?), is to add the key to the kwargs dictionary in the _send_request():
def _send_request(self, url, parameters={}):
""" Sends an HTTP get request to Blogger API.
Returns JSON decoded response. """
# Format the full API URL:
url = '{base}{url}?'.format(base=self.base, url=url)
# The api key will be always be added:
parameters['key']= self.key
try:
r = requests.get(url, params=parameters)
except:
print "** Could not reach url:\n", url
return
api_response = r.text
return self._jload(api_response)
I can't really get my head around what is the best way (or most pythonic way) to do it.
You could store it in a named constant.
If this code doesn't need to be secure, simply
API_KEY = '1ih3f2ihf2f'
If it's going to be hosted on a server somewhere or needs to be more secure, you could store the value in an environment variable
In your terminal:
export API_KEY='1ih3f2ihf2f'
then in your python script:
import os
API_KEY = os.environ.get('API_KEY')
The problem is, I have to specify the API key every time I call the get_posts function:
If it really is just this one method, the obvious idea is to write a wrapper:
def get_posts(blog, *args, **kwargs):
returns blog.get_posts(*args, key=key, **kwargs)
Or, better, wrap up the class to do it for you:
class KeyRememberingBloggerClient(BloggerClient):
def __init__(self, *args, **kwargs):
self.key = kwargs.pop('key')
super(KeyRememberingBloggerClient, self).__init__(*args, **kwargs)
def get_posts(self, *args, **kwargs):
return super(KeyRememberingBloggerClient, self).get_posts(
*args, key=self.key, **kwargs)
So now:
someblog = KeyRememberingBloggerClient(url='http://someblog.blogger.com', key='0123')
someblog.get_posts()
Yes, you can override or monkeypatch the _send_request method that all of the other methods use, but if there's only 1 or 2 methods that need to be fixed, why delve into the undocumented internals of the class, and fork the body of one of those methods just so you can change it in a way you clearly weren't expected to, instead of doing it cleanly?
Of course if there are 90 different methods scattered across 4 different classes, you might want to consider building these wrappers programmatically (and/or monkeypatching the classes)… or just patching the one private method, as you're doing. That seems reasonable.

using cookies with twisted.web.client

I'm trying to make a web client application using twisted but having some trouble with cookies. Does anyone have an example I can look at?
While it's true that getPage doesn't easily allow direct access to the request or response headers (just one example of how getPage isn't a super awesome API), cookies are actually supported.
cookies = {cookies: tosend}
d = getPage(url, cookies=cookies)
def cbPage(result):
print 'Look at my cookies:', cookies
d.addCallback(cbPage)
Any cookies in the dictionary when it is passed to getPage will be sent. Any new cookies the server sets in response to the request will be added to the dictionary.
You might have missed this feature when looking at getPage because the getPage signature doesn't have a cookies parameter anywhere in it! However, it does take **kwargs, and this is how cookies is supported: any extra arguments passed to getPage that it doesn't know about itself, it passes on to HTTPClientFactory.__init__. Take a look at that method's signature to see all of the things you can pass to getPage.
Turns out there is no easy way afaict
The headers are stored in twisted.web.client.HTTPClientFactory but not available from twisted.web.client.getPage() which is the function designed for pulling back a web page. I ended up rewriting the function:
from twisted.web import client
def getPage(url, contextFactory=None, *args, **kwargs):
fact = client._makeGetterFactory(
url,
HTTPClientFactory,
contextFactory=contextFactory,
*args, **kwargs)
return fact.deferred.addCallback(lambda data: (data, fact.response_headers))
from twisted.internet import reactor
from twisted.web import client
def getPage(url, contextFactory=None, *args, **kwargs):
return client._makeGetterFactory(
url,
CustomHTTPClientFactory,
contextFactory=contextFactory,
*args, **kwargs).deferred
class CustomHTTPClientFactory(client.HTTPClientFactory):
def __init__(self,url, method='GET', postdata=None, headers=None,
agent="Twisted PageGetter", timeout=0, cookies=None,
followRedirect=1, redirectLimit=20):
client.HTTPClientFactory.__init__(self, url, method, postdata,
headers, agent, timeout, cookies,
followRedirect, redirectLimit)
def page(self, page):
if self.waiting:
self.waiting = 0
res = {}
res['page'] = page
res['headers'] = self.response_headers
res['cookies'] = self.cookies
self.deferred.callback(res)
if __name__ == '__main__':
def cback(result):
for k in result:
print k, '==>', result[k]
reactor.stop()
def eback(error):
print error.getTraceback()
reactor.stop()
d = getPage('http://example.com', agent='example web client',
cookies={ 'some' : 'cookie' } )
d.addCallback(cback)
d.addErrback(eback)
reactor.run()

How to "keep-alive" with cookielib and httplib in python?

In python, I'm using httplib because it "keep-alive" the http connection (as oppose to urllib(2)). Now, I want to use cookielib with httplib but they seem to hate each other!! (no way to interface them together).
Does anyone know of a solution to that problem?
HTTP handler for urllib2 that supports keep-alive
You should consider using the Requests library instead at the earliest chance you have to refactor your code. In the mean time;
HACK ALERT! :)
I'd go other suggested way, but I've done a hack (done for different reasons though), which does create an interface between httplib and cookielib.
What I did was creating a fake HTTPRequest with minimal required set of methods, so that CookieJar would recognize it and process cookies as needed. I've used that fake request object, setting all the data needed for cookielib.
Here is the code of the class:
class HTTPRequest( object ):
"""
Data container for HTTP request (used for cookie processing).
"""
def __init__( self, host, url, headers={}, secure=False ):
self._host = host
self._url = url
self._secure = secure
self._headers = {}
for key, value in headers.items():
self.add_header(key, value)
def has_header( self, name ):
return name in self._headers
def add_header( self, key, val ):
self._headers[key.capitalize()] = val
def add_unredirected_header(self, key, val):
self._headers[key.capitalize()] = val
def is_unverifiable( self ):
return True
def get_type( self ):
return 'https' if self._secure else 'http'
def get_full_url( self ):
port_str = ""
port = str(self._host[1])
if self._secure:
if port != 443:
port_str = ":"+port
else:
if port != 80:
port_str = ":"+port
return self.get_type() + '://' + self._host[0] + port_str + self._url
def get_header( self, header_name, default=None ):
return self._headers.get( header_name, default )
def get_host( self ):
return self._host[0]
get_origin_req_host = get_host
def get_headers( self ):
return self._headers
Please note, the class has support for HTTPS protocol only (all I needed at the moment).
The code, which used this class was (please note another hack to make response compatible with cookielib):
cookies = CookieJar()
headers = {
# headers that you wish to set
}
# construct fake request
fake_request = HTTPRequest( host, request_url, headers )
# add cookies to fake request
cookies.add_cookie_header(fake_request)
# issue an httplib.HTTPConnection based request using cookies and headers from the fake request
http_connection.request(type, request_url, body, fake_request.get_headers())
response = http_connection.getresponse()
if response.status == httplib.OK:
# HACK: pretend we're urllib2 response
response.info = lambda : response.msg
# read and store cookies from response
cookies.extract_cookies(response, fake_request)
# process response...

Categories

Resources