Get Latest Commit URL from PyGithub Efficiently - python

I'm using this function to get the latest commit url using PyGithub:
from github import Github
def getLastCommitURL():
encrypted = 'mypassword'
# naiveDecrypt defined elsewhere
g = Github('myusername', naiveDecrypt(encrypted))
org = g.get_organization('mycompany')
code = org.get_repo('therepo')
commits = code.get_commits()
last = commits[0]
return last.html_url
It works but it seems to make Github unhappy with my IP address and give me a slow response for the resulting url. Is there a more efficient way for me to do this?

This wouldn't work if you had no commits in the past 24 hours. But if you do, it seems to return faster and will request fewer commits, according to the Github API documentation:
from datetime import datetime, timedelta
def getLastCommitURL():
encrypted = 'mypassword'
g = Github('myusername', naiveDecrypt(encrypted))
org = g.get_organization('mycompany')
code = org.get_repo('therepo')
# limit to commits in past 24 hours
since = datetime.now() - timedelta(days=1)
commits = code.get_commits(since=since)
last = commits[0]
return last.html_url

You could directly make a request to the api.
from urllib.request import urlopen
import json
def get_latest_commit(owner, repo):
url = 'https://api.github.com/repos/{owner}/{repo}/commits?per_page=1'.format(owner=owner, repo=repo)
response = urlopen(url).read()
data = json.loads(response.decode())
return data[0]
if __name__ == '__main__':
commit = get_latest_commit('mycompany', 'therepo')
print(commit['html_url'])
In this case you would only being making one request to the api instead of 3 and you are only getting the last commit instead of all of them. Should be faster as well.

Related

Unable to extract the table from API using python

I am trying to extract a table using an API but I am unable to do so. I am pretty sure that I am not using it correctly, and any help would be appreciated.
Actually I am trying to extract a table from this API but unable to figure out the right way on how to do it. This is what is mentioned in the website. I want to extract Latest_full_data table.
This is my code to get the table but I am getting error:
import urllib
import requests
import urllib.request
locu_api = 'api_Key'
def locu_search(query):
api_key = locu_api
url = 'https://www.quandl.com/api/v3/databases/WIKI/metadata?api_key=' + api_key
response = urllib.request.urlopen(url).read()
json_obj = str(response, 'utf-8')
datanew = json.loads(json_obj)
return datanew
When I do print(datanew). Update: Even if I change it to return data new, error is still the same.
I am getting this below error:
name 'datanew' is not defined
I had the same issues with urrlib before. If possible, try to use requests it's a better designed and working library in my opinion. Also, it is capable of reading JSON with a single function so no need to run it through multiple lines Sample code here:
import requests
locu_api = 'api_Key'
def locu_search():
url = 'https://www.quandl.com/api/v3/databases/WIKI/metadata?api_key=' + api_key
return requests.get(url).json()
locu_search()
Edit:
The endpoint that you are calling might not be the correct one. I think you are looking for the following one:
import requests
api_key = 'your_api_key_here'
def locu_search(dataset_code):
url = f'https://www.quandl.com/api/v3/datasets/WIKI/{dataset_code}/metadata.json?api_key={api_key}'
req = requests.get(url)
return req.json()
data = locu_search("FB")
This will return with all the metadata regarding a company. In this case Facebook.
Maybe it doesn't apply to your specific problem, but what I normally do is the following:
import requests
def get_values(url):
response = requests.get(url).text
values = json.loads(response)
return values

Google Indexing API - Invalid attribute. 'url' is not in standard URL format - But my URL is Correct

I am currently using Indexing API v3.
When I am using this API in a loop, I got this error:
Invalid attribute. 'url' is not in standard URL format
But I am pretty sure that my URL is correct, because it is download from Google search console:
Here is the code:
from oauth2client.service_account import ServiceAccountCredentials
import httplib2
import json
import pandas as pd
JSON_KEY_FILE = "key.json"
SCOPES = ["https://www.googleapis.com/auth/indexing"]
credentials = ServiceAccountCredentials.from_json_keyfile_name(JSON_KEY_FILE, scopes=SCOPES)
http = credentials.authorize(httplib2.Http())
# This file contains 2 column, URL and date
csv = pd.read_csv("my_data.csv")
csv[["URL"]][0:10].apply(lambda x: indexURL(x.to_string(), http), axis=1)
def indexURL(url, http):
ENDPOINT = "https://indexing.googleapis.com/v3/urlNotifications:publish"
content = {}
content['url'] = url
content['type'] = "URL_UPDATED"
json_ctn = json.dumps(content)
response, content = http.request(ENDPOINT, method="POST", body=json_ctn)
result = json.loads(content.decode())
if("error" in result):
print("Error({} - {}): {}".format(result["error"]["code"], result["error"]["status"], result["error"]["message"]))
else:
print("urlNotificationMetadata.url: {}".format(result["urlNotificationMetadata"]["url"]))
print("urlNotificationMetadata.latestUpdate.url: {}".format(result["urlNotificationMetadata"]["latestUpdate"]["url"]))
print("urlNotificationMetadata.latestUpdate.type: {}".format(result["urlNotificationMetadata"]["latestUpdate"]["type"]))
print("urlNotificationMetadata.latestUpdate.notifyTime: {}".format(result["urlNotificationMetadata"]["latestUpdate"]["notifyTime"]))
Here is a list of URL sample:
Can anyone please tell me what's wrong with my code?
Thank you very much in advance for all your help.
It seems that even if I apply .strip() to each row, there is still a \n at the end of each URL.
So instead of putting row one by one to lambda, I put the whole series to lambda and use a for-loop to handle it.
The whole working example is here:
Google Indexing API v3 Working Example with Python 3

Optimise python function fetching multi-level json attributes

I have a 3 level json file. I am fetching the values of some of the attributes from each of the 3 levels of json. At the moment, the execution time of my code is pathetic as it is taking about 2-3 minutes to get the results on my web page. I will be having a much larger json file to deal with in production.
I am new to python and flask and haven't done much of web programming. Please suggest me ways I could optimise my below code! Thanks for help, much appreciated.
import json
import urllib2
import flask
from flask import request
def Backend():
url = 'http://localhost:8080/surveillance/api/v1/cameras/'
response = urllib2.urlopen(url).read()
response = json.loads(response)
components = list(response['children'])
urlComponentChild = []
for component in components:
urlComponent = str(url + component + '/')
responseChild = urllib2.urlopen(urlComponent).read()
responseChild = json.loads(responseChild)
camID = str(responseChild['id'])
camName = str(responseChild['name'])
compChildren = responseChild['children']
compChildrenName = list(compChildren)
for compChild in compChildrenName:
href = str(compChildren[compChild]['href'])
ID = str(compChildren[compChild]['id'])
urlComponentChild.append([href,ID])
myList = []
for each in urlComponentChild:
response = urllib2.urlopen(each[0]).read()
response = json.loads(response)
url = each[0] + '/recorder'
responseRecorder = urllib2.urlopen(url).read()
responseRecorder = json.loads(responseRecorder)
username = str(response['subItems']['surveillance:config']['properties']['username'])
password = str(response['subItems']['surveillance:config']['properties']['password'])
manufacturer = str(response['properties']['Manufacturer'])
model = str(response['properties']['Model'])
status = responseRecorder['recording']
myList.append([each[1],username,password,manufacturer,model,status])
return myList
APP = flask.Flask(__name__)
#APP.route('/', methods=['GET', 'POST'])
def index():
""" Displays the index page accessible at '/'
"""
if request.method == 'GET':
return flask.render_template('index.html', response = Backend())
if __name__ == '__main__':
APP.debug=True
APP.run(port=62000)
Ok, caching. So what we're going to do is start returning values to the user instantly based on data we already have, rather than generating new data every time. This means that the user might get slightly less up to date data than is theoretically possible to get, but it means that the data they do receive they receive as quickly as is possible given the system you're using.
So we'll keep your backend function as it is. Like I said, you could certainly speed it up with multithreading (If you're still interested in that, the 10 second version is that I would use grequests to asynchronously get data from a list of urls).
But, rather than call it in response to the user every time a user requests data, we'll just call it routinely every once in a while. This is almost certainly something you'd want to do eventually anyway, because it means you don't have to generate brand new data for each user, which is extremely wasteful. We'll just keep some data on hand in a variable, update that variable as often as we can, and return whatever's in that variable every time we get a new request.
from threading import Thread
from time import sleep
data = None
def Backend():
.....
def main_loop():
while True:
sleep(LOOP_DELAY_TIME_SECONDS)
global data
data = Backend()
APP = flask.Flask(__name__)
#APP.route('/', methods=['GET', 'POST'])
def index():
""" Displays the index page accessible at '/'
"""
if request.method == 'GET':
# Return whatever data we currently have cached
return flask.render_template('index.html', response = data)
if __name__ == '__main__':
data = Backend() # Need to make sure we grab data before we start the server so we never return None to the user
Thread(target=main_loop).start() #Loop and grab new data at every loop
APP.debug=True
APP.run(port=62000)
DISCLAIMER: I've used Flask and threading before for a few projects, but I am by no means an expert on it or web development, at all. Test this code before using it for anything important (or better yet, find someone who knows that they're doing before using it for anything important)
Edit: data will have to be a global, sorry about that - hence the disclaimer

Amazon Product Advertising API (ItemSearch with ItemPage)

I wrote the following code:
from hashlib import sha256
from base64 import b64encode
import hmac
import urllib
from time import strftime, gmtime
url = 'http://ecs.amazonaws.com/onca/xml'
AWSAccessKeyId = amazon_settings.amazon_access_key_id
AssociateTag = amazon_settings.amazon_associate_tag
Keywords = urllib.quote_plus('Potter')
Operation = 'ItemSearch'
SearchIndex = 'Books'
Service = 'AWSECommerceService'
Timestamp = urllib.quote_plus(strftime("%Y-%m-%dT%H:%M:%S.000Z", gmtime()))
Version = '2011-08-01'
sign_to = 'GET\necs.amazonaws.com\n/onca/xml\nAWSAccessKeyId=%s&AssociateTag=%s&Keywords=%s&Operation=%s&SearchIndex=%s&Service=%s&Timestamp=%s&Version=%s' % (AWSAccessKeyId, AssociateTag, Keywords, Operation, SearchIndex, Service, Timestamp, Version)
Signature = urllib.quote_plus(b64encode(hmac.new(str(amazon_settings.amazon_secret_access_key), str(sign_to), sha256).digest()))
request = '%s?AWSAccessKeyId=%s&AssociateTag=%s&Keywords=%s&Operation=%s&SearchIndex=%s&Service=%s&Timestamp=%s&Version=%s&Signature=%s' % (url, AWSAccessKeyId, AssociateTag, Keywords, Operation, SearchIndex, Service, Timestamp, Version, Signature)
print request
When i use this code all fine.
But if i try add ItemPage param to sign_to variable and to request variable i get error SignatureDoesNotMatch.
Help me please.
It's actualy not answer to you question, but i recomend you take a look at excellent python wrapper for the Amazon Product Advertising API - python-amazon-product-api
It's hard to find in the documentation, but you have to make sure that your list of Operations are in alphabetical order or else you get a SignatureDoesNotMatch error.
For example, ItemPage must go between AssociateTag and Keywords to be valid.
AWSAccessKeyId
AssociateTag
ItemPage
Keywords
Operation
ResponseGroup
SearchIndex
Service
SignatureVersion
Timestamp
Version

Python Getting date online?

How can I get the current date, month & year online using Python? By this I mean, rather than getting it from the computer's date-visit a website and get it, so it doesn't rely on the computer.
So thinking about the "would be so trivial" part I went ahead and just made a google app engine web app -- when you visit it, it returns a simple response claiming to be HTML but actually just a string such as 2009-05-26 02:01:12 UTC\n. Any feature requests?-)
Usage example with Python's urllib module:
Python 2.7
>>> from urllib2 import urlopen
>>> res = urlopen('http://just-the-time.appspot.com/')
>>> time_str = res.read().strip()
>>> time_str
'2017-07-28 04:55:48'
Python 3.x+
>>> from urllib.request import urlopen
>>> res = urlopen('http://just-the-time.appspot.com/')
>>> result = res.read().strip()
>>> result
b'2017-07-28 04:53:46'
>>> result_str = result.decode('utf-8')
>>> result_str
'2017-07-28 04:53:46'
If you can't use NTP, but rather want to stick with HTTP, you could urllib.urlget("http://developer.yahooapis.com/TimeService/V1/getTime") and parse the results:
<?xml version="1.0" encoding="UTF-8"?>
<Error xmlns="urn:yahoo:api">
The following errors were detected:
<Message>Appid missing or other error </Message>
</Error>
<!-- p6.ydn.sp1.yahoo.com uncompressed/chunked Mon May 25 18:42:11 PDT 2009 -->
Note that the datetime (in PDT) is in the final comment (the error message is due to lack of APP ID). There probably are more suitable web services to get the current date and time in HTTP (without requiring registration &c), since e.g. making such a service freely available on Google App Engine would be so trivial, but I don't know of one offhand.
For this NTP server can be used.
import ntplib
import datetime, time
print('Make sure you have an internet connection.')
try:
client = ntplib.NTPClient()
response = client.request('pool.ntp.org')
Internet_date_and_time = datetime.datetime.fromtimestamp(response.tx_time)
print('\n')
print('Internet date and time as reported by NTP server: ',Internet_date_and_time)
except OSError:
print('\n')
print('Internet date and time could not be reported by server.')
print('There is not internet connection.')
In order to utilise an online time string, e.g. derived from an online service (http://just-the-time.appspot.com/), it can be read and converted into a datetime.datetime format using urllib2 and datetime.datetime:
import urllib2
from datetime import datetime
def getOnlineUTCTime():
webpage = urllib2.urlopen("http://just-the-time.appspot.com/")
internettime = webpage.read()
OnlineUTCTime = datetime.strptime(internettime.strip(), '%Y-%m-%d %H:%M:%S')
return OnlineUTCTime
or very compact (less good readable)
OnlineUTCTime=datetime.strptime(urllib2.urlopen("http://just-the-time.appspot.com/").read().strip(),
'%Y-%m-%d %H:%M:%S')
little exercise:
Comparing your own UTC time with the online time:
print(datetime.utcnow() - getOnlineUTCTime())
# 0:00:00.118403
#if the difference is negatieve the result will be something like: -1 day, 23:59:59.033398
(bear in mind that processing time is included also)
Goto timezonedb.com and create an account u will receive api key on the your email and use the api key in the following code
from urllib import request
from datetime import datetime
import json
def GetTime(zone):
ApiKey="YOUR API KEY"
webpage=request.urlopen("http://api.timezonedb.com/v2/get-time-zone?key=" +ApiKey + "&format=json&by=zone&zone="+zone)
internettime = json.loads(webpage.read().decode("UTF-8"))
OnlineTime = datetime.strptime(internettime["formatted"].strip(), '%Y-%m-%d %H:%M:%S')
return(OnlineTime)
print(GetTime("Asia/Kolkata")) #you can pass any zone region name for ex : America/Chicago
This works really well for me, no account required:
import requests
from datetime import datetime
def get_internet_datetime(time_zone: str = "etc/utc") -> datetime:
"""
Get the current internet time from:
'https://www.timeapi.io/api/Time/current/zone?timeZone=etc/utc'
"""
timeapi_url = "https://www.timeapi.io/api/Time/current/zone"
headers = {
"Accept": "application/json",
}
params = {"timeZone": time_zone}
dt = None
try:
request = requests.get(timeapi_url, headers=headers, params=params)
r_dict = request.json()
dt = datetime(
year=r_dict["year"],
month=r_dict["month"],
day=r_dict["day"],
hour=r_dict["hour"],
minute=r_dict["minute"],
second=r_dict["seconds"],
microsecond=r_dict["milliSeconds"] * 1000,
)
except Exception:
logger.exception("ERROR getting datetime from internet...")
return None
return dt
here is a python module for hitting NIST online http://freshmeat.net/projects/mxdatetime.
Perhaps you mean the NTP protocol? This project may help: http://pypi.python.org/pypi/ntplib/0.1.3
Here is the code I made for myself. I was getting a problem in linux that the date and time changes each time I switch on my PC, so instead setting again and again as the internet requires proper date. I made this script which will be used by date command to set date and time automatically through an alias.
import requests
resp = requests.get('https://www.timeapi.io/api/Time/current/zone?timeZone=etc/utc')
resp = resp.text
resp = str(resp)
resp
first = resp.find('"date":"') + 8
rp = ''
for i in range(first , 145):
rp = resp[i] + rp
print(rp[::-1]+"\n")
second = resp.find('"time":"') + 8
rp_2 = ''
for i in range(second , 160):
rp_2 = resp[i] + rp_2
print(rp_2[::-1]+"\n")

Categories

Resources