Connect API to Sheets using Python - python

I have managed to read from the sheets separately and I have managed to write to them, on the other hand I have access to the API, I'm just trying to put it all together and by the way learn Python, could someone help me?
I can't get the API response to be written in the sheet, it doesn't give me any error, it only tells me that it has connected but that it hasn't updated any column or any cell
from __future__ import print_function
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.oauth2 import service_account
import requests
import json
#-------------------------------------------------------------------
def temilla ():
BASE_URL = "https://company.com"
token = 'RYE_'
#headers = {'Content-Type':'application/x-www-form-urlencoded','Authorization': "Bearer {}".format(token)}
PARAMS = {"employee_ids":"1007",
#"employee_ids":"1388",
#"business_unit_ids":"13",
"start_date":"08/07/2022",
"end_date":"09/07/2022"
}
headers = {"Content-Type": "application/json",'Authorization': "Bearer {}".format(token), "Api-version": "1.0"}
response = requests.get(BASE_URL, params = PARAMS, headers = headers )
result = [response.content]
print(response.content)
#--------------------------------------------------------------------------
SERVICE_ACCOUNT_FILE = 'keys.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
creds = None
creds = service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES)
#--------------------------------------------------------------------------
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
SAMPLE_SPREADSHEET_ID = '146456464646464'
service = build('sheets', 'v4', credentials=creds)
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=SAMPLE_SPREADSHEET_ID,
range="Horarios!A1:J20").execute()
print(result)
values = result.get('values', [])
temilla()
Cosilla = temilla()
#Cosilla = [["DIA",1],["AÑO",2022],["MES",9],["EMPLEADO","MARIANO"],["ACTIVO",True]]
request = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID,
range="Prueba-Escritura!a1:xa", valueInputOption="USER_ENTERED", body=
{"values":Cosilla}).execute()
#print(result)
#print(values)
print(request)
The response that the API gives me is similar to the following
b'[{"shift_id":2400298,"business_unit_id":10,"business_unit":"alguno","employee_id":1007,"employee_code":"11111111","entry":"2022-08-07T11:30:00","exit":"2022-08-07T15:30:00"},{"shift_id":2400299,"business_unit_id":10,"business_unit":"alguno","employee_id":1007,"employee_code":"1111111","entry":"2022-08-07T19:00:00","exit":"2022-08-07T23:00:00"}]'

I believe your goal is as follows.
You want to put the values from response = requests.get(BASE_URL, params = PARAMS, headers = headers ) to Google Spreadsheet.
Sample value of it is [{"shift_id":2400298,"business_unit_id":10,"business_unit":"alguno","employee_id":1007,"employee_code":"11111111","entry":"2022-08-07T11:30:00","exit":"2022-08-07T15:30:00"},{"shift_id":2400299,"business_unit_id":10,"business_unit":"alguno","employee_id":1007,"employee_code":"1111111","entry":"2022-08-07T19:00:00","exit":"2022-08-07T23:00:00"}].
You want to achieve this using googleapis for python.
You have already been able to put and get values to Spreadsheet using Sheets API.
In this case, how about the following modification?
Modified script:
# Retrieve values from requests as JSON.
response = requests.get(BASE_URL, params = PARAMS, headers = headers ) # This is from your script.
obj = response.json()
# Convert JSON to 2 dimensional array.
keys = obj[0].keys()
ar = []
for o in obj:
temp = []
for k in keys:
temp.append(o[k] if k in o else "")
ar.append(temp)
# Put values to Spreadsheet using googleapis.
SAMPLE_SPREADSHEET_ID = "###" # <--- Please set your Spreadsheet.
request = service.spreadsheets().values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID, range="Prueba-Escritura!a1", valueInputOption="USER_ENTERED", body={"values": [list(keys), *ar]}).execute()
print(request)
In this modification,
Values from response = requests.get(BASE_URL, params = PARAMS, headers = headers ) is converted to JSON. And, using this object, an array for putting into Spreadsheet is created.
Values are put from cell "A1" of the sheet of "Prueba-Escritura". If you want to change this, please modify range="Prueba-Escritura!a1".
The header values are keys = obj[0].keys(). If you want to change this order, please modify this. If you want to use the specific order, please put the header row here like keys = ['shift_id','business_unit_id','business_unit',,,].
This script puts the values including the header row. If you don't want to include the header row, please modify [list(keys), *ar] to ar.
Reference:
Method: spreadsheets.values.update

Related

Passing data from http api to bigquery using google cloud function python

I am new to google cloud function and python but I managed to check online and write the below code in the main.py file but unable to get the data into bigquery
import pandas as pd
import json
import requests
from pandas.io import gbq
import pandas_gbq
import gcsfs
def validate_http(request):
request_json = request.get_json()
if request.args:
get_api_data()
return f'Data pull complete'
elif request_json:
get_api_data()
return f'Data pull complete'
else:
get_api_data()
return f'Data pull complete'
def get_api_data():
import requests
import pprint
headers = { 'Content-Type': 'application/x-www-form-urlencoded', }
data = f'client_id={my_client_id}&client_secret={my_client_secret}&grant_type=client_credentials&scope={my_scope}'
response = requests.post('https://login.microsoftonline.com/4fa9c138-d3e7-4bc3-8bab-a74bde6b7584/oauth2/v2.0/token', headers=headers, data=data)
json_response = response.json()
access_token = json_response["access_token"]
import requests
from requests.structures import CaseInsensitiveDict
url = "my_api_url"
headers = CaseInsensitiveDict()
headers["Accept"] = "application/json"
headers["Authorization"] = f"Bearer {access_token}"
resp = requests.get(url, headers=headers)
import json
new_json_response = resp.json()
new_json_response2 = new_json_response["value"]
j_data = json.dumps(new_json_response2)
input_data = j_data
data = json.loads(input_data)
result = [json.dumps(record) for record in data]
ndjson = "\n".join(result)
df = pd.DataFrame.from_records(ndjson)
bq_load('TABLE_NAME', df)
def bq_load(key, value):
project_name = 'PROJECT_ID'
dataset_name = 'DATASET_NAME'
table_name = key
value.to_gbq(destination_table='{}.{}'.format(dataset_name, table_name), project_id=project_name, if_exists='replace')
Can anyone help with what the issue is or if there is another way to get my json data to bigquery with python and google cloud function?
I have created a table in bigquery with the dataframe as well as per the screenshot below
panda_dataframe
Error message is below
error message in logs

Api call using python and token_auth

"""
#Collects basic metrics from Matomo installation and returns a pandas dataframe
"""
token = os.getenv("token")
# Build url string
base_url = 'https://matomo.___.com/index.php?module=API'
site_num = '&idSite=1'
return_format = '&format=json'
period = '&period=day'
date_range = '&date=last30'
method = '&method=VisitsSummary.get'
token_string = "&token_auth=" + token
my_url = base_url + site_num + return_format + period + date_range + method + token_string
# send request for report
r = requests.get(my_url)
# parse and tidy collected data
data = pd.DataFrame(r.json()).T
data = data.reset_index()
data.columns = [
"date",
"uniq_visitors",
"users",
"visits",
"actions",
"visits_converted",
"bounces",
"sum_visit_length",
"max_actions",
"bounce_rate",
"actions_per_visit",
"avg_time_on_site",
]
return data
I am trying to get data from the matomo API using an auth_token and parameters by using above code but i am not able to access it and my url is not taking token code any one has idea how i can solve this
Given that you are using the request library, passing parameters and headers can be done using the following params in your get call:
r = requests.get(my_url, params=payload)
In the same way, an auth token is usually passed within headers:
r = requests.get(my_url, params=payload, headers=headers)
Using this format you can simply create a headers object which contains your token_auth and directly pass your parameters in a payload object:
headers = {'token_auth': token}
payload = {'module':'API', 'idSite':1, 'format':'json', 'period':'day', 'date':'last30', 'method':'VisitsSummary.get'}
Since you are now passing your parameters in you get request, there is no need to add them to the end of your url. Thus, your url should stay as https://matomo.___.com/index.php. These can then be used within your params and headers respectively. Please note that this assumes that the matomo API places the token_auth in its headers such as most APIs do. If this is not the case you could pass it directly within the params payload.
Here is a global overview:
token = os.getenv("token")
# Get url, headers and params
my_url = 'https://matomo.___.com/index.php'
payload = {'module':'API', 'idSite':1, 'format':'json', 'period':'day', 'date':'last30', 'method':'VisitsSummary.get'}
headers = {'token_auth': token}
# send request for report
r = requests.get(my_url, params=payload, headers=headers)
Note this answers your question specifically regarding the API call and not the processing after.

Python combine multiple similar functions

I am working with an API to pull back data using python. My functions work fine but I feel like I am repeating myself over and over again and there is probably something I should be doing to make this more efficient.
What each one does is gets the number of results then hits the api back up to bring back the exact number of records.
First function:
def get_categories():
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/categories", headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results for next call
response = requests.get("https://api.destination.com/categories?$skip=0&$top="+records, headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe
Second function:
def get_groups():
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/groups", headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results
response = requests.get("https://api.destination.com/groups?$skip=0&$top="+records, headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe
And 3 more functions like users that do the same thing. The only difference between them as you can see is the getlike https://api.destination.com/categories vs https://api.destination.com/groups and the number of records returned for each will be different. Is there a way to combine these and call it a certain way?
Looks like you already know how to make functions, just extend it one step further to abstract away everything that is common amongst the functions.
BASE_URL = "https://api.destination.com/{}"
def make_headers():
headers = {"Authorization": "Bearer " + access_token}
return headers
def make_params(recs):
params = {'$skip': 0, '$top': recs}
return params
def make_df(data):
list_of_dict = data['resources']
df = pd.DataFrame.from_records(list_of_dict)
df['links'] = df['links'].str[0].str['href']
return df
def process(process):
headers = make_headers()
url = BASE_URL.format(process)
resp = requests.get(url, headers=headers)
data = resp.json()
records = data['totalResults']
params = make_params(records)
resp = requests.get(url, headers=headers, params=params)
all_data = resp.json()
return make_df(all_data)
Then you can call it like the following:
process('groups')
process('categories')
You can break it up further, but you get the idea.
You can just add a parameter to this function.
As an example:
def get_categories():
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/categories", headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results for next call
response = requests.get("https://api.destination.com/categories?$skip=0&$top="+records, headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe
You can just refactor to:
def get_elements(element):
if element is None:
return 'not found' #defaults to 404 error.
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/{}".format(element), headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results for next call
response = requests.get("https://api.destination.com/{}?$skip=0&$top={}".format(element,records), headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe

How to read specific columns from each URL and reiterate it for multiple urls

I have multiple URLs in Column A as shown in the picture.
URL information:
. All the URLs doesn't have the same schema.
. All the URLs have the first sheet name as "Sheet1".
How can I read only 4 specific columns information from each URL and append the data to one data frame in python?
I have tried the below code: But able to read-only cell value, not the URL.
scope = ['https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name('fdfaffff.json',scope)
#service = discovery.build('sheets', 'v4', credentials=credentials)
client = gspread.authorize(credentials)
ws = client.open_by_url("https://docs.google.com/spreadsheets/d/1sPYbuEmbY8EHHMhr5wQy3Eti_cYqNI6b3359NAuuWyE/edit#gid=0")
name = ws.worksheet("Sheet1")
import gspread
import gspread_dataframe as gd
df = gd.get_as_dataframe(name,evaluate_formulas = True,skiprows = 0, has_header = True)
df1 = df[['Links']]
df1
for i in df1.index: #loop using df.index
print(df1.iloc[i])
Getting output like this
Ignore the URL links if it is not same. I have given sample URLs here.

How can I access these json object in python

I'm making some data visualization from movies database api and I already access the data in the normal way but when i load the json data and for loop to print it, the data that out is just the column but I need to access the object inside.
url = "https://api.themoviedb.org/3/discover/movie?api_key="+ api_key
+"&language=en- US&sort_by=popularity.desc&include_adult=
false&include_video=false&page=1" # api url
response = urllib.request.urlopen(url)
raw_json = response.read().decode("utf-8")
data = json.loads(raw_json)
for j in data:
print(j)
i expect the output would be
[{'popularity': 15,
'id': 611,
'video': False,
'vote_count': 1403,
'vote_average': 8.9,
'title': 'lalalalo'},{....}]
but the actual output is
page
total_results
total_pages
results
The results are one level down. You are looping through the metadata.
Try changing your code to
import json
import urllib.request
api_key = "your api code"
url = "https://api.themoviedb.org/3/discover/movie?api_key=" + api_key +"&language=en- US&sort_by=popularity.desc&include_adult=false&include_video=false&page=1" # api url
response = urllib.request.urlopen(url)
raw_json = response.read().decode("utf-8")
data = json.loads(raw_json)
for j in data['results']:
print(j)
You need to change
data
to
data['results']
you can simply use requests module...
import requests
import json
your_link = " "
r = requests.get(your_link)
data = json.loads(r.content)
You shall have the json loaded up, then use your key "results" ["results"] and loop through the data you got.

Categories

Resources