I have used the following code for the Google Analytics API (which worked so far) - I just copied it, enabled the API and put in the credentials.
Everything looks fine for the generated output.
Here is the code I used
"""Hello Analytics Reporting API V4."""
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import csv
SCOPES = ['https://www.googleapis.....']
KEY_FILE_LOCATION = '/path/to/file/test.json'
VIEW_ID = '1234567'
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:country'}]
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ', dimension)
for i, values in enumerate(dateRangeValues):
print('Date range:', str(i))
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ':', value)
with open('/Users/path/document.csv','a') as f:
writer = csv.writer(f)
writer.writerow(dimensions)
writer.writerow(str(i))
writer.writerow(value)
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
print_response(response)
if __name__ == '__main__':
main()
Now I was wondering how I can save the generated output as a csv file (or something else if csv is not possible).
I was looking around here in the forum and on other pages too, but didn't find anything so far.
My output looks like this
Vietnam
5
6
1 2 6
The problem is that each value should be in a new column like this (maybe even with the headers
Vietnam 5 6 126
I would be really thankful if you could help me out on that.
Thank you!
Related
I have been using a GA Reporting API V4 Python script to authenticate report pulls but I am trying to re-use the script to authenticate GA4 API now but I am getting the following error and I am not able to find the root cause.
Error: Authorized user info was not in the expected format, missing fields refresh_token, client_secret.
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import openpyxl
import csv
from pandas.io.json import json_normalize
import ast
from datetime import date
from datetime import timedelta
from google.oauth2.credentials import Credentials
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'client_secrets_services.json'
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = Credentials.from_authorized_user_file(KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsdata', 'v1beta', credentials=credentials)
return analytics
def get_report(analytics, start_date, end_date_delta):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.properties().batchRunReports(property='properties/3220*****',
body={
'requests': [
{
'limit': 100000,
'dateRanges': [{'startDate': start_date.strftime("%Y-%m-%d") , 'endDate': (start_date+end_date_delta).strftime("%Y-%m-%d")}],
'metrics': [ {'name': 'screenPageViews'} ],
'dimensions': [{'name':'date'}, {'name':'sessionSourceMedium'}, {'name':'pagePath'} ],
'dimensionFilter': [
{"filters": [ {"dimensionName": "pagePath",
"operator": "REGEXP",
"expressions": "/pharmacysuccess"}]
}]
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ', dimension)
for i, values in enumerate(dateRangeValues):
print('Date range:', str(i))
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ':', value)
def main():
final_data = pd.DataFrame()
start_date = date(2023, 1, 1)
end_date = date(2023, 2, 1)
delta = timedelta(days=2)
end_date_delta = timedelta(days=1)
analytics = initialize_analyticsreporting()
while start_date < end_date:
response = get_report(analytics,start_date, end_date_delta)
columns = []
for s in range(len(response['reports'][0]['columnHeader']['dimensions'])):
columns.append(response['reports'][0]['columnHeader']['dimensions'][s])
for s in range(len(response['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries'])):
columns.append(response['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries'][s]['name'])
while response['reports'][0]['data'].get("rows") is not None:
dimensions = []
for s in range(len(response['reports'][0]['data']['rows'])):
dimensions.append(response['reports'][0]['data']['rows'][s]['dimensions'] )
metrics = []
for s in range(len(response['reports'][0]['data']['rows'])):
metrics.append(response['reports'][0]['data']['rows'][s]['metrics'][0]['values'])
rows=[]
for i in range(len(dimensions)):
rows.append(dimensions[i] + metrics[i])
response_final = pd.DataFrame(rows, columns=columns)
final_data=final_data.append(response_final,ignore_index=True)
break
start_date+=delta
print(final_data)
if __name__ == '__main__':
main()
I have the following code to get to the data of my google analytics api.
"""Hello Analytics Reporting API V4."""
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import csv
import pandas as pd
SCOPES = ['https://w......']
KEY_FILE_LOCATION = '/Users/,,,,test.json'
VIEW_ID = 'xxxxx'
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:country'}]
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
ofile = open('/Users/...csv', 'w', newline='')
writer = csv.writer(ofile)
dime= []
item = []
val = []
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ', dimension)
writer.writerow(dimensions)
dime.append(dimension)
for i, values in enumerate(dateRangeValues):
print('Date range:', str(i))
print(str(i))
print('range')
print(dateRangeValues[0])
writer.writerow(str(i))
item.append(str(i))
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ':', value)
print(value)
writer.writerows(value)
val.append(value)
#open the file in the write mode
with open('/Users/...csv', 'w') as f:
write = csv.writer(f)
write.writerow(dime)
write.writerows(val)
df = pd.DataFrame(
{'Country': dime,
'Value': val,
'Date Range': item
}
)
df.to_csv('/Users/.../pd.csv') ##It will include index also
'''with open('/Users/...document.csv','a') as f:
writer = csv.writer(f)
writer.writerow(dimensions)
writer.writerow(str(i))
writer.writerow(value)'''
'''with open('/Users....csv','a') as fd:
fd.writero(dimension)
fd.write(str(i))
fd.write(value)'''
'''df = pd.DataFrame()
df["Sessions"]=val
df["pagePath"]=dim
df=df[["pagePath","Sessions"]]
#Export to CSV
df.to_csv("page_by_session.csv")'''
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
print_response(response)
'''with open('/Users.../document.csv', 'a') as f:
for row in response:
f.write(f)'''
# open the file in the write mode
#with open('/Users/....', 'w', encoding='UTF8') as f:
# create the csv writer
# writer = csv.writer(f)
# write a row to the csv file
#writer.writerow(response)
if __name__ == '__main__':
main()
My problem now is that when I print the results and store them into my DataFrame I got this result
But I don't want to have the 0 for DateRange
I want to have the actual Range of Dates into this column
I know that I have specified the DateRanges with the different input parameters like "StartDate" , "EndDate", "7DaysAgo" and "today"...
but how can I use those to iterate over them and include them into my pandas data frame?
So how can I add the startDate , endDate and the daterange
to my csv file?
Any help?
Thanks
TLDR: How can I transform the json output from Google Analytics into a Pandas dataframe in Python?
I'm using the official documentation to get results out of Google Analytics. This works, but I would like to have the output in a pandas dataframe for further data analysis instead of in json format.
Remarks:
When you look at the code below, you'll see that I've commented out '''print(response)''' because that outputs the results in json format.
I've used the code from this article to write the pandas part.
I've searched extensively on Stackoverflow to find a solution but to no avail.
Thank you in advance.
def get_report(analytics):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:adMatchedQuery'}]
}]
}
).execute()
response = get_report(initialize_analyticsreporting())
'''print(response)'''
def print_response(response):
list = []
# get report data
for report in response.get('reports', []):
# set column headers
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
rows = report.get('data', {}).get('rows', [])
for row in rows:
# create dict for each row
dict = {}
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
# fill dict with dimension header (key) and dimension value (value)
for header, dimension in zip(dimensionHeaders, dimensions):
dict[header] = dimension
# fill dict with metric header (key) and metric value (value)
for i, values in enumerate(dateRangeValues):
for metric, value in zip(metricHeaders, values.get('values')):
#set int as int, float a float
if ',' in value or '.' in value:
dict[metric.get('name')] = float(value)
else:
dict[metric.get('name')] = int(value)
list.append(dict)
df = pd.DataFrame(list)
return df
I've built a Google Analytics API that pulls a number or dimensions and metrics from multiple GA views within separate accounts. My code essentially cycles through an excel document and determines what view to select and uses the corresponding .dat file credentials it needs from a dictionary. The weird thing is it works for the first view in the document but then fails once it gets to the next one no matter what is listed in that row. I keep getting the error message...
storage = file.Storage(stor_str)
AttributeError: 'str' object has no attribute 'Storage'
Here is hopefully the relevant pieces of my code for reference (please let me know if I'm missing anything)...
lz = zip(LI, DN, VI, V, DA, S, D)
sd = '2020-08-01'
ed = '2020-08-31'
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
flags = parser.parse_args([])
flow = client.flow_from_clientsecrets(
CLIENT_SECRETS_PATH, scope=SCOPES,
message=tools.message_if_missing(CLIENT_SECRETS_PATH))
dFI = {'DP7': 'arsseven.dat',
'DP6': 'arssix.dat',
'DP4': 'arsfour.dat',
'DP0': 'arszero.dat'}
gaDO = []
def getGADO(VI, sd, ed):
l = vi_start + str(int(VI))
response = analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': l,
'dateRanges': [{'sd': sd, 'ed': ed}],
'metrics': [
{'expression': 'ga:users'},
{'expression': 'ga:avgSessionDuration'}
],
'dimensions': [
{'name': 'ga:channelGrouping'}
]
}]}).execute()
report_list = response.get('reports')
for report in report_list:
data_rows = report.get('data', {}).get('rows', [])
for row in data_rows:
dimensions_in_row = row.get('dimensions')
metrics_rows = row.get('metrics')
for metrics in metrics_rows:
metrics_values = metrics.get('values')
full_row_data = dimensions_in_row + metrics_values
gaDO.append(full_row_data)
gaDON = []
for i in gaDO:
new_tuple = []
new_tuple.append(i[0])
new_tuple.append(int(i[1]))
new_tuple.append(float(i[2]))
gaDON.append(tuple(new_tuple))
colO = [
#Dimensions
'DCG',
#Metrics
'Users', 'ASD']
dfO = pd.DataFrame(gaDON, columns = colO)
print(dfO)
return dfO
for LI, DN, VI, V, DA, S, D in locations_zip:
dID_str = str(int(LI))
stor_str = dFI[DA]
storage = file.Storage(stor_str)
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)
vi_start = 'ga:'
dfX = getGADO(VI, sd, ed)
print(dfX)
Any help on this would be much appreciated!
I try to create roles in an automated way in Google Kubernetes (GKE).
For that, I use the python client library, but I don't want to have any dependency to kubectl and kubeconfig, or gcloud,
I use a service account (with a json key file from GCP) which has the permissions to create roles in namespaces (it is a cluster admin). When I use the access token given by this command :
gcloud auth activate-service-account --key-file=credentials.json
gcloud auth print-access-token
It works.
But when I try to generate the token by myself, I can create namespaces and other standard resources, but I have this error when it comes to roles :
E kubernetes.client.rest.ApiException: (403)
E Reason: Forbidden
E HTTP response headers: HTTPHeaderDict({'Audit-Id': 'b89b0fc2-9350-456e-9eca-730e7ad2cea1', 'Content-Type': 'application/json', 'Date': 'Tue, 26 Feb 2019 20:35:20 GMT', 'Content-Length': '1346'})
E HTTP response body: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"roles.rbac.authorization.k8s.io \"developers\" is forbidden: attempt to grant extra privileges: [{[*] [apps] [statefulsets] [] []} {[*] [apps] [deployments] [] []} {[*] [autoscaling] [horizontalpodautoscalers] [] []} {[*] [] [pods] [] []} {[*] [] [pods/log] [] []} {[*] [] [pods/portforward] [] []} {[*] [] [serviceaccounts] [] []} {[*] [] [containers] [] []} {[*] [] [services] [] []} {[*] [] [secrets] [] []} {[*] [] [configmaps] [] []} {[*] [extensions] [ingressroutes] [] []} {[*] [networking.istio.io] [virtualservices] [] []}] user=\u0026{100701357824788592239 [system:authenticated] map[user-assertion.cloud.google.com:[AKUJVp+KNvF6jw9II+AjCdqjbC0vz[...]hzgs0JWXOyk7oxWHkaXQ==]]} ownerrules=[{[create] [authorization.k8s.io] [selfsubjectaccessreviews selfsubjectrulesreviews] [] []} {[get] [] [] [] [/api /api/* /apis /apis/* /healthz /openapi /openapi/* /swagger-2.0.0.pb-v1 /swagger.json /swaggerapi /swaggerapi/* /version /version/]}] ruleResolutionErrors=[]","reason":"Forbidden","details":{"name":"developers","group":"rbac.authorization.k8s.io","kind":"roles"},"code":403}
I'm using the same service account, so I guess gcloud is doing something more than my script.
Here the python code I use to generate the token :
def _get_token(self) -> str:
# See documentation here
# https://developers.google.com/identity/protocols/OAuth2ServiceAccount
epoch_time = int(time.time())
# Generate a claim from the service account file.
claim = {
"iss": self._service_account_key["client_email"],
"scope": "https://www.googleapis.com/auth/cloud-platform",
"aud": "https://www.googleapis.com/oauth2/v4/token",
"exp": epoch_time + 3600,
"iat": epoch_time
}
# Sign claim with JWT.
assertion = jwt.encode(
claim,
self._service_account_key["private_key"],
algorithm='RS256'
).decode()
# Create payload for API.
data = urlencode({
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
"assertion": assertion
})
# Request the access token.
result = requests.post(
url="https://www.googleapis.com/oauth2/v4/token",
headers={
"Content-Type": "application/x-www-form-urlencoded"
},
data=data
)
result.raise_for_status()
return json.loads(result.text)["access_token"]
def _get_api_client(self) -> client.ApiClient:
configuration = client.Configuration()
configuration.host = self._api_url
configuration.verify_ssl = self._tls_verify
configuration.api_key = {
"authorization": f"Bearer {self._get_token()}"
}
return client.ApiClient(configuration)
And the function to create the role (which generates the 403 error):
def _create_role(self, namespace: str, body: str):
api_client = self._get_api_client()
rbac = client.RbacAuthorizationV1Api(api_client)
rbac.create_namespaced_role(
namespace,
body
)
If I short-circuit the _get_token method with the token extracted from gcloud, it works.
I guess it has something to do with the way I create my token (missing scope ?), but I don't find any documentation about it.
ANSWER :
Adding a scope does the job ! Thanks a lot :
# Generate a claim from the service account file.
claim = {
"iss": self._service_account_key["client_email"],
"scope": " ".join([
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/userinfo.email"
]),
"aud": "https://www.googleapis.com/oauth2/v4/token",
"exp": epoch_time + 3600,
"iat": epoch_time
}
So if you look at the code here for print-access-token you can see that the access token is generally printed without a scope. You see:
try:
creds = client.GoogleCredentials.get_application_default()
except client.ApplicationDefaultCredentialsError as e:
log.debug(e, exc_info=True)
raise c_exc.ToolException(str(e))
if creds.create_scoped_required():
...
and then on this file you see:
def create_scoped_required(self):
"""Whether this Credentials object is scopeless.
create_scoped(scopes) method needs to be called in order to create
a Credentials object for API calls.
"""
return False
Apparently, in your code, you are getting the token with the https://www.googleapis.com/auth/cloud-platform scope. You could try removing it or try with the USER_EMAIL_SCOPE since you are specifying: "iss": self._service_account_key["client_email"].
You can always check what gcloud auth activate-service-account --key-file=credentials.json stores under ~/.config. So you know what gcloud auth print-access-token uses. Note that as per this and this it looks like the store is in sqlite format.