I have the following code to get to the data of my google analytics api.
"""Hello Analytics Reporting API V4."""
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import csv
import pandas as pd
SCOPES = ['https://w......']
KEY_FILE_LOCATION = '/Users/,,,,test.json'
VIEW_ID = 'xxxxx'
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:country'}]
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
ofile = open('/Users/...csv', 'w', newline='')
writer = csv.writer(ofile)
dime= []
item = []
val = []
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ', dimension)
writer.writerow(dimensions)
dime.append(dimension)
for i, values in enumerate(dateRangeValues):
print('Date range:', str(i))
print(str(i))
print('range')
print(dateRangeValues[0])
writer.writerow(str(i))
item.append(str(i))
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ':', value)
print(value)
writer.writerows(value)
val.append(value)
#open the file in the write mode
with open('/Users/...csv', 'w') as f:
write = csv.writer(f)
write.writerow(dime)
write.writerows(val)
df = pd.DataFrame(
{'Country': dime,
'Value': val,
'Date Range': item
}
)
df.to_csv('/Users/.../pd.csv') ##It will include index also
'''with open('/Users/...document.csv','a') as f:
writer = csv.writer(f)
writer.writerow(dimensions)
writer.writerow(str(i))
writer.writerow(value)'''
'''with open('/Users....csv','a') as fd:
fd.writero(dimension)
fd.write(str(i))
fd.write(value)'''
'''df = pd.DataFrame()
df["Sessions"]=val
df["pagePath"]=dim
df=df[["pagePath","Sessions"]]
#Export to CSV
df.to_csv("page_by_session.csv")'''
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
print_response(response)
'''with open('/Users.../document.csv', 'a') as f:
for row in response:
f.write(f)'''
# open the file in the write mode
#with open('/Users/....', 'w', encoding='UTF8') as f:
# create the csv writer
# writer = csv.writer(f)
# write a row to the csv file
#writer.writerow(response)
if __name__ == '__main__':
main()
My problem now is that when I print the results and store them into my DataFrame I got this result
But I don't want to have the 0 for DateRange
I want to have the actual Range of Dates into this column
I know that I have specified the DateRanges with the different input parameters like "StartDate" , "EndDate", "7DaysAgo" and "today"...
but how can I use those to iterate over them and include them into my pandas data frame?
So how can I add the startDate , endDate and the daterange
to my csv file?
Any help?
Thanks
Related
I have been using a GA Reporting API V4 Python script to authenticate report pulls but I am trying to re-use the script to authenticate GA4 API now but I am getting the following error and I am not able to find the root cause.
Error: Authorized user info was not in the expected format, missing fields refresh_token, client_secret.
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import openpyxl
import csv
from pandas.io.json import json_normalize
import ast
from datetime import date
from datetime import timedelta
from google.oauth2.credentials import Credentials
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'client_secrets_services.json'
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = Credentials.from_authorized_user_file(KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsdata', 'v1beta', credentials=credentials)
return analytics
def get_report(analytics, start_date, end_date_delta):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.properties().batchRunReports(property='properties/3220*****',
body={
'requests': [
{
'limit': 100000,
'dateRanges': [{'startDate': start_date.strftime("%Y-%m-%d") , 'endDate': (start_date+end_date_delta).strftime("%Y-%m-%d")}],
'metrics': [ {'name': 'screenPageViews'} ],
'dimensions': [{'name':'date'}, {'name':'sessionSourceMedium'}, {'name':'pagePath'} ],
'dimensionFilter': [
{"filters": [ {"dimensionName": "pagePath",
"operator": "REGEXP",
"expressions": "/pharmacysuccess"}]
}]
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ', dimension)
for i, values in enumerate(dateRangeValues):
print('Date range:', str(i))
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ':', value)
def main():
final_data = pd.DataFrame()
start_date = date(2023, 1, 1)
end_date = date(2023, 2, 1)
delta = timedelta(days=2)
end_date_delta = timedelta(days=1)
analytics = initialize_analyticsreporting()
while start_date < end_date:
response = get_report(analytics,start_date, end_date_delta)
columns = []
for s in range(len(response['reports'][0]['columnHeader']['dimensions'])):
columns.append(response['reports'][0]['columnHeader']['dimensions'][s])
for s in range(len(response['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries'])):
columns.append(response['reports'][0]['columnHeader']['metricHeader']['metricHeaderEntries'][s]['name'])
while response['reports'][0]['data'].get("rows") is not None:
dimensions = []
for s in range(len(response['reports'][0]['data']['rows'])):
dimensions.append(response['reports'][0]['data']['rows'][s]['dimensions'] )
metrics = []
for s in range(len(response['reports'][0]['data']['rows'])):
metrics.append(response['reports'][0]['data']['rows'][s]['metrics'][0]['values'])
rows=[]
for i in range(len(dimensions)):
rows.append(dimensions[i] + metrics[i])
response_final = pd.DataFrame(rows, columns=columns)
final_data=final_data.append(response_final,ignore_index=True)
break
start_date+=delta
print(final_data)
if __name__ == '__main__':
main()
I have used the following code for the Google Analytics API (which worked so far) - I just copied it, enabled the API and put in the credentials.
Everything looks fine for the generated output.
Here is the code I used
"""Hello Analytics Reporting API V4."""
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import csv
SCOPES = ['https://www.googleapis.....']
KEY_FILE_LOCATION = '/path/to/file/test.json'
VIEW_ID = '1234567'
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:country'}]
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ', dimension)
for i, values in enumerate(dateRangeValues):
print('Date range:', str(i))
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ':', value)
with open('/Users/path/document.csv','a') as f:
writer = csv.writer(f)
writer.writerow(dimensions)
writer.writerow(str(i))
writer.writerow(value)
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
print_response(response)
if __name__ == '__main__':
main()
Now I was wondering how I can save the generated output as a csv file (or something else if csv is not possible).
I was looking around here in the forum and on other pages too, but didn't find anything so far.
My output looks like this
Vietnam
5
6
1 2 6
The problem is that each value should be in a new column like this (maybe even with the headers
Vietnam 5 6 126
I would be really thankful if you could help me out on that.
Thank you!
for an API that I am using, we need to be able to view what specific pages are being clicked on and output to a CSV File. I am able to see the average session duration, and the amount of page views. I am curious as to what I need to add into my code attached below to make sure that this is possible for exporting to a CSV file. Thank you!
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'client_secrets.json'
VIEW_ID ='insert here'
credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4', credentials=credentials)
response = analytics.reports().batchGet(body={
'reportRequests': [{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '30daysAgo', 'endDate': 'today'}],
'metrics': [
{"expression": "ga:pageviews"},
{"expression": "ga:avgSessionDuration"}
], "dimensions": [
{"name": "ga:deviceCategory"}
]
}]}).execute()
response
{'reports': [{'columnHeader': {'dimensions': ['ga:deviceCategory'],
'metricHeader': {'metricHeaderEntries': [{'name': 'ga:pageviews',
'type': 'INTEGER'},
{'name': 'ga:avgSessionDuration', 'type': 'TIME'}]}},
'data': {'isDataGolden': True,
'maximums': [{'values': ['485', '94.95454545454545']}],
'minimums': [{'values': ['29', '51.21186440677966']}],
'rowCount': 3,
'rows': [{'dimensions': ['desktop'],
'metrics': [{'values': ['485', '51.21186440677966']}]},
{'dimensions': ['mobile'],
'metrics': [{'values': ['409', '69.30859375']}]},
{'dimensions': ['tablet'],
'metrics': [{'values': ['29', '94.95454545454545']}]}],
'totals': [{'values': ['923', '60.06487341772152']}]}}]}
import pandas as pd
df = pd.DataFrame(columns=['Name', 'Age'])
def ga_response_dataframe(response):
row_list = []
# Get each collected report
for report in response.get('reports', []):
# Set column headers
column_header = report.get('columnHeader', {})
dimension_headers = column_header.get('dimensions', [])
metric_headers = column_header.get('metricHeader', {}).get('metricHeaderEntries', [])
# Get each row in the report
for row in report.get('data', {}).get('rows', []):
# create dict for each row
row_dict = {}
dimensions = row.get('dimensions', [])
date_range_values = row.get('metrics', [])
# Fill dict with dimension header (key) and dimension value (value)
for header, dimension in zip(dimension_headers, dimensions):
row_dict[header] = dimension
# Fill dict with metric header (key) and metric value (value)
for i, values in enumerate(date_range_values):
for metric, value in zip(metric_headers, values.get('values')):
# Set int as int, float a float
if ',' in value or '.' in value:
row_dict[metric.get('name')] = float(value)
else:
row_dict[metric.get('name')] = int(value)
row_list.append(row_dict)
df = row_list
return df
df = ga_response_dataframe(response)
#df = pd.DataFrame(row_list)
print(df)
TLDR: How can I transform the json output from Google Analytics into a Pandas dataframe in Python?
I'm using the official documentation to get results out of Google Analytics. This works, but I would like to have the output in a pandas dataframe for further data analysis instead of in json format.
Remarks:
When you look at the code below, you'll see that I've commented out '''print(response)''' because that outputs the results in json format.
I've used the code from this article to write the pandas part.
I've searched extensively on Stackoverflow to find a solution but to no avail.
Thank you in advance.
def get_report(analytics):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:adMatchedQuery'}]
}]
}
).execute()
response = get_report(initialize_analyticsreporting())
'''print(response)'''
def print_response(response):
list = []
# get report data
for report in response.get('reports', []):
# set column headers
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
rows = report.get('data', {}).get('rows', [])
for row in rows:
# create dict for each row
dict = {}
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
# fill dict with dimension header (key) and dimension value (value)
for header, dimension in zip(dimensionHeaders, dimensions):
dict[header] = dimension
# fill dict with metric header (key) and metric value (value)
for i, values in enumerate(dateRangeValues):
for metric, value in zip(metricHeaders, values.get('values')):
#set int as int, float a float
if ',' in value or '.' in value:
dict[metric.get('name')] = float(value)
else:
dict[metric.get('name')] = int(value)
list.append(dict)
df = pd.DataFrame(list)
return df
I've built a Google Analytics API that pulls a number or dimensions and metrics from multiple GA views within separate accounts. My code essentially cycles through an excel document and determines what view to select and uses the corresponding .dat file credentials it needs from a dictionary. The weird thing is it works for the first view in the document but then fails once it gets to the next one no matter what is listed in that row. I keep getting the error message...
storage = file.Storage(stor_str)
AttributeError: 'str' object has no attribute 'Storage'
Here is hopefully the relevant pieces of my code for reference (please let me know if I'm missing anything)...
lz = zip(LI, DN, VI, V, DA, S, D)
sd = '2020-08-01'
ed = '2020-08-31'
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
flags = parser.parse_args([])
flow = client.flow_from_clientsecrets(
CLIENT_SECRETS_PATH, scope=SCOPES,
message=tools.message_if_missing(CLIENT_SECRETS_PATH))
dFI = {'DP7': 'arsseven.dat',
'DP6': 'arssix.dat',
'DP4': 'arsfour.dat',
'DP0': 'arszero.dat'}
gaDO = []
def getGADO(VI, sd, ed):
l = vi_start + str(int(VI))
response = analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': l,
'dateRanges': [{'sd': sd, 'ed': ed}],
'metrics': [
{'expression': 'ga:users'},
{'expression': 'ga:avgSessionDuration'}
],
'dimensions': [
{'name': 'ga:channelGrouping'}
]
}]}).execute()
report_list = response.get('reports')
for report in report_list:
data_rows = report.get('data', {}).get('rows', [])
for row in data_rows:
dimensions_in_row = row.get('dimensions')
metrics_rows = row.get('metrics')
for metrics in metrics_rows:
metrics_values = metrics.get('values')
full_row_data = dimensions_in_row + metrics_values
gaDO.append(full_row_data)
gaDON = []
for i in gaDO:
new_tuple = []
new_tuple.append(i[0])
new_tuple.append(int(i[1]))
new_tuple.append(float(i[2]))
gaDON.append(tuple(new_tuple))
colO = [
#Dimensions
'DCG',
#Metrics
'Users', 'ASD']
dfO = pd.DataFrame(gaDON, columns = colO)
print(dfO)
return dfO
for LI, DN, VI, V, DA, S, D in locations_zip:
dID_str = str(int(LI))
stor_str = dFI[DA]
storage = file.Storage(stor_str)
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)
vi_start = 'ga:'
dfX = getGADO(VI, sd, ed)
print(dfX)
Any help on this would be much appreciated!