How to iterate over interval in json file and create a dataframe? - python

I am iterating over json file and creating dataframe with the desirable columns. I already implemented the code but now json file has little bit changed. But I am not able to think where to change the code to get the required output.
Explanation:
previous json result:
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-18T11:49:48.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 7,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nTransferred',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
previous dataframe:
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-18T11:49:48.000Z,7,,,,1.0,,,
new json result:
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-12T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-13T11:46:25.000Z/2021-10-14T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 2,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nTransferred',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-14T11:46:25.000Z/2021-10-15T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 3,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-15T11:46:25.000Z/2021-10-16T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
Now desirable dataframe:
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,1,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,2,,,,1,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,3,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,1,,,,,,,
What are the changes I need to do to in below code to get the new result.
column_names = []
if(query_result.results != None):
for item in query_result.results:
data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
for met in lst_data.metrics:
metric_name = met.metric
column_names.append('Queue_Id')
column_names.append(metric_name+'_count')
column_names.append(metric_name+'_sum')
column_names.append(metric_name+'.denominator')
column_names.append(metric_name+'.numerator')
column_names.append('Interval Start')
column_names.append('Interval End')
data_lst.append(queue_id)
data_lst.append(met.stats.count)
data_lst.append(met.stats.sum)
data_lst.append(met.stats.denominator)
data_lst.append(met.stats.numerator)
data_lst.append(lst_data.interval.split('/')[0])
data_lst.append(lst_data.interval.split('/')[1])
print(data_lst)
else:
data_lst = []
metric_name = query.metrics[0]
column_names.append('Queue_Id')
column_names.append(metric_name+'_count')
column_names.append(metric_name+'_sum')
column_names.append(metric_name+'.denominator')
column_names.append(metric_name+'.numerator')
column_names.append('Interval Start')
column_names.append('Interval End')
data_lst.append(queue_id)
data_lst.append('')
data_lst.append('')
data_lst.append('')
data_lst.append('')
data_lst.append(query.interval.split('/')[0])
data_lst.append(query.interval.split('/')[1])
print("data_lst", data_lst)
print("column_names", column_names)
return data_lst, column_names

I have modified my code little bit and got the result. The below code is working for me-
lst_of_metrics = ["nOffered", "nTransferred"]
out = defaultdict(list)
if(query_result.results != None):
for item in query_result.results:
#data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
out['queue_id'].append(queue_id)
for met1, met in itertools.zip_longest(query.metrics, lst_data.metrics):
#for met in lst_data.metrics:
if(met):
if(met.metric == met1):
out[met.metric+"_count"].append(met.stats.count)
out[met.metric+"_sum"].append(met.stats.sum)
out[met.metric+".denominator"].append(met.stats.denominator)
out[met.metric+".numerator"].append(met.stats.numerator)
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
interval = lst_data.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print("out", out)
else:
metric_name = query.metrics[0]
out['queue_id'].append(queue_id)
out[metric_name+"_count"].append('')
out[metric_name+"_sum"].append('')
out[metric_name+".denominator"].append('')
out[metric_name+".numerator"].append('')
interval = query.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print(out)
df = pd.DataFrame(out)
print (df)

Related

How to load list columns into a dataframe?

I try to load "columns" from a python list object into a dataframe.
This is my list object:
list = type(api_response.results) -> <class 'list'>
These are the values from the list object (I think this is a json structur):
{'results': [{'data': [{'interval': '2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z',
'metrics': [{'metric': 'nError',
'qualifier': None,
'stats': {'count': 4,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 113,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice'}}]}
I just need this result:
Dataframe:
interval metric count
0 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nError 4
1 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nOffered 113
How get this result? How is it possibly to call intervals or metrics from the list object?
Thanks for any help
you can use:
def get_metric(x):
check=0
vals=[]
for i in range(0,len(x)):
if len(x)==1:
check=1
for j in range(0,len(x) + check):
print(i,j)
vals.append(x[i]['data'][0]['metrics'][j]['metric'])
return vals
def get_count(x):
vals=[]
for i in range(0,len(x)):
for j in range(0,len(x[0])):
vals.append(x[i]['data'][0]['metrics'][j]['stats']['count'])
return vals
df['interval']=df['results'].apply(lambda x: [x[0]['data'][i]['interval'] for i in range(0,len(x[0]['data']))])
df['metric']= df['results'].apply(lambda x: get_metric(x))
df['count']= df['results'].apply(lambda x: get_count(x))
df=df.drop(['results'],axis=1)
df=df.explode(['metric','count']).explode('interval')
print(df)
'''
interval metric count
0 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nError 4
0 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nOffered 113
'''

Python value assigned to incorrect dict key

I am iterating through a csv and for each column, determining the longest len of a string, and updating a dict as necessary.
If I do this
def get_max_size(current, cell_value):
if cell_value:
current = max(current, len(cell_value))
return current
def my_function():
headers = ["val1","val2","val3","val4","val5"]
d = {header: {'max_size': 0, 'other': {'test': None}} for header in headers}
csv_file = [
["abc","123","HAMILTON","1950.00","17-SEP-2015"],
["ab","321","GLASGOW","711.00","13-NOV-2015"]
]
for row in csv_file:
for i, header in enumerate(headers):
max_size = get_max_size(d[header]['max_size'], row[i])
d[header]['max_size'] = max_size
print(d)
I get the expected output:
{'val1': {'max_size': 3, 'other': {'test': None}},
'val2': {'max_size': 3, 'other': {'test': None}},
'val3': {'max_size': 8, 'other': {'test': None}},
'val4': {'max_size': 7, 'other': {'test': None}},
'val5': {'max_size': 11, 'other': {'test': None}}}
However if I modify my code as such:
REQUIRED_VALUES = {
'max_size': 0,
'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None},
'max_value': None,
'allow_null': None,
}
def my_function():
headers = ["val1","val2","val3","val4","val5"]
# d = {header: {'max_size': 0, 'other': {'test': None}} for header in headers}
d = {header: REQUIRED_VALUES for header in headers}
csv_file = [
["abc","123","HAMILTON","1950.00","17-SEP-2015"],
["ab","321","GLASGOW","711.00","13-NOV-2015"]
]
for row in csv_file:
for i, header in enumerate(headers):
max_size = get_max_size(d[header]['max_size'], row[i])
d[header]['max_size'] = max_size
print(d)
Then the largest len of all keys (val5, the date field, of len == 11), is assigned to all max_length:
{'val1': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val2': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val3': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val4': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val5': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None}}
Is there some difference between the dicts that I'm missing? The dict is the only thing that changes, they both contain nested dictionaries... apart from number of items, I can't really see the difference.

Pandas to_sql with nested columns ERROR: A TVP's rows must be Sequence objects

I'm fetchin some data from an api, and I want to insert it in too an azure sql db.
I'm getting the data and adding it to a pandas dataframe before dropping empty columns and such.
After I have done what i need to do I'm trying to insert the data in to the database, but I'm getting this error message: ProgrammingError: (pyodbc.ProgrammingError) ("A TVP's rows must be Sequence objects.", 'HY000')
I've found that some of the columns have nested data (and I would like to keep it that way)
These columns contain lists of dicts here are some examples:
0 [{'note': 'Netto per 45 dgr'}]
1 [{'note': 'Netto per 45 dgr'}]
2 [{'note': 'Netto per 45 dgr'}]
[{'accountingCost': None, 'allowanceCharge': array([], dtype=object), 'billingReference': array([], dtype=object), 'contractDocumentReference': None, 'delivery': {'actualDeliveryDate': None, 'deliveryLocation': None, 'deliveryParty': None, 'despatch': None, 'estimatedDeliveryPeriod': None, 'promisedDeliveryPeriod': None, 'requestedDeliveryPeriod': None, 'trackingId': None}, 'despatchLineReference': {'attachment': None, 'copyIndicator': None, 'documentDescription': None, 'documentType': None, 'documentTypeCode': None, 'id': '', 'issueDate': None, 'uuid': None}, 'documentReference': None, 'id': '1', 'invoicePeriod': None, 'invoicedQuantity': {'unitCode': 'H21', 'unitCodeListId': None, 'value': '1.00'}, 'item': {'additionalItemProperty': array([], dtype=object), 'brandName': None, 'certificate': array([], dtype=object), 'classifiedTaxCategory': None, 'commodityClassification': array([], dtype=object), 'description': array([], dtype=object), 'dimension': array([], dtype=object), 'hazardousItem': array([], dtype=object), 'itemInstance': array([], dtype=object), 'itemSpecificationDocumentReference': array([], dtype=object), 'keyword': array([], dtype=object), 'manufacturerParty': None, 'manufacturersItemIdentification': array([], dtype=object), 'name': 'Ansvarsrett', 'originAddress': None, 'originCountry': None, 'packQuantity': None, 'packSizeNumeric': '', 'sellersItemIdentification': {'extendedId': None, 'id': {'id': 'P550', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}}, 'standardItemIdentification': {'extendedId': None, 'id': {'id': '7043010000953', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': 'GTIN', 'schemeName': None}}, 'transactionConditions': array([], dtype=object)}, 'lineExtensionAmount': {'currency': None, 'value': '504.00'}, 'lineGrossExtensionAmount': None, 'note': '', 'orderLineReference': {'buyersReference': '117230405 Kvileitet', 'lineId': {'id': '0', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'orderReference': None, 'salesOrderLineId': {'id': '1', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'sellersReference': ''}, 'orderedQuantity': None, 'price': {'allowanceCharge': array([], dtype=object), 'amount': {'currency': None, 'value': '504.00'}, 'baseQuantity': {'unitCode': None, 'unitCodeListId': None, 'value': '0'}, 'orderableUnitFactorRate': None, 'priceType': None, 'validityPeriod': array([], dtype=object)}, 'taxTotal': {'roundingAmount': None, 'taxAmount': {'currency': None, 'value': '126.00'}, 'taxSubtotal': array([], dtype=object)}}
{'accountingCost': None, 'allowanceCharge': array([], dtype=object), 'billingReference': array([], dtype=object), 'contractDocumentReference': None, 'delivery': {'actualDeliveryDate': None, 'deliveryLocation': None, 'deliveryParty': None, 'despatch': None, 'estimatedDeliveryPeriod': None, 'promisedDeliveryPeriod': None, 'requestedDeliveryPeriod': None, 'trackingId': None}, 'despatchLineReference': {'attachment': None, 'copyIndicator': None, 'documentDescription': None, 'documentType': None, 'documentTypeCode': None, 'id': '', 'issueDate': None, 'uuid': None}, 'documentReference': None, 'id': '2', 'invoicePeriod': None, 'invoicedQuantity': {'unitCode': 'H21', 'unitCodeListId': None, 'value': '3.00'}, 'item': {'additionalItemProperty': array([], dtype=object), 'brandName': None, 'certificate': array([], dtype=object), 'classifiedTaxCategory': None, 'commodityClassification': array([], dtype=object), 'description': array([], dtype=object), 'dimension': array([], dtype=object), 'hazardousItem': array([], dtype=object), 'itemInstance': array([], dtype=object), 'itemSpecificationDocumentReference': array([], dtype=object), 'keyword': array([], dtype=object), 'manufacturerParty': None, 'manufacturersItemIdentification': array([], dtype=object), 'name': 'Prosjektering', 'originAddress': None, 'originCountry': None, 'packQuantity': None, 'packSizeNumeric': '', 'sellersItemIdentification': {'extendedId': None, 'id': {'id': 'Projj', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}}, 'standardItemIdentification': {'extendedId': None, 'id': {'id': '7043010000298', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': 'GTIN', 'schemeName': None}}, 'transactionConditions': array([], dtype=object)}, 'lineExtensionAmount': {'currency': None, 'value': '7020.00'}, 'lineGrossExtensionAmount': None, 'note': '', 'orderLineReference': {'buyersReference': '117230405 Kvileitet', 'lineId': {'id': '0', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'orderReference': None, 'salesOrderLineId': {'id': '2', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'sellersReference': ''}, 'orderedQuantity': None, 'price': {'allowanceCharge': array([], dtype=object), 'amount': {'currency': None, 'value': '2340.00'}, 'baseQuantity': {'unitCode': None, 'unitCodeListId': None, 'value': '0'}, 'orderableUnitFactorRate': None, 'priceType': None, 'validityPeriod': array([], dtype=object)}, 'taxTotal': {'roundingAmount': None, 'taxAmount': {'currency': None, 'value': '1755.00'}, 'taxSubtotal': array([], dtype=object)}}
is there any way to add this data to mssql?

Iterate over json result and get the desirable data in pandas dataframe

I have a json result which I am trying to convert into dataframe but not able to get the correct result. Actually for some cases it is giving correct but for some case it is failing.
Example:
Based on metric API is generating result for specified interval. But this is not certain for that particular interval metric have output or not. And process is running 4 different queue_id.
suppose process is running only for 2 metric. ['nOffered', 'nTransferred']
queue_id = 'a72dba75-0bc6-4a65-b120-8803364f8dc3'
for this queue_id, nOffered is having some values but nTransferred doesn't have. Json result is given below-
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-12T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-13T11:46:25.000Z/2021-10-14T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 2,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-14T11:46:25.000Z/2021-10-15T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 3,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-15T11:46:25.000Z/2021-10-16T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
My code is giving below output-
queue_id nOffered_count nOffered_sum interval_start interval_end
0 a72dba75-0bc6-4a65-b120-8803364f8dc3 6 None 2021-10-11T11:46:25.000Z 2021-10-12T11:46:25.000Z
1 a72dba75-0bc6-4a65-b120-8803364f8dc3 1 None 2021-10-12T11:46:25.000Z 2021-10-13T11:46:25.000Z
2 a72dba75-0bc6-4a65-b120-8803364f8dc3 12 None 2021-10-13T11:46:25.000Z 2021-10-14T11:46:25.000Z
3 a72dba75-0bc6-4a65-b120-8803364f8dc3 6 None 2021-10-14T11:46:25.000Z 2021-10-15T11:46:25.000Z
4 a72dba75-0bc6-4a65-b120-8803364f8dc3 6 None 2021-10-15T11:46:25.000Z 2021-10-16T11:46:25.000Z
But when process is running for 2nd queue_id that time it is not working-
queue_id - 73643cff-799b-41ae-9a67-efcf5e593155
json output for this queue_id -
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-12T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-13T11:46:25.000Z/2021-10-14T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 2,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nTransferred',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-14T11:46:25.000Z/2021-10-15T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 3,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-15T11:46:25.000Z/2021-10-16T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
This time both metric having some data. So output would be-
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,1,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,2,,,,1,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,3,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,1,,,,,,,
And in final result, both the result merge and give the output with all columns and data.
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,6,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-12T11:46:25.000Z,2021-10-13T11:46:25.000Z,1.0,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,12.0,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,6.0,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,6.0,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,1,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,2,,,,1.0,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,3,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,1,,,,,,,
Currently I am running below logic-
out = defaultdict(list)
if(query_result.results != None):
for item in query_result.results:
#data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
out['queue_id'].append(queue_id)
for met in lst_data.metrics:
out[met.metric+"_count"].append(met.stats.count)
out[met.metric+"_sum"].append(met.stats.sum)
out[met.metric+".denominator"].append(met.stats.denominator)
out[met.metric+".numerator"].append(met.stats.numerator)
interval = lst_data.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print("out", out)
else:
metric_name = query.metrics[0]
out['queue_id'].append(queue_id)
out[metric_name+"_count"].append('')
out[metric_name+"_sum"].append('')
out[metric_name+".denominator"].append('')
out[metric_name+".numerator"].append('')
interval = query.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print(out)
df = pd.DataFrame(out)
print (df)
return df
I used below logic to get the desirable result. It is working for me.
lst_of_metrics = ["nOffered", "nTransferred"]
out = defaultdict(list)
if(query_result.results != None):
for item in query_result.results:
#data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
out['queue_id'].append(queue_id)
for met1, met in itertools.zip_longest(query.metrics, lst_data.metrics):
if(met):
if(met.metric == met1):
out[met.metric+"_count"].append(met.stats.count)
out[met.metric+"_sum"].append(met.stats.sum)
out[met.metric+".denominator"].append(met.stats.denominator)
out[met.metric+".numerator"].append(met.stats.numerator)
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
interval = lst_data.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print("out", out)
else:
metric_name = query.metrics[0]
out['queue_id'].append(queue_id)
out[metric_name+"_count"].append('')
out[metric_name+"_sum"].append('')
out[metric_name+".denominator"].append('')
out[metric_name+".numerator"].append('')
interval = query.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print(out)
df = pd.DataFrame(out)
print (df)

ast.literal_eval syntax error on list in dictionary

So I'm playing around with the Google Analytics dataset in pandas, and I'm having an issue.
The dataset has a column named hits, which contains a list of dictionaries, saved as a string. When I try to use ast.literat_eval to get the list of dictionaries, I get a syntax error pointing to this part of the dictionary:
'customMetrics': array([], dtype=object)
I'm using the following code to try to convert the string:
df['hits'] = df['hits'].apply(lambda x: ast.literal_eval(x))
I don't really care about this piece of data so it's fine if I could just ignore this and keep that particular dictionary value as a string. Does anyone know how to either ignore the error and set that dictionary value to a string, or properly convert this array?
FYI, an entire cell of the hits column looks like this:
"[{'hitNumber': 1, 'time': 0, 'hour': 23, 'minute': 0, 'isSecure': None, 'isInteraction': True, 'isEntrance': True, 'isExit': None, 'referer': 'https://www.youtube.com/yt/about/', 'page': {'pagePath': '/home', 'hostname': 'shop.googlemerchandisestore.com', 'pageTitle': 'Home', 'searchKeyword': None, 'searchCategory': None, 'pagePathLevel1': '/home', 'pagePathLevel2': '', 'pagePathLevel3': '', 'pagePathLevel4': ''}, 'transaction': None, 'item': None, 'contentInfo': None, 'appInfo': {'name': None, 'version': None, 'id': None, 'installerId': None, 'appInstallerId': None, 'appName': None, 'appVersion': None, 'appId': None, 'screenName': 'shop.googlemerchandisestore.com/home', 'landingScreenName': 'shop.googlemerchandisestore.com/home', 'exitScreenName': 'shop.googlemerchandisestore.com/signin.html', 'screenDepth': '0'}, 'exceptionInfo': {'description': None, 'isFatal': True, 'exceptions': None, 'fatalExceptions': None}, 'eventInfo': None, 'product': array([], dtype=object), 'promotion': array([], dtype=object), 'promotionActionInfo': None, 'refund': None, 'eCommerceAction': {'action_type': '0', 'step': 1, 'option': None}, 'experiment': array([], dtype=object), 'publisher': None, 'customVariables': array([], dtype=object), 'customDimensions': array([], dtype=object), 'customMetrics': array([], dtype=object), 'type': 'PAGE', 'social': {'socialInteractionNetwork': None, 'socialInteractionAction': None, 'socialInteractions': None, 'socialInteractionTarget': None, 'socialNetwork': 'YouTube', 'uniqueSocialInteractions': None, 'hasSocialSourceReferral': 'Yes', 'socialInteractionNetworkAction': ' : '}, 'latencyTracking': None, 'sourcePropertyInfo': None, 'contentGroup': {'contentGroup1': '(not set)', 'contentGroup2': '(not set)', 'contentGroup3': '(not set)', 'contentGroup4': '(not set)', 'contentGroup5': '(not set)', 'previousContentGroup1': '(entrance)', 'previousContentGroup2': '(entrance)', 'previousContentGroup3': '(entrance)', 'previousContentGroup4': '(entrance)', 'previousContentGroup5': '(entrance)', 'contentGroupUniqueViews1': None, 'contentGroupUniqueViews2': None, 'contentGroupUniqueViews3': None, 'contentGroupUniqueViews4': None, 'contentGroupUniqueViews5': None}, 'dataSource': None, 'publisher_infos': array([], dtype=object)}\n {'hitNumber': 2, 'time': 30507, 'hour': 23, 'minute': 1, 'isSecure': None, 'isInteraction': True, 'isEntrance': None, 'isExit': None, 'referer': None, 'page': {'pagePath': '/google+redesign/gift+cards', 'hostname': 'shop.googlemerchandisestore.com', 'pageTitle': 'Gift Cards', 'searchKeyword': None, 'searchCategory': None, 'pagePathLevel1': '/google+redesign/', 'pagePathLevel2': '/gift+cards', 'pagePathLevel3': '', 'pagePathLevel4': ''}, 'transaction': {'transactionId': None, 'transactionRevenue': None, 'transactionTax': None, 'transactionShipping': None, 'affiliation': None, 'currencyCode': 'USD', 'localTransactionRevenue': None, 'localTransactionTax': None, 'localTransactionShipping': None, 'transactionCoupon': None}, 'item': {'transactionId': None, 'productName': None, 'productCategory': None, 'productSku': None, 'itemQuantity': None, 'itemRevenue': None, 'currencyCode': 'USD', 'localItemRevenue': None}, 'contentInfo': None, 'appInfo': {'name': None, 'version': None, 'id': None, 'installerId': None, 'appInstallerId': None, 'appName': None, 'appVersion': None, 'appId': None, 'screenName': 'shop.googlemerchandisestore.com/google+redesign/gift+cards', 'landingScreenName': 'shop.googlemerchandisestore.com/home', 'exitScreenName': 'shop.googlemerchandisestore.com/signin.html', 'screenDepth': '0'}, 'exceptionInfo': {'description': None, 'isFatal': True, 'exceptions': None, 'fatalExceptions': None}, 'eventInfo': None, 'product': array([{'productSKU': 'GGOEGGCX056299', 'v2ProductName': 'Gift Card - $25.00', 'v2ProductCategory': 'Home/Gift Cards/', 'productVariant': '(not set)', 'productBrand': '(not set)', 'productRevenue': None, 'localProductRevenue': None, 'productPrice': 25000000, 'localProductPrice': 25000000, 'productQuantity': None, 'productRefundAmount': None, 'localProductRefundAmount': None, 'isImpression': True, 'isClick': None, 'customDimensions': array([], dtype=object), 'customMetrics': array([], dtype=object), 'productListName': 'Category', 'productListPosition': 1},\n {'productSKU': 'GGOEGGCX056499', 'v2ProductName': 'Gift Card - $50.00', 'v2ProductCategory': 'Home/Gift Cards/', 'productVariant': '(not set)', 'productBrand': '(not set)', 'productRevenue': None, 'localProductRevenue': None, 'productPrice': 50000000, 'localProductPrice': 50000000, 'productQuantity': None, 'productRefundAmount': None, 'localProductRefundAmount': None, 'isImpression': True, 'isClick': None, 'customDimensions': array([], dtype=object), 'customMetrics': array([], dtype=object), 'productListName': 'Category', 'productListPosition': 2},\n {'productSKU': 'GGOEGGCX056199', 'v2ProductName': 'Gift Card- $100.00', 'v2ProductCategory': 'Home/Gift Cards/', 'productVariant': '(not set)', 'productBrand': '(not set)', 'productRevenue': None, 'localProductRevenue': None, 'productPrice': 100000000, 'localProductPrice': 100000000, 'productQuantity': None, 'productRefundAmount': None, 'localProductRefundAmount': None, 'isImpression': True, 'isClick': None, 'customDimensions': array([], dtype=object), 'customMetrics': array([], dtype=object), 'productListName': 'Category', 'productListPosition': 3},\n {'productSKU': 'GGOEGGCX056399', 'v2ProductName': 'Gift Card - $250.00', 'v2ProductCategory': 'Home/Gift Cards/', 'productVariant': '(not set)', 'productBrand': '(not set)', 'productRevenue': None, 'localProductRevenue': None, 'productPrice': 250000000, 'localProductPrice': 250000000, 'productQuantity': None, 'productRefundAmount': None, 'localProductRefundAmount': None, 'isImpression': True, 'isClick': None, 'customDimensions': array([], dtype=object), 'customMetrics': array([], dtype=object), 'productListName': 'Category', 'productListPosition': 4}],\n dtype=object), 'promotion': array([], dtype=object), 'promotionActionInfo': None, 'refund': None, 'eCommerceAction': {'action_type': '0', 'step': 1, 'option': None}, 'experiment': array([], dtype=object), 'publisher': None, 'customVariables': array([], dtype=object), 'customDimensions': array([], dtype=object), 'customMetrics': array([], dtype=object), 'type': 'PAGE', 'social': {'socialInteractionNetwork': None, 'socialInteractionAction': None, 'socialInteractions': None, 'socialInteractionTarget': None, 'socialNetwork': 'YouTube', 'uniqueSocialInteractions': None, 'hasSocialSourceReferral': 'Yes', 'socialInteractionNetworkAction': ' : '}, 'latencyTracking': None, 'sourcePropertyInfo': None, 'contentGroup': {'contentGroup1': '(not set)', 'contentGroup2': '(not set)', 'contentGroup3': '(not set)', 'contentGroup4': '(not set)', 'contentGroup5': '(not set)', 'previousContentGroup1': '(not set)', 'previousContentGroup2': '(not set)', 'previousContentGroup3': '(not set)', 'previousContentGroup4': '(not set)', 'previousContentGroup5': '(not set)', 'contentGroupUniqueViews1': None, 'contentGroupUniqueViews2': None, 'contentGroupUniqueViews3': None, 'contentGroupUniqueViews4': None, 'contentGroupUniqueViews5': None}, 'dataSource': None, 'publisher_infos': array([], dtype=object)}\n {'hitNumber': 3, 'time': 53670, 'hour': 23, 'minute': 1, 'isSecure': None, 'isInteraction': True, 'isEntrance': None, 'isExit': True, 'referer': None, 'page': {'pagePath': '/signin.html', 'hostname': 'shop.googlemerchandisestore.com', 'pageTitle': 'The Google Merchandise Store - Log In', 'searchKeyword': None, 'searchCategory': None, 'pagePathLevel1': '/signin.html', 'pagePathLevel2': '', 'pagePathLevel3': '', 'pagePathLevel4': ''}, 'transaction': None, 'item': None, 'contentInfo': None, 'appInfo': {'name': None, 'version': None, 'id': None, 'installerId': None, 'appInstallerId': None, 'appName': None, 'appVersion': None, 'appId': None, 'screenName': 'shop.googlemerchandisestore.com/signin.html', 'landingScreenName': 'shop.googlemerchandisestore.com/home', 'exitScreenName': 'shop.googlemerchandisestore.com/signin.html', 'screenDepth': '0'}, 'exceptionInfo': {'description': None, 'isFatal': True, 'exceptions': None, 'fatalExceptions': None}, 'eventInfo': None, 'product': array([], dtype=object), 'promotion': array([], dtype=object), 'promotionActionInfo': None, 'refund': None, 'eCommerceAction': {'action_type': '0', 'step': 1, 'option': None}, 'experiment': array([], dtype=object), 'publisher': None, 'customVariables': array([], dtype=object), 'customDimensions': array([], dtype=object), 'customMetrics': array([], dtype=object), 'type': 'PAGE', 'social': {'socialInteractionNetwork': None, 'socialInteractionAction': None, 'socialInteractions': None, 'socialInteractionTarget': None, 'socialNetwork': 'YouTube', 'uniqueSocialInteractions': None, 'hasSocialSourceReferral': 'Yes', 'socialInteractionNetworkAction': ' : '}, 'latencyTracking': None, 'sourcePropertyInfo': None, 'contentGroup': {'contentGroup1': '(not set)', 'contentGroup2': '(not set)', 'contentGroup3': '(not set)', 'contentGroup4': '(not set)', 'contentGroup5': '(not set)', 'previousContentGroup1': '(not set)', 'previousContentGroup2': '(not set)', 'previousContentGroup3': '(not set)', 'previousContentGroup4': '(not set)', 'previousContentGroup5': '(not set)', 'contentGroupUniqueViews1': None, 'contentGroupUniqueViews2': None, 'contentGroupUniqueViews3': None, 'contentGroupUniqueViews4': None, 'contentGroupUniqueViews5': None}, 'dataSource': None, 'publisher_infos': array([], dtype=object)}]"

Categories

Resources