Iterate over json result and get the desirable data in pandas dataframe - python

I have a json result which I am trying to convert into dataframe but not able to get the correct result. Actually for some cases it is giving correct but for some case it is failing.
Example:
Based on metric API is generating result for specified interval. But this is not certain for that particular interval metric have output or not. And process is running 4 different queue_id.
suppose process is running only for 2 metric. ['nOffered', 'nTransferred']
queue_id = 'a72dba75-0bc6-4a65-b120-8803364f8dc3'
for this queue_id, nOffered is having some values but nTransferred doesn't have. Json result is given below-
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-12T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-13T11:46:25.000Z/2021-10-14T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 2,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-14T11:46:25.000Z/2021-10-15T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 3,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-15T11:46:25.000Z/2021-10-16T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
My code is giving below output-
queue_id nOffered_count nOffered_sum interval_start interval_end
0 a72dba75-0bc6-4a65-b120-8803364f8dc3 6 None 2021-10-11T11:46:25.000Z 2021-10-12T11:46:25.000Z
1 a72dba75-0bc6-4a65-b120-8803364f8dc3 1 None 2021-10-12T11:46:25.000Z 2021-10-13T11:46:25.000Z
2 a72dba75-0bc6-4a65-b120-8803364f8dc3 12 None 2021-10-13T11:46:25.000Z 2021-10-14T11:46:25.000Z
3 a72dba75-0bc6-4a65-b120-8803364f8dc3 6 None 2021-10-14T11:46:25.000Z 2021-10-15T11:46:25.000Z
4 a72dba75-0bc6-4a65-b120-8803364f8dc3 6 None 2021-10-15T11:46:25.000Z 2021-10-16T11:46:25.000Z
But when process is running for 2nd queue_id that time it is not working-
queue_id - 73643cff-799b-41ae-9a67-efcf5e593155
json output for this queue_id -
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-12T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-13T11:46:25.000Z/2021-10-14T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 2,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nTransferred',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-14T11:46:25.000Z/2021-10-15T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 3,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-15T11:46:25.000Z/2021-10-16T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
This time both metric having some data. So output would be-
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,1,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,2,,,,1,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,3,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,1,,,,,,,
And in final result, both the result merge and give the output with all columns and data.
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,6,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-12T11:46:25.000Z,2021-10-13T11:46:25.000Z,1.0,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,12.0,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,6.0,,,,,,,
a72dba75-0bc6-4a65-b120-8803364f8dc3,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,6.0,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,1,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,2,,,,1.0,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,3,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,1,,,,,,,
Currently I am running below logic-
out = defaultdict(list)
if(query_result.results != None):
for item in query_result.results:
#data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
out['queue_id'].append(queue_id)
for met in lst_data.metrics:
out[met.metric+"_count"].append(met.stats.count)
out[met.metric+"_sum"].append(met.stats.sum)
out[met.metric+".denominator"].append(met.stats.denominator)
out[met.metric+".numerator"].append(met.stats.numerator)
interval = lst_data.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print("out", out)
else:
metric_name = query.metrics[0]
out['queue_id'].append(queue_id)
out[metric_name+"_count"].append('')
out[metric_name+"_sum"].append('')
out[metric_name+".denominator"].append('')
out[metric_name+".numerator"].append('')
interval = query.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print(out)
df = pd.DataFrame(out)
print (df)
return df

I used below logic to get the desirable result. It is working for me.
lst_of_metrics = ["nOffered", "nTransferred"]
out = defaultdict(list)
if(query_result.results != None):
for item in query_result.results:
#data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
out['queue_id'].append(queue_id)
for met1, met in itertools.zip_longest(query.metrics, lst_data.metrics):
if(met):
if(met.metric == met1):
out[met.metric+"_count"].append(met.stats.count)
out[met.metric+"_sum"].append(met.stats.sum)
out[met.metric+".denominator"].append(met.stats.denominator)
out[met.metric+".numerator"].append(met.stats.numerator)
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
interval = lst_data.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print("out", out)
else:
metric_name = query.metrics[0]
out['queue_id'].append(queue_id)
out[metric_name+"_count"].append('')
out[metric_name+"_sum"].append('')
out[metric_name+".denominator"].append('')
out[metric_name+".numerator"].append('')
interval = query.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print(out)
df = pd.DataFrame(out)
print (df)

Related

How to load list columns into a dataframe?

I try to load "columns" from a python list object into a dataframe.
This is my list object:
list = type(api_response.results) -> <class 'list'>
These are the values from the list object (I think this is a json structur):
{'results': [{'data': [{'interval': '2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z',
'metrics': [{'metric': 'nError',
'qualifier': None,
'stats': {'count': 4,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 113,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice'}}]}
I just need this result:
Dataframe:
interval metric count
0 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nError 4
1 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nOffered 113
How get this result? How is it possibly to call intervals or metrics from the list object?
Thanks for any help
you can use:
def get_metric(x):
check=0
vals=[]
for i in range(0,len(x)):
if len(x)==1:
check=1
for j in range(0,len(x) + check):
print(i,j)
vals.append(x[i]['data'][0]['metrics'][j]['metric'])
return vals
def get_count(x):
vals=[]
for i in range(0,len(x)):
for j in range(0,len(x[0])):
vals.append(x[i]['data'][0]['metrics'][j]['stats']['count'])
return vals
df['interval']=df['results'].apply(lambda x: [x[0]['data'][i]['interval'] for i in range(0,len(x[0]['data']))])
df['metric']= df['results'].apply(lambda x: get_metric(x))
df['count']= df['results'].apply(lambda x: get_count(x))
df=df.drop(['results'],axis=1)
df=df.explode(['metric','count']).explode('interval')
print(df)
'''
interval metric count
0 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nError 4
0 2022-11-11T10:00:00.000Z/2022-11-11T10:30:00.000Z nOffered 113
'''

Python value assigned to incorrect dict key

I am iterating through a csv and for each column, determining the longest len of a string, and updating a dict as necessary.
If I do this
def get_max_size(current, cell_value):
if cell_value:
current = max(current, len(cell_value))
return current
def my_function():
headers = ["val1","val2","val3","val4","val5"]
d = {header: {'max_size': 0, 'other': {'test': None}} for header in headers}
csv_file = [
["abc","123","HAMILTON","1950.00","17-SEP-2015"],
["ab","321","GLASGOW","711.00","13-NOV-2015"]
]
for row in csv_file:
for i, header in enumerate(headers):
max_size = get_max_size(d[header]['max_size'], row[i])
d[header]['max_size'] = max_size
print(d)
I get the expected output:
{'val1': {'max_size': 3, 'other': {'test': None}},
'val2': {'max_size': 3, 'other': {'test': None}},
'val3': {'max_size': 8, 'other': {'test': None}},
'val4': {'max_size': 7, 'other': {'test': None}},
'val5': {'max_size': 11, 'other': {'test': None}}}
However if I modify my code as such:
REQUIRED_VALUES = {
'max_size': 0,
'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None},
'max_value': None,
'allow_null': None,
}
def my_function():
headers = ["val1","val2","val3","val4","val5"]
# d = {header: {'max_size': 0, 'other': {'test': None}} for header in headers}
d = {header: REQUIRED_VALUES for header in headers}
csv_file = [
["abc","123","HAMILTON","1950.00","17-SEP-2015"],
["ab","321","GLASGOW","711.00","13-NOV-2015"]
]
for row in csv_file:
for i, header in enumerate(headers):
max_size = get_max_size(d[header]['max_size'], row[i])
d[header]['max_size'] = max_size
print(d)
Then the largest len of all keys (val5, the date field, of len == 11), is assigned to all max_length:
{'val1': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val2': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val3': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val4': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None},
'val5': {'max_size': 11, 'allowed_values': {'digit': None, 'alpha': None, 'whitespace': None, 'symbol': None}, 'max_value': None, 'allow_null': None}}
Is there some difference between the dicts that I'm missing? The dict is the only thing that changes, they both contain nested dictionaries... apart from number of items, I can't really see the difference.

Iterating over JSON data and printing. (or creating Pandas DataFrame from JSON file)

I’m trying to use Python print specific values from a JSON file that I pulled from an API. From what I understand, I am pulling it as a JSON file that has a list of dictionaries of players, with a nested dictionary for each player containing their data (i.e. name, team, etc.).
I’m running into issues printing the values within the JSON file, as each character is printing on a separate line.
The end result I am trying to get to is a Pandas DataFrame containing all the values from the JSON file, but I can’t even seem to iterate through the JSON file correctly.
Here is my code:
url = "https://api-football-v1.p.rapidapi.com/v3/players"
querystring = {"league":"39","season":"2020", "page":"2"}
headers = {
"X-RapidAPI-Host": "api-football-v1.p.rapidapi.com",
"X-RapidAPI-Key": "xxxxxkeyxxxxx"
}
response = requests.request("GET", url, headers=headers, params=querystring).json()
response_dump = json.dumps(response)
for item in response_dump:
for player_item in item:
print(player_item)
This is the output when I print the JSON response (first two items):
{'get': 'players', 'parameters': {'league': '39', 'page': '2', 'season': '2020'}, 'errors': [], 'results': 20, 'paging': {'current': 2, 'total': 37}, 'response': [{'player': {'id': 301, 'name': 'Benjamin Luke Woodburn', 'firstname': 'Benjamin Luke', 'lastname': 'Woodburn', 'age': 23, 'birth': {'date': '1999-10-15', 'place': 'Nottingham', 'country': 'England'}, 'nationality': 'Wales', 'height': '174 cm', 'weight': '72 kg', 'injured': False, 'photo': 'https://media.api-sports.io/football/players/301.png'}, 'statistics': [{'team': {'id': 40, 'name': 'Liverpool', 'logo': 'https://media.api-sports.io/football/teams/40.png'}, 'league': {'id': 39, 'name': 'Premier League', 'country': 'England', 'logo': 'https://media.api-sports.io/football/leagues/39.png', 'flag': 'https://media.api-sports.io/flags/gb.svg', 'season': 2020}, 'games': {'appearences': 0, 'lineups': 0, 'minutes': 0, 'number': None, 'position': 'Attacker', 'rating': None, 'captain': False}, 'substitutes': {'in': 0, 'out': 0, 'bench': 3}, 'shots': {'total': None, 'on': None}, 'goals': {'total': 0, 'conceded': 0, 'assists': None, 'saves': None}, 'passes': {'total': None, 'key': None, 'accuracy': None}, 'tackles': {'total': None, 'blocks': None, 'interceptions': None}, 'duels': {'total': None, 'won': None}, 'dribbles': {'attempts': None, 'success': None, 'past': None}, 'fouls': {'drawn': None, 'committed': None}, 'cards': {'yellow': 0, 'yellowred': 0, 'red': 0}, 'penalty': {'won': None, 'commited': None, 'scored': 0, 'missed': 0, 'saved': None}}]}, {'player': {'id': 518, 'name': 'Meritan Shabani', 'firstname': 'Meritan', 'lastname': 'Shabani', 'age': 23, 'birth': {'date': '1999-03-15', 'place': 'München', 'country': 'Germany'}, 'nationality': 'Germany', 'height': '185 cm', 'weight': '78 kg', 'injured': False, 'photo': 'https://media.api-sports.io/football/players/518.png'}, 'statistics': [{'team': {'id': 39, 'name': 'Wolves', 'logo': 'https://media.api-sports.io/football/teams/39.png'}, 'league': {'id': 39, 'name': 'Premier League', 'country': 'England', 'logo': 'https://media.api-sports.io/football/leagues/39.png', 'flag': 'https://media.api-sports.io/flags/gb.svg', 'season': 2020}, 'games': {'appearences': 0, 'lineups': 0, 'minutes': 0, 'number': None, 'position': 'Midfielder', 'rating': None, 'captain': False}, 'substitutes': {'in': 0, 'out': 0, 'bench': 3}, 'shots': {'total': None, 'on': None}, 'goals': {'total': 0, 'conceded': 0, 'assists': None, 'saves': None}, 'passes': {'total': None, 'key': None, 'accuracy': None}, 'tackles': {'total': None, 'blocks': None, 'interceptions': None}, 'duels': {'total': None, 'won': None}, 'dribbles': {'attempts': None, 'success': None, 'past': None}, 'fouls': {'drawn': None, 'committed': None}, 'cards': {'yellow': 0, 'yellowred': 0, 'red': 0}, 'penalty': {'won': None, 'commited': None, 'scored': 0, 'missed': 0, 'saved': None}}]},
This is the data type of each layer of the JSON file, from when I iterated through it with a For loop:
print(type(response)) <class 'dict'>
print(type(response_dump)) <class 'str'>
print(type(item)) <class 'str'>
print(type(player_item)) <class 'str'>
You do not have to json.dumps() in my opinion, just use the JSON from response to iterate:
for player in response['response']:
print(player)
{'player': {'id': 301, 'name': 'Benjamin Luke Woodburn', 'firstname': 'Benjamin Luke', 'lastname': 'Woodburn', 'age': 23, 'birth': {'date': '1999-10-15', 'place': 'Nottingham', 'country': 'England'}, 'nationality': 'Wales', 'height': '174 cm', 'weight': '72 kg', 'injured': False, 'photo': 'https://media.api-sports.io/football/players/301.png'}, 'statistics': [{'team': {'id': 40, 'name': 'Liverpool', 'logo': 'https://media.api-sports.io/football/teams/40.png'}, 'league': {'id': 39, 'name': 'Premier League', 'country': 'England', 'logo': 'https://media.api-sports.io/football/leagues/39.png', 'flag': 'https://media.api-sports.io/flags/gb.svg', 'season': 2020}, 'games': {'appearences': 0, 'lineups': 0, 'minutes': 0, 'number': None, 'position': 'Attacker', 'rating': None, 'captain': False}, 'substitutes': {'in': 0, 'out': 0, 'bench': 3}, 'shots': {'total': None, 'on': None}, 'goals': {'total': 0, 'conceded': 0, 'assists': None, 'saves': None}, 'passes': {'total': None, 'key': None, 'accuracy': None}, 'tackles': {'total': None, 'blocks': None, 'interceptions': None}, 'duels': {'total': None, 'won': None}, 'dribbles': {'attempts': None, 'success': None, 'past': None}, 'fouls': {'drawn': None, 'committed': None}, 'cards': {'yellow': 0, 'yellowred': 0, 'red': 0}, 'penalty': {'won': None, 'commited': None, 'scored': 0, 'missed': 0, 'saved': None}}]}
{'player': {'id': 518, 'name': 'Meritan Shabani', 'firstname': 'Meritan', 'lastname': 'Shabani', 'age': 23, 'birth': {'date': '1999-03-15', 'place': 'München', 'country': 'Germany'}, 'nationality': 'Germany', 'height': '185 cm', 'weight': '78 kg', 'injured': False, 'photo': 'https://media.api-sports.io/football/players/518.png'}, 'statistics': [{'team': {'id': 39, 'name': 'Wolves', 'logo': 'https://media.api-sports.io/football/teams/39.png'}, 'league': {'id': 39, 'name': 'Premier League', 'country': 'England', 'logo': 'https://media.api-sports.io/football/leagues/39.png', 'flag': 'https://media.api-sports.io/flags/gb.svg', 'season': 2020}, 'games': {'appearences': 0, 'lineups': 0, 'minutes': 0, 'number': None, 'position': 'Midfielder', 'rating': None, 'captain': False}, 'substitutes': {'in': 0, 'out': 0, 'bench': 3}, 'shots': {'total': None, 'on': None}, 'goals': {'total': 0, 'conceded': 0, 'assists': None, 'saves': None}, 'passes': {'total': None, 'key': None, 'accuracy': None}, 'tackles': {'total': None, 'blocks': None, 'interceptions': None}, 'duels': {'total': None, 'won': None}, 'dribbles': {'attempts': None, 'success': None, 'past': None}, 'fouls': {'drawn': None, 'committed': None}, 'cards': {'yellow': 0, 'yellowred': 0, 'red': 0}, 'penalty': {'won': None, 'commited': None, 'scored': 0, 'missed': 0, 'saved': None}}]}
or
for player in response['response']:
print(player['player'])
{'id': 301, 'name': 'Benjamin Luke Woodburn', 'firstname': 'Benjamin Luke', 'lastname': 'Woodburn', 'age': 23, 'birth': {'date': '1999-10-15', 'place': 'Nottingham', 'country': 'England'}, 'nationality': 'Wales', 'height': '174 cm', 'weight': '72 kg', 'injured': False, 'photo': 'https://media.api-sports.io/football/players/301.png'}
{'id': 518, 'name': 'Meritan Shabani', 'firstname': 'Meritan', 'lastname': 'Shabani', 'age': 23, 'birth': {'date': '1999-03-15', 'place': 'München', 'country': 'Germany'}, 'nationality': 'Germany', 'height': '185 cm', 'weight': '78 kg', 'injured': False, 'photo': 'https://media.api-sports.io/football/players/518.png'}
To get a DataFrame simply call pd.json_normalize() - Cause your question is not that clear I am not sure wiche information is needed and how to displayed. This is predestinated to ask a new question with exact that focus.:
pd.json_normalize(response['response'])
EDIT
Based on your comment and improvment:
pd.concat([pd.json_normalize(response,['response'])\
,pd.json_normalize(response,['response','statistics'])], axis=1)\
.drop(['statistics'], axis=1)
player.id
player.name
player.firstname
player.lastname
player.age
player.birth.date
player.birth.place
player.birth.country
player.nationality
player.height
player.weight
player.injured
player.photo
team.id
team.name
team.logo
league.id
league.name
league.country
league.logo
league.flag
league.season
games.appearences
games.lineups
games.minutes
games.number
games.position
games.rating
games.captain
substitutes.in
substitutes.out
substitutes.bench
shots.total
shots.on
goals.total
goals.conceded
goals.assists
goals.saves
passes.total
passes.key
passes.accuracy
tackles.total
tackles.blocks
tackles.interceptions
duels.total
duels.won
dribbles.attempts
dribbles.success
dribbles.past
fouls.drawn
fouls.committed
cards.yellow
cards.yellowred
cards.red
penalty.won
penalty.commited
penalty.scored
penalty.missed
penalty.saved
0
301
Benjamin Luke Woodburn
Benjamin Luke
Woodburn
23
1999-10-15
Nottingham
England
Wales
174 cm
72 kg
False
https://media.api-sports.io/football/players/301.png
40
Liverpool
https://media.api-sports.io/football/teams/40.png
39
Premier League
England
https://media.api-sports.io/football/leagues/39.png
https://media.api-sports.io/flags/gb.svg
2020
0
0
0
Attacker
False
0
0
3
0
0
0
0
0
0
0
1
518
Meritan Shabani
Meritan
Shabani
23
1999-03-15
München
Germany
Germany
185 cm
78 kg
False
https://media.api-sports.io/football/players/518.png
39
Wolves
https://media.api-sports.io/football/teams/39.png
39
Premier League
England
https://media.api-sports.io/football/leagues/39.png
https://media.api-sports.io/flags/gb.svg
2020
0
0
0
Midfielder
False
0
0
3
0
0
0
0
0
0
0

Pandas to_sql with nested columns ERROR: A TVP's rows must be Sequence objects

I'm fetchin some data from an api, and I want to insert it in too an azure sql db.
I'm getting the data and adding it to a pandas dataframe before dropping empty columns and such.
After I have done what i need to do I'm trying to insert the data in to the database, but I'm getting this error message: ProgrammingError: (pyodbc.ProgrammingError) ("A TVP's rows must be Sequence objects.", 'HY000')
I've found that some of the columns have nested data (and I would like to keep it that way)
These columns contain lists of dicts here are some examples:
0 [{'note': 'Netto per 45 dgr'}]
1 [{'note': 'Netto per 45 dgr'}]
2 [{'note': 'Netto per 45 dgr'}]
[{'accountingCost': None, 'allowanceCharge': array([], dtype=object), 'billingReference': array([], dtype=object), 'contractDocumentReference': None, 'delivery': {'actualDeliveryDate': None, 'deliveryLocation': None, 'deliveryParty': None, 'despatch': None, 'estimatedDeliveryPeriod': None, 'promisedDeliveryPeriod': None, 'requestedDeliveryPeriod': None, 'trackingId': None}, 'despatchLineReference': {'attachment': None, 'copyIndicator': None, 'documentDescription': None, 'documentType': None, 'documentTypeCode': None, 'id': '', 'issueDate': None, 'uuid': None}, 'documentReference': None, 'id': '1', 'invoicePeriod': None, 'invoicedQuantity': {'unitCode': 'H21', 'unitCodeListId': None, 'value': '1.00'}, 'item': {'additionalItemProperty': array([], dtype=object), 'brandName': None, 'certificate': array([], dtype=object), 'classifiedTaxCategory': None, 'commodityClassification': array([], dtype=object), 'description': array([], dtype=object), 'dimension': array([], dtype=object), 'hazardousItem': array([], dtype=object), 'itemInstance': array([], dtype=object), 'itemSpecificationDocumentReference': array([], dtype=object), 'keyword': array([], dtype=object), 'manufacturerParty': None, 'manufacturersItemIdentification': array([], dtype=object), 'name': 'Ansvarsrett', 'originAddress': None, 'originCountry': None, 'packQuantity': None, 'packSizeNumeric': '', 'sellersItemIdentification': {'extendedId': None, 'id': {'id': 'P550', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}}, 'standardItemIdentification': {'extendedId': None, 'id': {'id': '7043010000953', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': 'GTIN', 'schemeName': None}}, 'transactionConditions': array([], dtype=object)}, 'lineExtensionAmount': {'currency': None, 'value': '504.00'}, 'lineGrossExtensionAmount': None, 'note': '', 'orderLineReference': {'buyersReference': '117230405 Kvileitet', 'lineId': {'id': '0', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'orderReference': None, 'salesOrderLineId': {'id': '1', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'sellersReference': ''}, 'orderedQuantity': None, 'price': {'allowanceCharge': array([], dtype=object), 'amount': {'currency': None, 'value': '504.00'}, 'baseQuantity': {'unitCode': None, 'unitCodeListId': None, 'value': '0'}, 'orderableUnitFactorRate': None, 'priceType': None, 'validityPeriod': array([], dtype=object)}, 'taxTotal': {'roundingAmount': None, 'taxAmount': {'currency': None, 'value': '126.00'}, 'taxSubtotal': array([], dtype=object)}}
{'accountingCost': None, 'allowanceCharge': array([], dtype=object), 'billingReference': array([], dtype=object), 'contractDocumentReference': None, 'delivery': {'actualDeliveryDate': None, 'deliveryLocation': None, 'deliveryParty': None, 'despatch': None, 'estimatedDeliveryPeriod': None, 'promisedDeliveryPeriod': None, 'requestedDeliveryPeriod': None, 'trackingId': None}, 'despatchLineReference': {'attachment': None, 'copyIndicator': None, 'documentDescription': None, 'documentType': None, 'documentTypeCode': None, 'id': '', 'issueDate': None, 'uuid': None}, 'documentReference': None, 'id': '2', 'invoicePeriod': None, 'invoicedQuantity': {'unitCode': 'H21', 'unitCodeListId': None, 'value': '3.00'}, 'item': {'additionalItemProperty': array([], dtype=object), 'brandName': None, 'certificate': array([], dtype=object), 'classifiedTaxCategory': None, 'commodityClassification': array([], dtype=object), 'description': array([], dtype=object), 'dimension': array([], dtype=object), 'hazardousItem': array([], dtype=object), 'itemInstance': array([], dtype=object), 'itemSpecificationDocumentReference': array([], dtype=object), 'keyword': array([], dtype=object), 'manufacturerParty': None, 'manufacturersItemIdentification': array([], dtype=object), 'name': 'Prosjektering', 'originAddress': None, 'originCountry': None, 'packQuantity': None, 'packSizeNumeric': '', 'sellersItemIdentification': {'extendedId': None, 'id': {'id': 'Projj', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}}, 'standardItemIdentification': {'extendedId': None, 'id': {'id': '7043010000298', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': 'GTIN', 'schemeName': None}}, 'transactionConditions': array([], dtype=object)}, 'lineExtensionAmount': {'currency': None, 'value': '7020.00'}, 'lineGrossExtensionAmount': None, 'note': '', 'orderLineReference': {'buyersReference': '117230405 Kvileitet', 'lineId': {'id': '0', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'orderReference': None, 'salesOrderLineId': {'id': '2', 'schemeAgencyId': None, 'schemeAgencyName': None, 'schemeId': None, 'schemeName': None}, 'sellersReference': ''}, 'orderedQuantity': None, 'price': {'allowanceCharge': array([], dtype=object), 'amount': {'currency': None, 'value': '2340.00'}, 'baseQuantity': {'unitCode': None, 'unitCodeListId': None, 'value': '0'}, 'orderableUnitFactorRate': None, 'priceType': None, 'validityPeriod': array([], dtype=object)}, 'taxTotal': {'roundingAmount': None, 'taxAmount': {'currency': None, 'value': '1755.00'}, 'taxSubtotal': array([], dtype=object)}}
is there any way to add this data to mssql?

How to iterate over interval in json file and create a dataframe?

I am iterating over json file and creating dataframe with the desirable columns. I already implemented the code but now json file has little bit changed. But I am not able to think where to change the code to get the required output.
Explanation:
previous json result:
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-18T11:49:48.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 7,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nTransferred',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
previous dataframe:
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-18T11:49:48.000Z,7,,,,1.0,,,
new json result:
queryResult: {'results': [{'data': [{'interval': '2021-10-11T11:46:25.000Z/2021-10-12T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-13T11:46:25.000Z/2021-10-14T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 2,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}},
{'metric': 'nTransferred',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-14T11:46:25.000Z/2021-10-15T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 3,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None},
{'interval': '2021-10-15T11:46:25.000Z/2021-10-16T11:46:25.000Z',
'metrics': [{'metric': 'nOffered',
'qualifier': None,
'stats': {'count': 1,
'count_negative': None,
'count_positive': None,
'current': None,
'denominator': None,
'max': None,
'min': None,
'numerator': None,
'ratio': None,
'sum': None,
'target': None}}],
'views': None}],
'group': {'mediaType': 'voice',
'queueId': '73643cff-799b-41ae-9a67-efcf5e593155'}}]}
Now desirable dataframe:
Queue_Id,Interval Start,Interval End,nOffered_count,nOffered_sum,nOffered.denominator,nOffered.numerator,nTransferred_count,nTransferred_sum,nTransferred.denominator,nTransferred.numerator
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-11T11:46:25.000Z,2021-10-12T11:46:25.000Z,1,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-13T11:46:25.000Z,2021-10-14T11:46:25.000Z,2,,,,1,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-14T11:46:25.000Z,2021-10-15T11:46:25.000Z,3,,,,,,,
73643cff-799b-41ae-9a67-efcf5e593155,2021-10-15T11:46:25.000Z,2021-10-16T11:46:25.000Z,1,,,,,,,
What are the changes I need to do to in below code to get the new result.
column_names = []
if(query_result.results != None):
for item in query_result.results:
data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
for met in lst_data.metrics:
metric_name = met.metric
column_names.append('Queue_Id')
column_names.append(metric_name+'_count')
column_names.append(metric_name+'_sum')
column_names.append(metric_name+'.denominator')
column_names.append(metric_name+'.numerator')
column_names.append('Interval Start')
column_names.append('Interval End')
data_lst.append(queue_id)
data_lst.append(met.stats.count)
data_lst.append(met.stats.sum)
data_lst.append(met.stats.denominator)
data_lst.append(met.stats.numerator)
data_lst.append(lst_data.interval.split('/')[0])
data_lst.append(lst_data.interval.split('/')[1])
print(data_lst)
else:
data_lst = []
metric_name = query.metrics[0]
column_names.append('Queue_Id')
column_names.append(metric_name+'_count')
column_names.append(metric_name+'_sum')
column_names.append(metric_name+'.denominator')
column_names.append(metric_name+'.numerator')
column_names.append('Interval Start')
column_names.append('Interval End')
data_lst.append(queue_id)
data_lst.append('')
data_lst.append('')
data_lst.append('')
data_lst.append('')
data_lst.append(query.interval.split('/')[0])
data_lst.append(query.interval.split('/')[1])
print("data_lst", data_lst)
print("column_names", column_names)
return data_lst, column_names
I have modified my code little bit and got the result. The below code is working for me-
lst_of_metrics = ["nOffered", "nTransferred"]
out = defaultdict(list)
if(query_result.results != None):
for item in query_result.results:
#data_lst = []
for lst_data in item.data:
print("####################################")
print(lst_data)
print("####################################")
out['queue_id'].append(queue_id)
for met1, met in itertools.zip_longest(query.metrics, lst_data.metrics):
#for met in lst_data.metrics:
if(met):
if(met.metric == met1):
out[met.metric+"_count"].append(met.stats.count)
out[met.metric+"_sum"].append(met.stats.sum)
out[met.metric+".denominator"].append(met.stats.denominator)
out[met.metric+".numerator"].append(met.stats.numerator)
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
else:
out[met1+"_count"].append('')
out[met1+"_sum"].append('')
out[met1+".denominator"].append('')
out[met1+".numerator"].append('')
interval = lst_data.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print("out", out)
else:
metric_name = query.metrics[0]
out['queue_id'].append(queue_id)
out[metric_name+"_count"].append('')
out[metric_name+"_sum"].append('')
out[metric_name+".denominator"].append('')
out[metric_name+".numerator"].append('')
interval = query.interval.split('/')
out['Interval Start'].append(interval[0])
out['Interval End'].append(interval[1])
print(out)
df = pd.DataFrame(out)
print (df)

Categories

Resources