Creating a dataframe from a dictionary within tuple

Creating a dataframe from a dictionary within tuple - python

I have a dictionary within a tuple and I want to know how to access it and create a dataframe merging the dictionary value into single row
Example:
({'Id': '4', 'BU': 'usa', 'V_ID': '44', 'INV': 'inv1331', 'DT': '08/1/19', 'AMT': '1500'}, {'Id': '9', 'BU': 'usa', 'V_ID': '44', 'INV': 'inv4321', 'DT': '02/6/19', 'AMT': '1000'})
Expected Result:
Id_1 BU_1 V_ID_1 INV_1 DT_1 AMT_1 Id_2 BU_2 V_ID_2 INV_2 DT_2 AMT_2
---------------------------------------------------------------------------------------------
4 usa 44 inv1331 08/1/19 1500 9 usa 44 inv4321 02/6/19 1000

x = ({'Id': '4', 'BU': 'usa', 'V_ID': '44', 'INV': 'inv1331', 'DT': '08/1/19', 'AMT': '1500'}, {'Id': '9', 'BU': 'usa', 'V_ID': '44', 'INV': 'inv4321', 'DT': '02/6/19', 'AMT': '1000'})
data = {f"{k}_{i+1}": v for i, d in enumerate(x) for k, v in d.items()}
df = pd.DataFrame(data, index = [0])
Output:
>>> df
Id_1 BU_1 V_ID_1 INV_1 DT_1 ... BU_2 V_ID_2 INV_2 DT_2 AMT_2
0 4 usa 44 inv1331 08/1/19 ... usa 44 inv4321 02/6/19 1000
[1 rows x 12 columns]

Related

how to transform strings in csv file into float and store them as a list?

I have csv file that looks like this:
id,type,inc,a,b
1,2,63376.799999999996,0.3061,187
2,1,58087.700000000004,0.3361,178
3,1,52699.9,0.3482,181
4,2,72964.8,0.3186,177
5,4,111589.79999999999,0.268,154
6,4,107618.0,0.2583,150
7,2,87109.2,0.3233,193
8,2,84669.59999999999,0.308,179
9,2,77258.4,0.3247,173
I need to transform values from fields [3], [4] and [5] that are 'inc', 'a' and 'b' into float and than append to households to create a list.
Finally it should print len(households)
I am trying this
import csv
import scipy.optimize as opt
def read_households(filename):
households=[]
fh = open("households.csv", 'r')
reader = csv.DictReader(fh) #creates object 'reader' to read the file
for hh in reader:
reader[3,4,5] = float(reader[3,4,5])
households.append(hh)
print("lines read:", len(households))
return(household)
It does not give me anything.

Try this code for get some output from CSV.
import csv
import scipy.optimize as opt
def read_households(filename):
households=[]
fh = open(filename, 'r')
reader = csv.DictReader(fh)
for hh in reader:
hh['inc'] = float(hh['inc'])
hh['a'] = float(hh['a'])
hh['b'] = float(hh['b'])
households.append(hh)
print("lines read:", len(households))
return households
# call function
read_households('path_of_csv_file')
Result :
lines read: 9
[{'id': '1', 'type': '2', 'inc': 63376.799999999996, 'a': 0.3061, 'b': 187.0},
{'id': '2', 'type': '1', 'inc': 58087.700000000004, 'a': 0.3361, 'b': 178.0},
{'id': '3', 'type': '1', 'inc': 52699.9, 'a': 0.3482, 'b': 181.0},
{'id': '4', 'type': '2', 'inc': 72964.8, 'a': 0.3186, 'b': 177.0},
{'id': '5', 'type': '4', 'inc': 111589.79999999999, 'a': 0.268, 'b': 154.0},
{'id': '6', 'type': '4', 'inc': 107618.0, 'a': 0.2583, 'b': 150.0},
{'id': '7', 'type': '2', 'inc': 87109.2, 'a': 0.3233, 'b': 193.0},
{'id': '8', 'type': '2', 'inc': 84669.59999999999, 'a': 0.308, 'b': 179.0},
{'id': '9', 'type': '2', 'inc': 77258.4, 'a': 0.3247, 'b': 173.0}]

If you can use pandas then this is a very easy way:
import pandas as pd
df = pd.read_csv('file.csv')
df = df.astype({'inc': 'float', 'a': 'float', 'b': 'float'})
print(df)
print(f"Lines read: {df.shape[0]}")
When run gives:
id type inc a b
0 1 2 63376.8 0.3061 187.0
1 2 1 58087.7 0.3361 178.0
2 3 1 52699.9 0.3482 181.0
...
8 9 2 77258.4 0.3247 173.0
Lines read: 9
If you need the output in the list of dictionaries output:
households = df.to_dict('records')
and we are done

getting class_ on wb scraping for two elements

I am doing a web scraping for top 10 teams icc, i got same class for both points and matches
"td",class_='table-body__cell u-center-text'
how do i split this
page=requests.get(url1)
page
soup1 = BeautifulSoup(page.content,"html.parser") print(soup1.prettify())
matches = []
for i in soup1.find_all("td",class_='rankings-block__banner-matches'):
matches.append(i.text)
matches

Simple way use pandas
You can use pandas to read the table into a dataframe and pick the values you want:
import pandas as pd
pd.read_html('https://www.icc-cricket.com/rankings/mens/team-rankings/odi/')[0]
Alternative with bs4
matches = [x.get_text() for x in soup.select('table.table tr td:nth-of-type(3)')]
points = [x.get_text() for x in soup.select('table.table tr td:nth-of-type(4)')]
print(matches, points)
or
matches=[]
points=[]
for x in soup.select('table.table tr')[1:]:
matches.append(x.select_one('td:nth-of-type(3)').get_text())
points.append(x.select_one('td:nth-of-type(4)').get_text())
print(matches, points)

A complete solution, just run this code and you will get a dictionary with all the data from the table organized nicely:
# get the entire table
table = soup1.find('table', {'class': 'table'})
# create dictionary to hold results
rankings = {}
# separate first row since it uses different markup than the rest
position = table.find('td', {'class': 'rankings-block__banner--pos'}).text.strip()
country_name = table.find('span', {'class': 'u-hide-phablet'}).text.strip()
matches = table.find('td', {'class': 'rankings-block__banner--matches'}).text.strip()
points = table.find('td', {'class': 'rankings-block__banner--points'}).text.strip()
rating = table.find('td', {'class': 'rankings-block__banner--rating u-text-right'}).text.strip()
rankings[country_name] = {'position': position,
'matches': matches,
'points': points,
'rating': rating}
# for the next rows, use a loop
for row in table.find_all('tr', {'class': 'table-body'}):
position = row.find('td', {'class': 'table-body__cell table-body__cell--position u-text-right'}).text.strip()
country_name = row.find('span', {'class': 'u-hide-phablet'}).text.strip()
matches = row.find_all('td', {'class': 'table-body__cell u-center-text'})[0].text.strip()
points = row.find_all('td', {'class': 'table-body__cell u-center-text'})[1].text.strip()
rating = row.find('td', {'class': 'table-body__cell u-text-right rating'}).text.strip()
rankings[country_name] = {'position': position,
'matches': matches,
'points': points,
'rating': rating}
rankings
Which outputs:
{'New Zealand': {'position': '1',
'matches': '17',
'points': '2,054',
'rating': '121'},
'England': {'position': '2',
'matches': '32',
'points': '3,793',
'rating': '119'},
'Australia': {'position': '3',
'matches': '28',
'points': '3,244',
'rating': '116'},
'India': {'position': '4',
'matches': '32',
'points': '3,624',
'rating': '113'},
'South Africa': {'position': '5',
'matches': '25',
'points': '2,459',
'rating': '98'},
'Pakistan': {'position': '6',
'matches': '27',
'points': '2,524',
'rating': '93'},
'Bangladesh': {'position': '7',
'matches': '30',
'points': '2,740',
'rating': '91'},
'West Indies': {'position': '8',
'matches': '30',
'points': '2,523',
'rating': '84'},
'Sri Lanka': {'position': '9',
'matches': '32',
'points': '2,657',
'rating': '83'},
'Afghanistan': {'position': '10',
'matches': '17',
'points': '1,054',
'rating': '62'},
'Netherlands': {'position': '11',
'matches': '7',
'points': '336',
'rating': '48'},
'Ireland': {'position': '12',
'matches': '25',
'points': '1,145',
'rating': '46'},
'Oman': {'position': '13', 'matches': '11', 'points': '435', 'rating': '40'},
'Scotland': {'position': '14',
'matches': '8',
'points': '308',
'rating': '39'},
'Zimbabwe': {'position': '15',
'matches': '20',
'points': '764',
'rating': '38'},
'Nepal': {'position': '16', 'matches': '11', 'points': '330', 'rating': '30'},
'UAE': {'position': '17', 'matches': '9', 'points': '190', 'rating': '21'},
'United States': {'position': '18',
'matches': '14',
'points': '232',
'rating': '17'},
'Namibia': {'position': '19', 'matches': '6', 'points': '97', 'rating': '16'},
'Papua New Guinea': {'position': '20',
'matches': '10',
'points': '0',
'rating': '0'}}
In addition, you can also add it to a pandas dataframe for better analysis:
pd.DataFrame(rankings)
Which outputs:
New Zealand England Australia India South Africa Pakistan Bangladesh West Indies Sri Lanka Afghanistan Netherlands Ireland Oman Scotland Zimbabwe Nepal UAE United States Namibia Papua New Guinea
position 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
matches 17 32 28 32 25 27 30 30 32 17 7 25 11 8 20 11 9 14 6 10
points 2,054 3,793 3,244 3,624 2,459 2,524 2,740 2,523 2,657 1,054 336 1,145 435 308 764 330 190 232 97 0
rating 121 119 116 113 98 93 91 84 83 62 48 46 40 39 38 30 21 17 16 0

How to extract dictionary and sub dictionary

Here is an original dataframe of 2 rows consisting of ID and ColumnA. Some row may have one detail.
ID ColumnA
1 {'1': {'Order': '0', 'Result': ''},
'2': {'Order': 'Yellow', 'Result': 'Red'},
'3': {'Order': 'Clear', 'Result': 'Tight'},
'4': {'Order': '1.000-1.030', 'Result': '1.015'}}
2 {'1': {'Order': '0', 'Result': '1.015'},
'4': {'Order': '1.000-1.030', 'Result': '2.4'},
'5': {'Order': '6.0-7.0', 'Result': ''},
'6': {'Order': 'Negative', 'Result': 'Negative'},
'7': {'Order': 'Negative', 'Result': 'Negative'},
'8': {'Order': 'Negative', 'Result': 'Positive'},
'9': {'Order': 'Negative', 'Result': ''}}
I want to extract from ColumnA to new dataframe
ID Column_ID Column_Order ColumnD_Result
1 1 0
1 2 Yellow Red
1 3 Clear Tight
1 4 1.000-1.030 1.015
2 1 0 1.015
2 4 1.000-1.030 2.4
2 5 6.0-7.0
2 6 Negative Negative
2 7 Negative Negative
2 8 Negative Positive
2 9 Negative
How to write the extraction of dictionary?

Extraction by looping on dictionary items:
import pandas as pd
data = [
['1', {'1': {'Order': '0', 'Result': ''},
'2': {'Order': 'Yellow', 'Result': 'Red'},
'3': {'Order': 'Clear', 'Result': 'Tight'},
'4': {'Order': '1.000-1.030', 'Result': '1.015'}}],
['2', {'1': {'Order': '0', 'Result': '1.015'},
'4': {'Order': '1.000-1.030', 'Result': '2.4'},
'5': {'Order': '6.0-7.0', 'Result': ''},
'6': {'Order': 'Negative', 'Result': 'Negative'},
'7': {'Order': 'Negative', 'Result': 'Negative'},
'8': {'Order': 'Negative', 'Result': 'Positive'},
'9': {'Order': 'Negative', 'Result': ''}}]]
df = pd.DataFrame(data, columns=['ID', 'ColumnA'])
dfColumnA = pd.DataFrame([], columns=['ID', 'Column_ID', 'Column_Order', 'ColumnD_Result'])
i = 0
for index, row in df.iterrows():
dictColumA = row['ColumnA']
for column_ID, v in dictColumA.items():
dfColumnA.loc[i] = [row['ID'], column_ID, v['Order'], v['Result']]
i += 1
print(dfColumnA)
Output:
ID Column_ID Column_Order ColumnD_Result
0 1 1 0
1 1 2 Yellow Red
2 1 3 Clear Tight
3 1 4 1.000-1.030 1.015
4 2 1 0 1.015
5 2 4 1.000-1.030 2.4
6 2 5 6.0-7.0
7 2 6 Negative Negative
8 2 7 Negative Negative
9 2 8 Negative Positive
10 2 9 Negative

Python Pandas, how to group list of dict and sort

I have a list of dict like:
data = [
{'ID': '000681', 'type': 'B:G+', 'testA': '11'},
{'ID': '000682', 'type': 'B:G+', 'testA': '-'},
{'ID': '000683', 'type': 'B:G+', 'testA': '13'},
{'ID': '000684', 'type': 'B:G+', 'testA': '14'},
{'ID': '000681', 'type': 'B:G+', 'testB': '15'},
{'ID': '000682', 'type': 'B:G+', 'testB': '16'},
{'ID': '000683', 'type': 'B:G+', 'testB': '17'},
{'ID': '000684', 'type': 'B:G+', 'testB': '-'}
]
How to use Pandas to get data like:
data = [
{'ID': '000683', 'type': 'B:G+', 'testA': '13', 'testB': '17'},
{'ID': '000681', 'type': 'B:G+', 'testA': '11', 'testB': '15'},
{'ID': '000684', 'type': 'B:G+', 'testA': '14', 'testB': '-'},
{'ID': '000682', 'type': 'B:G+', 'testA': '-', 'testB': '16'}
]
Same ID and same type to one col and sorted by testA and testB values
sorted : both testA and testB have value and lager value of testA+testB at the top.

First convert columns to numeric with replace non numeric to integers and then aggregate sum:
df = pd.DataFrame(data)
c = ['testA','testB']
df[c] = df[c].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df1 = df.groupby(['ID','type'])[c].sum(min_count=1).sort_values(c).fillna('-').reset_index()
print (df1)
ID type testA testB
0 000681 B:G+ 11 15
1 000683 B:G+ 13 17
2 000684 B:G+ 14 -
3 000682 B:G+ - 16
If want sorting by sum of both columns use Series.argsort:
df = pd.DataFrame(data)
c = ['testA','testB']
df[c] = df[c].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df2 = df.groupby(['ID','type'])[c].sum(min_count=1)
df2 = df2.iloc[(-df2).sum(axis=1).argsort()].fillna('-').reset_index()
print (df2)
ID type testA testB
0 000683 B:G+ 13 17
1 000681 B:G+ 11 15
2 000682 B:G+ - 16
3 000684 B:G+ 14 -

How to unpack an object of dictionaries to a range of Data Frames

I am creating a function that grabs data from an ERP system to display to the end user.
I want to unpack an object of dictionaries and create a range of Pandas DataFrames with them.
For example, I have:
troRows
{0: [{'productID': 134336, 'price': '10.0000', 'amount': '1', 'cost': 0}],
1: [{'productID': 142141, 'price': '5.5000', 'amount': '4', 'cost': 0}],
2: [{'productID': 141764, 'price': '5.5000', 'amount': '1', 'cost': 0}],
3: [{'productID': 81661, 'price': '4.5000', 'amount': '1', 'cost': 0}],
4: [{'productID': 146761, 'price': '5.5000', 'amount': '1', 'cost': 0}],
5: [{'productID': 143585, 'price': '5.5900', 'amount': '9', 'cost': 0}],
6: [{'productID': 133018, 'price': '5.0000', 'amount': '1', 'cost': 0}],
7: [{'productID': 146250, 'price': '13.7500', 'amount': '5', 'cost': 0}],
8: [{'productID': 149986, 'price': '5.8900', 'amount': '2', 'cost': 0},
{'productID': 149790, 'price': '4.9900', 'amount': '2', 'cost': 0},
{'productID': 149972, 'price': '5.2900', 'amount': '2', 'cost': 0},
{'productID': 149248, 'price': '2.0000', 'amount': '2', 'cost': 0},
{'productID': 149984, 'price': '4.2000', 'amount': '2', 'cost': 0},
Each time the function will need to unpack x number of dictionaries which may have different number of rows into a range of DataFrames.
So for example, this range of Dictionaries would return
DF0, DF1, DF2, DF3, DF4, DF5, DF6, DF7, DF8.
I can unpack a single Dictionary with:
pd.DataFrame(troRows[8])
which returns
amount cost price productID
0 2 0 5.8900 149986
1 2 0 4.9900 149790
2 2 0 5.2900 149972
3 2 0 2.0000 149248
4 2 0 4.2000 149984
How can I structure my code so that it does this for all the dictionaries for me?

Solution for dictionary of DataFrames - use dictioanry comprehension and set index values to keys of dictionary:
dfs = {k: pd.DataFrame(v) for k, v in troRows.items()}
print (dfs)
{0: amount cost price productID
0 1 0 10.0000 134336, 1: amount cost price productID
0 4 0 5.5000 142141, 2: amount cost price productID
0 1 0 5.5000 141764, 3: amount cost price productID
0 1 0 4.5000 81661, 4: amount cost price productID
0 1 0 5.5000 146761, 5: amount cost price productID
0 9 0 5.5900 143585, 6: amount cost price productID
0 1 0 5.0000 133018, 7: amount cost price productID
0 5 0 13.7500 146250, 8: amount cost price productID
0 2 0 5.8900 149986
1 2 0 4.9900 149790
2 2 0 5.2900 149972
3 2 0 2.0000 149248
4 2 0 4.2000 149984}
print (dfs[8])
amount cost price productID
0 2 0 5.8900 149986
1 2 0 4.9900 149790
2 2 0 5.2900 149972
3 2 0 2.0000 149248
4 2 0 4.2000 149984
Solutions for one DataFrame:
Use list comprehension with flattening and pass it to DataFrame constructor:
troRows = pd.Series([[{'productID': 134336, 'price': '10.0000', 'amount': '1', 'cost': 0}],
[{'productID': 142141, 'price': '5.5000', 'amount': '4', 'cost': 0}],
[{'productID': 141764, 'price': '5.5000', 'amount': '1', 'cost': 0}],
[{'productID': 81661, 'price': '4.5000', 'amount': '1', 'cost': 0}],
[{'productID': 146761, 'price': '5.5000', 'amount': '1', 'cost': 0}],
[{'productID': 143585, 'price': '5.5900', 'amount': '9', 'cost': 0}],
[{'productID': 133018, 'price': '5.0000', 'amount': '1', 'cost': 0}],
[{'productID': 146250, 'price': '13.7500', 'amount': '5', 'cost': 0}],
[{'productID': 149986, 'price': '5.8900', 'amount': '2', 'cost': 0},
{'productID': 149790, 'price': '4.9900', 'amount': '2', 'cost': 0},
{'productID': 149972, 'price': '5.2900', 'amount': '2', 'cost': 0},
{'productID': 149248, 'price': '2.0000', 'amount': '2', 'cost': 0},
{'productID': 149984, 'price': '4.2000', 'amount': '2', 'cost': 0}]])
df = pd.DataFrame([y for x in troRows for y in x])
Another solution for flatten your data is use chain.from_iterable:
from itertools import chain
df = pd.DataFrame(list(chain.from_iterable(troRows)))
print (df)
amount cost price productID
0 1 0 10.0000 134336
1 4 0 5.5000 142141
2 1 0 5.5000 141764
3 1 0 4.5000 81661
4 1 0 5.5000 146761
5 9 0 5.5900 143585
6 1 0 5.0000 133018
7 5 0 13.7500 146250
8 2 0 5.8900 149986
9 2 0 4.9900 149790
10 2 0 5.2900 149972
11 2 0 2.0000 149248
12 2 0 4.2000 149984

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Creating a dataframe from a dictionary within tuple - python

Related

how to transform strings in csv file into float and store them as a list?

getting class_ on wb scraping for two elements

How to extract dictionary and sub dictionary

Python Pandas, how to group list of dict and sort

How to unpack an object of dictionaries to a range of Data Frames

Categories

Resources