Retrieving results from dictionary of a range

Retrieving results from dictionary of a range - python

I have a ENTIREMAP which has a mapping of all names and toy prices. What I want to try and do is create a function getResults like below:
#################
def getResults(name, price):
# where name could be 'Laura' and price is 0.02
# then from the ENTIREMAP find the key 'Laura' and if 0.02 is in the range
# of the prices in the map i.e since 0.02 is between (0.0,0.05) then return
# ('PEN', 'BLUE')
prices = [d[name] for d in ENTIRELIST if name in d]
if prices:
print prices[0]
###################
GIRLTOYPRICES = {(0.0,0.05):('PEN', 'BLUE'),
(0.05,0.08):('GLASSES', 'DESIGNER'),
(0.08,0.12):('TOP', 'STRIPY'),
}
BOYTOYPRICES = {(0.0,0.10):('BOOK', 'HARRY POTTER'),
(0.10,0.15):('BLANKET', 'SOFT'),
(0.15,0.40):('GBA', 'GAMES'),
}
GIRLS = ['Laura', 'Samantha']
BOYS = ['Mike','Fred']
GIRLLIST = [{girl: GIRLTOYPRICES} for girl in GIRLS]
BOYLIST = [{boy: BOYTOYPRICES} for boy in BOYS]
ENTIRELIST = GIRLMAP + BOYMAP
print ENTIRELIST
[{'Laura': {(0.0, 0.05): ('PEN', 'BLUE'), (0.08, 0.12): ('TOP', 'STRIPY'), (0.05, 0.08): ('GLASSES', 'DESIGNER')}}, {'Samantha': {(0.0, 0.05): ('PEN', 'BLUE'), (0.08, 0.12): ('TOP', 'STRIPY'), (0.05, 0.08): ('GLASSES', 'DESIGNER')}}, {'Mike': {(0.0, 0.1): ('BOOK', 'HARRY POTTER'), (0.15, 0.4): ('GBA', 'GAMES'), (0.1, 0.15): ('BLANKET', 'SOFT')}}, {'Fred': {(0.0, 0.1): ('BOOK', 'HARRY POTTER'), (0.15, 0.4): ('GBA', 'GAMES'), (0.1, 0.15): ('BLANKET', 'SOFT')}}]
Any help would be appreciated.

Kind of a weird data structure, but:
for person in ENTIRELIST:
person_name, toys = person.items()[0]
if person_name != name: # inverted to reduce nesting
continue
for (price_min, price_max), toy in toys.items():
if price_min <= price < price_max:
return toy
This is simpler (and more effective):
GIRLMAP = {girl: GIRLTOYPRICES for girl in GIRLS}
BOYMAP = {boy: BOYTOYPRICES for boy in BOYS}
ENTIREMAP = dict(GIRLMAP, **BOYMAP)
for (price_min, price_max), toy in ENTIREMAP[name].items():
if price_min <= price < price_max:
return toy

Related

Python Shopping cart, tax rate, sum of items

I am creating a shopping cart application applying tax rate and showing the sum of items and total, I could get all the infos showing in the screen except the sum of all items with the taxRate applied. Clearly I'm missing something but I am not sure what I could do to populate price and tax rate for all products and sum them together.
cart = {"Shirt": ["Clothing", 39.99, "Manhattan"], "TV": ["Electronic", 999.00, "White Plains"], "Muffin": ["Bread", 9.50, "Manhattan"], "Jacket": ["Clothing", 45.95, "White Plains"], "Coat": ["Clothing", 239.55, "Manhattan"]}
for i in cart:
unit = cart[i][1]
city = cart[i][2]
kind = cart[i][0]
taxPercent = getTaxRate(city, kind, unit)
print ("*tax: ${:.2f}".format(taxPercent*unit), "\n{}:".format(i), unit,"+ ${:.2f}".format(taxPercent*unit),"= ${:.2f}".format(unit + taxPercent*unit))
# print ("--------- Please pay the following:------- \n", "Total:${:.2f}".format(items + taxPercent*items))
Output should be:
*Tax: 00.0
Shirt: $39.99
TV: 999.0+99.90 = $1098.90
*Tax: 99.90
Muffin: 9.5+0.95 = $10.45
*Tax: 0.95
Jacket: 45.95+4.14 = $50.09
*Tax: 4.14
Coat: 239.55+21.56 = $261.11
*Tax: 21.56
---------- Please pay the following ----------
Total: $1460.54

You have to add values to sum it up you can do it by adding statement in code
totalamt=totalamt+unit + taxPercent*unit
it will add the price by iterating loop
cart = {"Shirt": ["Clothing", 39.99, "Manhattan"], "TV": ["Electronic", 999.00, "White Plains"], "Muffin": ["Bread", 9.50, "Manhattan"], "Jacket": ["Clothing", 45.95, "White Plains"], "Coat": ["Clothing", 239.55, "Manhattan"]}
def getTaxRate (city, kind, price): # given call and simple return
if city == "Manhattan":
if kind == "Clothing":
if price > 100:
taxRate = 0.09
else:
taxRate = 0.0
elif kind == "Electronic":
taxRate = 0.11
else:
taxRate = 0.1
elif city == "White Plains":
if kind == "Electronic":
taxRate = 0.1
else:
taxRate = 0.09
return taxRate
totalamt=0
for k,v in cart.items():
# print(v)
unit = v[1]
city = v[2]
kind = v[0]
taxPercent = getTaxRate(city, kind, unit)
print("*tax: ${:.2f}".format(taxPercent*unit), "\n{}:".format(k), unit,"+ ${:.2f}".format(taxPercent*unit),"= ${:.2f}".format(unit + taxPercent*unit))
totalamt=totalamt+unit + taxPercent*unit
print("--------- Please pay the following:------- \n", "Total:${:.2f}".format(totalamt))

Trying to pass columns into function, getting keyerror (Pandas)

I have this code block:
def euc_dist(x,y):
return ((x[0] - y[0])**2 +(x[1] - y[1])**2 )**(1/2)
def dist(s1,s2):
distances = [euc_dist(s1[i],s2[i]) for i in range(s1.shape[0])]
return pd.Series(distances)
distances_df = tracking_data.loc[:,tracking_data[['away_player10_point', 'away_player9_point', 'away_player8_point', 'away_player7_point', 'away_player6_point', 'away_player5_point', 'away_player4_point', 'away_player3_point', 'away_player2_point', 'away_player1_point', 'away_player11_point', 'home_player1_point', 'home_player2_point', 'home_player3_point', 'home_player4_point', 'home_player5_point', 'home_player6_point', 'home_player7_point', 'home_player8_point', 'home_player9_point', 'home_player10_point', 'home_player11_point']].apply(tuple, axis = 1)].apply(dist, args = (tracking_data["ball_point"]))
tracking_data["closest"] = distances_df.idxmin(axis = 1).apply(lambda x: str(x)[:-6])
I am getting this error when running:
KeyError: "None of [Index([
((-22.06, -8.32), (-0.12, 21.38), (-1.49, -9.62), (-0.26, -28.52),
(-19.32, 16.22), (-15.11, 0.43), (-7.69, 32.87), (0.45, -0.25),
(-9.88, 7.67), (-47.29, -0.14), (-18.1, -25.42), (0.46, -19.84),
(7.58, 4.82), (15.33, -23.38), (21.08, 6.57), (14.98, 20.7), (8.14,
-4.27), (21.36, -9.06), (46.92, 0.01), (0.29, 9.88), (0.67, 22.24), (-0.06, -9.07)),\n
((-22.06, -8.32), (-0.07, 21.39), (-1.47, -9.64), (-0.23, -28.51),
(-19.31, 16.22), (-15.1, 0.42), (-7.68, 32.88), (0.46, -0.26), (-9.87,
7.7), (-47.3, -0.15), (-18.09, -25.41), (0.43, -19.83), (7.5600000000000005, 4.83), (15.31, -23.38), (21.06, 6.57), (14.97,
20.72), (8.12, -4.28), (21.33, -9.04), (46.91, 0.02), (0.25, 9.85), (0.67, 22.24), (-0.11, -9.05)),\n
((-22.06, -8.33), (-0.03, 21.39), (-1.43, -9.67), (-0.2, -28.5),
(-19.29, 16.24), (-15.09, 0.42), (-7.66, 32.9), (0.47000000000000003,
-0.27), (-9.85, 7.72), (-47.31, -0.16), (-18.08, -25.4), (0.39, -19.83), (7.55, 4.85), (15.28, -23.38), (21.03, 6.57), (14.95, 20.74), (8.09, -4.28), (21.28, -9.02), (46.91, 0.03), (0.2, 9.82), (0.66,
22.24), (-0.16, -9.02)),\n ((-22.06, -8.34), (0.01, 21.39), (-1.3900000000000001, -9.7), (-0.16, -28.5), (-19.28,
16.25), (-15.08, 0.42), (-7.64, 32.92), (0.49, -0.27), (-9.84, 7.75), (-47.32, -0.16), (-18.07, -25.4), (0.3500000000000...
Please reference this notebook to see my table as it is too large to put here. The work pertaining to this question is at the bottom.
https://github.com/piercepatrick/Articles_EDA/blob/main/nashSCProject.ipynb
I have been trying to work out this problem in my previous question: Pandas Dataframe: Find the column with the closest coordinate point to another columns coordinate point
I have a hunch that this issue lies in the source data since I originally loaded it in as JSON?

First, reset the index with
tracking_data = tracking_data.reset_index()
Then change
distances_df = tracking_data.loc[:,tracking_data[['away_player10_point', 'away_player9_point', 'away_player8_point', 'away_player7_point', 'away_player6_point', 'away_player5_point', 'away_player4_point', 'away_player3_point', 'away_player2_point', 'away_player1_point', 'away_player11_point', 'home_player1_point', 'home_player2_point', 'home_player3_point', 'home_player4_point', 'home_player5_point', 'home_player6_point', 'home_player7_point', 'home_player8_point', 'home_player9_point', 'home_player10_point', 'home_player11_point']].apply(tuple, axis = 1)].apply(dist, args = (tracking_data["ball_point"]))
for
distances_df = tracking_data[['away_player10_point', 'away_player9_point', 'away_player8_point', 'away_player7_point', 'away_player6_point', 'away_player5_point', 'away_player4_point', 'away_player3_point', 'away_player2_point', 'away_player1_point', 'away_player11_point', 'home_player1_point', 'home_player2_point', 'home_player3_point', 'home_player4_point', 'home_player5_point', 'home_player6_point', 'home_player7_point', 'home_player8_point', 'home_player9_point', 'home_player10_point', 'home_player11_point']].apply(dist, args = (tracking_data["ball_point"],))

dict object returned inf values, but it should have been numeric

Tried appling the following code:
df = pd.read_csv('data_sample_ltv.csv')
# Convert date to year
date_mapper = {date: pd.to_datetime(date).year for date in df['transaction_date'].unique()}
year = df['transaction_date'].map(date_mapper)
df['year'] = year
# Convert tier to categorical
tiers = pd.Categorical(df['customer_tier'],
categories=['Gold', 'Silver', 'Bronze', 'Free-Trial'],
ordered=False)
df['customer_tier'] = tiers
# Create highest tier mapper
def highest_tier(c_id, df=df):
tier = df.loc[df.customer_id == c_id]['customer_tier'].sort_values().iloc[0]
return tier
tier_mapper = {
cust_id: highest_tier(cust_id) for cust_id in df['customer_id'].unique()
}
# Aggregate the data
customer_df = df.groupby(['customer_id']).agg({
'transaction_amount': ['sum', 'count'],
'year': [pd.Series.nunique]
})
customer_df['highest_tier'] = customer_df.index.map(tier_mapper)
customer_df['lifespan'] = customer_df[('year', 'nunique')]
customer_df['avg_trn_amt'] = customer_df[('transaction_amount', 'sum')] / customer_df[('transaction_amount', 'count')]
customer_df['avg_trn_per_yr'] = customer_df[('transaction_amount', 'count')] / customer_df['lifespan']
# Create the LTV function
def ltv(df, tier=None):
if tier:
df = df.loc[df['highest_tier'] == tier]
ltv_dict = {
'avg_lifespan': round(df['lifespan'].mean(), 1),
'avg_trn_per_yr': round(df['avg_trn_per_yr'].mean(), 1),
'avg_trn_amt': round(df['avg_trn_amt'].mean(), 2),
'ltv': None
}
ltv_dict['ltv'] = round(
ltv_dict['avg_lifespan'] * ltv_dict['avg_trn_per_yr'] * ltv_dict['avg_trn_amt'], 2)
return ltv_dict
# Calculate the LTVs for each of our customer segments
ltv_all = ltv(customer_df)
ltv_gold = ltv(customer_df, 'Gold')
ltv_silver = ltv(customer_df, 'Silver')
ltv_bronze = ltv(customer_df, 'Bronze')
print(f"The lifetime value of our Gold tier is: {ltv_gold['ltv']} while the ltv of bronze is {ltv_bronze['ltv']}")
But the ltv results were inf, and not a numeric value:
The lifetime value of our Gold tier is: inf while the ltv of bronze is inf
The results from each ltv were all inf:
{'avg_lifespan': 1.2, 'avg_trn_per_yr': inf, 'avg_trn_amt': 82.23, 'ltv': inf}
{'avg_lifespan': 1.1, 'avg_trn_per_yr': inf, 'avg_trn_amt': 39.9, 'ltv': inf}
{'avg_lifespan': 1.3, 'avg_trn_per_yr': inf, 'avg_trn_amt': 128.13, 'ltv': inf}
Can someone help me understand what went wrong and what should I do to transform the inf into a numeric value? Thanks.

Why it only prints out one?

I have this bad boy:
def by_complexity(db : {str: {(int,int) : float}}) -> [(str,int)]:
complex = []
for state, taxes in db.items():
complex.append((state, len(taxes.values())))
return (sorted(complex, key = lambda zps : (-zps[1],zps[0])))
db1 = {'CT': {( 0, 12_499): .02,
( 12_500, 49_999): .04,
( 50_000, None): .06},
'IN': {(0, None): .04},
'LA': {( 0, 9_999): .03,
( 10_000, 12_499): .05,
( 12_500, 49_999): .055,
( 50_000, 299_999): .06,
(300_000, None): .078},
'MA': {(0, None): .055}}
print(by_complexity(db1))
Now when I run it, it only prints out [('CT', 3)]
instead of [('LA', 5), ('CT', 3), ('IN', 1), ('MA', 1)] so now I'm wondering why? because I can't find a bug in it... it just doesn't work

It's coming from your indent level with your return.
You are returning while still in your for loop.
Try this :
def by_complexity(db: {str: {(int, int): float}}) -> [(str, int)]:
complex = []
for state, taxes in db.items():
complex.append((state, len(taxes.values())))
return (sorted(complex, key=lambda zps: (-zps[1], zps[0])))

How to flatten multiple levels of tuple and have tab-separated elements?

This is a function to compute word-similarity I wrote with import of xlwings, a python-excel library. I want it to return like this (what I expect, is the items in each row/line should be split by a tab. And then I could easily copy/paste into a Excel file for a sum), for example:
0.9999998807907104 'casual' 1.0 1.0 29.0
0.8386740684509277 'active' 0.3333 1.0 13.0
0.776314377784729 'cardigans'0.1667 1.0 84.0
But it actually return like this (what I hate, is I couldn't copy to Excel file for further use, like summing digits):
[[0.9999998807907104, ('casual', (1.0, 1.0, 29.0))],
[0.8386740684509277, ('active', (0.3333, 1.0, 13.0))],
[0.776314377784729, ('cardigans', (0.1667, 1.0, 84.0))]]
How could I realize that? Thank you.
def similarity(phrase, N=10):
phrase_vec = phrase_model[phrase]
CosDisList = []
wb = xw.Book('file01.xlsx')
sht = wb.sheets['sheet1']
for a_word in phrase_model.keys():
a_val = phrase_model[a_word]
cos_dis = cosine_similarity(phrase_vec, a_val)
for i in range(1, 18):
if a_word == sht.cells(i, 1).value:
DataFromExcel = (sht.cells(i, 2).value, sht.cells(i, 3).value, sht.cells(i, 4).value)
DataCombined = (a_word, DataFromExcel)
CosDisBind = [float(str(cos_dis.tolist()).strip('[[]]')), DataCombined]
CosDisList.append(CosDisBind)
CosDisListSort = sorted(CosDisList, key=operator.itemgetter(0), reverse=True)
CosDisListTopN = heapq.nlargest(N, CosDisListSort)
return CosDisListTopN

You can use the following function. Source : a blogpost
def flatten(l, ltypes=(list, tuple)):
ltype = type(l)
l = list(l)
i = 0
while i < len(l):
while isinstance(l[i], ltypes):
if not l[i]:
l.pop(i)
i -= 1
break
else:
l[i:i + 1] = l[i]
i += 1
return ltype(l)
Then just use:
abc = [[0.9999998807907104, ('casual', (1.0, 1.0, 29.0))],
[0.8386740684509277, ('active', (0.3333, 1.0, 13.0))],
[0.776314377784729, ('cardigans', (0.1667, 1.0, 84.0))]]
flat_list = flatten(abc)
final_array = np.array(flat_list).reshape((np.round(len(flat_list)//5), 5)).tolist()
# [['0.9999998807907104', 'casual', '1.0', '1.0', '29.0'], ['0.8386740684509277', 'active', '0.3333', '1.0', '13.0'], ['0.776314377784729', 'cardigans', '0.1667', '1.0', '84.0']]
Now you can join individual lists:
most_final = ["\t".join(x) for x in final_array]
print(most_final[0])
output
print(most_final[0])
0.9999998807907104 casual 1.0 1.0 29.0

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Retrieving results from dictionary of a range - python

Related

Python Shopping cart, tax rate, sum of items

Trying to pass columns into function, getting keyerror (Pandas)

dict object returned inf values, but it should have been numeric

Why it only prints out one?

How to flatten multiple levels of tuple and have tab-separated elements?

Categories

Resources