Lambda - apply combination on pandas dataframe - python

Starting from the following dataframe:
I created the following function:
def campaign_name(name,ID,prefix):
campaign = "S[" + prefix + ID + "]: " + name
return campaign
I would like to use it in a dataframe like this:
keywords_merge_temporary["campaign name"] = keywords_merge_temporary.apply(lambda x: campaign_name(x.name,x.id,x.prefix), axis=1)
Problem being, for some reason I get the following error, which I never had in the past when I used this kind of lambda + apply combinations:
<ipython-input-...> in <module>()
----> 1 keywords_merge_temporary["campaign name"] = keywords_merge_temporary.apply(lambda z: campaign_name(z.name,z.id,z.prefix), axis=1)
/Users/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds) 4150 if reduce is None: 4151 reduce = True
-> 4152 return self._apply_standard(f, axis, reduce=reduce) 4153 else: 4154 return self._apply_broadcast(f, axis)
/Users/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in
_apply_standard(self, func, axis, ignore_failures, reduce) 4246 try: 4247 for i, v in enumerate(series_gen):
-> 4248 results[i] = func(v) 4249 keys.append(v.name) 4250 except Exception as e:
<ipython-input-...> in <lambda>(z)
----> 1 keywords_merge_temporary["campaign name"] = keywords_merge_temporary.apply(lambda z: campaign_name(z.name,z.id,z.prefix), axis=1)
<ipython-input-52-f727ebf9b9ee> in campaign_name(name, ID, prefix)
1 def campaign_name(name,ID,prefix):
----> 2 campaign = "S[" + prefix + ID + "]: " + name
3 return campaign
TypeError: ("ufunc 'add' did not contain a loop with signature matching types dtype('<U21') dtype('<U21') dtype('<U21')", 'occurred at index 0')

Related

ValueError: The number of FixedLocator locations (76), usually from a call to set_ticks, does not match the number of ticklabels (8)

Noobie here :)
Any help very welcome as I am struggling with this one:
Been experimenting class and functions programming, but been strugling with some code I found online. The issue seems to be with the "step" argument below as i get the following error:
ValueError Traceback (most recent call last)
Input In [27], in <cell line: 8>()
6 df = main_df[['age', 'price']].groupby('age').sum().reset_index(drop=False)
7 fig, ax = MyPlots.new_plot()
----> 8 MyPlots.bar(ax, x=df['age'], y=df['price'], step=10)
9 MyPlots.title(ax, "Revenue By Age")
11 df = (main_df[['age', 'price']]
12 .groupby('age')
13 .count()
14 .rename(columns=dict(price='sales'))
15 .reset_index().copy())
Input In [4], in MyPlots.bar(ax, data, step, x, y)
293 else:
294 i += 1
--> 297 ax.set_xticklabels(new_a)
298 ax.set_xticks(new_a)
300 # Set background
File C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axes\_base.py:75, in _axis_method_wrapper.__set_name__.<locals>.wrapper(self, *args, **kwargs)
74 def wrapper(self, *args, **kwargs):
---> 75 return get_method(self)(*args, **kwargs)
File C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axis.py:1798, in Axis._set_ticklabels(self, labels, fontdict, minor, **kwargs)
1796 if fontdict is not None:
1797 kwargs.update(fontdict)
-> 1798 return self.set_ticklabels(labels, minor=minor, **kwargs)
File C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axis.py:1720, in Axis.set_ticklabels(self, ticklabels, minor, **kwargs)
1716 if isinstance(locator, mticker.FixedLocator):
1717 # Passing [] as a list of ticklabels is often used as a way to
1718 # remove all tick labels, so only error for > 0 ticklabels
1719 if len(locator.locs) != len(ticklabels) and len(ticklabels) != 0:
-> 1720 raise ValueError(
1721 "The number of FixedLocator locations"
1722 f" ({len(locator.locs)}), usually from a call to"
1723 " set_ticks, does not match"
1724 f" the number of ticklabels ({len(ticklabels)}).")
1725 tickd = {loc: lab for loc, lab in zip(locator.locs, ticklabels)}
1726 func = functools.partial(self._format_with_dict, tickd)
ValueError: The number of FixedLocator locations (76), usually from a call to set_ticks, does not match the number of ticklabels (8).
The function is defined as below:
# Bar Chart
def bar(ax, data=None, step=None, x=None, y=None):
""" Plot a rock bar """
if isinstance(data, pd.Series):
x = data.value_counts().index
y = data.value_counts().values
data_name_x = data.name
data_name_y = "Quantity"
else:
data_name_x = x.name
data_name_y = y.name
ax.bar(x,y, tick_label=x, zorder=2)
if step:
new_a = []
i = 0
for val in x.sort_values():
if i == 0:
new_a.append(val)
if i == (step-1):
i = 0
else:
i += 1
Then later in the workbook:
age_price_biva = Biva(main_df['age'], main_df['price'], x_class_size=30)
age_price_biva.describe()
age_price_biva.scatterplot()
age_price_biva.boxplots()
df = main_df[['age', 'price']].groupby('age').sum().reset_index(drop=False)
fig, ax = MyPlots.new_plot()
MyPlots.bar(ax, x=df['age'], y=df['price'], step=10)
MyPlots.title(ax, "Revenue By Age")
df = (main_df[['age', 'price']]
.groupby('age')
.count()
.rename(columns=dict(price='sales'))
.reset_index().copy())
fig, ax = MyPlots.new_plot()
MyPlots.bar(ax, x=df['age'], y=df['sales'], step=10)
MyPlots.title(ax, "Volume By Age")
Thank you everyone!

Why am I getting error using .map in python function

I'm trying to map a dictionary value to a dataset in a fuction. I keep getting the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-114-f1360d45f8fc> in <module>
----> 1 df['unit_value_factor_4'] = df.apply(map_value, axis=1)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-113-2ec7fc46c34e> in map_value(row)
2 def map_value(row):
3 if row['RATING_CLASS_CODE'] == 'G':
----> 4 val = row['unit_value_model'].map(g_cn_value)
5
6 elif row['RATING_CLASS_CODE'] == 'CN':
AttributeError: ("'float' object has no attribute 'map'", 'occurred at index 40')
Below is the function. This is simply looking up the RATING_CLASS_CODE on each row, then mapping a value from a dictionary that corresponds to the unit_value_model which matches my dictionary key.
def map_value(row):
if row['RATING_CLASS_CODE'] == 'G':
val = row['unit_value_model'].map(g_cn_value)
elif row['RATING_CLASS_CODE'] == 'CN':
val = row['unit_value_model'].map(g_cn_value)
elif row['RATING_CLASS_CODE'] == 'NE':
val = row['unit_value_model'].map(ne_gv_value)
elif row['RATING_CLASS_CODE'] == 'GV':
val = row['unit_value_model'].map(ne_gv_value)
elif row['RATING_CLASS_CODE'] == 'LA':
val = row['unit_value_model'].map(la_coll_value)
else:
val = None
print(val)
return val
df['unit_value_factor_4'] = df.apply(map_value, axis=1)
I thnk you need np.select with multiple conditions.
Look at this answer for an explicit example.

How to use .map on an integer column in python pandas

I'm trying to take an integer column and map discrete values to another column. Basically, if a credit tier is marked, 1, 2, 3, antoher column maps those to no credit state, no hit or thin files. Then fill the null values with vaild. I tried However, I keep getting this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-129-926e6625f2b6> in <module>
1 #train.dtypes
----> 2 df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-129-926e6625f2b6> in <lambda>(row)
1 #train.dtypes
----> 2 df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
<ipython-input-126-462888d46184> in discrete_credit(row, variable)
6
7 """
----> 8 score = row[variable].map({1:'no_credit_state', 2:'thin_file', 3:"no_hit"})
9 score = row[score].fillna('valid')
10 score = pd.Categorical(row[score], ['valid', 'no_credit_state','thin_file', 'no_hit'])
AttributeError: ("'numpy.int64' object has no attribute 'map'", 'occurred at index 0')
Here is a code example that is throwing the same error:
import pandas as pd
credit = {'credit_52278':[1,2,3,500,550,600,650,700,750,800,900]
}
df = pd.DataFrame(credit)
def discrete_credit(row, variable):
"""
allows thin files, no hits and no credit scores to float which will then allow the rest of the credit score to be fit \
with a spline
"""
score = row[variable].map({1:'no_credit_state', 2:'thin_file', 3:"no_hit"})
score = row[score].fillna('valid')
score = pd.Categorical(row[score], ['valid', 'no_credit_state','thin_file', 'no_hit'])
return score
df['discrete_52278'] = df.apply(lambda row: discrete_credit(row, 'credit_52278'), axis = 1)
map is a Series method, but you are trying to use it on a scalar (float) value.
You could simply do something like:
df['discrete_52278'] = (
df['credit_52278']
.map({
1: 'no_credit_state',
2: 'thin_file',
3: 'no_hit'
})
.fillna('valid')
.astype('category')
)

Give row number as index when reading csv

I have a csv file like the one here below:
30,60,14.3,53.6,0.71,403,0
30,60,15.3,54.9,0.72,403,0
30,60,16.5,56.2,0.73,403,0
30,60,17.9,57.5,0.74,403,0
No header, just data. The columns are
colNames = {
'doa_in1': np.float64, 'doa_in2': np.float64,
'doa_est1': np.float64, 'doa_est2': np.float64,
'rho': np.float64,
'seed': np.int32, 'matl_chan':np.int32
}
I read the csv with:
tmp_df = pd.read_csv(
io.BytesIO(tmp_csv), encoding='utf8',
header=None,
names=colNames.keys(), dtype=colNames,
converters={
'matl_chan': lambda x: bool(int(x))
}
)
This gives a warning as I'm giving two possible conversion to matl_chan, but it's just a warning that python will use only what is in converters (i.e. the lambda function)
I would like to have as index for each row a number or something unique.
That's because, then I process tmp_df with this function
def remove_lines(df):
THRES = 50
THRES_angle = 10 # degrees
is_converging = True
for idx, row in df.iterrows():
if idx == 0:
is_converging = False
# check if MUSIC started converging
if abs(row['doa_est1']-row['doa_in1']) < THRES_angle:
if abs(row['doa_est2']-row['doa_in2']) < THRES_angle:
is_converging = True
# calc error
err = abs(row['doa_est1']- row['doa_in1'])+abs(row['doa_est2']-row['doa_in2'])
if err > THRES and is_converging:
df=df.drop(idx)
return df
All rows, though, have index 30, so the function doesn't drop anything as I get this error:
KeyError: '[30] not found in axis'
The full stacktrace is
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-143-b61c0402f9d7> in <module>
----> 1 df=get_dataframe()
<ipython-input-121-b76aab8b17ee> in get_dataframe()
24 continue
25
---> 26 tmp_df_sanitized = remove_lines(tmp_df)
27 all_dataframes.append(tmp_df_sanitized)
28
<ipython-input-142-31019390251a> in remove_lines(df)
62 err = abs(row['doa_est1']-row['doa_in1'])+abs(row['doa_est2']-row['doa_in2'])
63 if err > THRES and is_converging:
---> 64 df=df.drop(idx)
65 print("dropped {}".format(idx))
66 return df
/usr/lib/python3.7/site-packages/pandas/core/frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3938 index=index, columns=columns,
3939 level=level, inplace=inplace,
-> 3940 errors=errors)
3941
3942 #rewrite_axis_style_signature('mapper', [('copy', True),
/usr/lib/python3.7/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3778 for axis, labels in axes.items():
3779 if labels is not None:
-> 3780 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
3781
3782 if inplace:
/usr/lib/python3.7/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors)
3810 new_axis = axis.drop(labels, level=level, errors=errors)
3811 else:
-> 3812 new_axis = axis.drop(labels, errors=errors)
3813 result = self.reindex(**{axis_name: new_axis})
3814
/usr/lib/python3.7/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
4962 if mask.any():
4963 if errors != 'ignore':
-> 4964 raise KeyError(
4965 '{} not found in axis'.format(labels[mask]))
4966 indexer = indexer[~mask]
KeyError: '[30] not found in axis'
Is there anyone who has a solution?
edit: to be clearer, I'd like to have the row index as [0,1,2,3] for the four row I put above

Specify lambda function to continue even if there is an error

I am trying to run this line of code:
df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
But for some address and zipcodes the function get_zillow_id() fails. But I want the lambda function to just ignore the error for that particular address and zipcode and continue. How do I do that?
Here is the entire code:
from pyzillow.pyzillow import ZillowWrapper, GetDeepSearchResults, GetUpdatedPropertyDetails
import pandas as pd
import numpy as np
key = "X1-ZWz1gtmiat11xn_7ew1d"
# Create function to get zillow_id
def get_zillow_id(key, address, zipcode):
zillow_data = ZillowWrapper(key)
deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
result = GetDeepSearchResults(deep_search_response)
return result.zillow_id
# Create function to get propery data
def get_property_data(key, address, zipcode):
zillow_data = ZillowWrapper(key)
updated_property_details_response = zillow_data.get_updated_property_details(get_zillow_id(key, address, zipcode))
result = GetUpdatedPropertyDetails(updated_property_details_response)
return result.year_built
# Import data into dataframe
df = pd.read_csv('test.csv')
# Get zillow ids
df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
Here is a picture of the data frame:
Here is the error I am getting:
ZillowError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self, code_obj, result)
2861 #rprint('Running code', repr(code_obj)) # dbg
-> 2862 exec(code_obj, self.user_global_ns, self.user_ns)
2863 finally:
<ipython-input-40-55f38b77eeea> in <module>()
1 # Get zillow ids
----> 2 df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
4261 reduce=reduce,
-> 4262 ignore_failures=ignore_failures)
4263 else:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in _apply_standard(self, func, axis, ignore_failures, reduce)
4357 for i, v in enumerate(series_gen):
-> 4358 results[i] = func(v)
4359 keys.append(v.name)
<ipython-input-40-55f38b77eeea> in <lambda>(row)
1 # Get zillow ids
----> 2 df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
<ipython-input-37-ce158395fdb8> in get_zillow_id(key, address, zipcode)
3 zillow_data = ZillowWrapper(key)
----> 4 deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
5 result = GetDeepSearchResults(deep_search_response)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pyzillow\pyzillow.py in get_deep_search_results(self, address, zipcode)
30 }
---> 31 return self.get_data(url, params)
32
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pyzillow\pyzillow.py in get_data(self, url, params)
81 if response.findall('message/code')[0].text is not '0':
---> 82 raise ZillowError(int(response.findall('message/code')[0].text))
83 else:
<class 'str'>: (<class 'TypeError'>, TypeError('__str__ returned non-string (type dict)',))
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self, code_obj, result)
2877 if result is not None:
2878 result.error_in_exec = sys.exc_info()[1]
-> 2879 self.showtraceback(running_compiled_code=True)
2880 else:
2881 outflag = False
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
1809 value, tb, tb_offset=tb_offset)
1810
-> 1811 self._showtraceback(etype, value, stb)
1812 if self.call_pdb:
1813 # drop into debugger
~\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\zmqshell.py in _showtraceback(self, etype, evalue, stb)
541 u'traceback' : stb,
542 u'ename' : unicode_type(etype.__name__),
--> 543 u'evalue' : py3compat.safe_unicode(evalue),
544 }
545
~\AppData\Local\Continuum\anaconda3\lib\site-packages\ipython_genutils\py3compat.py in safe_unicode(e)
63 """
64 try:
---> 65 return unicode_type(e)
66 except UnicodeError:
67 pass
TypeError: __str__ returned non-string (type dict)
You should try and understand exactly why your function will fail. Then use a try / except clause to ignore the specific problem you wish to avoid. For example, to ignore TypeError:
def get_zillow_id(key, address, zipcode):
try:
zillow_data = ZillowWrapper(key)
deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
result = GetDeepSearchResults(deep_search_response)
return result.zillow_id
except TypeError, ZillowError:
return None
df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']),
axis=1)
If ZillowError is an actual error, you may need to import it from that library.

Categories

Resources