Issues to access to a list in Pandas dataframe - RESOLVED - python

I am currently working with list in a dataframe defined as follow :
class DATA():
def __init__(self):
self.BDD_EEC = pd.DataFrame([],columns = ['Nom','Nmbre Param','Param', 'Units','Fixe','Min Param', 'Max Param','Entry', 'Equation'])
## R+R/C
n=0
self.BDD_EEC.at[n,'Nom'] = 'Re+R1/C1'
self.BDD_EEC.at[n,'Nombre Param'] = 3
self.BDD_EEC.at[n,'Param'] = ['Re','C1','R1']
self.BDD_EEC.at[n,'Units'] = ['Ohm','F','Ohm']
self.BDD_EEC.at[n,'Fixe'] = [0,0,0]
self.BDD_EEC.at[n,'Min Param'] = [0,1e-10,0]
self.BDD_EEC.at[n,'Max Param'] = [1000,1,1e10]
self.BDD_EEC.at[n,'Equation'] = 'Re+(R1)/(1+1j*2*np.pi*f*R1*Q1)'
#### R+R/Q
n = 1
self.BDD_EEC.at[n,'Nom'] = 'Re+R1/Q1'
self.BDD_EEC.at[n,'Nombre Param'] = 4
self.BDD_EEC.at[n,'Param'] = ['Re','Q1', 'a1', 'R1']
self.BDD_EEC.at[n,'Units'] = ['Ohm','F.s^(a-1)','--','Ohm']
self.BDD_EEC.at[n,'Fixe'] = [0,0,0,0]
self.BDD_EEC.at[n,'Min Param'] = [0.001, 1e-10, 0.001, 0.001]
self.BDD_EEC.at[n,'Max Param'] = [200, 1, 1, 1e6]
self.BDD_EEC.at[n,'Equation'] = 'Re+(R1)/(1+1j*2*np.pi*f*np.power(R1*Q1,a1))'
After creating the object 'mes_datas', I try to access to the list ['Param'] thanks to the name, it works well :
mes_datas.BDD_EEC['Param'][mes_datas.BDD_EEC['Nom'] == 'Re+R1/C1'][0]
Out[26]: ['Re', 'C1', 'R1']
but with the other name, I have this error :
mes_datas.BDD_EEC['Param'][mes_datas.BDD_EEC['Nom'] == 'Re+R1/Q1'][0]
Traceback (most recent call last):
File "C:\Users\cboissy\anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2895, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1032, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1039, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<ipython-input-27-1bad18a3e796>", line 1, in <module>
mes_datas.BDD_EEC['Param'][mes_datas.BDD_EEC['Nom'] == 'Re+R1/Q1'][0]
File "C:\Users\cboissy\anaconda3\lib\site-packages\pandas\core\series.py", line 882, in __getitem__
return self._get_value(key)
File "C:\Users\cboissy\anaconda3\lib\site-packages\pandas\core\series.py", line 989, in _get_value
loc = self.index.get_loc(label)
File "C:\Users\cboissy\anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2897, in get_loc
raise KeyError(key) from err
KeyError: 0
I am quite confuse... Does anyone can help ?
Thanks

You're getting KeyError because the returning index is 1, not 0. You can use .iat[0] to get first result:
print(mes_datas.BDD_EEC.loc[mes_datas.BDD_EEC["Nom"] == "Re+R1/Q1", "Param"].iat[0])
Prints:
['Re', 'Q1', 'a1', 'R1']

Related

iterate a dataframe

I'm trying to iterate a dataframe to call queries in mongodb from a list and save each query in a csv file. I have the connection with no errors, but when I iterate it just creates the frist file (0.csv) and I have an error for the second row of the dataframe.
This is my code:
sql = [
('tran','transactions',{"den": "00100002773060"}),
('tran','Data',{'name': 'john'}),
]
df = pd.DataFrame(sql, columns = ["database", "entity", "sql"])
for i in range(len(df)):
database = df.iloc[i]["database"]
entity=df.iloc[i]["entity"]
myquery=df.iloc[i]["sql"]
collection = client[database][entity]
try:
mydoc = list(collection.find(myquery))
if len(mydoc) > 0:
df = pd.DataFrame(mydoc)
df.pop("_id")
df.to_csv(str(i) + '.csv')
print("file saved")
except:
print("error on file")
and this the error
Traceback (most recent call last):
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3629, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'database'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "getSql.py", line 12, in <module>
database = df.iloc[i]["database"]
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/series.py", line 958, in __getitem__
return self._get_value(key)
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/series.py", line 1069, in _get_value
loc = self.index.get_loc(label)
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3631, in get_loc
raise KeyError(key) from err
KeyError: 'database'
from what I can see here you are changing your df variable here
df = pd.DataFrame(mydoc)
probably just rename it

saving coordinates from Dataframe as Polygons (shapely.geometry) AttributeError

I want to create a Polygon from a list of coordinates:
import pandas as pd
from shapely.geometry import Point, Polygon
data = pd.read_csv('path.csv', sep=';')
the data is in the following format
Suburb
features_geometry_x
features_geometry_y
1
50.941840
6.9595637
1
50.941845
6.9595698
3
50.94182
6.9595632
4
50.9418837
6.9595958
with several rows for suburb 1, 3 and 4
#create a polygon
I = data.loc[data['Suburb'] == 1]
I['coordinates'] = list(zip(I['features_geometry_x'], I['features_geometry_y']))
poly_i = Polygon(I['coordinates'])
the code above works fine but if I do the same thing for suburb 3 and 4 it yields the following error:
L = data.loc[data['Suburb'] == 3]
L['coordinates'] = list(zip(L['features_geometry_x'], L['features_geometry_y']))
poly_l = Polygon(L['coordinates'])
File "shapely/speedups/_speedups.pyx", line 252, in shapely.speedups._speedups.geos_linearring_from_py
File "/Users/Jojo/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py", line 5487, in getattr
return object.getattribute(self, name)
AttributeError: 'Series' object has no attribute 'array_interface'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Jojo/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3361, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 76, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 2131, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 2140, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/var/folders/j6/wgg72kmx145f3krf14nzjfq40000gn/T/ipykernel_4092/214655495.py", line 3, in
poly_l = Polygon(Lindenthal['coordinates'])
File "/Users/Jojo/opt/anaconda3/lib/python3.8/site-packages/shapely/geometry/polygon.py", line 261, in init
ret = geos_polygon_from_py(shell, holes)
File "/Users/Jojo/opt/anaconda3/lib/python3.8/site-packages/shapely/geometry/polygon.py", line 539, in geos_polygon_from_py
ret = geos_linearring_from_py(shell)
File "shapely/speedups/_speedups.pyx", line 344, in shapely.speedups._speedups.geos_linearring_from_py
File "/Users/Jojo/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py", line 942, in getitem
return self._get_value(key)
File "/Users/Jojo/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py", line 1051, in _get_value
loc = self.index.get_loc(label)
File "/Users/Jojo/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3363, in get_loc
raise KeyError(key) from err
KeyError: 0
Please help :)
I think the issue here is that you need more than one data point to create a polygon where as your suburb 2 and 3 each got only a single point.

Python class method chaining: Unexpected results with pandas

While investigating method chaining within a python class for a project I'm starting, I encountered this unexpected error. Wonder if someone can explain what might be happening in method3() below with the error thrown by the commented-out code which calculates df['col6'] using df['col5'] ... for some reason any reference to col5 seems to throw errors.
import pandas as pd
class mchain:
def __init__(self, df):
self._df = df
#property
def df(self):
return self._df
def method1(self, msg):
print(msg)
self.df['col4'] = self.df.apply(lambda x: x['col1']+x['col3'], axis=1)
return self
def method2(self, msg):
print(msg)
self.df['col5'] = self.df.apply(lambda x: x['col4']**2, axis=1)
return self
def method3(self, msg):
print(msg)
self.df['col6'] = self.df.apply(lambda x: x['col4']*2, axis=1)
#self.df['col6'] = self.df.apply(lambda x: x['col5']*2, axis=1)
print(self.df.columns)
return self
def method4(self):
print(self.df.head().to_string())
return self
if __name__ == '__main__':
df = pd.DataFrame({'col1': [1,2,3,4,5], 'col2': ['a', 'b', 'c', 'd', 'e'], 'col3': [3,3,2,4,6]})
a = mchain(df)
print(a.method1('beginning').method3('end'))
print(a.method1('beginning').method2('middle').method3('end').method4())
Here's the error traceback when I uncomment the code in method3 ..
(calculator) [Mon Sep 20 11:20:18]:Project python3 ./utils/methodchainingtest.py
beginning
end
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 4554, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 4562, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'col5'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/zzz/Documents/repos/Project/./utils/methodchainingtest.py", line 40, in <module>
print(a.method1('beginning').method3('end'))
File "/Users/zzz/Documents/repos/Project/./utils/methodchainingtest.py", line 27, in method3
self.df['col6'] = self.df.apply(lambda x: x['col5']*2, axis=1)
File "/usr/local/lib/python3.9/site-packages/pandas/core/frame.py", line 7768, in apply
return op.get_result()
File "/usr/local/lib/python3.9/site-packages/pandas/core/apply.py", line 185, in get_result
return self.apply_standard()
File "/usr/local/lib/python3.9/site-packages/pandas/core/apply.py", line 276, in apply_standard
results, res_index = self.apply_series_generator()
File "/usr/local/lib/python3.9/site-packages/pandas/core/apply.py", line 290, in apply_series_generator
results[i] = self.f(v)
File "/Users/zzz/Documents/repos/Project/./utils/methodchainingtest.py", line 27, in <lambda>
self.df['col6'] = self.df.apply(lambda x: x['col5']*2, axis=1)
File "/usr/local/lib/python3.9/site-packages/pandas/core/series.py", line 853, in __getitem__
return self._get_value(key)
File "/usr/local/lib/python3.9/site-packages/pandas/core/series.py", line 961, in _get_value
loc = self.index.get_loc(label)
File "/usr/local/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3082, in get_loc
raise KeyError(key) from err
KeyError: 'col5'
(calculator) [Mon Sep 20 11:34:31]:Project

Pandas To_Excel parsing problem - outputting only 1 file

Hello I have working code like this:
import pandas as pdfrom pandas.io.json import json_normalize
import json
import warnings
warnings.filterwarnings('ignore')
with open('yieldfull.json') as file:
data = json.load(file)
df_json = json_normalize(data)
df_json_stripped = data[0]
platform_dict = df_json_stripped['result']
platform_names = []
for key in platform_dict:
platform_names.append(key)
for name in platform_names:
if name == 'Autofarm':
vault_name_df = json_normalize(pd.DataFrame(dict([(k , pd.Series(v)) for k,v in df_json['result.'+name+'.LPVaults.vaults'].items()]))[0])['name']
current_token_0 = json_normalize(pd.DataFrame(dict([(k , pd.Series(v)) for k,v in df_json['result.'+name+'.LPVaults.vaults'].items()]))[0])['LPInfo.currentToken0']
current_token_1 = json_normalize(pd.DataFrame(dict([(k , pd.Series(v)) for k,v in df_json['result.'+name+'.LPVaults.vaults'].items()]))[0])['LPInfo.currentToken1']
df_json = pd.DataFrame({'Vault_Name':vault_name_df, 'Current_Token_0':current_token_0 , 'Current_Token_1':current_token_1})
df_json.to_excel('Output_'+name+'.xlsx', index = False)
platform_names.remove(name)
elif name == 'Acryptos':
vault_name_df = json_normalize(pd.DataFrame(dict([(k , pd.Series(v)) for k,v in df_json['result.'+name+'.vaults.vaults'].items()]))[0])['name']
price_USD = json_normalize(pd.DataFrame(dict([(k , pd.Series(v)) for k,v in df_json['result.'+name+'.vaults.vaults'].items()]))[0])['priceInUSDDepositToken']
current_token_0 = json_normalize(pd.DataFrame(dict([(k , pd.Series(v)) for k,v in df_json['result.'+name+'.vaults.vaults'].items()]))[0])['currentTokens']
deposited_token = json_normalize(pd.DataFrame(dict([(k, pd.Series(v)) for k,v in df_json['result.'+name+'.vaults.vaults'].items()]))[0])['depositedTokens']
df_json = pd.DataFrame({'Vault_Name':vault_name_df, 'Price_USD':price_USD, 'Current_Token_0':current_token_0, 'Deposited_Token':deposited_token})
df_json.to_excel('Output_'+name+'.xlsx', index = False)
else:
pass
Problem is: If I leave it like this it only outputs for first if. When I comment out that if section it will successfully output elif, but I can't get it to output 2 files whatever I do. Any ideas?
Error I'm getting for Acryptos:
Traceback (most recent call last):
File "C:\Users\Adam\PycharmProjects\Scrapy_Things\venv\lib\site-packages\pandas\core\indexes\base.py", line 3080, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 4554, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 4562, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'result.Acryptos.vaults.vaults'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/Users/Adam/PycharmProjects/Scrapy_Things/yieldwatch/yieldwatch/spiders/JsonExcel.py", line 27, in <module>
vault_name_df = json_normalize(pd.DataFrame(dict([(k , pd.Series(v)) for k,v in df_json['result.'+name+'.vaults.vaults'].items()]))[0])['name']
File "C:\Users\Adam\PycharmProjects\Scrapy_Things\venv\lib\site-packages\pandas\core\frame.py", line 3024, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\Adam\PycharmProjects\Scrapy_Things\venv\lib\site-packages\pandas\core\indexes\base.py", line 3082, in get_loc
raise KeyError(key) from err
KeyError: 'result.Acryptos.vaults.vaults'
But if I comment out Autofarm and just process if for Acryptos is outputs excel just fine.
please remove the below line from your code
platform_names.remove(name)
debug code:
platform_names=['Autofarm','Acryptos']
for name in platform_names:
if name == 'Autofarm':
print("Autofarm")
#platform_names.remove(name) # remove this line
elif name == "Acryptos":
print("Acryptos")
you have initially created
df_json = json_normalize(data)
and also in loop, you are overwriting it -->
df_json = pd.DataFrame({'Vault_Name':vault_name_df, 'Current_Token_0':current_token_0 , 'Current_Token_1':current_token_1})
df_json.to_excel('Output_'+name+'.xlsx', index = False)
so change the name in loop and it will be okay.

KeyError(Key) when using append with defaultdict

I am getting the following error when I am trying to append to a dictionary using defaultdict(list). From my understanding, defaultdict is suppose to prevent a keyerror.
raise KeyError(key) from err
KeyError: 'id'
The following is my code:
weather_data = defaultdict(list)
m = len(_ids)
date = str(date.today())
i = 0
while i < m:
url = ("https://api.openweathermap.org/data/2.5/weather?id=%s&units=%s&appid=%s") %
(_ids.loc[i], 'imperial', weather_key)
payload = r.get(url).json()
payload_from_json = pd.json_normalize(payload)
weather_data[date].append(date)
weather_data['id'].append(payload_from_json['id'])
weather_data['weather'].append(payload_from_json['weather'])
weather_data['base'].append(payload_from_json['base'])
weather_data['visibility'].append(payload_from_json['visibility'])
weather_data['dt'].append(payload_from_json['dt'])
weather_data['name'].append(payload_from_json['name'])
weather_data['cod'].append(payload_from_json['cod'])
weather_data['coord.lon'].append(payload_from_json['coord.lon'])
weather_data['coord.lat'].append(payload_from_json['coord.lat'])
weather_data['main.temp'].append(payload_from_json['main.temp'])
weather_data['main.feels_like'].append(payload_from_json['main.feels_like'])
weather_data['main.temp_min'].append(payload_from_json['main.temp_min'])
weather_data['main.temp_max'].append(payload_from_json['main.temp_max'])
weather_data['main.pressure'].append(payload_from_json['main.pressure'])
weather_data['main.humidity'].append(payload_from_json['main.humidity'])
weather_data['wind.speed'].append(payload_from_json['wind.speed'])
weather_data['wind.deg'].append(payload_from_json['wind.deg'])
weather_data['clouds.all'].append(payload_from_json['clouds.all'])
weather_data['sys.type'].append(payload_from_json['sys.type'])
weather_data['sys.id'].append(payload_from_json['sys.id'])
weather_data['sys.country'].append(payload_from_json['sys.country'])
weather_data['sys.sunrise'].append(payload_from_json['sys.sunrise'])
weather_data['sys.sunset'].append(payload_from_json['sys.sunset'])
i = i + 1
print(weather_data)
Here is the traceback error - can someone tell me how to interpret this:
Traceback (most recent call last):
File "/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 2895, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1675, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1683, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'coord.lon'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "main.py", line 40, in <module>
weather_data['coord.lon'].append(payload_from_json['coord.lon'])
File "/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py", line 2902, in __getitem__
indexer = self.columns.get_loc(key)
File "/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 2897, in get_loc
raise KeyError(key) from err
[EDIT]
your weather_data is your default dict, but payload_from_json not. So your error was raised by payload_from_json.
You can fix this by using get to access the key:
weather_data['id'].append(payload_from_json.get('id'))
if you dont want to include junk data, you can add some verifications before append:
if payload_from_json.get('id') is not None:
weather_data['id'].append(payload_from_json.get('id'))
Also, you can add some default value like this:
weather_data['id'].append(payload_from_json.get('id', 'missing'))
or
weather_data['id'].append(payload_from_json.get('id', ''))
or by default:
weather_data['id'].append(payload_from_json.get('id', None))
In your specific problem, this should work:
weather_data = defaultdict(list)
m = len(_ids)
date = str(date.today())
i = 0
while i < m:
url = ("https://api.openweathermap.org/data/2.5/weather?id=%s&units=%s&appid=%s") %
(_ids.loc[i], 'imperial', weather_key)
payload = r.get(url).json()
payload_from_json = pd.json_normalize(payload)
weather_data[date].append(date)
weather_data['id'].append(payload_from_json.get('id'))
weather_data['weather'].append(payload_from_json.get('weather'))
weather_data['base'].append(payload_from_json.get('base'))
weather_data['visibility'].append(payload_from_json.get('visibility'))
weather_data['dt'].append(payload_from_json.get('dt'))
weather_data['name'].append(payload_from_json.get('name'))
weather_data['cod'].append(payload_from_json.get('cod'))
weather_data['coord.lon'].append(payload_from_json.get('coord.lon'))
weather_data['coord.lat'].append(payload_from_json.get('coord.lat'))
weather_data['main.temp'].append(payload_from_json.get('main.temp'))
weather_data['main.feels_like'].append(payload_from_json.get('main.feels_like'))
weather_data['main.temp_min'].append(payload_from_json.get('main.temp_min'))
weather_data['main.temp_max'].append(payload_from_json.get('main.temp_max'))
weather_data['main.pressure'].append(payload_from_json.get('main.pressure'))
weather_data['main.humidity'].append(payload_from_json.get('main.humidity'))
weather_data['wind.speed'].append(payload_from_json.get('wind.speed'))
weather_data['wind.deg'].append(payload_from_json.get('wind.deg'))
weather_data['clouds.all'].append(payload_from_json.get('clouds.all'))
weather_data['sys.type'].append(payload_from_json.get('sys.type'))
weather_data['sys.id'].append(payload_from_json.get('sys.id'))
weather_data['sys.country'].append(payload_from_json.get('sys.country'))
weather_data['sys.sunrise'].append(payload_from_json.get('sys.sunrise'))
weather_data['sys.sunset'].append(payload_from_json.get('sys.sunset'))
i += 1

Categories

Resources