Getting Error While Mapping Data using Dictionary - python

I'm reading multiple files using this code block. Sometimes the column names in the file and col_map dictionary differ and code through the error. If the column name in the file is Label/Name and the col_map value is Label/, then I will throw the error. I'm looking for a wildcard kind of approach. It can name a partial value match. Then it should map the value.
If the Column name contains Label/, it should map the values.
Errors:
File "backup.py", line 27, in
mapping_function(df)
File "backup.py", line 24, in mapping_function
_data[i] = data[col_map[i]]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/frame.py", line 2927, in getitem
indexer = self.columns.get_loc(key)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/indexes/base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Label/Studio/Network/Developer/Publisher'
import pandas as pd
df=pd.read_csv('test.txt',sep=' ')
print(df.columns)
### Columns Name #########
# Label/Name,Item Title,Quantity
col_map = {
"start_date":None,
"end_date":None,
"product_label":"Label/",
"product_title":"Item Title",
"product_sku":None,
"quantity":"Quantity"
}
def mapping_function(data):
_data = {}
for i in col_map:
if col_map[i] is not None:
_data[i] = data[col_map[i]]
mapping_function(df)

Related

iterate a dataframe

I'm trying to iterate a dataframe to call queries in mongodb from a list and save each query in a csv file. I have the connection with no errors, but when I iterate it just creates the frist file (0.csv) and I have an error for the second row of the dataframe.
This is my code:
sql = [
('tran','transactions',{"den": "00100002773060"}),
('tran','Data',{'name': 'john'}),
]
df = pd.DataFrame(sql, columns = ["database", "entity", "sql"])
for i in range(len(df)):
database = df.iloc[i]["database"]
entity=df.iloc[i]["entity"]
myquery=df.iloc[i]["sql"]
collection = client[database][entity]
try:
mydoc = list(collection.find(myquery))
if len(mydoc) > 0:
df = pd.DataFrame(mydoc)
df.pop("_id")
df.to_csv(str(i) + '.csv')
print("file saved")
except:
print("error on file")
and this the error
Traceback (most recent call last):
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3629, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'database'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "getSql.py", line 12, in <module>
database = df.iloc[i]["database"]
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/series.py", line 958, in __getitem__
return self._get_value(key)
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/series.py", line 1069, in _get_value
loc = self.index.get_loc(label)
File "/home/r/Desktop/table_csv/entorno_virtual/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 3631, in get_loc
raise KeyError(key) from err
KeyError: 'database'
from what I can see here you are changing your df variable here
df = pd.DataFrame(mydoc)
probably just rename it

KeyError: 1.0 after renaming columns of dataframe

Following script:
import pandas as pd
import numpy as np
import math
A = pd.DataFrame(np.array([[1,2,3,4],[5,6,7,8]]))
Floor1 = math.floor(A.min()[1]/2)*2
names = np.array([ 0. , 0.635, 1.27 , 1.905])
A.columns = names
Floor2 = math.floor(A.min()[1]/2)*2
Floor1 is being executed correctly, Floor2 which is done with the same df but with renamed columns isn't. I get a key error:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 385, in pandas._libs.hashtable.Float64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 392, in pandas._libs.hashtable.Float64HashTable.get_item
KeyError: 1.0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Desktop\Python\untitled0.py", line 13, in <module>
Floor2 = math.floor(A.min()[1]/2)*2
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py", line 871, in __getitem__
result = self.index.get_value(self, key)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\numeric.py", line 449, in get_value
loc = self.get_loc(k)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\numeric.py", line 508, in get_loc
return super().get_loc(key, method=method, tolerance=tolerance)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 385, in pandas._libs.hashtable.Float64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 392, in pandas._libs.hashtable.Float64HashTable.get_item
KeyError: 1.0
I know, there is a similar question: After rename column get keyerror
But I didn't really get the answer and - more important - how to solve it.
Before renaming if you get the columns of A using list(A.columns), you'll see that you'll get the list [0,1,2,3]. So, you can index using the key 1. However, after renaming, you can no longer index with key 1 because the column names have changed.
If you are using A.min(), you are finding minimum value in axis=0 by default that is along columns.
When changing the column names, you cannot access index '1' as there is no index with the name '1' in the columns.
If Your intension is finding the minimum in a row, you can use A.min(axis=1).
You can write the code like this.
import pandas as pd
import numpy as np
import math
A = pd.DataFrame(np.array([[1,2,3,4],[5,6,7,8]]))
Floor1 = math.floor(A.min(axis=1)[1]/2)*2
names = np.array([ 0. , 0.635, 1.27 , 1.905])
A.columns = names
Floor2 = math.floor(A.min(axis=1)[1]/2)*2
Thank you

KeyError: 'class_name' in python3.7/site-packages/pandas/core/indexes/base.py

I am trying to use one Github repo and I get the following error in python source files.
I looked at posts like [this][1] but couldn't figure the exact problem.
Here's the error that I see:
File "/home/kgarg8/kgarg8-workspace/few-shot/venv/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3078, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'class_name'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main "__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code exec(code, run_globals)
File "/home/kgarg8/kgarg8-workspace/few-shot/experiments/proto_nets.py", line 62, in <module> background = dataset_class('background')
File "/home/kgarg8/kgarg8-workspace/few-shot/few_shot/datasets.py", line 31, in __init__
self.unique_characters = sorted(self.df['class_name'].unique())
File "/home/kgarg8/kgarg8-workspace/few-shot/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 2688, in __getitem__
return self._getitem_column(key)
File "/home/kgarg8/kgarg8-workspace/few-shot/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
return self._get_item_cache(key)
File "/home/kgarg8/kgarg8-workspace/few-shot/venv/lib/python3.7/site-packages/pandas/core/generic.py", line 2489, in _get_item_cache
values = self._data.get(item)
File "/home/kgarg8/kgarg8-workspace/few-shot/venv/lib/python3.7/site-packages/pandas/core/internals.py", line 4115, in get
loc = self.items.get_loc(item)
File "/home/kgarg8/kgarg8-workspace/few-shot/venv/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer key))
File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'class_name'
Here's the relevant code snippet:
# proto_nets.py
if args.dataset == 'omniglot':
n_epochs = 40
dataset_class = OmniglotDataset
num_input_channels = 1
drop_lr_every = 20
...
background = dataset_class('background')
# datasets.py
class OmniglotDataset(Dataset):
def __init__(self, subset):
if subset not in ('background', 'evaluation'):
raise(ValueError, 'subset must be one of (background, evaluation)')
self.subset = subset
self.df = pd.DataFrame(self.index_subset(self.subset))
self.df = self.df.assign(id=self.df.index.values)
self.unique_characters = sorted(self.df['class_name'].unique())
You can assume me to be neophyte, any pointers to debug further would be appreciated.
I think that the problem is due to Python/ Pandas version problem.
I am running on pandas==0.23.4 and python==3.7.3
The error is due to the way you are handling unique values (self.unique_characters), particulary at df['class_name']. This chunk is looking for a column named class_name, and you clearly don't have such a column. Instead, I believe you can achieve your goal as follows:
self.unique_characters = sorted(self.df.index.values.unique())
Since your problem is not reproducible, my answer is based on my general evaluation of the issue. Please comment if this does not solve the issue.

Printing mutiple columns in Pandas (Python)

I'm new to Python and the Pandas module, but I can't seem to get this to work.
This is my code. I'm using a csv file containing the month and rainfall for Singapore.
Below is my code: 0
df = pd.read_csv('rainfall-monthly-total.csv')
print ((df['total_rainfall'])[df.total_rainfall == df['total_rainfall'].max()])
print ((df['month'])[df.total_rainfall == df['total_rainfall'].max()])
print ((df['total_rainfall', 'month'])[df.total_rainfall == df['total_rainfall'].max()])
The first two statements work fine. But something is wrong with the third and I can't find out why. Below is the output.
"/Users/xxxx/PycharmProjects/Phyton for Finance/venv/bin/python" "/Users/xxxx/PycharmProjects/Phyton for Finance/Panda Tutorial.py"
299 765.9
Name: total_rainfall, dtype: float64
299 2006-12
Name: month, dtype: object
Traceback (most recent call last):
File "/Users/xxxx/PycharmProjects/Phyton for Finance/venv/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3078, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: ('total_rainfall', 'month')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/xxxx/PycharmProjects/Phyton for Finance/Panda Tutorial.py", line 16, in <module>
print ((df['total_rainfall', 'month'])[df.total_rainfall == df['total_rainfall'].max()])
File "/Users/xxxx/PycharmProjects/Phyton for Finance/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 2688, in __getitem__
return self._getitem_column(key)
File "/Users/xxxx/PycharmProjects/Phyton for Finance/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
return self._get_item_cache(key)
File "/Users/xxxx/PycharmProjects/Phyton for Finance/venv/lib/python3.7/site-packages/pandas/core/generic.py", line 2489, in _get_item_cache
values = self._data.get(item)
File "/Users/xxxx/PycharmProjects/Phyton for Finance/venv/lib/python3.7/site-packages/pandas/core/internals.py", line 4115, in get
loc = self.items.get_loc(item)
File "/Users/xxxx/PycharmProjects/Phyton for Finance/venv/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: ('total_rainfall', 'month')
Process finished with exit code 1
I'm using PyCharm with python 3.7.
How do I get python to print out both columns for that particular month?
Try this:
print ((df[['total_rainfall', 'month']])[df.total_rainfall == df['total_rainfall'].max()]
You need to convert single square brackets to double:
['total_rainfall', 'month']
TO
[['total_rainfall', 'month']]
Easy. You need to use use a list of columns you want to print. so use df.loc to filter your data frame with conditions:
print(df.loc[df.total_rainfall == df['total_rainfall'].max(), ['total_rainfall', 'month']])

Python, Panda.read_excel Problem reading multiple sets of Data from one sheet

I have an Excel sheet with two sets of data (picture). I want to plot those with matplotlib in Python and import them with pandas. I narrowed down my script to make it quicker to read.
script:
import matplotlib.pyplot as plt
import pandas as pd
Tabelle = pd.read_excel("C:\\Users\\alexk\\Dropbox\\WW\\WW Master\\1. Semester\\WW2\\WW2 Kernfachpraktikum\\KFP2\\Ergebnisse.xlsx","Tabelle1")
x = Tabelle["Number1"]
y = Tabelle["Value1"]
x2=Tabelle["Number2"]
y2=Tabelle["Value2"]
plt.bar(x, y)
plt.bar(x2,y2)
plt.show()
End of script.
In the script it's possible to plot x and y when x2 and y2 are hashtagged out. When I want to read/plot/whatever x2 and y2 I get an error.
Error code:
Traceback (most recent call last):
File "C:\Users\alexk\PycharmProjects\venv\lib\site-packages\pandas\core\indexes\base.py", line 3078, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Number2'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/alexk/PycharmProjects/Vickers.py", line 8, in <module>
x2=Tabelle["Number2"]
File "C:\Users\alexk\PycharmProjects\venv\lib\site-packages\pandas\core\frame.py", line 2688, in __getitem__
return self._getitem_column(key)
File "C:\Users\alexk\PycharmProjects\venv\lib\site-packages\pandas\core\frame.py", line 2695, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\alexk\PycharmProjects\venv\lib\site-packages\pandas\core\generic.py", line 2489, in _get_item_cache
values = self._data.get(item)
File "C:\Users\alexk\PycharmProjects\venv\lib\site-packages\pandas\core\internals.py", line 4115, in get
loc = self.items.get_loc(item)
File "C:\Users\alexk\PycharmProjects\venv\lib\site-packages\pandas\core\indexes\base.py", line 3080, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Number2'
Process finished with exit code 1
End of error code.
With other excel sheets this process worked fine. What am I missing? Does it have something to do with the excel sheet?

Categories

Resources