How to do assignment in Pandas without warning? - python

I'm trying to port this code in R to Python using Pandas.
This is my R code (assume data is a data.frame):
transform <- function(data) {
baseValue <- data$baseValue
na.base.value <- is.na(baseValue)
baseValue[na.base.value] <- 1
zero.base.value <- baseValue == 0
baseValue[zero.base.value] <- 1
data$adjustedBaseValue <- data$baseRatio * baseValue
baseValue[na.base.value] <- -1
baseValue[zero.base.value] <- 0
data$baseValue <- baseValue
return(data)
}
This is my attempt to port the R code in Python (assume data is pandas.DataFrame):
import pandas as pd
def transform(data):
base_value = data['baseValue']
na_base_value = base_value.isnull()
base_value.loc[na_base_value] = 1
zero_base_value = base_value == 0
base_value.loc[zero_base_value] = 1
data['adjustedBaseValue'] = data['baseRatio'] * base_value
base_value.loc[na_base_value] = -1
base_value.loc[zero_base_value] = 0
return data
But then I got this warning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
self._setitem_with_indexer(indexer, value)
I have read through and don't understand how to fix it. What should I do to fix the code so that there is no more warning? I don't want to suppress the warning though.

If you want to modify the same object that was passed to the function, then this should work so long as what's passed in as data isn't already a view of another dataframe.
def transform(data):
base_value = data['baseValue']
na_base_value = base_value.isnull()
data.loc[na_base_value, 'baseValue'] = 1
zero_base_value = base_value == 0
data.loc[zero_base_value, 'baseValue'] = 1
data['adjustedBaseValue'] = data['baseRatio'] * base_value
data.loc[na_base_value, 'baseValue'] = -1
data.loc[zero_base_value, 'baseValue'] = 0
return data
If you want to work with a copy and return that manipulated copied data then this is your answer.
def transform(data):
data = data.copy()
base_value = data['baseValue'].copy()
na_base_value = base_value.isnull()
base_value.loc[na_base_value] = 1
zero_base_value = base_value == 0
base_value.loc[zero_base_value] = 1
data['adjustedBaseValue'] = data['baseValue'] * base_value
base_value.loc[na_base_value] = -1
base_value.loc[zero_base_value] = 0
return data

Related

Get excel data in order using python

I'm using python to get a list functions from the excel in order.
Excel image
I want to get result like that
Login
Submit
forgot password
Delete
(Submitted)
Next>confirm
save
cancel
upload
delete
...
I struggling with the excel indexes.
I think my logic is totally wrong.
import pandas as pd
df = pd.read_excel(fileNameMatrix,sheet_name="doTestCase")
lastRowNumber = len(df)
newDict = df.to_dict()
newJson = df.to_json()
document = docx.Document()
# print(newDict)
datalist = []
def sorting():
controller = True
dataexist = ""
j = 0
listindex = 0
while controller:
try:
if j == 0:
menulist = []
func2 = newDict['Menu']
for i in range(len(func2)):
if type(func2[i]) == str:
menulist.append()
listindex = listindex + 1
dataexist = "1"
datalist.append(menulist)
elif j > 0:
func2 = newDict['Unnamed: '+str(j)]
unnamelist = []
for i in range(len(func2)):
if type(func2[i]) == str:
dataexist = "1"
unnamelist.append()
datalist.append(unnamelist)
# unnamelist = []
if dataexist == "1":
j = j+1
except:
return datalist
controller = False
sorting()
Result
[['Login', 'delete ', 'userinfo'], ['Submit', '(submitted)',
'test'], ['forgot password', 'Next>confirm', 'Save'], ['save',
'delete'], ['cancel'], ['upload']]
Any logic suggesting would be appreciated.
IIUC, use bfill and dropna:
df = (pd.read_excel(fileNameMatrix,sheet_name='doTestCase', header=None)
.bfill(axis=1)[0].dropna())
print(df)
# Output
0 Login
1 Submit
2 forgot password
5 delete
6 (submitted)
8 Next>confirm
9 save
10 cancel
11 upload
12 delete
13 Save
15 test
18 userinfo
Name: 0, dtype: object

How to transform/format scientific notation number to 2 decimal cases?

I have this function below, that iterates through sheets in a excel workbook, and gets the data into a pandas dataframe. Unfortunately some of the values are coming in scientific notation, and I need them rounded to 2 decimal cases
def iterate_sheets(wb, number_of_fw=13):
# iterates through each sheet in the workbook
for sheet in wb.sheetnames:
ws = wb[f'{sheet}']
i = 0
while i < number_of_fw:
cel = ws[f'A{10 + i}']
cel.number_format = '#,##0.00'
fiscal_weeks.append(cel.value)
i += 1
i = 0
print(fiscal_weeks)
while i < number_of_fw:
cel = ws[f'B{10 + i}']
cel.number_format = '#,##0.00'
pageloads.append(cel.value)
i += 1
i = 0
print(pageloads)
while i < number_of_fw:
cel = ws[f'C{10 + i}']
cel.number_format = '#,##0.00'
tti.append(cel.value)
i += 1
print(tti)
platform_name = ws["B2"].value
metric_name = ws["C8"].value
# transform the data into a pandas DataFrame
data_dict = {
'fiscal_week': fiscal_weeks,
'pageload': pageloads,
'tti': tti,
'metric_name': metric_name
}
df = pd.DataFrame(data_dict)
You can try this:
pd.set_option('display.float_format', lambda x: '%.2f' % x)

While and Append in pandas python

I am trying to Call Api in a while loop and append the dataframe. But it is not appending .
#Max timestamp
MaxTs = 1635876000
api_key = "api_key"
cnt = 0
while cnt < 4:
url = f"https://min-api.cryptocompare.com/data/v2/histohour?fsym=BTC&tsym=USD&limit=2000&toTs={MaxTs}&api_key={api_key}"
r = requests.get(url)
data = r.json()
price_df = pd.DataFrame(data['Data']['Data'])
i = 0
reccnt = 2000
while i < reccnt:
currTs = price_df.iloc[i]['time']
if currTs < MaxTs:
MaxTs = currTs
i = i + 1
if cnt == 0:
#Copying the Orginal df to new df.
newdf = price_df.copy()
else:
#when counter increases append the df.
newdf.append(price_df)
print(MaxTs)
cnt = cnt + 1
You should increase cnt inside the while loop, not outside.
But after you perform a correction you will get several copies of the same price_df. Is that what you are trying to get?

Python: A value is trying to be set on a copy of a slice from a DataFrame

What am I doing to deserve this warning from Python? Would like to avoid errors, if the warning is telling me something...
if DELfile.exists():
print(DELfile)
sectorDEL = pd.read_csv(DELfile, sep=';', header=0, float_precision='round_trip')
if i == 1:
sectorDELmax = sectorDEL
i = i + 1
else:
k = 0
for sensor in sectorDEL['SENSOR_NO']:
if sectorDEL['DAMAGE_EQUIVALENT_LOAD'][k] > sectorDELmax['DAMAGE_EQUIVALENT_LOAD'][k]:
sectorDELmax['DAMAGE_EQUIVALENT_LOAD'][k] = sectorDEL['DAMAGE_EQUIVALENT_LOAD'][k]
k = k + 1
else:
print('Could not find ' +str(DELfile))
return None

Export data efficiently to CSV using python

I am using my arduino to analyze analog inputs and I am accessing the arduino using the pyfirmata library and I ambasically measuring voltages using the 6 analog inputs on my arduino Uno. I need to find a way to live time feed this data into a CSV efficiently... I am not sure on the best way to do that
Any suggestion would help but please write out the code you suggest. I would prefer to use Pandas if possible because it's easier
voltage0 through voltage5 are my variables and I am trying to report those in a nice format that will later have to be analyzed
import time
from datetime import datetime
import pyfirmata
import pandas as pd
board = pyfirmata.Arduino('/dev/ttyACM1')
analog_pin0 = board.get_pin('a:0:i')
analog_pin1 = board.get_pin('a:1:i')
analog_pin2 = board.get_pin('a:2:i')
analog_pin3 = board.get_pin('a:3:i')
analog_pin4 = board.get_pin('a:4:i')
analog_pin5 = board.get_pin('a:5:i')
it = pyfirmata.util.Iterator(board)
it.start()
analog_pin0.enable_reporting()
analog_pin1.enable_reporting()
analog_pin2.enable_reporting()
analog_pin3.enable_reporting()
analog_pin4.enable_reporting()
analog_pin5.enable_reporting()
data = []
count = 0
x = 0
start = 0
while x <= 1000:
reading0 = analog_pin0.read()
if reading0 != None:
voltage0 = reading0 * 5
voltage0 = round(voltage0,2)
else:
voltage0 = float('nan')
reading1 = analog_pin1.read()
if reading1 != None:
voltage1 = reading1 * 5
voltage1 = round(voltage1,2)
else:
voltage1 = float('nan')
reading2 = analog_pin2.read()
if reading2 != None:
voltage2 = reading2 * 5
voltage2 = round(voltage2,2)
else:
voltage2 = float('nan')
reading3 = analog_pin3.read()
if reading3 != None:
voltage3 = reading3 * 5
voltage3 = round(voltage3,2)
else:
voltage3 = float('nan')
reading4 = analog_pin4.read()
if reading4 != None:
voltage4 = reading4 * 5
voltage4 = round(voltage4,2)
else:
voltage4 = float('nan')
reading5 = analog_pin5.read()
if reading5 != None:
voltage5 = reading5 * 5
voltage5 = round(voltage5,2)
else:
voltage5 = float('nan')
datarow = {'Voltage0': voltage0, 'Voltage1': voltage1, 'Voltage2' : voltage2, 'Voltage3': voltage3, 'Voltage4' : voltage4, 'Voltage5' : voltage5, 'Time' : time.strftime("%Y-%m-%d_%H:%M:%S")}
data.append(datarow)
if count%500 == 0:
dataframe = pd.DataFrame(data)
dataframe.to_csv('data.csv')
x += 1
count += 1
#time.sleep(1)enter code here
Your code seems to work, but it's not very efficient. Every 500 iterations, you rewrite all your data instead of updating your file with the new data in the end. You might consider saving it this way instead:
if count%500 == 0:
dataframe = pd.DataFrame(data)
dataframe.to_csv('data.csv',mode='a',header=False)
data = []
If it's still not fast enough, you might consider saving your data to a binary format such as .npy (numpy format), and convert it later to csv.

Categories

Resources