I'm using flask and getting error at set_values. I'm reading the input from html and passing it to the code
#app.route('/home', methods=['POST'])
def first():
source = request.files['first']
destination = request.files['second']
df = pd.read_csv(source)
df1 = pd.read_csv(destination)
val1 = int(request.form['val1'])
val2 = int(request.form['val2'])
val3 = int(request.form['val3'])
target = request.form['str']
df2 = df[df.columns[val2]]
count = 0
for j in df[df.columns[val1]]:
x = df1.loc[df1[df1.columns[val3]] == j].index.values
for i in x:
df1.set_value(i, target, df2[count])
count = count + 1
df1.to_csv('result.csv', index=False)
Check your pandas version.
df.set_value() is deprecated since pandas version 0.21.0
Instead use df.at
import pandas as pd
df = pd.DataFrame({"A":[1, 5, 3, 4, 2],
"B":[3, 2, 4, 3, 4],
"C":[2, 2, 7, 3, 4],
"D":[4, 3, 6, 12, 7]})
df.at[2,'B']=100
A B C D
0 1 3 2 4
1 5 2 2 3
2 3 100 7 6
3 4 3 3 12
4 2 4 4 7
Related
Let's consider two pandas dataframes:
import numpy as np
import pandas as pd
df = pd.DataFrame([1, 2, 3, 2, 5, 4, 3, 6, 7])
check_df = pd.DataFrame([3, 2, 5, 4, 3, 6, 4, 2, 1])
If want to do the following thing:
If df[1] > check_df[1] or df[2] > check_df[1] or df[3] > check_df[1] then we assign to df 1, and 0 otherwise
If df[2] > check_df[2] or df[3] > check_df[2] or df[4] > check_df[2] then we assign to df 1, and 0 otherwise
We apply the same algorithm to end of DataFrame
My primitive code is the following:
df_copy = df.copy()
for i in range(len(df) - 3):
moving_df = df.iloc[i:i+3]
if (moving_df >check_df.iloc[i]).any()[0]:
df_copy.iloc[i] = 1
else:
df_copy.iloc[i] = -1
df_copy
0
0 -1
1 1
2 -1
3 1
4 1
5 -1
6 3
7 6
8 7
Could you please give me a advice, if there is any possibility to do this without loop?
IIUC, this is easily done with a rolling.min:
df['out'] = np.where(df[0].rolling(N, min_periods=1).max().shift(1-N).gt(check_df[0]),
1, -1)
output:
0 out
0 1 -1
1 2 1
2 3 -1
3 2 1
4 5 1
5 4 -1
6 3 1
7 6 -1
8 7 -1
to keep the last items as is:
m = df[0].rolling(N).max().shift(1-N)
df['out'] = np.where(m.gt(check_df[0]),
1, -1)
df['out'] = df['out'].mask(m.isna(), df[0])
output:
0 out
0 1 -1
1 2 1
2 3 -1
3 2 1
4 5 1
5 4 -1
6 3 1
7 6 6
8 7 7
Although #mozway has already provided a very smart solution, I would like to share my approach as well, which was inspired by this post.
You could create your own object that compares a series with a rolling series. The comparison could be performed by typical operators, i.e. >, < or ==. If at least one comparison holds, the object would return a pre-defined value (given in list returns_tf, where the first element would be returned if the comparison is true, and the second if it's false).
Possible Code:
import numpy as np
import pandas as pd
df = pd.DataFrame([1, 2, 3, 2, 5, 4, 3, 6, 7])
check_df = pd.DataFrame([3, 2, 5, 4, 3, 6, 4, 2, 1])
class RollingComparison:
def __init__(self, comparing_series: pd.Series, rolling_series: pd.Series, window: int):
self.comparing_series = comparing_series.values[:-1*window]
self.rolling_series = rolling_series.values
self.window = window
def rolling_window_mask(self, option: str = "smaller"):
shape = self.rolling_series.shape[:-1] + (self.rolling_series.shape[-1] - self.window + 1, self.window)
strides = self.rolling_series.strides + (self.rolling_series.strides[-1],)
rolling_window = np.lib.stride_tricks.as_strided(self.rolling_series, shape=shape, strides=strides)[:-1]
rolling_window_mask = (
self.comparing_series.reshape(-1, 1) < rolling_window if option=="smaller" else (
self.comparing_series.reshape(-1, 1) > rolling_window if option=="greater" else self.comparing_series.reshape(-1, 1) == rolling_window
)
)
return rolling_window_mask.any(axis=1)
def assign(self, option: str = "rolling", returns_tf: list = [1, -1]):
mask = self.rolling_window_mask(option)
return np.concatenate((np.where(mask, returns_tf[0], returns_tf[1]), self.rolling_series[-1*self.window:]))
The assignments can be achieved as follows:
roller = RollingComparison(check_df[0], df[0], 3)
check_df["rolling_smaller_checking"] = roller.assign(option="smaller")
check_df["rolling_greater_checking"] = roller.assign(option="greater")
check_df["rolling_equals_checking"] = roller.assign(option="equal")
Output (the column rolling_smaller_checking equals your desired output):
0 rolling_smaller_checking rolling_greater_checking rolling_equals_checking
0 3 -1 1 1
1 2 1 -1 1
2 5 -1 1 1
3 4 1 1 1
4 3 1 -1 1
5 6 -1 1 1
6 4 3 3 3
7 2 6 6 6
8 1 7 7 7
I would like to add a column in a data frame when another column is increasing/decreasing or stays the same with:
1 -> increasing, 0 -> same, -1 -> decreasing
So if df['battery'] = [1,2,3,4,7,9,3,3,3,]
I would like state to be df['state'] = [1,1,1,1,1,-1,0,0]
This should do the trick!
a = [1,2,3,4,7,9,3,3,3]
b = []
for x in range(len(a)-1):
b.append((a[x+1] > a[x]) - (a[x+1] < a[x]))
print(b)
You could use pd.Series.diff method to get the difference between consecutive values, and then assign the necessary state values by using boolean indexing:
import pandas as pd
df = pd.DataFrame()
df['battery'] = [1,2,3,4,7,9,3,3,3]
diff = df['battery'].diff()
df.loc[diff > 0, 'state'] = 1
df.loc[diff == 0, 'state'] = 0
df.loc[diff < 0, 'state'] = -1
print(df)
# battery state
# 0 1 NaN
# 1 2 1.0
# 2 3 1.0
# 3 4 1.0
# 4 7 1.0
# 5 9 1.0
# 6 3 -1.0
# 7 3 0.0
# 8 3 0.0
Or, alternatively, one could use np.select:
import numpy as np
diff = df['battery'].diff()
df['state'] = np.select([diff < 0, diff > 0], [-1, 1], 0)
# Be careful, default 0 will replace the first NaN as well.
print(df)
# battery state
# 0 1 0
# 1 2 1
# 2 3 1
# 3 4 1
# 4 7 1
# 5 9 1
# 6 3 -1
# 7 3 0
# 8 3 0
So here's your dataframe:
>>> import pandas as pd
>>> data = [[[1,2,3,4,7,9,3,3,3]]]
>>> df = pd.DataFrame(data, columns = ['battery'])
>>> df
battery
0 [1, 2, 3, 4, 7, 9, 3, 3, 3]
And finally use apply and a lambda function in order to generate the required result:
>>> df['state'] = df.apply(lambda row: [1 if t - s > 0 else -1 if t-s < 0 else 0 for s, t in zip(row['battery'], row['battery'][1:])], axis=1)
>>> df
battery state
0 [1, 2, 3, 4, 7, 9, 3, 3, 3] [1, 1, 1, 1, 1, -1, 0, 0]
Alternatively, if you want the exact difference between each element in the list, you can use the following:
>>> df['state'] = df.apply(lambda row: [t - s for s, t in zip(row['battery'], row['battery'][1:])], axis=1)
>>> df
battery state
0 [1, 2, 3, 4, 7, 9, 3, 3, 3] [1, 1, 1, 3, 2, -6, 0, 0]
Try pd.np.sign
pd.np.sign(df.battery.diff().fillna(1))
0 1.0
1 1.0
2 1.0
3 1.0
4 1.0
5 1.0
6 -1.0
7 0.0
8 0.0
Name: battery, dtype: float64
I have two data frames and I want to use pandas syntax or methods to compare them and update values from the larger data frame to the smaller data frame based on similar keys.
import numpy
import pandas as pd
temp = pd.read_csv('.\\..\\..\\test.csv')
temp2 = pd.read_excel('.\\..\\..\\main.xlsx')
lenOfFile = len(temp.iloc[:, 1])
lenOfFile2 = len(temp2.iloc[:, 1])
dict1 = {}
dict2 = {}
for i in range(lenOfFile):
dict1[temp.iloc[i, 0]] = temp.iloc[i, 1]
for i in range(lenOfFile2):
dict2[temp2.iloc[i, 0]] = temp2.iloc[i, 1]
for i in dict1:
if i in dict2:
dict1[i] = dict2[i]
else:
dict1[i] = "Not in dict2"
I want the same behavior as what I wrote.
You should have put a Minimal, Complete and Verifiable Example. Please, make sure in the future we can run your code just by pasting into our IDE. I spent way too much time on that question haha
import pandas as pd
temp = pd.DataFrame({'A' : [20, 4, 60, 4, 8], 'B' : [2, 4, 5, 6, 7]})
temp2 = pd.DataFrame({'A' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'B' : [1, 2, 3, 10, 5, 6, 70, 8, 9, 10]})
print(temp)
print(temp2)
# A B
# 0 20 2
# 1 4 4
# 2 60 5
# 3 4 6
# 4 8 7
# A B
# 0 1 1
# 1 2 2
# 2 3 3
# 3 4 10
# 4 5 5
# 5 6 6
# 6 7 70
# 7 8 8
# 8 9 9
# 9 10 10
# Make a mapping of the values of our second mask.
mapping = dict(zip(temp2['A'], temp2['B']))
# We apply the mapping to each row. If we find the occurence, replace, else, default.
temp['B'] = temp['A'].apply(lambda x:mapping[x] if x in mapping else 'No matching')
print(temp)
# A B
# 0 20 No matching
# 1 4 10
# 2 60 No matching
# 3 4 10
# 4 8 8
How should I write multiple rows to single user id
example
id = ['userid1','userid2'....'useridn']
ndarry1 = [1,2,3,4,5...]
ndarry2 = [1,2,3,4,5...]
.
.
ndarryn = [1,2,3,4,5...]
Expected Output: Dataframe
id value
userid1 1
userid1 2
userid1 3
. .
. .
userid2 1
Can anybody suggest how should I do it.?
id = ['userid1', 'userid2', 'userid3', 'userid4']
ndarray1 = [1, 2, 3, 4]
ndarray2 = [1, 2, 3, 4]
ndarray3 = [1, 2, 3, 4]
ndarray4 = [1, 2, 3, 4]
n = 4
ID = []
value = []
for i in id:
a = str(id.index(i)+1)
for j in range(0,n):
ID.append(i)
value.append(eval('ndarray'+ a)[j])
df = pd.DataFrame({'ID':ID,'Value':value})
Output
ID Value
0 userid1 1
1 userid1 2
2 userid1 3
3 userid1 4
4 userid2 1
5 userid2 2
6 userid2 3
7 userid2 4
8 userid3 1
9 userid3 2
10 userid3 3
11 userid3 4
12 userid4 1
13 userid4 2
14 userid4 3
15 userid4 4
Different approach
id = ['userid1', 'userid2', 'userid3']
ndarray1 = [1, 2, 3, 4]
ndarray2 = [1, 2, 3, 4]
ndarray3 = [1, 2, 3, 4]
concat = [ndarray1, ndarray2, ndarray3]
n = []
user = []
for i in range(len(concat)):
for j in range(len(concat[i])):
user.append(id[i])
n.append(concat[i][j])
df = pd.DataFrame(data=[n, user]).T
df.columns = ['id', 'value']
I'd like to create a new dataframe using the same values from another dataframe, unless there is a 0 value. If there is a 0 value, I'd like to find the average of the entry before and after.
For Example:
df = A B C
5 2 1
3 4 5
2 1 0
6 8 7
I'd like the result to look like the df below:
df_new = A B C
5 2 1
3 4 5
2 1 6
6 8 7
import pandas as pd
import numpy as np
df = pd.DataFrame({'A':[5, 3, 2, 6], 'B':[2, 4, 1, 8], 'C':[1, 5, 0, 7]})
Nrows = len(df)
def run(col):
originalValues = list(df[col])
values = list(np.where(np.array(list(df[col])) == 0)[0])
indices2replace = filter(lambda x: x > 0 and x < Nrows, values)
for index in indices2replace:
originalValues[index] = 0.5 * (originalValues[index+1] + originalValues[index-1])
return originalValues
newDF = pd.DataFrame(map(lambda x: run(x) , df.columns)).transpose()