Custom math function in panda dataframe with two columns - python

I would like to get this custum function for a panda dataframe to work.
It is a simple function with two inputs
wordCount
imageCount
and supposed to calculate the reading time of a text in a panda dataframe.
c = ImageCount
x = WordCount
(5.717938 + (12.03401 - 5.717938)/(1 + (c /3.579499)^4.092419))* c) + x * 0.0037736111111111113
I tried it in a couple of ways, but could not get it to work properly.
def readingT(df, y="imageCount", x="wordCount"):
readingTimeImage = (5.717938 + (12.03401 - 5.717938)/(1 + (c/3.579499)^4.092419))* c
readingTimeWords = 0.0037736111111111113 * x
return readingTimeImage + readingTimeWords
def readingT2(c="imageCount", w="wordCount"):
return ((5.717938 + (12.03401 - 5.717938)/(1 + (c/3.579499)^4.092419))* c + 0.0037736111111111113 * w)
readingT2.apply(readingT, c="imageCount", w="wordCount")
#Try next
def readingT3(x, y):
(((5.717938 + (12.03401 - 5.717938)/(1 + ( x /3.579499)**4.092419)) * x) + 0.0037736111111111113 * y)
readingT3.apply(lambda x: rule(x["imageCount"], x["wordCount"]), axis = 1)
Every single one of them gives throws out an error.
Cheers in advance for any help.

def f(c, x):
return (5.717938 + (12.03401 - 5.717938)/(1 + (c /3.579499)^4.092419))* c) + x * 0.0037736111111111113
df['reading_time'] = df.apply(lambda x: f(x.imageCount, x.wordCount), axis=1)

Related

Mean from a list with a condition in Python

list = [[159.2213, 222.2223, 101.2122]
[359.2222, 22.2210, 301.2144]]
if list[1][0] < list[0][0]:
avg = (list[1][0] + list[0][0] - 200)/2
else:
avg = (list[1][0] + list[0][0] + 200)/2
Hello! I want to do this for every column and output the results in another list.
Fix
You may loop iterate the number of cols there is
values = [[159.2213, 222.2223, 101.2122], [359.2222, 22.2210, 301.2144]]
avgs = []
for idx_col in range(len(values[0])):
if values[1][idx_col] < values[0][idx_col]:
avg = (values[1][idx_col] + values[0][idx_col] - 200) / 2
else:
avg = (values[1][idx_col] + values[0][idx_col] + 200) / 2
avgs.append(avg)
Simplify
You can use zip to iterate on both rows at a time, and simplify the if/else condition
avgs = []
for first_row, second_row in zip(*values):
factor = -1 if second_row < first_row else 1
avgs.append((first_row + second_row + (200 * factor)) / 2)
Best with numpy
Easy syntax and best performance
import numpy as np
values = np.array(values)
res = values.sum(axis=0) / 2
res += np.where(values[1] < values[0], -100, 100)
A list comprehension would look like this:
avg = [(x + y + (200 if x <= y else -200)) / 2 for x, y in zip(*lst)]
Arguably easier if you use numpy:
arr = np.array(lst)
avg = 0.5 * (arr.sum(axis=0) + np.copysign(200, np.diff(arr, axis=0)))
lis = [[159.2213, 222.2223, 101.2122],
[359.2222, 22.2210, 301.2144]]
res = []
for i in range(len(lis[0])):
if lis[1][i] < lis[0][i]:
res.append((lis[1][i] + lis[0][i] - 200)/2)
else:
res.append((lis[1][i] + lis[0][i] + 200)/2)
This should work, however using numpy would be a better solution for these kind of problems.
You can do it like this:
list = [[159.2213, 222.2223, 101.2122]
[359.2222, 22.2210, 301.2144]]
results = []
for x,y in zip(list[0],list[1]):
if y < x:
avg = (y + x - 200)/2
else:
avg = (y + x + 200)/2
results.append(avg)

Dynamic query creation using python reduce function?

Currently, the below code dynamically creates the query as:-
code:
zip_cols = list(zip(['name','address'],
['name_1','address_1']))
self.matches = self.features[
(
[
reduce(
lambda x, y: x + y,
[self.features[a + "_" + c[0] + "_" + c[1]] for a in self._algos],
)
for c in zip_cols
][0]
> (self.input_args.get('threshold', 0.7) * 4)
)
& (
[
reduce(
lambda x, y: x + y,
[self.features[a + "_" + c[0] + "_" + c[1]] for a in self._algos],
)
for c in zip_cols
][1]
> (self.input_args.get('threshold', 0.7) * 4)
)].copy()
query:
matches = features[(
(
(features['fw_name_name_1'] / 100)
+ features['sw_name_name_1']
+ features['jw_name_name_1']
+ features['co_name_name_1']
) > 2.8
)
&
(
(
(features['fw_address_address_1'] / 100)
+ features['sw_address_address_1']
+ features['jw_address_address_1']
+ features['co_address_address_1']
) > 2.8
)
].copy()
but this query works if there are 2 columns in source_compare_names and fails for 1 or more than 2. How can we fix that here?
With the minumum input and context I got this should get you started. The idea is that you dynamically build up the filter criteria as a string, join them and evaluate them.
threshold = self.input_args.get('threshold', 0.7) * 4
column_selection = [reduce(lambda x, y: x + y,
[self.features[a + "_" + c[0] + "_" + c[1]] for a in self._algos]) for c in zip_cols]
size = 10 # number of items you need
total_filter_list = []
for i in range(size):
# build the filter columns as list of strings
total_filter_list.append(f'(column_selection[{i}] > {threshold})')
# join the list of strings with '&', build the total filter criteria as string
total_filter_string = ' & '.join(total_filter_list)
# evaluate the filter
self.features[eval(total_filter_string)]

Python output formatting issues

Not sure about the syntax of the output I am receving. Any help would be appreciated.
Here is my code:
import numpy
def g(): #generate random complex values
return numpy.random.random(1) + numpy.random.random(1) *1j
p = numpy.poly1d(numpy.squeeze([g(),g(),g()])) # test function p
pprime = numpy.polyder(p) #derivative of p
print 'Our p(x) is {} '. format(p)
print('\n') # new line
print'Our pprime(x) is {} '. format(pprime) #apply newtons method to p
print('\n') # new line
#apply newtons method to p
def root_newton ( f, df, tolerance = 1.0e-6):
dx = 2 * tolerance
x=0
while dx > tolerance:
x1 = x - f(x)/df(x)
dx = abs (x - x1)
x = x1
return x
print('Our first root is at {}'.format(root_newton(p,pprime)))
print('\n') # new line
Here's the output:
Our p(x) is 2
(0.6957 + 0.683j) x + (0.3198 + 0.5655j) x + (0.9578 + 0.1899j)
Our pprime(x) is
(1.391 + 1.366j) x + (0.3198 + 0.5655j)
Our first root is at (0.00925817978737+0.830966156841j)
The correct roots are [-0.64968928-1.01513333j 0.00925818+0.83096616j]
What does the 2 above the second component in my first line outputted mean? I can't find anything similar to my question online. I am guessing it may mean the x component is squared but I'm not sure? This is python 3 by the way.
The 2 is the exponent on the first x, misaligned because you put text before it on the same line.
If we take your output:
Our p(x) is 2
(0.6957 + 0.683j) x + (0.3198 + 0.5655j) x + (0.9578 + 0.1899j)
and remove the text you prepended:
2
(0.6957 + 0.683j) x + (0.3198 + 0.5655j) x + (0.9578 + 0.1899j)
the intended meaning of the 2 becomes clearer.

Python script to generate gradients not working

I have this python script to generate x,y,z lists and u,v,w lists such that u[i],v[i],w[i] is the gradient vector for point x[i],y[i],z[i].
It doesn't seem to be getting the right values. Does anyone know whats wrong?
from math import *
def coordinates(lst, f, gradx, grady, gradz):
lst = lst[1:-1].split(",")
lst = [float(x.strip()) for x in lst]
xlst = []
ylst = []
zlst = []
ulst = []
vlst = []
wlst = []
for x in lst:
for y in lst:
xlst.append(str(x))
ylst.append(str(y))
zlst.append(str(f(x,y)))
ulst.append(str(gradx(x,y)))
vlst.append(str(grady(x,y)))
wlst.append(str(gradz(x,y)))
string = "xlst=[" + ",".join(xlst) + "]\n" + \
"ylst=[" + ",".join(ylst) + "]\n" + \
"zlst=[" + ",".join(zlst) + "]\n" + \
"ulst=[" + ",".join(ulst) + "]\n" + \
"vlst=[" + ",".join(vlst) + "]\n" + \
"wlst=[" + ",".join(wlst) + "]\n"
return string
lst = "{0, 2, 4, 6, 8, 10}"
# get function in the form f(x,y)=z or here its y^2 - x^2 - z = 0
f = lambda x,y: y**2 - x**2
# get the three gradient functions (df/dx, df/dy, df/dz)
gx = lambda x,y: -2*x
gy = lambda x,y: 2*y
gz = lambda x,y: -1
c = coordinates(lst, f, gx, gy, gz)
print c

generate polynomial in python

I am trying to make a function which can print a polynomial of order n of x,y
i.e. poly(x,y,1) will output c[0] + c[1]*x + c[2]*y
i.e. poly(x,y,2) will output c[0] + c[1]*x + c[2]*y + c[3]*x**2 + c[4]*y**2 + c[5]*x*y
Could you give me some ideas? Maybe itertools?
You could try to start from something like
def poly(x,y,n):
counter = 0
for nc in range(n+1):
for i in range(nc+1):
print "c[", counter, "]",
print " * ", x, "**", i,
print " * ", y, "**", nc-i,
print " + ",
counter += 1
For example
poly("x", "y", 2)
will produce
c[ 0 ] * x ** 0 * y ** 0 + c[ 1 ] * x ** 0 * y ** 1 + c[ 2 ] * x ** 1 * y ** 0 + c[ 3 ] * x ** 0 * y ** 2 + c[ 4 ] * x ** 1 * y ** 1 + c[ 5 ] * x ** 2 * y ** 0 +
Build in ifs, if you want to suppress undesired output.
Since you wanted a functional solution with itertools, here's a one-liner:
import itertools as itt
from collections import Counter
n = 3
xy = ("x", "y") # list of variables may be extended indefinitely
poly = '+'.join(itt.starmap(lambda u, t: u+"*"+t if t else u, zip(map(lambda v: "C["+str(v)+"]", itt.count()),map(lambda z: "*".join(z), map(lambda x: tuple(map(lambda y: "**".join(map(str, filter(lambda w: w!=1, y))), x)), map(dict.items, (map(Counter, itt.chain.from_iterable(itt.combinations_with_replacement(xy, i) for i in range(n+1))))))))))
That would give you
C[0]+C[1]*x+C[2]*y+C[3]*x**2+C[4]*y*x+C[5]*y**2+C[6]*x**3+C[7]*y*x**2+C[8]*y**2*x+C[9]*y**3
Note, the order of coefficients is slightly different. This will work not only for any n, but also for any number of variables (x, y, z, etc...)
Just for laughs
Slightly more generalized:
from itertools import product
def make_clause(c, vars, pows):
c = ['c[{}]'.format(c)]
vp = (['', '{}', '({}**{})'][min(p,2)].format(v,p) for v,p in zip(vars,pows))
return '*'.join(c + [s for s in vp if s])
def poly(vars, max_power):
res = (make_clause(c, vars, pows) for c,pows in enumerate(product(*(range(max_power+1) for v in vars))))
return ' + '.join(res)
then poly(['x', 'y'], 2) returns
"c[0] + c[1]*y + c[2]*(y**2) + c[3]*x + c[4]*x*y + c[5]*x*(y**2) + c[6]*(x**2) + c[7]*(x**2)*y + c[8]*(x**2)*(y**2)"

Categories

Resources