I'm trying to overwrite a columns of 1's if there are duplicates but for some reason this code won't >overwrite the 1's, it does something else.
Sample input
dhid,midx,midy,midz,a,dtype
AAA1,321.235,200.436,59.72,7,RR
AAA7,321.235,200.436,59.72,-99,CR
AAA2,321.235,200.236,68.7,15,CR
Example Output:
dhid,midx,midy,midz,a,dtype,KEEPVA
AAA1,321.235,200.436,59.72,7,RR,1
AAA7,321.235,200.436,59.72,-99,CR,0 --> GETS RID OF THIS DUPLICATE THAT IS MISSING
AAA2,321.235,200.236,68.7,15,CR,1
My try so far
dup_dict = []
flg = False
for i in range(npts-1):
d2 = 0.0
for idir in range(2,-2,-1):
if (idir==-1 and d2 <= d2tol ):
dup_dict.append([i,i+1,d2])
break
d = np.abs(xyz[i,idir]-xyz[i+1,idir])
if (d > dtol):
break
d2 = d2 + d ** 2
print('Total Duplicates Found: %d'%(len(dup_dict)))
df['KEEPVA'] = np.ones(len(df.index))
fmtlist = ['%s','%0.5f','%0.5f','%0.5f','%0.5f','%d','%s']
fl = open('dup_gq.log','w')
fl.write(','.join(['dhid','midx','midy','midz','a','KEEPVA','dtype'])+'\n')
for i0,i1,d in dup_dict:
aufa0 = df['a'].values[i0]
aufa1 = df['a'].values[i1]
dtype0 = df['dtype'].values[i0]
dtype1 = df['dtype'].values[i1]
if (dtype0!=dtype1):
print(df['dhid'].values[i0],df['dhid'].values[i1],dtype0,dtype1,a0,a1)
if a0 > a1:
df.loc[i1,'KEEPVA'] = 0
if a0 <= a1:
df.loc[i0,'KEEPVA'] = 0
print(df['dhid'].values[i0],df['dhid'].values[i1],df['KEEPVA'].values[i0],
df['KEEPVA'].values[i1])
a = df[['dhid','midx','midy','midz','a','KEEPVA','dtype']].values[[i0,i1],:]
np.savetxt(fl,a,fmt=fmtlist,delimiter=',')
fl.close()
Related
I am trying to create an indicator that will find all the divergences between 2 signals.
The output of the function so far looks like this
But the problem is that is painfully slow when I am trying to use it with long signals. Could any of you guys help me to make it faster if is possible?
My code:
def find_divergence(price: pd.Series, indicator: pd.Series, width_divergence: int, order: int):
div = pd.DataFrame(index=range(price.size), columns=[
f"Bullish_{width_divergence}_{order}",
f"Berish_{width_divergence}_{order}"
])
div[f'Bullish_idx_{width_divergence}_{order}'] = False
div[f'Berish_idx_{width_divergence}_{order}'] = False
def calc_argrelextrema(price_: np.numarray):
return argrelextrema(price_, np.less_equal, order=order)[0]
price_ranges = []
for i in range(len(price)):
price_ranges.append(price.values[0:i + 1])
f = []
with ThreadPoolExecutor(max_workers=16) as exe:
for i in price_ranges:
f.append(exe.submit(calc_argrelextrema, i))
prices_lows = SortedSet()
for r in concurrent.futures.as_completed(f):
data = r.result()
for d in reversed(data):
if d not in prices_lows:
prices_lows.add(d)
else:
break
price_lows_idx = pd.Series(prices_lows)
for idx_1 in range(price_lows_idx.size):
min_price = price[price_lows_idx[idx_1]]
min_indicator = indicator[price_lows_idx[idx_1]]
for idx_2 in range(idx_1 + 1, idx_1 + width_divergence):
if idx_2 >= price_lows_idx.size:
break
if price[price_lows_idx[idx_2]] < min_price:
min_price = price[price_lows_idx[idx_2]]
if indicator[price_lows_idx[idx_2]] < min_indicator:
min_indicator = indicator[price_lows_idx[idx_2]]
consistency_price_rd = min_price == price[price_lows_idx[idx_2]]
consistency_indicator_rd = min_indicator == indicator[price_lows_idx[idx_1]]
consistency_price_hd = min_price == price[price_lows_idx[idx_1]]
consistency_indicator_hd = min_indicator == indicator[price_lows_idx[idx_2]]
diff_price = price[price_lows_idx[idx_1]] - price[price_lows_idx[idx_2]] # should be neg
diff_indicator = indicator[price_lows_idx[idx_1]] - indicator[price_lows_idx[idx_2]] # should be pos
is_regular_divergence = diff_price > 0 and diff_indicator < 0
is_hidden_divergence = diff_price < 0 and diff_indicator > 0
if is_regular_divergence and consistency_price_rd and consistency_indicator_rd:
div.at[price_lows_idx[idx_2], f'Bullish_{width_divergence}_{order}'] = (price_lows_idx[idx_1], price_lows_idx[idx_2])
div.at[price_lows_idx[idx_2], f'Bullish_idx_{width_divergence}_{order}'] = True
elif is_hidden_divergence and consistency_price_hd and consistency_indicator_hd:
div.at[price_lows_idx[idx_2], f'Berish_{width_divergence}_{order}'] = (price_lows_idx[idx_1], price_lows_idx[idx_2])
div.at[price_lows_idx[idx_2], f'Berish_idx_{width_divergence}_{order}'] = True
return div
so i try to solve https://open.kattis.com/problems/10kindsofpeople with my python code, i think the code is good and passed 22/25 test case, but there is a runtime error in testcase 23.
the code is here:
if __name__ == "__main__":
def walk(arr,r1,c1,r2,c2,rows,cols, history):
history['{0},{1}'.format(r1,c1)] = True
# print('{},{}-{},{}'.format(r1,c1,r2,c2))
if arr[r1][c1] == arr[r2][c2]:
if r1 == r2 and c1 == c2:
return True
if r1-1 >= 0 and '{0},{1}'.format(r1-1, c1) not in history:
atas = walk(arr, r1-1,c1,r2,c2,rows,cols,history)
else:
atas=False
if r1+1 < rows and '{0},{1}'.format(r1+1, c1) not in history:
bawah = walk(arr,r1+1,c1,r2,c2,rows,cols,history)
else:
bawah=False
if c1-1 >= 0 and '{0},{1}'.format(r1, c1-1) not in history:
kiri = walk(arr,r1,c1-1,r2,c2,rows,cols,history)
else:
kiri=False
if c1+1 < cols and '{0},{1}'.format(r1, c1+1) not in history:
kanan = walk(arr,r1,c1+1,r2,c2,rows,cols,history)
else:
kanan = False
# if one of them == true , there is a path to destination
if atas or bawah or kiri or kanan:
return True
else:
return False
else:
return False
map = input()
rows, cols = map.split(" ")
rows = int(rows)
cols = int(cols)
arr_row = []
for i in range(int(rows)):
str_inp = input()
list_int = [int(i) for i in str_inp]
arr_row.append(list_int)
coord_row=input()
coord_pair=[]
for i in range(int(coord_row)):
r1,c1,r2,c2 = input().split(" ")
coord_pair.append([r1,c1,r2,c2])
# print(arr_row)
for c in coord_pair:
r1 = int(c[0]) - 1
c1 = int(c[1]) - 1
r2 = int(c[2]) - 1
c2 = int(c[3]) - 1
history = {}
if arr_row[r1][c1] != arr_row[r2][c2]:
print("neither")
elif walk(arr_row, r1, c1, r2, c2, rows, cols, history):
ret = 'binary' if arr_row[r1][c1] == 0 else 'decimal'
print(ret)
else:
print('neither')
i think there is an error in input with the hidden test case, i would appreciate if anyone can find the bugs, thank you
Im parsed list of crew witch one looks like:
20;mechanic;0;68
21;cook;0;43
22;scientist;0;79
23;manager;1;65
24;mechanic;1;41
etc
And now I'm trying to figure out how to count number of workers who have 60 or more stamina( the last element in each employee )
There is my code:
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
Print from terminal:
1
2
3
...
90
And there is the right answer I think, but is there anyway to get only one 'total' count, not a 90ty strings ?
Just print one line after the loop is done.
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
But I would also recommend using pandas for data manipulation. For example:
df = pd.read_csv('employee.txt', sep=';')
df.columns = ['col1', 'col2', 'col3', 'stamina']
Then just filter and get the size:
df[df.stamina >= 60].size
So after a day of thinking I wrote this and get right answer ( maybe someone will find this helpful):
def total_resist_count():
# with open('employee.txt', 'r') as employee_list:
employee_list = [input() for i in range(120)]
candidates = []
for employee in employee_list:
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
candidates.append(employee_data)
return candidates
required_professionals = {
'computers specialist': 5,
'cook': 3,
'doctor': 5,
'electrical engineer': 4,
'manager': 1,
'mechanic': 8,
'scientist': 14
}
expedition_total = 40
female_min = 21
male_min = 12
def validate_solution(cur_team, num_females, num_males):
global expedition_total, female_min, male_min
if sum(cur_team) != expedition_total or num_females < female_min or num_males < male_min:
return False
num_of_free_vacancies = 0
for k in required_professionals:
num_of_free_vacancies += required_professionals[k]
if num_of_free_vacancies > 0:
return False
return True
TEAM = None
def backtrack(candidates, cur_team, num_females, num_males):
global required_professionals, expedition_total, TEAM
if sum(cur_team) > expedition_total or TEAM is not None:
return
if validate_solution(cur_team, num_females, num_males):
team = []
for i, used in enumerate(cur_team):
if used == 1:
team.append(candidates[i])
TEAM = team
return
for i in range(len(candidates)):
if cur_team[i] == 0 and required_professionals[candidates[i][1]] > 0:
cur_team[i] = 1
required_professionals[candidates[i][1]] -= 1
if candidates[i][2] == '1':
backtrack(candidates, cur_team, num_females, num_males + 1)
else:
backtrack(candidates, cur_team, num_females + 1, num_males)
required_professionals[candidates[i][1]] += 1
cur_team[i] = 0
if __name__ == '__main__':
ec = decode_fcc_message()
candidates = total_resist_count(ec)
cur_team = [0] * len(candidates)
backtrack(candidates, cur_team, 0, 0)
s = ""
for t in TEAM:
s += str(t[0]) + ';'
print(s)
Below is a part of my python script, which reads data in daily automation in Linux system & print it in mail body & sends. my input file changes daily and it works perfectly if input file contains all strings (or) numarical values.
If the input file has empty string/value in any of rows, it throws IndexError and stops printing the data.
f = open('INPUTfile')
lines = f.readlines()
count=len(lines)
f.close()
body1="""
"""
z=0
while (z<count):
test = lines[z]
hello = test.split(',')
a = hello[0:1]
a1 = a[0]
b = hello[1:2]
b1 = b[0]
c = hello[2:3]
c1 = c[0]
d = hello[3:4]
d1 = d[0]
e = hello[4:5]
e1 = e[0]
f = hello[5:6]
f1 = f[0]
g = hello[6:7]
g1 = g[0]
h = hello[7:8]
h1 = h[0]
i = hello[8:9]
i1 = i[0]
j = hello[9:10]
j1 = j[0]
k = hello[10:11]
k1 = k[0]
l = hello[11:12]
l1 = l[0]
m = hello[12:13]
m1 = m[0]
d1 = float(d[0])
g1 = float(g[0])
j1 = float(j[0])
m1 = float(m[0])
if all([d1 < 99.00, j1 < 99.00]):
body1 = body1 + '<tr><td style="font-family:Calibri;"><b>' + a1 + '</b></td><td style="font-family:Calibri;">' + b1 + '</td></td><td style="font-family:Calibri;">' + c1 + '</td></td><td style="font-family:Calibri;color:red">' + str(round(d1,2)) + '</td></td><td style="font-family:Calibri;">' + e1 + '</td><td style="font-family:Calibri;">' + f1 + '</td></td><td style="font-family:Calibri;color:red">' + str(round(g1,2)) + '</td><td style="font-family:Calibri;">' + h1 + '</td><td style="font-family:Calibri;">' + i1 + '</td><td style="font-family:Calibri;">' + str(round(j1,2)) + '</td><td style="font-family:Calibri;">' + k1 + '</td><td style="font-family:Calibri;">' + l1 + '</td><td style="font-family:Calibri;">' + str(round(m1,2)) + '</td></tr>'
z=z+1
My inputfile:
APPU1,2004423,2004417,99.9997,2847,2847,100,7600,7599,99.9846,1248,1248,99.9999
APPU2,,,
APPU3,2004333,2004329,99.9998,2848,2848,100,7593,7592,99.9842,1248,1247,99.9999
APPU4,2004020,2004016,99.9998,2849,2847,100,7596,7595,99.9853,1248,1247,99.9999
please suggest solution to print the data even if the rows in INPUT file contains null values.
I don't understand use of while loop here.
Anyway what you need is an if statement at the starting of while loop.
while (z<count):
test = lines[z]
hello = test.split(',')
if len(hello) < 14: # or whatever number of items required.
z+=1
continue
#rest of your code goes here
If I were you, I would write the code like this.
with open('INPUTfile') as f:
for i, line in enumerate(f):
hello = line.split(',')
#rest of the code.
You can use try except block in python.
try:
<your logic here> or the code
except IndexError:
pass # If IndexError is encountered, pass the control to the loop and continue from the next line
Use this code snippet and check if that solves the problem
for line in lines:
try:
test = line
hello = test.split(',')
a = hello[0:1]
a1 = a[0]
b = hello[1:2]
b1 = b[0]
c = hello[2:3]
c1 = c[0]
d = hello[3:4]
d1 = d[0]
e = hello[4:5]
e1 = e[0]
f = hello[5:6]
f1 = f[0]
g = hello[6:7]
g1 = g[0]
h = hello[7:8]
h1 = h[0]
i = hello[8:9]
i1 = i[0]
j = hello[9:10]
j1 = j[0]
k = hello[10:11]
k1 = k[0]
l = hello[11:12]
l1 = l[0]
m = hello[12:13]
m1 = m[0]
d1 = float(d[0])
g1 = float(g[0])
j1 = float(j[0])
m1 = float(m[0])
except IndexError:
pass
This is my code. The problem is that the output looks like this
2015-06-03 19:32:11.225085
{'2015-01-21-20:56:45.mp3': 1}{'negative': -2}{'2015-01-15-21:28:23.mp3': 1}
i want to be like a dictionary.... like this one below so i can read it back as a dictionary and remove the keys from the first subset sum and go on output a second on and so on until no other subset sum exists...
2015-06-03 19:32:11.225085
{'2015-01-21-20:56:45.mp3': 1, 'negative': -2, '2015-01-15-21:28:23.mp3': 1}
Any ideas?
thanx in advanced
import os, sys,re,gzip, pickle
from itertools import combinations
import json
from datetime import datetime
mp3folder = raw_input('Please copy paste the mp3s path:')
lowerin = input('Please enter your total playlist time in NEGATIVE seconds and hit ENTER:')
r = {}
drk = os.listdir(mp3folder)
drifiles = list(drk)
r = dict.fromkeys(drifiles, 0)
for key in r.keys():
print ('Please enter the duration of...')
print(key)
r[key] = input('in seconds and hit ENTER:')
r['negative'] = lowerin
d = {}
neg = 0
pos = 0
dates = datetime.now()
dates = str(dates)
f = open("dict.txt",'ab')
f.write('\n'+dates+'\n')
f.close()
for (w,v) in r.iteritems():
if v > 0: pos += v
else: neg += v
sums = [0] * (pos - neg + 1)
for (w,v) in r.iteritems():
s = sums[:]
if not s[v - neg]: s[v - neg] = (w,)
for (i, w2) in enumerate(sums):
if w2 and not s[i + v]:
s[i + v] = w2 + (w,)
sums = s
if s[-neg]:
for x in s[-neg]:
d = dict([(x, r[x])])
file('dict.txt','a'+'\n').write(repr(d))
break
f = open('dict.txt','r')
filedata = f.read()
f.close()
newdata = filedata.replace("}{",", ")
f = open('lexiko.txt','w')
f.write(newdata)
f.close()
di = eval(open("lexiko.txt").read())
print di
this will do it