Split list according to the rule - python

I have a list
values_list = [1013.0, 683.0, 336.0, 406.0, 636.0, 1065.0, 1160.0]
Also I have a value
value = 660.6153846153846
This list is based on the assumption that there are 3 stages. First stage should be higher that the value, second - lower, and third is again higher.
I want to split this list into three lists, saving the order of the values like this:
values_list = [[1013.0, 683.0], [336.0, 406.0, 636.0], [1065.0, 1160.0]]

Try this one, using groupby:
from itertools import groupby
values_list = [1013.0, 683.0, 336.0, 406.0, 636.0, 1065.0, 1160.0]
value = 660.6153846153846
result = list(list(b) for a,b in groupby(values_list, lambda x: x < value ))
print (result)
Result:
[[1013.0, 683.0], [336.0, 406.0, 636.0], [1065.0, 1160.0]]

Try this one:
splits = []
splt = []
s = 0
for v in values_list:
if len(splt) > 0:
if v > value and s != 1:
splits.append(splt)
splt = []
elif v <= value and s != -1:
splits.append(splt)
splt = []
splt.append(v)
s = 2*(v > value) - 1
if len(splt) > 0:
splits.append(splt)

Related

Retrieve from dict based on list values

Here is my data -
inp = [{'father_husband_mother_name': [['Father s Name', 0.8603670001029968],
['Shripati', 0.8603670001029968],
['Father s Name', 0.8903670001029969],
['Shpppati', 0.8903670001029969]],
'doc_id': [['GGX2176', 0.8435981869697571],
['GGC2176', 0.8835981869697571]],
'name': [['Elector s Name', 0.8301510810852051],
['Shibshankar Ghosh', 0.8301510810852051],
['Elector s Name', 0.8501510810852051],
['Shibshankar Ghosh', 0.8501510810852051]],
'date_of_birth': [['Age as on 1.1.2000', 0.8067844915390014],
['15', 0.8067844915390014],
['Age as on 1.1.2000', 0.8267844915390015],
['15', 0.8267844915390015]],
'gender_sex': [['Sex', 0.7784658074378967],
['M', 0.7784658074378967],
['Sex', 0.8784658074378967],
['M', 0.8784658074378967]]}]
STOPWORDS = ['Sex', 'Father s Name', 'Elector s Name', 'Address', 'Name', 'Gender', 'Mother s Name',
'Husband s Name']
The output that I expect:
{'father_husband_mother_name': 'Shpppati',
'doc_id': 'GGC2176',
'name': 'Shibshankar Ghosh',
'date_of_birth': 'Age as on 1.1.2000,15',
'gender_sex': 'M'}
Here is the logic -
Retrieve the value that has the highest confidence score [the float inside the list of lists] that is not present in STOPWORDS for each key.
What I have tried -
def process_kie_dict(voter_raw_labels, threshold=0.7):
cleaned_dict = {}
intermediate_dict = {}
for entity_dict in voter_raw_labels:
for entity, val in entity_dict.items():
conf_val = [item[1] for item in val]
unique_val = list(set(conf_val))
max_conf = max(unique_val)
if max_conf > threshold:
if len(unique_val)==1:
add_val = [item[0] for item in val]
else:
max_conf_index = conf_val.index(max_conf)
add_val = [item[0] for item in val[max_conf_index:]]
if entity not in intermediate_dict.keys():
intermediate_dict[entity] = [add_val,max_conf]
else:
if intermediate_dict[entity][1] < max_conf:
intermediate_dict[entity] = [add_val,max_conf]
# print(intermediate_dict)
for key, val in intermediate_dict.items():
final_value = ''
for value in val[0]:
m = len(str.strip(value))
edit_dist_list = []
for word in STOPWORDS:
n = len(word)
edit_dist = editDistDP(value, word, m, n)
edit_dist_list.append(edit_dist)
if min(edit_dist_list) < 2:
value=''
final_value = final_value + value + ','
clean_value = final_value.strip(",")
cleaned_dict[key]=clean_value
return cleaned_dict
def editDistDP(str1, str2, m, n):
# Create a table to store results of subproblems
dp = [[0 for x in range(n + 1)] for x in range(m + 1)]
# Fill d[][] in bottom up manner
for i in range(m + 1):
for j in range(n + 1):
# If first string is empty, only option is to
# insert all characters of second string
if i == 0:
dp[i][j] = j # Min. operations = j
# If second string is empty, only option is to
# remove all characters of second string
elif j == 0:
dp[i][j] = i # Min. operations = i
# If last characters are same, ignore last char
# and recur for remaining string
elif str1[i-1] == str2[j-1]:
dp[i][j] = dp[i-1][j-1]
# If last character are different, consider all
# possibilities and find minimum
else:
dp[i][j] = 1 + min(dp[i][j-1], # Insert
dp[i-1][j], # Remove
dp[i-1][j-1]) # Replace
return dp[m][n]
You can forget about the edit distance implementation, not important. What I want to know is given nested for loops, this won't work at scale. Looking for a more efficient implementation.
Here is a parser for your data
result = {k: sorted(v, key=lambda x: x[1] if x[0] not in STOPWORDS else 0)[-1][0] for k, v in inp[0].items()}
In short, it takes a key and sorts the rest of the dictionary based on the confidence value, unless the first element of the list is included in STOPWORDS. Then adds the first element of that sorted list to the result dictionary as a value.

python: filter based on IF condition

I am operating with simple python condition aimed at filtering of the values > or equal to zero, and store filtered values in the list
# make a object contained all clusters
clustering = d.clusterer.clustering_dict[cut_off]
# list of ignored objects
banned_conf=[]
for clust in clustering:
clustStr = str(clustering.index(clust))
clustStr = int(clustStr) + 1
# get the value of energy for the clust
ener=clust[0].energy
# set up filter to ignore conformations with positive energies
if ener > 0:
print('Conformation in ' + str(clustStr) + ' cluster poses positive energy')
banned_conf.append(ener)
print('Nonsence: It is ignored!')
continue
elif ener == 0:
print('Conformation in ' + str(clustStr) + ' cluster poses ZERO energy')
banned_conf.append(ener)
print('Very rare case: it is ignored!')
continue
#else:
#print("Ain't no wrong conformations in " + str(clustStr) + " cluster")
How would it be possible to ignore all values > or = 0 within the same IF statement (without elif)? Which filtering would be better (with elif or in single IF statement)?
I would use the filter function:
lst = [0,1,-1,2,-2,3,-3,4,-4]
filtered = list(filter(lambda x: x >= 0, lst))
for ele in filtered:
print(f'{ele} is >= 0')
Or if you don't want to use lamda function and filter I would do:
lst = [0,1,-1,2,-2,3,-3,4,-4]
filtered = []
for ele in lst:
if ele >= 0:
filtered.append(ele)
for ele in filtered:
print(f'{ele} is >= 0')
Or you can use list comprehension:
lst = [0,1,-1,2,-2,3,-3,4,-4]
filtered = [for ele in lst if ele >= 0]
for ele in filtered:
print(f'{ele} is >= 0')
You can use >= to test both conditions at once.
for index, clust in enumerate(clustering, 1):
ener = clust[0].energy
if ener >= 0:
print(f'Conformation in {index} cluster poses zero or positive energy, it is ignored')
banned_conf.append(clust)
Your original method is better if you want to show a different message for zero and positive energy.

Trying to find same digits in a row [duplicate]

This question already has answers here:
Longest sequence of consecutive duplicates in a python list
(4 answers)
Closed 2 years ago.
Im trying to find the biggest series of digit in a row in a list which i can input. And im doing this way:
list = []
count_max_numbers = 0
while True:
x = int(input('число: '))
if x == 0:
break
list.append(int(x))
max_number = max(list)
for i in list:
if i != max_number:
pass
else:
count_max_numbers += 1
current_result = 0
max_result = 0
last_seen = list[0]
longest_digit = 0
for i in list:
if i == last_seen:
current_result += 1
else:
if current_result > max_result:
max_result = current_result
longest_digit = i
last_seen = i
current_result = 1
if current_result > max_result:
max_result = current_result
longest_digit = i
print(f'{list}')
print(f'max number: {max_number} reapeted{count_max_numbers} times')
print(f'the biggest series: {longest_digit} repeated {max_result} times')
this works only with first digit in a list. But i need to work it with whole list.
For example if input (1,2,3,3,3,3,5,55)
It need to get output: the biggest series: 3 repeated 4 times
I still have a problem with output of {longest_digit} it's incorrect
Try this:
mylist = [1,2,3,3,3,3,5,55]
val_holder = {}
for val in mylist:
if val not in val_holder.keys():
val_holder[val] = 1
else:
val_holder[val] += 1
max_key = max(val_holder, key=val_holder.get)
print(max_key)
My aproach to this will be using dictionary
lst = [1,2,3,3,3,3,5,55]
dict = {}
for val in lst:
if val not in dict.keys():
dict[val] = 1
else:
dict[val] = dict[val] + 1
Once we get the dictionary we sort this using the value
sorted_dict = sorted(dict.items(), key=lambda keyVal: keyVal[1], reverse=True)
print(dict[0][0])
Here's a simple algorithm:
Initialize longest = None, longest_digit = None, current = 0, previous_digit, and current_digit = None.
Then, for each item in the list:
If the item is equal to current_digit, increment current by one.
Otherwise:
(A) If current > longest, set longest = previous_digit and longest_digit = item
(B) Then, reset current to 1 and current_digit to item
Set previous_digit to item
At the end of the list, also do step (A) above.
Now you should have the answer in longest (4) and longest_digit (3)

How turn my loop results into a list

I have no idea how to use put these results into a list and sort it using python3.
def get_new(x):
i = 0
while i < 6:
i = i+1
print (x)
x = (x*31334)%31337
get_new(7546)
One way to do this is to create a list and append the values of x. Then return this list from your function:
def get_new(x):
lst = []
i = 0
while i < 6:
i = i+1
x = (x*31334)%31337
lst.append(x)
return lst
print (get_new(7546))
#[8699, 5240, 15617, 15823, 15205, 17059]
For calculating and sorting the calculated list, do this using list append and sort.
def get_new(x):
new_list = []
i = 0
while i < 6:
i = i+1
#print (x)
x = (x*31334)%31337
new_list.append(x) # append the each new value of x to `new_list`
return new_list
a = get_new(7546) # returns the unsorted calculated list
a.sort() # sorting using sort() function
print (a)
#OUTPUT [5240, 8699, 15205, 15617, 15823, 17059]

How to produce multiple modes in Python?

Basically I just need to figure out how to produce modes (numbers occurring most frequently) from a list in Python, whether or not that list has multiple modes?
Something like this:
def print_mode (thelist):
counts = {}
for item in thelist:
counts [item] = counts.get (item, 0) + 1
maxcount = 0
maxitem = None
for k, v in counts.items ():
if v > maxcount:
maxitem = k
maxcount = v
if maxcount == 1:
print "All values only appear once"
if counts.values().count (maxcount) > 1:
print "List has multiple modes"
else:
print "Mode of list:", maxitem
But instead of returning strings in the "All values only appear once," or "list has multiple modes," I would want it to return the actual integers that it's referencing?
Make a Counter, then pick off the most common elements:
from collections import Counter
from itertools import groupby
l = [1,2,3,3,3,4,4,4,5,5,6,6,6]
# group most_common output by frequency
freqs = groupby(Counter(l).most_common(), lambda x:x[1])
# pick off the first group (highest frequency)
print([val for val,count in next(freqs)[1]])
# prints [3, 4, 6]
def mode(arr):
if len(arr) == 0:
return []
frequencies = {}
for num in arr:
frequencies[num] = frequencies.get(num,0) + 1
mode = max([value for value in frequencies.values()])
modes = []
for key in frequencies.keys():
if frequencies[key] == mode:
modes.append(key)
return modes
This code can tackle with any list. Make sure, elements of the list are numbers.
new in python 3.8's statistics module there is a function for that:
import statistics as s
print("mode(s): ",s.multimode([1,1,2,2]))
output: mode(s): [1, 2]

Categories

Resources