Python: Concatenate similiar objects in List - python

I have a list containing strings as ['Country-Points'].
For example:
lst = ['Albania-10', 'Albania-5', 'Andorra-0', 'Andorra-4', 'Andorra-8', ...other countries...]
I want to calculate the average for each country without creating a new list. So the output would be (in the case above):
lst = ['Albania-7.5', 'Andorra-4.25', ...other countries...]
Would realy appreciate if anyone can help me with this.
EDIT:
this is what I've got so far. So, "data" is actually a dictionary, where the keys are countries and the values are list of other countries points' to this country (the one as Key). Again, I'm new at Python so I don't realy know all the built-in functions.
for key in self.data:
lst = []
index = 0
score = 0
cnt = 0
s = str(self.data[key][0]).split("-")[0]
for i in range(len(self.data[key])):
if s in self.data[key][i]:
a = str(self.data[key][i]).split("-")
score += int(float(a[1]))
cnt+=1
index+=1
if i+1 != len(self.data[key]) and not s in self.data[key][i+1]:
lst.append(s + "-" + str(float(score/cnt)))
s = str(self.data[key][index]).split("-")[0]
score = 0
self.data[key] = lst

itertools.groupby with a suitable key function can help:
import itertools
def get_country_name(item):
return item.split('-', 1)[0]
def get_country_value(item):
return float(item.split('-', 1)[1])
def country_avg_grouper(lst) :
for ctry, group in itertools.groupby(lst, key=get_country_name):
values = list(get_country_value(c) for c in group)
avg = sum(values)/len(values)
yield '{country}-{avg}'.format(country=ctry, avg=avg)
lst[:] = country_avg_grouper(lst)
The key here is that I wrote a function to do the change out of place and then I can easily make the substitution happen in place by using slice assignment.

I would probabkly do this with an intermediate dictionary.
def country(s):
return s.split('-')[0]
def value(s):
return float(s.split('-')[1])
def country_average(lst):
country_map = {}|
for point in lst:
c = country(pair)
v = value(pair)
old = country_map.get(c, (0, 0))
country_map[c] = (old[0]+v, old[1]+1)
return ['%s-%f' % (country, sum/count)
for (country, (sum, count)) in country_map.items()]
It tries hard to only traverse the original list only once, at the expense of quite a few tuple allocations.

Related

How to take only last value from a list with unique tag?

In my LIST(not dictionary) I have these strings:
"K:60",
"M:37",
"M_4:47",
"M_5:89",
"M_6:91",
"N:15",
"O:24",
"P:50",
"Q:50",
"Q_7:89"
in output I need to have
"K:60",
"M_6:91",
"N:15",
"O:24",
"P:50",
"Q_7:89"
What is the possible decision?
Or even maybe, how to take tag with the maximum among strings with the same tag.
Use re.split and list comprehension as shown below. Use the fact that when the dictionary dct is created, only the last value is kept for each repeated key.
import re
lst = [
"K:60",
"M:37",
"M_4:47",
"M_5:89",
"M_6:91",
"N:15",
"O:24",
"P:50",
"Q:50",
"Q_7:89"
]
dct = dict([ (re.split(r'[:_]', s)[0], s) for s in lst])
lst_uniq = list(dct.values())
print(lst_uniq)
# ['K:60', 'M_6:91', 'N:15', 'O:24', 'P:50', 'Q_7:89']
Probably far from the cleanest but here is a method quite easy to understand.
l = ["K:60", "M:37", "M_4:47", "M_5:89", "M_6:91", "N:15", "O:24", "P:50", "Q:50", "Q_7:89"]
reponse = []
val = []
complete_val = []
for x in l:
if x[0] not in reponse:
reponse.append(x[0])
complete_val.append(x.split(':')[0])
val.append(int(x.split(':')[1]))
elif int(x.split(':')[1]) > val[reponse.index(x[0])]:
val[reponse.index(x[0])] = int(x.split(':')[1])
for x in range(len(complete_val)):
print(str(complete_val[x]) + ":" + str(val[x]))
K:60
M:91
N:15
O:24
P:50
Q:89
I do not see any straight-forward technique. Other than iterating on entire thing and computing yourself, I do not see if any built-in can be used. I have written this where you do not require your values to be sorted in your input.
But I like the answer posted by Timur Shtatland, you can make us of that if your values are already sorted in input.
intermediate = {}
for item in a:
key, val = item.split(':')
key = key.split('_')[0]
val = int(val)
if intermediate.get(key, (float('-inf'), None))[0] < val:
intermediate[key] = (val, item)
ans = [x[1] for x in intermediate.values()]
print(ans)
which gives:
['K:60', 'M_6:91', 'N:15', 'O:24', 'P:50', 'Q_7:89']

How to find the highest value element in a list with reference to a dictionary on python

How do I code a function in python which can:
iterate through a list of word strings which may contain duplicate words and referencing to a dictionary,
find the word with the highest absolute sum, and
output it along with the corresponding absolute value.
The function also has to ignore words which are not in the dictionary.
For example,
Assume the function is called H_abs_W().
Given the following list and dict:
list_1 = ['apples','oranges','pears','apples']
Dict_1 = {'apples':5.23,'pears':-7.62}
Then calling the function as:
H_abs_W(list_1,Dict_1)
Should give the output:
'apples',10.46
EDIT:
I managed to do it in the end with the code below. Looking over the answers, turns out I could have done it in a shorter fashion, lol.
def H_abs_W(list_1,Dict_1):
freqW = {}
for char in list_1:
if char in freqW:
freqW[char] += 1
else:
freqW[char] = 1
ASum_W = 0
i_word = ''
for a,b in freqW.items():
x = 0
d = Dict_1.get(a,0)
x = abs(float(b)*float(d))
if x > ASum_W:
ASum_W = x
i_word = a
return(i_word,ASum_W)
list_1 = ['apples','oranges','pears','apples']
Dict_1 = {'apples':5.23,'pears':-7.62}
d = {k:0 for k in list_1}
for x in list_1:
if x in Dict_1.keys():
d[x]+=Dict_1[x]
m = max(Dict_1, key=Dict_1.get)
print(m,Dict_1[m])
try this,
key, value = sorted(Dict_1.items(), key = lambda x : x[1], reverse=True)[0]
print(f"{key}, {list_1.count(key) * value}")
# apples, 10.46
you can use Counter to calculate the frequency(number of occurrences) of each item in the list.
max(counter.values()) will give us the count of maximum occurring element
max(counter, key=counter.get) will give the which item in the list is
associated with that highest count.
========================================================================
from collections import Counter
def H_abs_W(list_1, Dict_1):
counter = Counter(list_1)
count = max(counter.values())
item = max(counter, key=counter.get)
return item, abs(count * Dict_1.get(item))

Sorting algorithm help in python

I've been playing around with a program that will take in information from two files and then write the information out to a single file in sorted order.
So what i did was store each line of the file as an element in a list. I create another function that splits each element into a 2d array where i can easily access the name variables. From there i want to create a nested for loop that as it iterates it checks for the highest value in the array, removes the value from the list and appending it to a new list until there's a sorted list.
I think I am like 90% of the way there, but I am having trouble wrapping my head around the logic of sorting algorithms. It seems like the problem just keeps getting more complex and i keep wanting to use pointers. If someone could help shine some light on the subject I would greatly appreciate it.
import os
from http.cookiejar import DAYS
from macpath import split
# This program reads a given input file and finds its longest line.
class Employee:
def __init__(self, EmployeeID, name, wage, days):
self.EmployeeID = EmployeeID
self.name = name
self.wage = wage
self.days = days
def Extraction(file,file2):
employList = []
while True:
line1 = file.readline().strip()
line2 = file2.readline().strip()
#print(type(line1))
employList.append(line1)
#print(line1)
employList.append(line2)
#print(line2)
if line1 == '' or line2 == '':
break
return employList
def Sort(mylist):
splitlist = []
sortedlist = []
print(len(mylist))
for items in range(len(mylist)):
#print(mylist[items].split())
splitlist.append(mylist[items].split())
print(splitlist)
#print(splitlist[1][1])
#print(splitlist[1][2])
highest = "z"
print(highest)
sortingLength = len(splitlist)
for i in range(10):
for items in range(len(splitlist)-2):
if highest > splitlist[items][2]:
istrue = highest < splitlist[items][2]
highest = splitlist[items][1]
print(items)
print(istrue)
print('marker')
print(splitlist[items][2])
if items == (len(splitlist)-2):
print("End of list",splitlist[items][2])
print(highest)
print(splitlist.index(highest))
print(splitlist[len(splitlist)-1][2])
print(sortingLength)
fPath = 'C:/Temp'
fileName = 'payroll1.txt'
fullFileName = os.path.join(fPath,fileName)
fileName2 = 'payroll2.txt'
fullFileName2 = os.path.join(fPath,fileName2)
f = open(fullFileName,'r')
f2 = open(fullFileName2, 'r')
employeeList = Extraction(f,f2)#pulling out each line in the file and placing into a list
Sort(employeeList)
ReportName= "List of Employees:"
marker = '-'* len(ReportName)
print (ReportName + ' \n' + marker)
total = 0
f.close()
I am having trouble with once having the higest value trying to append that value to a sortedlist, removing the value from the splitlist, and re running the code.
Using the sorted method is much easier and already built-in, per Joran's suggestion. I've edited your reading method so that it builds two lists of tuples, representing the line and the length of the line. The sorted method will return a list sorted according to the key (line length) and descending order (reverse=True)
from operator import itemgetter
class Employee:
def __init__(self, EmployeeID, name, wage, days):
self.EmployeeID = EmployeeID
self.name = name
self.wage = wage
self.days = days
def Extraction(file,file2):
employList = []
mylines = [(i, len(l.strip()), 'file1') for i,l in enumerate(file.readlines())]
mylines2 = [(i, len(l.strip()), 'file2') for i,l in enumerate(file2.readlines())]
employList = [*mylines, *mylines2]
return employList
fPath = 'C:/Temp'
fileName = 'payroll1.txt'
fullFileName = os.path.join(fPath,fileName)
fileName2 = 'payroll2.txt'
fullFileName2 = os.path.join(fPath,fileName2)
f = open(fullFileName,'r')
f2 = open(fullFileName2, 'r')
employeeList = Extraction(f,f2)#pulling out each line in the file and placing the line_number and length into a list
f.close()
f2.close()
# Itemgetter will sort on the second element of the tuple, len(line)
# and reverse will put it in descending order
ReportName = sorted(employeeList, key=itemgetter(1), reverse=True)
EDIT: I've added markers in the tuples so that you can keep track of what lines came from what file. Might be a bit confusing without them

Building a list of months by iterating between two dates in a list (Python)

I have an ordered (i.e. sorted) list that contains dates sorted (as datetime objects) in ascending order.
I want to write a function that iterates through this list and generates another list of the first available dates for each month.
For example, suppose my sorted list contains the following data:
A = [
'2001/01/01',
'2001/01/03',
'2001/01/05',
'2001/02/04',
'2001/02/05',
'2001/03/01',
'2001/03/02',
'2001/04/10',
'2001/04/11',
'2001/04/15',
'2001/05/07',
'2001/05/12',
'2001/07/01',
'2001/07/10',
'2002/03/01',
'2002/04/01',
]
The returned list would be
B = [
'2001/01/01',
'2001/02/04',
'2001/03/01',
'2001/04/10',
'2001/05/07',
'2001/07/01',
'2002/03/01',
'2002/04/01',
]
The logic I propose would be something like this:
def extract_month_first_dates(input_list, start_date, end_date):
#note: start_date and end_date DEFINITELY exist in the passed in list
prev_dates, output = [],[] # <- is this even legal?
for (curr_date in input_list):
if ((curr_date < start_date) or (curr_date > end_date)):
continue
curr_month = curr_date.date.month
curr_year = curr_date.date.year
date_key = "{0}-{1}".format(curr_year, curr_month)
if (date_key in prev_dates):
continue
else:
output.append(curr_date)
prev_dates.append(date_key)
return output
Any comments, suggestions? - can this be improved to be more 'Pythonic' ?
>>> import itertools
>>> [min(j) for i, j in itertools.groupby(A, key=lambda x: x[:7])]
['2001/01/01', '2001/02/04', '2001/03/01', '2001/04/10', '2001/05/07', '2001/07/01', '2002/03/01', '2002/04/01']
Searching lists is a O(n) operation. I think you can simply check whether the key is new:
def extract_month_first_dates(input_list):
output = []
last_key = None
for curr_date in input_list:
date_key = curr_date.date.month, curr_date.date.year # no string key required
if date_key != last_key:
output.append(curr_date)
last_key = date_key
return output
Here is a simple solution in classic python i.e. no itertools ;) and self explanatory
visited = {}
B = []
for a in A:
month = a[:7]
if month not in visited:
B.append(a)
visited[month] = 1
print B
Ouput:
['2001/01/01', '2001/02/04', '2001/03/01', '2001/04/10', '2001/05/07', '2001/07/01', '2002/03/01', '2002/04/01']

Find the most common element in a list

What is an efficient way to find the most common element in a Python list?
My list items may not be hashable so can't use a dictionary.
Also in case of draws the item with the lowest index should be returned. Example:
>>> most_common(['duck', 'duck', 'goose'])
'duck'
>>> most_common(['goose', 'duck', 'duck', 'goose'])
'goose'
A simpler one-liner:
def most_common(lst):
return max(set(lst), key=lst.count)
Borrowing from here, this can be used with Python 2.7:
from collections import Counter
def Most_Common(lst):
data = Counter(lst)
return data.most_common(1)[0][0]
Works around 4-6 times faster than Alex's solutions, and is 50 times faster than the one-liner proposed by newacct.
On CPython 3.6+ (any Python 3.7+) the above will select the first seen element in case of ties. If you're running on older Python, to retrieve the element that occurs first in the list in case of ties you need to do two passes to preserve order:
# Only needed pre-3.6!
def most_common(lst):
data = Counter(lst)
return max(lst, key=data.get)
With so many solutions proposed, I'm amazed nobody's proposed what I'd consider an obvious one (for non-hashable but comparable elements) -- [itertools.groupby][1]. itertools offers fast, reusable functionality, and lets you delegate some tricky logic to well-tested standard library components. Consider for example:
import itertools
import operator
def most_common(L):
# get an iterable of (item, iterable) pairs
SL = sorted((x, i) for i, x in enumerate(L))
# print 'SL:', SL
groups = itertools.groupby(SL, key=operator.itemgetter(0))
# auxiliary function to get "quality" for an item
def _auxfun(g):
item, iterable = g
count = 0
min_index = len(L)
for _, where in iterable:
count += 1
min_index = min(min_index, where)
# print 'item %r, count %r, minind %r' % (item, count, min_index)
return count, -min_index
# pick the highest-count/earliest item
return max(groups, key=_auxfun)[0]
This could be written more concisely, of course, but I'm aiming for maximal clarity. The two print statements can be uncommented to better see the machinery in action; for example, with prints uncommented:
print most_common(['goose', 'duck', 'duck', 'goose'])
emits:
SL: [('duck', 1), ('duck', 2), ('goose', 0), ('goose', 3)]
item 'duck', count 2, minind 1
item 'goose', count 2, minind 0
goose
As you see, SL is a list of pairs, each pair an item followed by the item's index in the original list (to implement the key condition that, if the "most common" items with the same highest count are > 1, the result must be the earliest-occurring one).
groupby groups by the item only (via operator.itemgetter). The auxiliary function, called once per grouping during the max computation, receives and internally unpacks a group - a tuple with two items (item, iterable) where the iterable's items are also two-item tuples, (item, original index) [[the items of SL]].
Then the auxiliary function uses a loop to determine both the count of entries in the group's iterable, and the minimum original index; it returns those as combined "quality key", with the min index sign-changed so the max operation will consider "better" those items that occurred earlier in the original list.
This code could be much simpler if it worried a little less about big-O issues in time and space, e.g....:
def most_common(L):
groups = itertools.groupby(sorted(L))
def _auxfun((item, iterable)):
return len(list(iterable)), -L.index(item)
return max(groups, key=_auxfun)[0]
same basic idea, just expressed more simply and compactly... but, alas, an extra O(N) auxiliary space (to embody the groups' iterables to lists) and O(N squared) time (to get the L.index of every item). While premature optimization is the root of all evil in programming, deliberately picking an O(N squared) approach when an O(N log N) one is available just goes too much against the grain of scalability!-)
Finally, for those who prefer "oneliners" to clarity and performance, a bonus 1-liner version with suitably mangled names:-).
from itertools import groupby as g
def most_common_oneliner(L):
return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]
What you want is known in statistics as mode, and Python of course has a built-in function to do exactly that for you:
>>> from statistics import mode
>>> mode([1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 6, 6, 6])
3
Note that if there is no "most common element" such as cases where the top two are tied, this will raise StatisticsError on Python
<=3.7, and on 3.8 onwards it will return the first one encountered.
Without the requirement about the lowest index, you can use collections.Counter for this:
from collections import Counter
a = [1936, 2401, 2916, 4761, 9216, 9216, 9604, 9801]
c = Counter(a)
print(c.most_common(1)) # the one most common element... 2 would mean the 2 most common
[(9216, 2)] # a set containing the element, and it's count in 'a'
If they are not hashable, you can sort them and do a single loop over the result counting the items (identical items will be next to each other). But it might be faster to make them hashable and use a dict.
def most_common(lst):
cur_length = 0
max_length = 0
cur_i = 0
max_i = 0
cur_item = None
max_item = None
for i, item in sorted(enumerate(lst), key=lambda x: x[1]):
if cur_item is None or cur_item != item:
if cur_length > max_length or (cur_length == max_length and cur_i < max_i):
max_length = cur_length
max_i = cur_i
max_item = cur_item
cur_length = 1
cur_i = i
cur_item = item
else:
cur_length += 1
if cur_length > max_length or (cur_length == max_length and cur_i < max_i):
return cur_item
return max_item
This is an O(n) solution.
mydict = {}
cnt, itm = 0, ''
for item in reversed(lst):
mydict[item] = mydict.get(item, 0) + 1
if mydict[item] >= cnt :
cnt, itm = mydict[item], item
print itm
(reversed is used to make sure that it returns the lowest index item)
Sort a copy of the list and find the longest run. You can decorate the list before sorting it with the index of each element, and then choose the run that starts with the lowest index in the case of a tie.
A one-liner:
def most_common (lst):
return max(((item, lst.count(item)) for item in set(lst)), key=lambda a: a[1])[0]
I am doing this using scipy stat module and lambda:
import scipy.stats
lst = [1,2,3,4,5,6,7,5]
most_freq_val = lambda x: scipy.stats.mode(x)[0][0]
print(most_freq_val(lst))
Result:
most_freq_val = 5
# use Decorate, Sort, Undecorate to solve the problem
def most_common(iterable):
# Make a list with tuples: (item, index)
# The index will be used later to break ties for most common item.
lst = [(x, i) for i, x in enumerate(iterable)]
lst.sort()
# lst_final will also be a list of tuples: (count, index, item)
# Sorting on this list will find us the most common item, and the index
# will break ties so the one listed first wins. Count is negative so
# largest count will have lowest value and sort first.
lst_final = []
# Get an iterator for our new list...
itr = iter(lst)
# ...and pop the first tuple off. Setup current state vars for loop.
count = 1
tup = next(itr)
x_cur, i_cur = tup
# Loop over sorted list of tuples, counting occurrences of item.
for tup in itr:
# Same item again?
if x_cur == tup[0]:
# Yes, same item; increment count
count += 1
else:
# No, new item, so write previous current item to lst_final...
t = (-count, i_cur, x_cur)
lst_final.append(t)
# ...and reset current state vars for loop.
x_cur, i_cur = tup
count = 1
# Write final item after loop ends
t = (-count, i_cur, x_cur)
lst_final.append(t)
lst_final.sort()
answer = lst_final[0][2]
return answer
print most_common(['x', 'e', 'a', 'e', 'a', 'e', 'e']) # prints 'e'
print most_common(['goose', 'duck', 'duck', 'goose']) # prints 'goose'
Building on Luiz's answer, but satisfying the "in case of draws the item with the lowest index should be returned" condition:
from statistics import mode, StatisticsError
def most_common(l):
try:
return mode(l)
except StatisticsError as e:
# will only return the first element if no unique mode found
if 'no unique mode' in e.args[0]:
return l[0]
# this is for "StatisticsError: no mode for empty data"
# after calling mode([])
raise
Example:
>>> most_common(['a', 'b', 'b'])
'b'
>>> most_common([1, 2])
1
>>> most_common([])
StatisticsError: no mode for empty data
Simple one line solution
moc= max([(lst.count(chr),chr) for chr in set(lst)])
It will return most frequent element with its frequency.
You probably don't need this anymore, but this is what I did for a similar problem. (It looks longer than it is because of the comments.)
itemList = ['hi', 'hi', 'hello', 'bye']
counter = {}
maxItemCount = 0
for item in itemList:
try:
# Referencing this will cause a KeyError exception
# if it doesn't already exist
counter[item]
# ... meaning if we get this far it didn't happen so
# we'll increment
counter[item] += 1
except KeyError:
# If we got a KeyError we need to create the
# dictionary key
counter[item] = 1
# Keep overwriting maxItemCount with the latest number,
# if it's higher than the existing itemCount
if counter[item] > maxItemCount:
maxItemCount = counter[item]
mostPopularItem = item
print mostPopularItem
ans = [1, 1, 0, 0, 1, 1]
all_ans = {ans.count(ans[i]): ans[i] for i in range(len(ans))}
print(all_ans)
all_ans={4: 1, 2: 0}
max_key = max(all_ans.keys())
4
print(all_ans[max_key])
1
#This will return the list sorted by frequency:
def orderByFrequency(list):
listUniqueValues = np.unique(list)
listQty = []
listOrderedByFrequency = []
for i in range(len(listUniqueValues)):
listQty.append(list.count(listUniqueValues[i]))
for i in range(len(listQty)):
index_bigger = np.argmax(listQty)
for j in range(listQty[index_bigger]):
listOrderedByFrequency.append(listUniqueValues[index_bigger])
listQty[index_bigger] = -1
return listOrderedByFrequency
#And this will return a list with the most frequent values in a list:
def getMostFrequentValues(list):
if (len(list) <= 1):
return list
list_most_frequent = []
list_ordered_by_frequency = orderByFrequency(list)
list_most_frequent.append(list_ordered_by_frequency[0])
frequency = list_ordered_by_frequency.count(list_ordered_by_frequency[0])
index = 0
while(index < len(list_ordered_by_frequency)):
index = index + frequency
if(index < len(list_ordered_by_frequency)):
testValue = list_ordered_by_frequency[index]
testValueFrequency = list_ordered_by_frequency.count(testValue)
if (testValueFrequency == frequency):
list_most_frequent.append(testValue)
else:
break
return list_most_frequent
#tests:
print(getMostFrequentValues([]))
print(getMostFrequentValues([1]))
print(getMostFrequentValues([1,1]))
print(getMostFrequentValues([2,1]))
print(getMostFrequentValues([2,2,1]))
print(getMostFrequentValues([1,2,1,2]))
print(getMostFrequentValues([1,2,1,2,2]))
print(getMostFrequentValues([3,2,3,5,6,3,2,2]))
print(getMostFrequentValues([1,2,2,60,50,3,3,50,3,4,50,4,4,60,60]))
Results:
[]
[1]
[1]
[1, 2]
[2]
[1, 2]
[2]
[2, 3]
[3, 4, 50, 60]
Here:
def most_common(l):
max = 0
maxitem = None
for x in set(l):
count = l.count(x)
if count > max:
max = count
maxitem = x
return maxitem
I have a vague feeling there is a method somewhere in the standard library that will give you the count of each element, but I can't find it.
This is the obvious slow solution (O(n^2)) if neither sorting nor hashing is feasible, but equality comparison (==) is available:
def most_common(items):
if not items:
raise ValueError
fitems = []
best_idx = 0
for item in items:
item_missing = True
i = 0
for fitem in fitems:
if fitem[0] == item:
fitem[1] += 1
d = fitem[1] - fitems[best_idx][1]
if d > 0 or (d == 0 and fitems[best_idx][2] > fitem[2]):
best_idx = i
item_missing = False
break
i += 1
if item_missing:
fitems.append([item, 1, i])
return items[best_idx]
But making your items hashable or sortable (as recommended by other answers) would almost always make finding the most common element faster if the length of your list (n) is large. O(n) on average with hashing, and O(n*log(n)) at worst for sorting.
>>> li = ['goose', 'duck', 'duck']
>>> def foo(li):
st = set(li)
mx = -1
for each in st:
temp = li.count(each):
if mx < temp:
mx = temp
h = each
return h
>>> foo(li)
'duck'
I needed to do this in a recent program. I'll admit it, I couldn't understand Alex's answer, so this is what I ended up with.
def mostPopular(l):
mpEl=None
mpIndex=0
mpCount=0
curEl=None
curCount=0
for i, el in sorted(enumerate(l), key=lambda x: (x[1], x[0]), reverse=True):
curCount=curCount+1 if el==curEl else 1
curEl=el
if curCount>mpCount \
or (curCount==mpCount and i<mpIndex):
mpEl=curEl
mpIndex=i
mpCount=curCount
return mpEl, mpCount, mpIndex
I timed it against Alex's solution and it's about 10-15% faster for short lists, but once you go over 100 elements or more (tested up to 200000) it's about 20% slower.
def most_frequent(List):
counter = 0
num = List[0]
for i in List:
curr_frequency = List.count(i)
if(curr_frequency> counter):
counter = curr_frequency
num = i
return num
List = [2, 1, 2, 2, 1, 3]
print(most_frequent(List))
Hi this is a very simple solution, with linear time complexity
L = ['goose', 'duck', 'duck']
def most_common(L):
current_winner = 0
max_repeated = None
for i in L:
amount_times = L.count(i)
if amount_times > current_winner:
current_winner = amount_times
max_repeated = i
return max_repeated
print(most_common(L))
"duck"
Where number, is the element in the list that repeats most of the time
numbers = [1, 3, 7, 4, 3, 0, 3, 6, 3]
max_repeat_num = max(numbers, key=numbers.count) *# which number most* frequently
max_repeat = numbers.count(max_repeat_num) *#how many times*
print(f" the number {max_repeat_num} is repeated{max_repeat} times")
def mostCommonElement(list):
count = {} // dict holder
max = 0 // keep track of the count by key
result = None // holder when count is greater than max
for i in list:
if i not in count:
count[i] = 1
else:
count[i] += 1
if count[i] > max:
max = count[i]
result = i
return result
mostCommonElement(["a","b","a","c"]) -> "a"
The most common element should be the one which is appearing more than N/2 times in the array where N being the len(array). The below technique will do it in O(n) time complexity, with just consuming O(1) auxiliary space.
from collections import Counter
def majorityElement(arr):
majority_elem = Counter(arr)
size = len(arr)
for key, val in majority_elem.items():
if val > size/2:
return key
return -1
def most_common(lst):
if max([lst.count(i)for i in lst]) == 1:
return False
else:
return max(set(lst), key=lst.count)
def popular(L):
C={}
for a in L:
C[a]=L.count(a)
for b in C.keys():
if C[b]==max(C.values()):
return b
L=[2,3,5,3,6,3,6,3,6,3,7,467,4,7,4]
print popular(L)

Categories

Resources