Basically I just need to figure out how to produce modes (numbers occurring most frequently) from a list in Python, whether or not that list has multiple modes?
Something like this:
def print_mode (thelist):
counts = {}
for item in thelist:
counts [item] = counts.get (item, 0) + 1
maxcount = 0
maxitem = None
for k, v in counts.items ():
if v > maxcount:
maxitem = k
maxcount = v
if maxcount == 1:
print "All values only appear once"
if counts.values().count (maxcount) > 1:
print "List has multiple modes"
else:
print "Mode of list:", maxitem
But instead of returning strings in the "All values only appear once," or "list has multiple modes," I would want it to return the actual integers that it's referencing?
Make a Counter, then pick off the most common elements:
from collections import Counter
from itertools import groupby
l = [1,2,3,3,3,4,4,4,5,5,6,6,6]
# group most_common output by frequency
freqs = groupby(Counter(l).most_common(), lambda x:x[1])
# pick off the first group (highest frequency)
print([val for val,count in next(freqs)[1]])
# prints [3, 4, 6]
def mode(arr):
if len(arr) == 0:
return []
frequencies = {}
for num in arr:
frequencies[num] = frequencies.get(num,0) + 1
mode = max([value for value in frequencies.values()])
modes = []
for key in frequencies.keys():
if frequencies[key] == mode:
modes.append(key)
return modes
This code can tackle with any list. Make sure, elements of the list are numbers.
new in python 3.8's statistics module there is a function for that:
import statistics as s
print("mode(s): ",s.multimode([1,1,2,2]))
output: mode(s): [1, 2]
Related
I wrote code for task, but get error: time-limit-exceeded from testing system.
I need to get advice how I can write this code faster and more precise
Code:
# input
n = int(input())
seq = input()
pairs = []
seq = list(seq)
# find pairs
counted = []
for i, item in enumerate(seq):
for j, num in enumerate(seq):
if (i != j) and (item == num):
if (i not in counted) and (j not in counted):
pairs.append((item, num))
counted.append(i)
counted.append(j)
# remove pairs from seq
for pair in pairs:
seq.remove(pair[0])
seq.remove(pair[1])
# create a palindrome
start = []
end = []
pairs = sorted(pairs)
pairs = list(reversed(pairs))
for item in pairs:
start.append(item[0])
end.append(item[1])
end = list(reversed(end))
if len(seq) != 0:
seq = [int(item) for item in seq]
max_el = list(sorted(seq))[-1]
start.append(max_el)
final_s = start + end
# output
output = ''.join([str(item) for item in final_s])
print(output)
It's an interesting problem and not completely trivial. First, I think the input can only have odd count of a single digit, otherwise it cannot be formed into a palindrome. For example, 11333 is a valid input, but 113334 is not (both 3 and 4 have odd counts). It should also be noted that we cannot just dump the odd-count digits in the middle of the output. For example, we might be tempted to do 1335551 -> 3155513, but the correct answer (largest palindrome) is 5315135.
Given these constraints, here's my attempt at a solution. It uses collections.Counter to count the digit pairs, which are then sorted in descending order and mirrored to create the output. The possible odd-count digit is handled by treating it as a single digit (which goes into the middle of the output), plus a bunch of paired digits.
I tested it for input sizes of 10^5 digits and it didn't seem to take much time at all.
from collections import Counter
def biggest_pal(n):
c = Counter(str(n))
s = ''
evens = {k: v for k, v in c.items() if not v % 2}
odds = {k: v for k, v in c.items() if v % 2}
vodd = ''
if len(odds) > 1:
raise ValueError('Invalid input')
elif odds:
vodd, nodd = odds.popitem()
if nodd > 1:
evens[vodd] = nodd - 1
for k, v in sorted(evens.items(), key=lambda p: -int(p[0])):
s += k * int(v/2)
return s + vodd + s[::-1]
Some test inputs:
biggest_pal(112) # 121
biggest_pal(1122) # 2112
biggest_pal(1234123) # 3214123
biggest_pal(1331555) # 5315135
biggest_pal(112212) # ValueError: invalid input
Like for 1234,4566,654,987; we see, we have 4 and 6 both with 3 as frequency. So, we'll receive the output as 6 because it's the bigger one.
So, the code which i thought as the solution is:
def MaxDigit(input1,input2,input3,input4):
arr=[input1,input2,input3,input4]
k=0
for i in range(1,10):
ask=[0]*i
for j in range(0,4):
while arr[j]!=0:
k=int(arr[j]%10)
arr[j]=int(arr[j]/10)
ask[k]+=1
So, after this we'll get ask list with no.s as indexes and frequency with value. I can code that further.
But it is showing index out of range error for last line i.e ask[k]+=1 which i'm unable to guess, why it's showing like that. Please help me with this.
If there could be an alternate code too, help me with it.
input = [234,4566,654,987]
digits = [int(n) for num in input for n in str(num)] # extracts each digit separately into a list as in [2, 3, 4, 4, 5, 6, 6, 6, 5, 4, 9, 8, 7]
Generating a frequency dictionary and sorting the dictionary based on your conditions, first on the decreasing order of values, and then on the decreasing order or the key.
digit_count = {i:digits.count(i) for i in set(digits)}
digit_count_sorted = sorted(digit_count.items(), key=lambda x: (-x[1], -x[0]))
digit_count_sorted[0][0] #prints the answer 6
You can implement it as a function :
def MaxDigit(input):
digits = [int(n) for num in input for n in str(num)]
digit_count = {i:digits.count(i) for i in set(digits)}
digit_count_sorted = sorted(digit_count.items(), key=lambda x: (-x[1], -x[0]))
return digit_count_sorted[0][0]
print(MaxDigit([234,4566,654,987])
Output :
6
Try this:
def MaxDigit(input1,input2,input3,input4):
s = '{}{}{}{}'.format(input1,input2,input3,input4)
maxCount = 0
maxDigit = 0
for digit in range(10):
count = s.count(str(digit))
if maxCount <= count:
maxCount = count
maxDigit = digit
return maxDigit
One way to implement this is with a Counter, converting all the numbers to strings and counting the digits. You can then find the maximum count from the counter and return the largest value that has that count:
from collections import Counter
def MaxDigit(*args):
counts = Counter(''.join(str(a) for a in args))
maxcount = counts.most_common(1)[0][1]
return int(max(v for v, c in counts.items() if c == maxcount))
print(MaxDigit(1234,4566,654,987))
Output:
6
As an alternative to finding the maximum count and filtering on that, you can sort the Counter descending on count and then key and then return the key of the first value:
def MaxDigit(*args):
counts = Counter(''.join(str(a) for a in args))
counts = sorted(counts.items(), key=lambda x:(-x[1], -int(x[0])))
return int(counts[0][0])
This code needs to find the most frequent k-mers (substrings of k letters) with d mismatches in a string (genome). In the past I had to find the most frequent k-mer without mismatches and I'm trying minimally alter my code. To do so, I would have to be able to increment values in a dictionary that have a different key from a string I'm passing. Is that possible? Below is my code. Is there a way to do what I have written in the comment? HammingDistance() just computes the number of differences between 2 strings.
import operator
def MostFrequentKmer (Text, k, d):
kmerDict = {}
freqKmers = list()
for i in range (0, len(Text)-k+1):
kmer = Text[i:i+k]
if kmer in kmerDict:
kmerDict[kmer] += 1
#elif a key exists for which HammingDistance(key, kmer) <= d, then increment the value associated with that key
else:
kmerDict[kmer] = 1
maxVal = max(zip(kmerDict.values()))[0]
for k, v in kmerDict.items():
if v == maxVal:
freqKmers.append(k)
print(sorted(freqKmers))
def HammingDistance (str1, str2):
hamDis = 0
for i in range(0, len(str1)):
if str1[i] != str2[i]:
hamDis += 1
return hamDis
Example IO is:
Input- ("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4, 1)
Output- ["ATGC", "ATGT", "GATG"]
Assuming you want to 1) increment the count for all closest keys and 2) add an entry if there are no closest keys, the below does what you want.
else:
close_keys = [k for k in kmerDict.keys() if HammingDistance(k, kmer) <= d]
if close_keys:
for k in close_keys:
kmerDict[k] += 1
else:
kmerDict[k] = 1
As an aside, please consider following python naming conventions, e.g., change HammingDistance to hamming_distance.
My task is:
To write a function that gets a string as an argument and returns the letter(s) with the maximum appearance in it.
Example 1:
s = 'Astana'
Output:
a
Example 2:
s = 'Kaskelen'
Output:
ke
So far, I've got this code(click to run):
a = input()
def most_used(w):
a = list(w)
indexes = []
g_count_max = a.count(a[0])
for letter in a:
count = 0
i = int()
for index in range(len(a)):
if letter == a[index] or letter == a[index].upper():
count += 1
i = index
if g_count_max <= count: //here is the problem.
g_count_max = count
if i not in indexes:
indexes.append(i)
letters = str()
for i in indexes:
letters = letters + a[i].lower()
return letters
print(most_used(a))
The problem is that it automatically adds first letter to the array because the sum of appearance of the first element is actually equal to the starter point of appearance(which is basically the first element).
Example 1:
s = 'hheee'
Output:
he
Example 2:
s = 'malaysia'
Output:
ma
I think what you're trying to can be much simplified by using the standard library's Counter object
from collections import Counter
def most_used(word):
# this has the form [(letter, count), ...] ordered from most to least common
most_common = Counter(word.lower()).most_common()
result = []
for letter, count in most_common:
if count == most_common[0][1]:
result.append(letter) # if equal largest -- add to result
else:
break # otherwise don't bother looping over the whole thing
return result # or ''.join(result) to return a string
You can use a dictionary comprehension with a list comprehension and max():
s = 'Kaskelen'
s_lower = s.lower() #convert string to lowercase
counts = {i: s_lower.count(i) for i in s_lower}
max_counts = max(counts.values()) #maximum count
most_common = ''.join(k for k,v in counts.items() if v == max_counts)
Yields:
'ke'
try this code using list comprehensions:
word = input('word=').lower()
letters = set(list(word))
max_w = max([word.count(item) for item in letters])
out = ''.join([item for item in letters if word.count(item)==max_w])
print(out)
Also you can import Counter lib:
from collections import Counter
a = "dagsdvwdsbd"
print(Counter(a).most_common(3)[0][0])
Then it returns:
d
I am doing python basic challenges this is one of them. What all I needed to do is to read through a file and print out the frequency of letters in decreasing order. I am able to do this but I wanted to enhance the program by also printing out the frequency percentage alongside with the letter - frequency - freq%. Something like this: o - 46 - 10.15%
This is what I did so far:
def exercise11():
import string
while True:
try:
fname = input('Enter the file name -> ')
fop = open(fname)
break
except:
print('This file does not exists. Please try again!')
continue
counts = {}
for line in fop:
line = line.translate(str.maketrans('', '', string.punctuation))
line = line.translate(str.maketrans('', '', string.whitespace))
line = line.translate(str.maketrans('', '', string.digits))
line = line.lower()
for ltr in line:
if ltr in counts:
counts[ltr] += 1
else:
counts[ltr] = 1
lst = []
countlst = []
freqlst = []
for ltrs, c in counts.items():
lst.append((c, ltrs))
countlst.append(c)
totalcount = sum(countlst)
for ec in countlst:
efreq = (ec/totalcount) * 100
freqlst.append(efreq)
freqlst.sort(reverse=True)
lst.sort(reverse=True)
for ltrs, c, in lst:
print(c, '-', ltrs)
exercise11()
As you can see I am able to calculate and sort the freq% on a different list but I am not able to include it in the tuple of the lst[] list alongside with the letter, freq. Is there any way to solve this problem?
Also if you have any other suggestions for my code. Please do mention.
Output Screen
Modification
Applying a simple modification as mentioned by #wwii I got the desired output. All I had to do is add one more parameter to the print statement while iterating the lst[] list. Previously I tried to make another list for the freq%, sort and then tried to insert it to the letters-count tuple in a list which didn't work out.
for ltrs, c, in lst:
print(c, '-', ltrs, '-', round(ltrs/totalcount*100, 2), '%')
Output Screen
Your count data is in a dictionary of {letter:count} pairs.
You can use the dictionary to calculate the total count like this:
total_count = sum(counts.values())
Then don't calculate the percentage till you are iterating over the counts...
for letter, count in counts.items():
print(f'{letter} - {count} - {100*count/total}') #Python v3.6+
#print('{} - {} - {}'.format(letter, count, 100*count/total) #Python version <3.6+
Or if you want to put it all in a list so you can sort it:
data = []
for letter, count in counts.items():
data.append((letter,count,100*count/total)
Using operator.itemgetter for the sort key function can help code readability.
import operator
letter = operator.itemgetter(0)
count = operator.itemgetter(1)
frequency = operator.itemgetter(2)
data.sort(key=letter)
data.sort(key=count)
data.sort(key=frequency)
Tuples are immutable which is probably the issue you are finding. The other issue is the simple form of the sort function; A more-advanced sort function would serve you well. See below:
The list-of-tuples format of lst, but because tuples are immutable whereas lists are mutable, opting to change lst to a list-of-lists is a valid approach. Then, since lst is a list-of-lists with each element consisting of 'letter,count,frequency%', the sort function with lambda can be used to sort by whichever index you'd like. The following is to be inserted after your for line in fop: loop.
lst = []
for ltrs, c in counts.items():
lst.append([ltrs,c])
totalcount = sum([x[1] for x in lst]) # sum all 'count' values in a list comprehension
for elem in lst:
elem.append((elem[1]/totalcount)*100) # now that each element in 'lst' is a mutable list, you can append the calculated frequency to the respective element in lst
lst.sort(reverse=True,key=lambda lst:lst[2]) # sort in-place in reverse order by index 2.
The items in freqlst,countlist, and lst are related to each other by their position. If any are sorted that relationship is lost.
zipping the lists together before sorting will maintain the relationship.
Will pick up from your list initialization lines.
lst = []
countlst = []
freqlst = []
for ltr, c in counts.items():
#change here, lst now only contains letters
lst.append(ltr)
countlst.append(c)
totalcount = sum(countlst)
for ec in countlst:
efreq = (ec/totalcount) * 100
freqlst.append(efreq)
#New stuff here: Note this only works in python 3+
zipped = zip(lst, countlst, freqlst)
zipped = sorted(zipped, key=lambda x: x[1])
for ltr, c, freq in zipped:
print("{} - {} - {}%".format(ltr, c, freq)) # love me the format method :)
Basically, zip combines lists together into a list of tuples. Then you can use a lambda function to sort those tuples (very common stack question)
I think I was able to achieve what you wanted by using lists instead of tuples. Tuples cannot be modified, but if you really want to know how click here
(I also added the possibility to quit the program)
Important: Never forget to comment your code
The code:
def exercise11():
import string
while True:
try:
fname = input('Enter the file name -> ')
print('Press 0 to quit the program') # give the User the option to quit the program easily
if fname == '0':
break
fop = open(fname)
break
except:
print('This file does not exists. Please try again!')
continue
counts = {}
for line in fop:
line = line.translate(str.maketrans('', '', string.punctuation))
line = line.translate(str.maketrans('', '', string.whitespace))
line = line.translate(str.maketrans('', '', string.digits))
line = line.lower()
for ltr in line:
if ltr in counts:
counts[ltr] += 1
else:
counts[ltr] = 1
lst = []
countlst = []
freqlst = []
for ltrs, c in counts.items():
# add a zero as a place holder &
# use square brakets so you can use a list that you can modify
lst.append([c, ltrs, 0])
countlst.append(c)
totalcount = sum(countlst)
for ec in countlst:
efreq = (ec/totalcount) * 100
freqlst.append(efreq)
freqlst.sort(reverse=True)
lst.sort(reverse=True)
# count the total of the letters
counter = 0
for ltrs in lst:
counter += ltrs[0]
# calculate the percentage for each letter
for letter in lst:
percentage = (letter[0] / counter) * 100
letter[2] += float(format(percentage, '.2f'))
for i in lst:
print('The letter {} is repeated {} times, which is {}% '.format(i[1], i[0], i[2]))
exercise11()
<?php
$fh = fopen("text.txt", 'r') or die("File does not exist");
$line = fgets($fh);
$words = count_chars($line, 1);
foreach ($words as $key=>$value)
{
echo "The character <b>' ".chr($key)." '</b> was found <b>$value</b> times. <br>";
}
?>