Convert array of letters into their counts with Python - python

I have an array, for example
[A,A,A,B,B,C,A,A,D,D,D,B]
I want to count the entries and convert it to this
[3A,2B,1C,2A,3D,1B]
I have tried a load of if else type logic but I'm having issues. Is there a neat way to do this?

Seems like a good use of itertools.groupby:
from itertools import groupby
l = ['A','A','A','B','B','C','A','A','D','D','D','B']
[f'{len(list(g))}{k}' for k, g in groupby(l)]
# ['3A', '2B', '1C', '2A', '3D', '1B']

This is one way to solve this problem.
letters = ['A', 'A', 'A', 'B', 'B', 'C', 'A', 'A', 'D', 'D', 'D']
previous_letter = letters[0]
counter = 1
res = list()
for i in range(1, len(letters)):
current_letter = letters[i]
if current_letter == previous_letter:
counter += 1
else:
res.append(f"{counter}{current_letter}")
previous_letter = letters[i]
counter = 1
res.append(f"{counter}{previous_letter}")
print(res)
The trick is in checking a change of letters and keeping track of the count.

you could try using list.count() to get the number of times the first item appears then use list.pop(list.index()) in a for loop to remove all occurences of the first item, do this in a while loop until len(lst) returns 0
out = []
while len(lst) > 0:
s = lst[0]
c = 0
while len(lst) > 0:
if lst[0] == s:
c += 1
lst.pop(0)
out.append(f"{c}{s}")

Related

how to separate alternating digits and characters in string into dict or list?

'L134e2t1C1o1d1e1'
the original string is "LeetCode"
but I need to separate strings from digits, digits can be not only single-digit but also 3-4 digits numbers like 345.
My code needs to separate into dict of key values; keys are characters and numbers is the digit right after the character. Also create 2 lists of separate digits, letters only.
expected output:
letters = ['L', 'e', 't', 'C', 'o', 'd', 'e']
digits = [134,2,1,1,1,1,1]
This code is not properly processing this.
def f(s):
d = dict()
letters = list()
# letters = list(filter(lambda x: not x.isdigit(), s))
i = 0
while i < len(s):
print('----------------------')
if not s[i].isdigit():
letters.append(s[i])
else:
j = i
temp = ''
while j < len(s) and s[j].isdigit():
j += 1
substr = s[i:j]
print(substr)
i += 1
print('----END -')
print(letters)
With the following modification your function separates letters from digits in s:
def f(s):
letters = list()
digits = list()
i = 0
while i < len(s):
if not s[i].isdigit():
letters.append(s[i])
i += 1
else:
j = i
temp = ''
while j < len(s) and s[j].isdigit():
j += 1
substr = s[i:j]
i = j
digits.append(substr)
print(letters)
print(digits)
f('L134e2t1C1o1d1e1')
As said in my comments you didn't update i after the inner loop terminates which made i go back to a previous and already processed index.
If I would be limited to not use regex I would do it following way
text = 'L134e2t1C1o1d1e1'
letters = [i for i in text if i.isalpha()]
digits = ''.join(i if i.isdigit() else ' ' for i in text).split()
print(letters)
print(digits)
output
['L', 'e', 't', 'C', 'o', 'd', 'e']
['134', '2', '1', '1', '1', '1', '1']
Explanation: for letters I use simple list comprehension with condition, .isalpha() is str method which check if string (in this consisting of one character) is alphabetic. For digits (which should be rather called numbers) I replace non-digits using single space, turn that into string using ''.join then use .split() (it does split on one or more whitespaces). Note that digits is now list of strs rather than ints, if that is desired add following line:
digits = list(map(int,digits))
Your string only had two e's, so I've added one more to complete the example. This is one way you could do it:
import re
t = 'L1e34e2t1C1o1d1e1'
print(re.sub('[^a-zA-Z]', '', t))
Result:
LeetCode
I know you cannot use regex, but to complete this answer, I'll just add a solution:
def f(s):
d = re.findall('[0-9]+', s)
l = re.findall('[a-zA-Z]', s)
print(d)
print(l)
f(t)
Result:
['134', '2', '1', '1', '1', '1', '1']
['L', 'e', 't', 'C', 'o', 'd', 'e']
You edited your question and I got a bit confused, so here is a really exhaustive code giving you a list of letters, list of the numbers, the dict with the number associated with the number, and finally the sentence with corresponding number of characters ...
def f(s):
letters = [c for c in s if c.isalpha()]
numbers = [c for c in s if c.isdigit()]
mydict = {}
currentKey = ""
for c in s:
print(c)
if c.isalpha():
mydict[c] = [] if c not in mydict.keys() else mydict[c]
currentKey = c
elif c.isdigit():
mydict[currentKey].append(c)
sentence = ""
for i in range(len(letters)):
count = int(numbers[i])
while count > 0:
sentence += letters[i]
count -= 1
print(letters)
print(numbers)
print(mydict)
print(sentence)
letters = []
digits = []
dig = ""
for letter in 'L134e2t1C1o1d1e1':
if letter.isalpha():
# do not add empty string to list
if dig:
# append dig to list of digits
digits.append(dig)
dig = ""
letters.append(letter)
# if it is a actual letter continue
continue
# add digits to `dig`
dig = dig + letter
Try this. The idea is to skip all actual letters and add the digits to dig.
I know there's an accepted answer but I'll throw this one in anyway:
letters = []
digits = []
lc = 'L134e2t1C1o1d1e1'
n = None
for c in lc:
if c.isalpha():
if n is not None:
digits.append(n)
n = None
letters.append(c)
else:
if n is None:
n = int(c)
else:
n *= 10
n += int(c)
if n is not None:
digits.append(n)
for k, v in zip(letters, digits):
dct.setdefault(k, []).append(v)
print(letters)
print(digits)
print(dct)
Output:
['L', 'e', 't', 'C', 'o', 'd', 'e']
[134, 2, 1, 1, 1, 1, 1]
{'L': [134], 'e': [2, 1], 't': [1], 'C': [1], 'o': [1], 'd': [1]}

Is there better way to check value changed in sequence?

I have a list like below:
list = ['A', 'A', 'B', 'A', 'B', 'A', 'B']
And, I want to count the number of the first value (in case above, 'A') consecutively before the other values ('B') come.
So I wrote a code like:
history = list[0]
number_A = 0
number_B = 0
for i in list:
if history != i:
break
if i == 'A':
number_A += 1
history = 'A'
else:
number_B += 1
history = 'B'
However, I think this is very untidy.
Is there any more simple way to do this process?
Thank you for reading.
Using groupby with the default key function, you can count the number of items in the first grouper:
from itertools import groupby
def count_first(lst):
if not lst:
return 0
_, grouper = next(groupby(lst))
return sum(1 for _ in grouper)
print(count_first(['A', 'A', 'B', 'A', 'B', 'A', 'B']))
# 2
There is no reason for the "else" clause, you are not going to count 'B's since you are going to break before you get there.
lst = ['A', 'A', 'B', 'A', 'B', 'A', 'B']
count = 0
for i in lst:
if i != lst[0]:
break
count += 1
print("list starts with %d of %s's" % (count, lst[0]))
You could use takewhile:
from itertools import takewhile
my_list = ['A', 'A', 'B', 'A', 'B', 'A', 'B']
res = takewhile(lambda x: x == my_list[0], my_list)
print(len(list(res)))
OUT: 2
I renamed your list to lst in order to not override the builtin name list.
>>> lst = ['A', 'A', 'B', 'A', 'B', 'A', 'B']
>>> string = ''.join(lst)
>>> len(string) - len(string.lstrip('A'))
2

How to delete items repeated less than k in a list

In a python list, I want to delete all elements repeated less than 'k'.
for example if k == 3 then if our list is:
l = [a,b,c,c,c,a,d,e,e,d,d]
then the output must be:
[c,c,c,d,d,d]
what is a fast way to do that (my data is large), any good pythonic suggestion?
this is what I coded but I don't think it is the fastest and most pythonic way:
from collections import Counter
l = ['a', 'b', 'c', 'c', 'c', 'a', 'd', 'e', 'e', 'd', 'd']
counted = Counter(l)
temp = []
for i in counted:
if counted[i] < 3:
temp.append(i)
new_l = []
for i in l:
if i not in temp:
new_l.append(i)
print(new_l)
You can use collections.Counter to construct a dictionary mapping values to counts. Then use a list comprehension to filter for counts larger than a specified value.
from collections import Counter
L = list('abcccadeedd')
c = Counter(L)
res = [x for x in L if c[x] >=3]
# ['c', 'c', 'c', 'd', 'd', 'd']
A brute-force option would be to get the number of occurrences per item, then filter that output. The collections.Counter object works nicely here:
l = [a,b,c,c,c,a,d,e,e,d,d]
c = Counter(l)
# Counter looks like {'a': 2, 'b': 1, 'c': 3...}
l = [item for item in l if c[item]>=3]
Under the hood, Counter acts as a dictionary, which you can build yourself like so:
c = {}
for item in l:
# This will check if item is in the dictionary
# if it is, add to current count, if it is not, start at 0
# and add 1
c[item] = c.get(item, 0) + 1
# And the rest of the syntax follows from here
l = [item for item in l if c[item]>=3]
I would use a Counter from collections:
from collections import Counter
count_dict = Counter(l)
[el for el in l if count_dict[el]>2]
Any drawback with this option?
l = ['a','b','c','c','c','a','d','e','e','d','d']
res = [ e for e in l if l.count(e) >= 3]
#=> ['c', 'c', 'c', 'd', 'd', 'd']

How to combine two elements of a list based on a given condition

I want to combine two elements in a list based on a given condition.
For example if I encounter the character 'a' in a list, I would like to combine it with the next element. The list:
['a', 'b', 'c', 'a', 'd']
becomes
['ab', 'c', 'ad']
Is there any quick way to do this?
One solution I have thought of is to create a new empty list and iterate through the first list. As we encounter the element 'a' in list 1, we join list1[index of a] and list1[index of a + 1] and append the result to list 2. However I wanted to know if there is any way to do it without creating a new list and copying values into it.
This does not create a new list, just modifies the existing one.
l = ['a', 'b', 'c', 'a', 'd']
for i in range(len(l)-2, -1, -1):
if l[i] == 'a':
l[i] = l[i] + l.pop(i+1)
print(l)
If you don't want to use list comprehension to create a new list (maybe because your input list is huge) you could modify the list in-place:
i=0
while i < len(l):
if l[i]=='a':
l[i] += l.pop(i+1)
i += 1
Use a list comprehension with an iterator on your list. When the current iteratee is a simply join it with the next item from the iterator using next:
l = ['a', 'b', 'c', 'a', 'd']
it = iter(l)
l[:] = [i+next(it) if i == 'a' else i for i in it]
print l
# ['ab', 'c', 'ad']
Well, if you don't want to create a new list so much, here we go:
from itertools import islice
a = list("abcdabdbac")
i = 0
for x, y in zip(a, islice(a, 1, None)):
if x == 'a':
a[i] = x + y
i += 1
elif y != 'a':
a[i] = y
i += 1
try:
del a[i:]
except:
pass
you could use itertools.groupby and group by:
letter follows a or
letter is not a
using enumerate to generate the current index, which allows to fetch the previous element from the list (creating a new list but one-liner)
import itertools
l = ['a', 'b', 'c', 'a', 'd']
print(["".join(x[1] for x in v) for _,v in itertools.groupby(enumerate(l),key=lambda t: (t[0] > 0 and l[t[0]-1]=='a') or t[1]=='a')])
result:
['ab', 'c', 'ad']
This is easy way. Mb not pythonic way.
l1 = ['a', 'b', 'c', 'a', 'd']
do_combine = False
combine_element = None
for el in l1:
if do_combine:
indx = l1.index(el)
l1[indx] = combine_element + el
do_combine = False
l1.remove(combine_element)
if el == 'a':
combine_element = el
do_combine = True
print(l1)
# ['ab', 'c', 'ad']

How to append count numbers to duplicates in a list in Python?

Here is a list containing duplicates:
l1 = ['a', 'b', 'c', 'a', 'a', 'b']
Here is the desired result:
l1 = ['a', 'b', 'c', 'a_1', 'a_2', 'b_1']
How can the duplicates be renamed by appending a count number?
Here is an attempt to achieve this goal; however, is there a more Pythonic way?
for index in range(len(l1)):
counter = 1
list_of_duplicates_for_item = [dup_index for dup_index, item in enumerate(l1) if item == l1[index] and l1.count(l1[index]) > 1]
for dup_index in list_of_duplicates_for_item[1:]:
l1[dup_index] = l1[dup_index] + '_' + str(counter)
counter = counter + 1
In Python, generating a new list is usually much easier than changing an existing list. We have generators to do this efficiently. A dict can keep count of occurrences.
l = ['a', 'b', 'c', 'a', 'a', 'b']
def rename_duplicates( old ):
seen = {}
for x in old:
if x in seen:
seen[x] += 1
yield "%s_%d" % (x, seen[x])
else:
seen[x] = 0
yield x
print list(rename_duplicates(l))
I would do something like this:
a1 = ['a', 'b', 'c', 'a', 'a', 'b']
a2 = []
d = {}
for i in a1:
d.setdefault(i, -1)
d[i] += 1
if d[i] >= 1:
a2.append('%s_%d' % (i, d[i]))
else:
a2.append(i)
print a2
Based on your comment to #mathmike, if your ultimate goal is to create a dictionary from a list with duplicate keys, I would use a defaultdict from the `collections Lib.
>>> from collections import defaultdict
>>> multidict = defaultdict(list)
>>> multidict['a'].append(1)
>>> multidict['b'].append(2)
>>> multidict['a'].append(11)
>>> multidict
defaultdict(<type 'list'>, {'a': [1, 11], 'b': [2]})
I think the output you're asking for is messy itself, and so there is no clean way of creating it.
How do you intend to use this new list? Would a dictionary of counts like the following work instead?
{'a':3, 'b':2, 'c':1}
If so, I would recommend:
from collections import defaultdict
d = defaultdict(int) # values default to 0
for key in l1:
d[key] += 1
I wrote this approach for renaming duplicates in a list with any separator and a numeric or alphabetical postfix (e.g. _1, _2 or _a, _b, _c etc.). Might not be the best you could write efficient-wise, but I like this as a clean readable code which is also scalable easily.
def rename_duplicates(label_list, seperator="_", mode="numeric"):
"""
options for 'mode': numeric, alphabet
"""
import string
if not isinstance(label_list, list) or not isinstance(seperator, str):
raise TypeError("lable_list and separator must of type list and str, respectively")
for item in label_list:
l_count = label_list.count(item)
if l_count > 1:
if mode == "alphabet":
postfix_str = string.ascii_lowercase
if len(postfix_str) < l_count:
# do something
pass
elif mode == "numeric":
postfix_str = "".join([str(i+1) for i in range(l_count)])
else:
raise ValueError("the 'mode' could be either 'numeric' or 'alphabet'")
postfix_iter = iter(postfix_str)
for i in range(l_count):
item_index = label_list.index(item)
label_list[item_index] += seperator + next(postfix_iter)
return label_list
label_list = ['a', 'b', 'c', 'a', 'a', 'b']
use the function:
rename_duplicates(label_list)
result:
['a_1', 'b_1', 'c', 'a_2', 'a_3', 'b_2']

Categories

Resources