count the same character which comes in sequence - python

def count_squences(string):
i= 0
total = 0
total_char_list = []
while i < len(string):
print(string[i])
if string[i] == "x":
total += 1
if string[i] == "y":
total_char_list.append(total)
total = 0
i = i + 1
return total_char_list
print(count_squences("xxxxyyxyxx"))
I am trying to return the most used x characters in a file format. for example this functions should return [4, 1, 2].
For example if string is "xxxxxyxxyxxx' it should return [5, 2, 3]
My function does not return the correct list. Any helps would be really appreciated. Thanks

You are not resetting your counter when you encounter a y character, and you should only append to total_char_list if there was at least one x character counted by the time you find a y character (y characters could be duplicated too):
total = 0
while i < len(string):
if string[i] == "x":
total += 1
if string[i] == "y":
if total:
total_char_list.append(total)
total = 0
i = i + 1
Next, when the loop ends and total is not zero, you need to append that value too, or you won't be counting 'x' characters at the end of the sequence:
while ...:
# ...
if total:
# x characters at the end
total_char_list.append(total)
Next, you really want to use a for loop to loop over a sequence. You are given the individual characters that way:
total = 0
for char in string:
if char == 'x':
total += 1
if char == 'y':
if total:
total_charlist.append(total)
total = 0
if total:
# x characters at the end
total_char_list.append(total)
You can make this faster with itertools.groupby():
from itertools import groupby
def count_squences(string):
return [sum(1 for _ in group) for char, group in groupby(string) if char == 'x']
groupby() divides up an iterable input (such as a string) into separate iterators per group, where a group is defined as any consecutive value with the same key(value) result. The default key() function just returns the value, so groupby(string) gives you groups of consecutive characters that are the same. char is the repeated character, and sum(1 for _ in group) takes the length of an iterator.
You can then make it more generic, and count all groups:
def count_all_sequences(string):
counts = {}
for char, group in groupby(string):
counts.setdefault(char, []).append(sum(1 for _ in group))
return counts
The same can be done with a regular expression:
import re
def count_all_sequences(string):
counts = {}
# (.)(\1*) finds repeated characters; (.) matching one, \1 matching the same
# This gives us (first, rest) tuples, so len(rest) + 1 is the total length
for char, group in re.findall(r'(.)(\1*)', string):
counts.setdefault(char, []).append(len(group) + 1)
return counts

You don't initialize the value of total between the sequences so it keeps on counting.
def count_squences(string):
i= 0
total = 0
total_char_list = []
while i < len(string):
if string[i] == "x":
total += 1
if string[i] == "y":
if total != 0:
total_char_list.append(total)
total = 0
i = i + 1
if total != 0:
total_char_list.append(total)
return total_char_list
Update (17:00) - fixed the original procedure and I thought of a better solution -
my_str = "xxxxyyxyxx"
[len(z) for z in re.split("y+", my_str)]

Edited for function format:
def count_sequences(string):
return [len(x) for x in re.findall(r"x+", string)]
count_sequences("xxxxyyxyxx")
returns [4,1,2]

Related

Count number of letters until one letters has changed

I want to count number of same letter at beginning between two words (letter by letter) until there's one different and return who has the most same letter.
This is my code :
def same(word, liste):
letter = 0
dico = dict()
for i in liste:
while word[letter] == i[letter]:
letter += 1;
dico[i] = letter;
letter = 0;
same = max(dico, key=dico.get)
return same
But i get always this error of string index out of range, I've tried with much way but nothing
while word[letter] == i[letter]:
IndexError: string index out of range
In input :
same('hello',['hi,'hell','helo'])
Output:
'hell'
Thanks
I would just use a list comprehension along with basic substring logic:
def same(word, liste):
return max([x for x in liste if x in word], key=len)
print(same('hello',['hi','hell','helo'])) # hell
you can't go out of range in your while
verify the length of your word before word[letter] == i[letter]
while letter < len(word) and letter < len(i) and word[letter] == i[letter]:
gives you :
def same(word, liste):
letter = 0
dico = dict()
for i in liste:
while letter < len(word) and letter < len(i) and word[letter] == i[letter]:
letter += 1;
dico[i] = letter;
letter = 0;
same = max(dico, key=dico.get)
return same
print(same('blablabla',['blaze','bli']))
----------
>>> blaze
A combination of zip, sum and max should give you the result -
def same(word, liste):
pairs = [zip(word, x) for x in liste]
match_len = [sum([x == y for x, y in pair]) for pair in pairs]
return lst[match_len.index(max(match_len))]
Yet an other solution:
def same(word, liste):
def get_score(word, other):
for i, (l1, l2) in enumerate(zip(word, other)):
if l1 != l2:
break
return i
scores = {other:get_score(word, other) for other in liste}
return max(scores, key=scores.get)
In this case, I define the function get_score to count the number of common letters in a pair of words. I am sure to not run in IndexError because of the zip that makes an iterator over the 2 words.
Then I just did the same as you to get the word associated with the greatest score.

Given a string of digits, return the longest substring with alternating odd/even or even/odd digits

So I was doing this python challenge for fun, but for some reason it is saying I am incorrect and I have no idea why. The challenge prompt said this:
Given a string of digits, return the longest substring with alternating odd/even or even/odd digits. If two or more substrings have the same length, return the substring that occurs first.
Examples
longest_substring("225424272163254474441338664823") âžž "272163254"
# substrings = 254, 272163254, 474, 41, 38, 23
longest_substring("594127169973391692147228678476") âžž "16921472"
# substrings = 94127, 169, 16921472, 678, 476
longest_substring("721449827599186159274227324466") âžž "7214"
# substrings = 7214, 498, 27, 18, 61, 9274, 27, 32
# 7214 and 9274 have same length, but 7214 occurs first.
Notes
The minimum alternating substring size is 2.
The code I wrote for a solution was this:
def longest_substring(digits):
substring = []
final = []
loop = 0 # just loops through the for loop
start = 0
loop2 = 0
while loop+1 < len(digits):
num = int(digits[loop])
num2 = int(digits[loop+1])
if (num + num2)%2 != 0 and start == 0:
substring.append(num)
substring.append(num2)
start += 1
elif (num + num2)%2 != 0:
substring.append(num2)
else:
start = 0
loop2 += 1
final.append(substring.copy())
substring.clear()
loop += 1
sorted_list = list(sorted(final, key=len))
if len(sorted_list[-1]) == len(sorted_list[-2]):
index1 = final.index(sorted_list[-1])
index2 = final.index(sorted_list[-2])
if index1 < index2: # because the larger than not first
sorted_list = sorted_list[-1]
else:
sorted_list = sorted_list[-2]
sorted_list = str(sorted_list)
sorted_list = sorted_list.replace('[','')
sorted_list = sorted_list.replace(']', '')
sorted_list = sorted_list.replace(', ','')
return str(sorted_list) # or print(str(sorted_list)) neither works
If you're curious the challenge is here
I would actually prefer using a shorter code. It's easier to look for errors IMO.
Does this work for you?
def longest_substring(digits):
max_len = 0
ans = ''
for i in range(len(digits)):
temp = digits[i]
for x in range(i+1, len(digits)):
if int(digits[x])%2 != int(digits[x-1])%2:
temp += digits[x]
else:
break
if len(temp) > max_len:
max_len = len(temp)
ans = temp
return ans
Here's something a bit simpler:
def longest_substring(digits):
current = longest = digits[0]
for digit in digits[1:]:
if int(digit)%2 != int(current[-1])%2:
current += digit
else:
longest = longest if len(longest) >= len(current) else current
current = digit
longest = longest if len(longest) >= len(current) else current
return longest
The bit about how you're picking the indexes at the end doesn't always work, leading to cases where your list ends up with all of the possibilities in it. If, instead you modify the code to be explicit, and run through the entire list after you create sorted_list:
best = []
for cur in sorted_list:
if len(cur) > len(best):
best = cur
return "".join([str(x) for x in best])
The rest of your implementation will work.
And for kicks, I took a pass at simplifying it:
def longest_substring(digits):
possible, best = "", ""
for x in digits + " ":
if x == " " or (len(possible) > 0 and int(possible[-1]) % 2 == int(x) % 2):
best = possible if len(possible) > len(best) else best
possible = ""
possible += x
return best

How to delete specific elements from an array while not deleting its later occurences

Take integer input from a user and then delete the elements from an array having those many consecutive ocurences from the array.
Eg the input array is "aabcca" and the input from the user is 2.
Then the answer should be "ba".
I tried it when the elements are not repeated. My code works perfectly for examples like "aaabbccc".
for j in range(t, (n+1)):
if (t == n):
if (count == k):
array = [x for x in array if x != temp]
print array
exit()
if (t == n and count == k):
array = [x for x in array if x != temp]
print array
exit()
if temp == data[j]:
count += 1
t += 1
if temp != data[j]:
if count == k:
array = [x for x in array if x != temp]
temp = data[t]
count = 1
t += 1
you can use sliding window or two pointers to solve it.
the key point is use [start, end] range to record a consecutive seq, and only add the seq whose length less than n:
def delete_consecutive(s, n):
start, end, count = 0, 0, 0
res, cur = '', ''
for end, c in enumerate(s):
if c == cur:
count += 1
else:
# only add consecutive seq less than n
if count < n:
res += s[start:end]
count = 1
start = end
cur = c
# deal with tail part
if count < n:
res += s[start:end+1]
return res
test and output:
print(delete_consecutive('aabcca', 2)) # output: ba
print(delete_consecutive('aaabbccc', 3)) # output: bb
Hope that helps you, and comment if you have further questions. : )
Here is one way to do that:
def remove_consecutive(s, n):
# Number of repeated consecutive characters
count = 0
# Previous character
prev = None
# Pieces of string of result
out = []
for i, c in enumerate(s):
# If new character
if c != prev:
# Add piece of string without repetition blocks
out.append(s[i - (count % n):i])
# Reset count
count = 0
# Increase count
count += 1
prev = c
# Add last piece
out.append(s[len(s) - (count % n):])
return ''.join(out)
print(remove_consecutive('aabcca', 2))
# ba
print(remove_consecutive('aaabbccc', 2))
# ac
print(remove_consecutive('aaabbccc', 3))
# bb

Counting occurrences of a sub-string without using a built-in function

My teacher challenged me of finding a way to count the occurences of the word "bob" in any random string variable without str.count(). So I did,
a = "dfjgnsdfgnbobobeob bob"
compteurDeBob = 0
for i in range (len(a) - 1):
if a[i] == "b":
if a[i+1] == "o":
if a[i+2] == "b":
compteurDeBob += 1
print(compteurDeBob)
but I wanted to find a way to do that with a word of any length as shown below, but I have no clue on how to do that...
a = input("random string: ")
word = input("Wanted word: ")
compteurDeBob = 0
for i in range (len(a)-1):
#... i don't know...
print(compteurDeBob)
a = input("random string: ")
word = input("Wanted word: ")
count = 0
for i in range(len(a)-len(word)):
if a[i:i+len(word)] == word:
count += 1
print(count)
If you want your search to be case-insensitive, then you can use lower() function:
a = input("random string: ").lower()
word = input("Wanted word: ").lower()
count = 0
for i in range(len(a)):
if a[i:i+len(word)] == word:
count += 1
print(count)
For the user input
Hi Bob. This is bob
the first approach will output 1 and the second approach will output 2
To count all overlapping occurrences (like in your example) you could just slice the string in a loop:
a = input("random string: ")
word = input("Wanted word: ")
cnt = 0
for i in range(len(a)-len(word)+1):
if a[i:i+len(word)] == word:
cnt += 1
print(cnt)
You can use string slicing. One way to adapt your code:
a = 'dfjgnsdfgnbobobeob bob'
counter = 0
value = 'bob'
chars = len(value)
for i in range(len(a) - chars + 1):
if a[i: i + chars] == value:
counter += 1
A more succinct way of writing this is possible via sum and a generator expression:
counter = sum(a[i: i + chars] == value for i in range(len(a) - chars + 1))
This works because bool is a subclass of int in Python, i.e. True / False values are considered 1 and 0 respectively.
Note str.count won't work here, as it only counts non-overlapping matches. You could utilise str.find if built-ins are allowed.
The fastest way to calculate overlapping matches is the Knuth-Morris-Pratt algorithm [wiki] which runs in O(m+n) with m the string to match, and n the size of the string.
The algorithm first builds a lookup table that acts more or less as the description of a finite state machine (FSM). First we construct such table with:
def build_kmp_table(word):
t = [-1] * (len(word)+1)
cnd = 0
for pos in range(1, len(word)):
if word[pos] == word[cnd]:
t[pos] = t[cnd]
else:
t[pos] = cnd
cnd = t[cnd]
while cnd >= 0 and word[pos] != word[cnd]:
cnd = t[cnd]
cnd += 1
t[len(word)] = cnd
return t
Then we can count with:
def count_kmp(string, word):
n = 0
wn = len(word)
t = build_kmp_table(word)
k = 0
j = 0
while j < len(string):
if string[j] == word[k]:
k += 1
j += 1
if k >= len(word):
n += 1
k = t[k]
else:
k = t[k]
if k < 0:
k += 1
j += 1
return n
The above counts overlapping instances in linear time in the string to be searched, which was an improvements of the "slicing" approach that was earlier used, that works in O(m×n).

count an occurrence of a string in a bigger string

I am looking to understand what I can do to make my code to work. Learning this concept will probably unlock a lot in my programming understanding. I am trying to count the number of times the string 'bob' occurs in a larger string. Here is my method:
s='azcbobobegghakl'
for i in range(len(s)):
if (gt[0]+gt[1]+gt[2]) == 'bob':
count += 1
gt.replace(gt[0],'')
else:
gt.replace(gt[0],'')
print count
How do I refer to my string instead of having to work with integers because of using for i in range(len(s))?
Try this:
def number_of_occurrences(needle, haystack, overlap=False):
hlen, nlen = len(haystack), len(needle)
if nlen > hlen:
return 0 # definitely no occurrences
N, i = 0, 0
while i < hlen:
consecutive_matching_chars = 0
for j, ch in enumerate(needle):
if (i + j < hlen) and (haystack[i + j] == ch):
consecutive_matching_chars += 1
else:
break
if consecutive_matching_chars == nlen:
N += 1
# if you don't need overlap, skip 'nlen' characters of 'haystack'
i += (not overlap) * nlen # booleans can be treated as numbers
i += 1
return N
Example usage:
haystack = 'bobobobobobobobobob'
needle = 'bob'
r = number_of_occurrences(needle, haystack)
R = haystack.count(needle)
print(r == R)
thanks. your support help to birth the answer in. here what I have :
numBobs = 0
for i in range(1, len(s)-1):
if s[i-1:i+2] == 'bob':
numBobs += 1
print 'Number of times bob occurs is:', numBobs

Categories

Resources