How do I use .setdefault with the default being a list that i filled in a function? For example,
import random
foundwords = []
with open("text", "r") as file:
contents = file.read().replace('\n',' ')
words = contents.split(' ')
def findwords(word):
for i in range(len(words) - 1):
if words[i] == word:
if not words[i + 1] == '':
foundwords.append(words[i + 1])
wordsDict = {}
for i in range(len(words) - 1):
findwords(words[i])
wordsDict.setdefault(words[i], foundwords)
del foundwords[:]
def assemble():
start = words[random.randint(0, len(words))]
print(start.capitalize())
assemble()
When I check wordsDict, all lists are empty. However, I know that the lists are filled.
You're not making a copy of the foundwords list when you use .setdefault(), so all the dictionary elements refer to the same list. Then you remove all the elements of that list with del foundwords[:], so they all refer to that empty list.
Make a copy of the list when you add it to the dictionary.
for word in words:
findwords(word)
wordsDict.setdefault(word, foundwords[:])
del foundwords[:]
Better would be to change findwords() so it returns a new list instead of writing into a global variable.
def findwords(word):
foundwords = []
for i in range(len(words) - 1):
if words[i] == word:
if not words[i + 1] == '':
foundwords.append(words[i + 1])
return foundwords
for words in words:
wordsDict.setdefault(word, findwords(word))
Related
I have a list of words:
list1 = ['technology','technician','technical','technicality']
I want to check which phrase is repeated in each of the word. In this case, it is 'tech'.
I have tried converting all the characters to ascii values, but I am stuck there as I am unable to think of any logic.
Can somebody please help me with this?
This is generally called the Longest common substring/subsequence problem.
A very basic (but slow) strategy:
longest_substring = ""
curr_substring = ""
# Loop over a particular word (ideally, shortest).
for start_idx in range(shortest_word):
# Select a substring from that word.
for length in range(1, len(shortest_word) - start_idx):
curr_substring = shortest_word[start_idx : start_idx + length]
# Check if substring is present in all words,
# and exit loop or update depending on outcome.
if "curr_substring not in all words":
break
if "new string is longer":
longest_substring = curr_substring
Iterate over first word, increase length of prefix if there is only one prefix in all words checked by set, when difference in prefix is found return last result
list1 = ['technology', 'technician', 'technical', 'technicality']
def common_prefix(li):
s = set()
word = li[0]
while(len(s) < 2):
old_s = s
for i in range(1, len(word)):
s.add(word[:i])
return old_s.pop()
print(common_prefix(list1))
output: techn
Find the shortest word. Iterate over increasingly small chunks of the first word, starting with a chunk equal in length to the shortest word, checking that each is contained in all of the other strings. If it is, return that substring.
list1 = ['technology', 'technician', 'technical', 'technicality']
def shortest_common_substring(lst):
shortest_len = min(map(len, lst))
shortest_word = next((w for w in lst if len(w) == shortest_len), None)
for i in range(shortest_len, 1, -1):
for j in range(0, shortest_len - i):
substr = lst[0][j:i]
if all(substr in w for w in lst[1:]):
return substr
And just for fun, let's replace that loop with a generator expression, and just take the first thing it gives us (or None).
def shortest_common_substring(lst):
shortest_len = min(map(len, lst))
shortest_word = next((w for w in lst if len(w) == shortest_len), 0)
return next((lst[0][j:i] for i in range(shortest_len, 1, -1)
for j in range(0, shortest_len - i)
if all(lst[0][j:i] in w for w in lst[1:])),
None)
This question already has answers here:
Split string every nth character?
(19 answers)
Splitting a string into 2-letter segments [duplicate]
(6 answers)
Closed 2 years ago.
I want to divide text into pairs.
Input: text = "abcde"
Goal Output: result = ["ab", "cd", "e_"]
Current Output: result = ['ab', 'abcd']
My current code looks like this. But I do not know how I do that now. Anyone has a tip for me?
def split_pairs(text):
result = []
if text is None or not text:
return []
pair = ""
for i in range(len(text)):
if i % 2 == 0:
pair += text[i]
pair += text[i+1]
else:
result.append(pair)
return result
You could use a list comprehension to zip together the even values with the corresponding odd values. And using itertools.zip_longest you can use the fillvalue argument to provide a "fill in" if there is a length mismatch.
>>> from itertools import zip_longest
>>> s = 'abcde'
>>> pairs = [i+j for i,j in zip_longest(s[::2], s[1::2], fillvalue='_')]
>>> pairs
['ab', 'cd', 'e_']
You should reset your "pair" variable once appended to "result"
def split_pairs(text):
result = []
if text is None or not text:
return []
pair = ""
for i in range(len(text)):
if i % 2 == 0:
pair += text[i]
pair += text[i+1]
else:
result.append(pair)
pair = ""
return result
You could also use a list comprehension over a range with 3rd step parameter and add ljust to add _. This will also work nicely for more than just pairs:
>>> s = "abcde"
>>> k = 2
>>> [s[i:i+k].ljust(k, "_") for i in range(0, len(s), k)]
['ab', 'cd', 'e_']
I am not if your code needed to be in the format you originally wrote it in, but I wrote the below code that gets the job done.
def split_pairs(text):
if len(text) % 2 == 0:
result = [text[i:i+2] for i in range(0, len(text), 2)]
else:
result = [text[i:i+2] for i in range(0, len(text), 2)]
result[-1]+="_"
return result
The issue here is that the "pair" variable is never reinitialized to "".
Make sure you make it an empty string in your else block.
def split_pairs(text):
result = []
if text is None or not text:
return []
pair = ""
for i in range(len(text)):
if i % 2 == 0:
pair += text[i]
pair += text[i+1]
else:
result.append(pair)
pair = "" # Make sure you reset it
return result
If you want to have a "_" at the end (in case of an odd number of character), you could do like the following:
def split_pairs(text):
result = []
if text is None or not text:
return []
pair = "__" # Setting pair to "__" by default
for i in range(len(text)):
if i % 2 == 0:
pair[0] = text[i]
if i < len(text): # Avoiding overflow
pair[1] = text[i+1]
else:
result.append(pair)
pair = "__" # Make sure you reset it
if pair != "__": # Last bit
result.append(pair)
return result
My task is:
To write a function that gets a string as an argument and returns the letter(s) with the maximum appearance in it.
Example 1:
s = 'Astana'
Output:
a
Example 2:
s = 'Kaskelen'
Output:
ke
So far, I've got this code(click to run):
a = input()
def most_used(w):
a = list(w)
indexes = []
g_count_max = a.count(a[0])
for letter in a:
count = 0
i = int()
for index in range(len(a)):
if letter == a[index] or letter == a[index].upper():
count += 1
i = index
if g_count_max <= count: //here is the problem.
g_count_max = count
if i not in indexes:
indexes.append(i)
letters = str()
for i in indexes:
letters = letters + a[i].lower()
return letters
print(most_used(a))
The problem is that it automatically adds first letter to the array because the sum of appearance of the first element is actually equal to the starter point of appearance(which is basically the first element).
Example 1:
s = 'hheee'
Output:
he
Example 2:
s = 'malaysia'
Output:
ma
I think what you're trying to can be much simplified by using the standard library's Counter object
from collections import Counter
def most_used(word):
# this has the form [(letter, count), ...] ordered from most to least common
most_common = Counter(word.lower()).most_common()
result = []
for letter, count in most_common:
if count == most_common[0][1]:
result.append(letter) # if equal largest -- add to result
else:
break # otherwise don't bother looping over the whole thing
return result # or ''.join(result) to return a string
You can use a dictionary comprehension with a list comprehension and max():
s = 'Kaskelen'
s_lower = s.lower() #convert string to lowercase
counts = {i: s_lower.count(i) for i in s_lower}
max_counts = max(counts.values()) #maximum count
most_common = ''.join(k for k,v in counts.items() if v == max_counts)
Yields:
'ke'
try this code using list comprehensions:
word = input('word=').lower()
letters = set(list(word))
max_w = max([word.count(item) for item in letters])
out = ''.join([item for item in letters if word.count(item)==max_w])
print(out)
Also you can import Counter lib:
from collections import Counter
a = "dagsdvwdsbd"
print(Counter(a).most_common(3)[0][0])
Then it returns:
d
Im doing an exercise, where im defining a function that takes two inputs - a sentence and a word, that will be replaced by stars in the sentence input.
Problem is, I cant get the final output to put spaces in between words, ie it prints all the words crammed together. Any help please ?
def censor(text, word):
lis = text.split()
output =""
p = []
for w in lis:
if w != word:
p.append(w)
else:
l = len(w)
y = "*" * l
p.append(y)
output = output.join(p)
print output
censor("Hello world televison", "world")
You don't need to initialize output to an empty string first. You can just do
output = " ".join(p)
Notice the " ".join(), that is what determines how you are joining your strings. In this case, it's a single space. Also, you need to return something from your function, so instead of using print you should do
return output
Here's another solution, even though it's a little tricky, it should handle all the different cases that can occur:
def censor(text, word):
text = '.' + text + '.'
for i in range(len(text)):
if text[i].lower() == word[0].lower():
toCensor = True
for j in range(len(word)):
if text[i + j].lower() != word[j].lower():
toCensor = False
break
if toCensor:
if (ord(text[i - 1]) < ord('A') or ord(text[i - 1]) > ord('z'))\
and (ord(text[i + len(word)]) < ord('A') or ord(text[i + len(word)]) > ord('z')):
lst = list(text)
for j in range(len(word)):
lst[i + j] = '*'
text = "".join(lst)
lst = list(text)
lst = lst[1 : -1]
return "".join(lst)
censor("World worlds world television", "world")
>>> ***** worlds ***** television
It handles capital letters and all the punctuation.
How do I split the 2nd tuple
data = [('152', 'Farko', 'Kier'), ('153', 'Park - Pub')]
to get this output:
[('152', 'Farko', 'Kier'), ('153', 'Park', 'Pub')]
I tried this way:
lst = []
for i in data:
if len(i) == 2:
i[1] = tuple(i[1].split(' - '))
lst.append(i)
It'd work, except it raised an exception TypeError: 'tuple' object does not support item assignment. But I can't assign i = tuple(i[1].split(' - ')) because I need to keep the number which is in position i[0] in tuple. List comprehansion solution would be greatly welcome. Suggestions?
You could use a list comprehension:
lst = [t[:1] + tuple(t[1].split(' - ')) if len(t) == 2 else t for t in data]
or you could adjust your loop to create a new tuple:
for i in data:
if len(i) == 2:
i = i[:1] + tuple(i[1].split(' - '))
lst.append(i)
newdata = [tuple(s for t in tu for s in t.split(' - ')) for tu in data]
# [('152', 'Farko', 'Kier'), ('153', 'Park', 'Pub')]
As you have seen you can't modify the tuple, however you can just create a new one with the data you want:
data = [('152', 'Farko', 'Kier'), ('153', 'Park - Pub')]
lst = []
for i in data:
if len(i) == 2:
i = i[:1] + tuple(i[1].split(' - '))
lst.append(i)
Or if you want to modify the original list:
for i, tup in enumerate(data):
if len(tup) == 2:
data[i] = tup[:1] + tuple(tup[1].split(' - '))