I need k words to be generated, until the sum of all the characters that make up the list is equal to or greater than 25
import random
for x in range(k):
n=("a","b","c","d")
cc=[''.join(random.choice(n) for _ in range(random.choice(range(2,5))))]
print(cc)
def sumt(input1):
l = list(input1)
total = sum(len(i) for i in l)
return int(total)
print(sumt([cc]))
You can have a for loop if you have a variable amount of iteration to do
Have a method that generate a word, then call until you reach the good total length
chars = "abcd"
def new_word():
return ''.join(random.choice(chars) for _ in range(random.choice(range(2, 5))))
def generate(total_length):
result = []
result_length = 0
while result_length < total_length:
result.append(new_word())
result_length += len(result[-1]) # sum with len of last word
return result
x = generate(25)
print(x)
If I understand, you want to build a list of words until the sum of all characters is >= 25? I prefer using classes...
import random
class WordList:
def __init__(self):
self.choices = ['a','b','c','d']
self.threshold = 25
self.char_sum = 0
self.lst = []
self.build_list()
def build_list(self):
'''Build a list of words until sum of all chars
meets or exceeds the threshold.
'''
while self.char_sum < self.threshold:
self.generate_word()
self.char_sum = sum(len(i) for i in self.lst)
def generate_word(self):
'''Generate a single word with 2 to 5 characters.
'''
_word = ''.join(random.choice(self.choices) for _ in range(random.choice(range(2,5))))
self.lst.append(_word)
Usage:
new_list = WordList().lst
print(new_list)
Related
I need to insert a string (character by character) into another string at every 3rd position
For example:- string_1:-wwwaabkccgkll
String_2:- toadhp
Now I need to insert string2 char by char into string1 at every third position
So the output must be wwtaaobkaccdgkhllp
Need in Python.. even Java is ok
So i tried this
Test_str="hiimdumbiknow"
challenge="toadh"
new_st=challenge [k]
Last=list(test_str)
K=0
For i in range(Len(test_str)):
if(i%3==0):
last.insert(i,new_st)
K+=1
and the output i get
thitimtdutmbtiknow
You can split test_str into sub-strings to length 2, and then iterate merging them with challenge:
def concat3(test_str, challenge):
chunks = [test_str[i:i+2] for i in range(0,len(test_str),2)]
result = []
i = j = 0
while i<len(chunks) or j<len(challenge):
if i<len(chunks):
result.append(chunks[i])
i += 1
if j<len(challenge):
result.append(challenge[j])
j += 1
return ''.join(result)
test_str = "hiimdumbiknow"
challenge = "toadh"
print(concat3(test_str, challenge))
# hitimoduambdikhnow
This method works even if the lengths of test_str and challenge are mismatching. (The remaining characters in the longest string will be appended at the end.)
You can split Test_str in to groups of two letters and then re-join with each letter from challenge in between as follows;
import itertools
print(''.join(f'{two}{letter}' for two, letter in itertools.zip_longest([Test_str[i:i+2] for i in range(0,len(Test_str),2)], challenge, fillvalue='')))
Output:
hitimoduambdikhnow
*edited to split in to groups of two rather than three as originally posted
you can try this, make an iter above the second string and iterate over the first one and select which character should be part of the final string according the position
def add3(s1, s2):
def n():
try:
k = iter(s2)
for i,j in enumerate(s1):
yield (j if (i==0 or (i+1)%3) else next(k))
except:
try:
yield s1[i+1:]
except:
pass
return ''.join(n())
def insertstring(test_str,challenge):
result = ''
x = [x for x in test_str]
y = [y for y in challenge]
j = 0
for i in range(len(x)):
if i % 2 != 0 or i == 0:
result += x[i]
else:
if j < 5:
result += y[j]
result += x[i]
j += 1
get_last_element = x[-1]
return result + get_last_element
print(insertstring(test_str,challenge))
#output: hitimoduambdikhnow
I have a list in Python like this:
`list = ['thatCreation', 'happeningso', '’', 'comebecause',]
Question :
I want specific words:
For e.g. -> 'thatCreation' -> 'that', 'creation'
'happeningso' -> 'happening', 'so'
'comebeacause' -> 'come', 'because' `
Thanks in advance for solving it in python.
It looks like you are trying to take words merged together in camel case and break it apart. There is a great algorithm called Viterbi that does this really well.
I can't explain the magic behind it, but I implemented it in my program recently and it works really well. My understanding is it calculates the probability of each word and splits on that. This algorithm can split words in any case.
def word_prob(word): return dictionary[word] / total
def words(text): return re.findall('[a-z]+', text.lower())
dictionary = Counter(words(open(words_path).read()))
max_word_length = max(map(len, dictionary))
total = float(sum(dictionary.values()))
def viterbi_segment(text):
probs, lasts = [1.0], [0]
for i in range(1, len(text) + 1):
prob_k, k = max((probs[j] * word_prob(text[j:i]), j)
for j in range(max(0, i - max_word_length), i))
probs.append(prob_k)
lasts.append(k)
words = []
i = len(text)
while 0 < i:
words.append(text[lasts[i]:i])
i = lasts[i]
words.reverse()
return words, probs[-1]
sentence = ' '.join(viterbi_segment('thatCreation'.lower())[0])
print('sentence: {0}'.format(sentence))
word = ''.join(a.capitalize() for a in split('([^a-zA-Z0-9])', sentence)
if a.isalnum())
print('word: {0}'.format(word[0].lower() + word[1:]))
You need a dictionary of a ton of words, there are multiple out there, but I used:
https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english-no-swears.txt
and updated it with new words that it didn't have.
Borrowed from Peter Norvig's pytudes to perform word segmentation.
Please try..
import re
import math
import random
import matplotlib.pyplot as plt
from collections import Counter
from itertools import permutations
from typing import List, Tuple, Set, Dict, Callable
!wget https://raw.githubusercontent.com/dwyl/english-words/master/words.txt
Word = str # We implement words as strings
cat = ''.join # Function to concatenate strings together
def tokens(text) -> List[Word]:
"""List all the word tokens (consecutive letters) in a text. Normalize to lowercase."""
return re.findall('[a-z]+', text.lower())
TEXT = open('big.txt').read()
WORDS = tokens(TEXT)
class ProbabilityFunction:
def __call__(self, outcome):
"""The probability of `outcome`."""
if not hasattr(self, 'total'):
self.total = sum(self.values())
return self[outcome] / self.total
class Bag(Counter, ProbabilityFunction): """A bag of words."""
Pword = Bag(WORDS)
def Pwords(words: List[Word]) -> float:
"Probability of a sequence of words, assuming each word is independent of others."
return Π(Pword(w) for w in words)
def Π(nums) -> float:
"Multiply the numbers together. (Like `sum`, but with multiplication.)"
result = 1
for num in nums:
result *= num
return result
def splits(text, start=0, end=20) -> Tuple[str, str]:
"""Return a list of all (first, rest) pairs; start <= len(first) <= L."""
return [(text[:i], text[i:])
for i in range(start, min(len(text), end)+1)]
def segment(text) -> List[Word]:
"""Return a list of words that is the most probable segmentation of text."""
if not text:
return []
else:
candidates = ([first] + segment(rest)
for (first, rest) in splits(text, 1))
return max(candidates, key=Pwords)
strings = ['thatCreation', 'happeningso', 'comebecause']
[segment(string.lower()) for string in strings]
--2020-08-04 18:48:06-- https://raw.githubusercontent.com/dwyl/english-words/master/words.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4863005 (4.6M) [text/plain]
Saving to: ‘words.txt.2’
words.txt.2 100%[===================>] 4.64M 162KB/s in 25s
2020-08-04 18:48:31 (192 KB/s) - ‘words.txt.2’ saved [4863005/4863005]
[['that', 'creation'], ['happening', 'so'], ['come', 'because']]
import re
from collections import Counter
def viterbi_segment(text):
probs, lasts = [1.0], [0]
for i in range(1, len(text) + 1):
prob_k, k = max((probs[j] * word_prob(text[j:i]), j)
for j in range(max(0, i - max_word_length), i))
probs.append(prob_k)
lasts.append(k)
words = []
i = len(text)
while 0 < i:
words.append(text[lasts[i]:i])
i = lasts[i]
words.reverse()
return words, probs[-1]
def word_prob(word): return dictionary[word] / total
def words(text): return re.findall('[a-z]+', text.lower())
dictionary = Counter(words(open('big.txt').read()))
max_word_length = max(map(len, dictionary))
total = float(sum(dictionary.values()))
l = ['thatCreation', 'happeningso', 'comebecause',]
for w in l:
print(viterbi_segment(w.lower()))
O/p will be -
(['that', 'creation'], 1.63869514118246e-07)
(['happening', 'so'], 1.1607123777400279e-07)
(['come', 'because'], 4.81658105705814e-07)
I got a solution to my problem from #Darius Bacon and for this, you need to make all strings a lowercase string.
Thank You Guys for your help.
Visit this link for download big.txt :
https://norvig.com/big.txt
I'm currently trying to make a sort of "word mixer": for two given words and the desired length specified, the program should return the "mix" of the two words. However, it can be any sort of mix: it can be the first half of the first word combined with the second half of the second word, it can be a random mix, anything really.
Examples:
fish + cake, length 5: fiske
dog + cat, length 4: doga
late + cross, length 6: losste
I've written a very sloppy code (as seen below), and I'd appreciate some tips on what am I doing wrong (since my outputs aren't really good) and if there's anything that can be improved.
from random import randint
name1 = "domingues"
name2 = "signorelli"
names = [name1,name2]
# a list of the desired lengths
lengths = [5,6,7]
mixes = []
def sizes(size):
if size == 5:
letters1 = randint(2,3)
else:
letters1 = randint(2,size-2)
letters2 = size-letters1
return letters1, letters2
def mix(letters1, letters2):
n = randint(0,1)
if n == 1:
a = 0
else:
a = 1
n1 = names[n]
n2 = names[a]
result = n1[0:letters2]+n2[-letters1::]
return result
file = open("results.txt","w+")
for leng in lengths:
file.write("RESULTS WITH "+str(leng)+" LETTERS \n")
file.write("\n")
for i in range(10):
let1, let2 = sizes(leng)
result = mix(let1,let2)
while result == name1 or result == name2:
result = mix(let2)
if result not in mixes:
mixes.append(result)
for m in mixes:
if m not in file:
file.write(m+" \n")
file.write("\n")
file.close()
(Thanks for taking your time to help me btw, I appreciate it!)
In general, this is AI-related problem, because we are implicitly want to get readable mixed words.
I just wrote simple (and dirty) code that tries to catch sequences of vowels and consonants from training data and builds mixed words according to catched rules.
import random
consonants_pat = 'BCDFGHJKLMNPQRSTVXZ'.lower()
vowels_pat = 'aeiouy'
train_data = '''
This our sentence to be used as a training dataset
It should be longer
'''
def build_mixer(train_data, num=3, mixed_len=(2, 4)):
def _get_random_pattern(td, wlen):
td_splitted = td.lower().split()
while True:
w = random.choice(list(filter(lambda x: len(x)>=wlen, td_splitted)))
for j in range(len(w)-wlen):
yield tuple(map(lambda x: 0 if x in vowels_pat else 1, w[j:j + wlen]))
def _select_vowels(w):
return
def _mixer(w1, w2, num=num, mixed_len=mixed_len):
allowed_letters = w1.lower().strip() + w2.lower().strip()
ind = 1
for j in range(num):
wlen = random.choice(range(*mixed_len))
pattern = _get_random_pattern(train_data, wlen)
_aux = allowed_letters
word = ''
try:
for pat in pattern:
for k in pat:
if k == 0:
choiced = random.choice(list(filter(lambda x: x in vowels_pat, _aux)))
word += choiced
else:
choiced = random.choice(list(filter(lambda x: x in consonants_pat, _aux)))
word += choiced
l = list(_aux)
l.remove(choiced)
_aux = ''.join(l)
ind += 1
yield word
if ind>num:
raise StopIteration
except IndexError:
continue
return _mixer
mixer = build_mixer(train_data, num=6, mixed_len=(3,6))
for mixed in mixer('this', 'horse'):
print(mixed)
I got the following words:
het hetihs hetihssro sheo hsio tohir
I recommend taking a random slice of the word string and combining it with another random slice from the second word. Get the len(word) and take a slice of the word randomly using random.randrange().
import random
def word_mixer(word1, word2):
slice1 = word1[:random.randrange(2, len(word1))]
slice2 = word2[:random.randrange(2, len(word2))]
return slice1 + slice2
mixed = word_mixer('weasel', 'snake')
print(mixed)
Output:
wesnak
weasesna
weassnak
Here's one way to do it.
import random
w1 = 'dog'
w2 = 'cat'
w3 = 'fish'
w4 = 'wolf'
def word_mixer(w1, w2, length):
new_word = w1 + w2
x = random.sample(range(len(new_word)), length)
result = []
for i in x:
result.append(new_word[i])
return "".join(result)
print(word_mixer(w3,w4,4))
print(word_mixer(w2,w4,5))
Output:
lfwi
falwc
A bit more smaller version of #AkshayNevrekar's post:
import random
w1 = 'dog'
w2 = 'cat'
w3 = 'fish'
w4 = 'wolf'
def word_mixer(w1, w2, length):
return ''.join(random.sample(w1 + w2, length))
print(word_mixer(w3, w4, 4))
print(word_mixer(w2, w4, 5))
We can also use random.sample and pass mixed string to it like this:
import random
w1=input("Enter first word")
w2=input("Enter second word")
len=int(input("Enter length"))
mixed=w1+w2
def wordmixer(mixed,len):
return ''.join(random.sample(mixed,len))
print(wordmixer(mixed,len))
My task is:
To write a function that gets a string as an argument and returns the letter(s) with the maximum appearance in it.
Example 1:
s = 'Astana'
Output:
a
Example 2:
s = 'Kaskelen'
Output:
ke
So far, I've got this code(click to run):
a = input()
def most_used(w):
a = list(w)
indexes = []
g_count_max = a.count(a[0])
for letter in a:
count = 0
i = int()
for index in range(len(a)):
if letter == a[index] or letter == a[index].upper():
count += 1
i = index
if g_count_max <= count: //here is the problem.
g_count_max = count
if i not in indexes:
indexes.append(i)
letters = str()
for i in indexes:
letters = letters + a[i].lower()
return letters
print(most_used(a))
The problem is that it automatically adds first letter to the array because the sum of appearance of the first element is actually equal to the starter point of appearance(which is basically the first element).
Example 1:
s = 'hheee'
Output:
he
Example 2:
s = 'malaysia'
Output:
ma
I think what you're trying to can be much simplified by using the standard library's Counter object
from collections import Counter
def most_used(word):
# this has the form [(letter, count), ...] ordered from most to least common
most_common = Counter(word.lower()).most_common()
result = []
for letter, count in most_common:
if count == most_common[0][1]:
result.append(letter) # if equal largest -- add to result
else:
break # otherwise don't bother looping over the whole thing
return result # or ''.join(result) to return a string
You can use a dictionary comprehension with a list comprehension and max():
s = 'Kaskelen'
s_lower = s.lower() #convert string to lowercase
counts = {i: s_lower.count(i) for i in s_lower}
max_counts = max(counts.values()) #maximum count
most_common = ''.join(k for k,v in counts.items() if v == max_counts)
Yields:
'ke'
try this code using list comprehensions:
word = input('word=').lower()
letters = set(list(word))
max_w = max([word.count(item) for item in letters])
out = ''.join([item for item in letters if word.count(item)==max_w])
print(out)
Also you can import Counter lib:
from collections import Counter
a = "dagsdvwdsbd"
print(Counter(a).most_common(3)[0][0])
Then it returns:
d
I'm currently learning to create generators and to use itertools. So I decided to make a string index generator, but I'd like to add some parameters such as a "start index" allowing to define where to start generating the indexes.
I came up with this ugly solution which can be very long and not efficient with large indexes:
import itertools
import string
class StringIndex(object):
'''
Generator that create string indexes in form:
A, B, C ... Z, AA, AB, AC ... ZZ, AAA, AAB, etc.
Arguments:
- startIndex = string; default = ''; start increment for the generator.
- mode = 'lower' or 'upper'; default = 'upper'; is the output index in
lower or upper case.
'''
def __init__(self, startIndex = '', mode = 'upper'):
if mode == 'lower':
self.letters = string.ascii_lowercase
elif mode == 'upper':
self.letters = string.ascii_uppercase
else:
cmds.error ('Wrong output mode, expected "lower" or "upper", ' +
'got {}'.format(mode))
if startIndex != '':
if not all(i in self.letters for i in startIndex):
cmds.error ('Illegal characters in start index; allowed ' +
'characters are: {}'.format(self.letters))
self.startIndex = startIndex
def getIndex(self):
'''
Returns:
- string; current string index
'''
startIndexOk = False
x = 1
while True:
strIdMaker = itertools.product(self.letters, repeat = x)
for stringList in strIdMaker:
index = ''.join([s for s in stringList])
# Here is the part to simpify
if self.startIndex:
if index == self.startIndex:
startIndexOk = True
if not startIndexOk:
continue
###
yield index
x += 1
Any advice or improvement is welcome. Thank you!
EDIT:
The start index must be a string!
You would have to do the arithmetic (in base 26) yourself to avoid looping over itertools.product. But you can at least set x=len(self.startIndex) or 1!
Old (incorrect) answer
If you would do it without itertools (assuming you start with a single letter), you could do the following:
letters = 'abcdefghijklmnopqrstuvwxyz'
def getIndex(start, case):
lets = list(letters.lower()) if case == 'lower' else list(letters.upper())
# default is 'upper', but can also be an elif
for r in xrange(0,10):
for l in lets[start:]:
if l.lower() == 'z':
start = 0
yield ''.join(lets[:r])+l
I run until max 10 rows of letters are created, but you could ofcourse use an infinite while loop such that it can be called forever.
Correct answer
I found the solution in a different way: I used a base 26 number translator (based on (and fixxed since it didn't work perfectly): http://quora.com/How-do-I-write-a-program-in-Python-that-can-convert-an-integer-from-one-base-to-another)
I uses itertools.count() to count and just loops over all the possibilities.
The code:
import time
from itertools import count
def toAlph(x, letters):
div = 26
r = '' if x > 0 else letters[0]
while x > 0:
r = letters[x % div] + r
if (x // div == 1) and (x % div == 0):
r = letters[0] + r
break
else:
x //= div
return r
def getIndex(start, case='upper'):
alphabet = 'abcdefghijklmnopqrstuvwxyz'
letters = alphabet.upper() if case == 'upper' else alphabet
started = False
for num in count(0,1):
l = toAlph(num, letters)
if l == start:
started = True
if started:
yield l
iterator = getIndex('AA')
for i in iterator:
print(i)
time.sleep(0.1)