How to use given function to deliver many strings - python

I have my function randStr which spits out a random string with N character.
def randStr(chars = string.ascii_uppercase + string.digits, N=4):
return ''.join(random.choice(chars) for _ in range(N))
How do I make it so that it returns 1000 of these random and unique small strings?

(Edit: added uniqueness check)
You could use a while-loop to iterate until you get 1000 unique words. Note that input string has to be at least 8 char long. To check using Permutations Calculator, 7P4=840, and 8P4=1680, and 9P4 onwards is greater than that.
import string
import random
def randStr(chars = string.ascii_uppercase + string.digits, N=4):
return ''.join(random.choice(chars) for _ in range(N))
unique_words = []
while len(unique_words) < 1000:
result = randStr('abcdefgh')
if result not in unique_words:
unique_words.append(result)
print(result, end=', ')
print(len(unique_words))
Output:
cfae, gbca, fgfe, bdhg, decd, gcha, ddgc, babd, bggb, eghe, eeca, ebch, fbec, bgbe, gbbc, dgda, efec, hccd, bgfh, gdbf, ecac, edhd, cfdg, eacf, dgaa, heeb,
...
egbb, cbed, eefg, gdec, dgcg, cgag, fadc, effe, dahg, fhdb, 1000

Same answer as that of #black-raven but with more efficient uniqueness check
Note the use of set structure for guaranteed unique strings.
import string
import random
def randStr(chars = string.ascii_uppercase + string.digits, N=4):
return ''.join(random.choice(chars) for _ in range(N))
strings = set()
while len(strings) != 1000:
newString = randStr('asdf')
strings.add(newString)
strings = list(strings)

Related

Difficulty creating random words under conditions

I need k words to be generated, until the sum of all the characters that make up the list is equal to or greater than 25
import random
for x in range(k):
n=("a","b","c","d")
cc=[''.join(random.choice(n) for _ in range(random.choice(range(2,5))))]
print(cc)
def sumt(input1):
l = list(input1)
total = sum(len(i) for i in l)
return int(total)
print(sumt([cc]))
You can have a for loop if you have a variable amount of iteration to do
Have a method that generate a word, then call until you reach the good total length
chars = "abcd"
def new_word():
return ''.join(random.choice(chars) for _ in range(random.choice(range(2, 5))))
def generate(total_length):
result = []
result_length = 0
while result_length < total_length:
result.append(new_word())
result_length += len(result[-1]) # sum with len of last word
return result
x = generate(25)
print(x)
If I understand, you want to build a list of words until the sum of all characters is >= 25? I prefer using classes...
import random
class WordList:
def __init__(self):
self.choices = ['a','b','c','d']
self.threshold = 25
self.char_sum = 0
self.lst = []
self.build_list()
def build_list(self):
'''Build a list of words until sum of all chars
meets or exceeds the threshold.
'''
while self.char_sum < self.threshold:
self.generate_word()
self.char_sum = sum(len(i) for i in self.lst)
def generate_word(self):
'''Generate a single word with 2 to 5 characters.
'''
_word = ''.join(random.choice(self.choices) for _ in range(random.choice(range(2,5))))
self.lst.append(_word)
Usage:
new_list = WordList().lst
print(new_list)

How to validate a word in python?

I have a list in Python like this:
`list = ['thatCreation', 'happeningso', '’', 'comebecause',]
Question :
I want specific words:
For e.g. -> 'thatCreation' -> 'that', 'creation'
'happeningso' -> 'happening', 'so'
'comebeacause' -> 'come', 'because' `
Thanks in advance for solving it in python.
It looks like you are trying to take words merged together in camel case and break it apart. There is a great algorithm called Viterbi that does this really well.
I can't explain the magic behind it, but I implemented it in my program recently and it works really well. My understanding is it calculates the probability of each word and splits on that. This algorithm can split words in any case.
def word_prob(word): return dictionary[word] / total
def words(text): return re.findall('[a-z]+', text.lower())
dictionary = Counter(words(open(words_path).read()))
max_word_length = max(map(len, dictionary))
total = float(sum(dictionary.values()))
def viterbi_segment(text):
probs, lasts = [1.0], [0]
for i in range(1, len(text) + 1):
prob_k, k = max((probs[j] * word_prob(text[j:i]), j)
for j in range(max(0, i - max_word_length), i))
probs.append(prob_k)
lasts.append(k)
words = []
i = len(text)
while 0 < i:
words.append(text[lasts[i]:i])
i = lasts[i]
words.reverse()
return words, probs[-1]
sentence = ' '.join(viterbi_segment('thatCreation'.lower())[0])
print('sentence: {0}'.format(sentence))
word = ''.join(a.capitalize() for a in split('([^a-zA-Z0-9])', sentence)
if a.isalnum())
print('word: {0}'.format(word[0].lower() + word[1:]))
You need a dictionary of a ton of words, there are multiple out there, but I used:
https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english-no-swears.txt
and updated it with new words that it didn't have.
Borrowed from Peter Norvig's pytudes to perform word segmentation.
Please try..
import re
import math
import random
import matplotlib.pyplot as plt
from collections import Counter
from itertools import permutations
from typing import List, Tuple, Set, Dict, Callable
!wget https://raw.githubusercontent.com/dwyl/english-words/master/words.txt
Word = str # We implement words as strings
cat = ''.join # Function to concatenate strings together
def tokens(text) -> List[Word]:
"""List all the word tokens (consecutive letters) in a text. Normalize to lowercase."""
return re.findall('[a-z]+', text.lower())
TEXT = open('big.txt').read()
WORDS = tokens(TEXT)
class ProbabilityFunction:
def __call__(self, outcome):
"""The probability of `outcome`."""
if not hasattr(self, 'total'):
self.total = sum(self.values())
return self[outcome] / self.total
class Bag(Counter, ProbabilityFunction): """A bag of words."""
Pword = Bag(WORDS)
def Pwords(words: List[Word]) -> float:
"Probability of a sequence of words, assuming each word is independent of others."
return Π(Pword(w) for w in words)
def Π(nums) -> float:
"Multiply the numbers together. (Like `sum`, but with multiplication.)"
result = 1
for num in nums:
result *= num
return result
def splits(text, start=0, end=20) -> Tuple[str, str]:
"""Return a list of all (first, rest) pairs; start <= len(first) <= L."""
return [(text[:i], text[i:])
for i in range(start, min(len(text), end)+1)]
def segment(text) -> List[Word]:
"""Return a list of words that is the most probable segmentation of text."""
if not text:
return []
else:
candidates = ([first] + segment(rest)
for (first, rest) in splits(text, 1))
return max(candidates, key=Pwords)
strings = ['thatCreation', 'happeningso', 'comebecause']
[segment(string.lower()) for string in strings]
--2020-08-04 18:48:06-- https://raw.githubusercontent.com/dwyl/english-words/master/words.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4863005 (4.6M) [text/plain]
Saving to: ‘words.txt.2’
words.txt.2 100%[===================>] 4.64M 162KB/s in 25s
2020-08-04 18:48:31 (192 KB/s) - ‘words.txt.2’ saved [4863005/4863005]
[['that', 'creation'], ['happening', 'so'], ['come', 'because']]
import re
from collections import Counter
def viterbi_segment(text):
probs, lasts = [1.0], [0]
for i in range(1, len(text) + 1):
prob_k, k = max((probs[j] * word_prob(text[j:i]), j)
for j in range(max(0, i - max_word_length), i))
probs.append(prob_k)
lasts.append(k)
words = []
i = len(text)
while 0 < i:
words.append(text[lasts[i]:i])
i = lasts[i]
words.reverse()
return words, probs[-1]
def word_prob(word): return dictionary[word] / total
def words(text): return re.findall('[a-z]+', text.lower())
dictionary = Counter(words(open('big.txt').read()))
max_word_length = max(map(len, dictionary))
total = float(sum(dictionary.values()))
l = ['thatCreation', 'happeningso', 'comebecause',]
for w in l:
print(viterbi_segment(w.lower()))
O/p will be -
(['that', 'creation'], 1.63869514118246e-07)
(['happening', 'so'], 1.1607123777400279e-07)
(['come', 'because'], 4.81658105705814e-07)
I got a solution to my problem from #Darius Bacon and for this, you need to make all strings a lowercase string.
Thank You Guys for your help.
Visit this link for download big.txt :
https://norvig.com/big.txt

How to change uppercase & lowercase alternatively in a string?

I want to create a new string from a given string with alternate uppercase and lowercase.
I have tried iterating over the string and changing first to uppercase into a new string and then to lower case into another new string again.
def myfunc(x):
even = x.upper()
lst = list(even)
for itemno in lst:
if (itemno % 2) !=0:
even1=lst[1::2].lowercase()
itemno=itemno+1
even2=str(even1)
print(even2)
Since I cant change the given string I need a good way of creating a new string alternate caps.
Here's a onliner
"".join([x.upper() if i%2 else x.lower() for i,x in enumerate(mystring)])
You can simply randomly choose for each letter in the old string if you should lowercase or uppercase it, like this:
import random
def myfunc2(old):
new = ''
for c in old:
lower = random.randint(0, 1)
if lower:
new += c.lower()
else:
new += c.upper()
return new
Here's one that returns a new string using with alternate caps:
def myfunc(x):
seq = []
for i, v in enumerate(x):
seq.append(v.upper() if i % 2 == 0 else v.lower())
return ''.join(seq)
This does the job also
def foo(input_message):
c = 0
output_message = ""
for m in input_message:
if (c%2==0):
output_message = output_message + m.lower()
else:
output_message = output_message + m.upper()
c = c + 1
return output_message
Here's a solution using itertools which utilizes string slicing:
from itertools import chain, zip_longest
x = 'inputstring'
zipper = zip_longest(x[::2].lower(), x[1::2].upper(), fillvalue='')
res = ''.join(chain.from_iterable(zipper))
# 'iNpUtStRiNg'
Using a string slicing:
from itertools import zip_longest
s = 'example'
new_s = ''.join(x.upper() + y.lower()
for x, y in zip_longest(s[::2], s[1::2], fillvalue=''))
# ExAmPlE
Using an iterator:
s_iter = iter(s)
new_s = ''.join(x.upper() + y.lower()
for x, y in zip_longest(s_iter, s_iter, fillvalue=''))
# ExAmPlE
Using the function reduce():
def func(x, y):
if x[-1].islower():
return x + y.upper()
else:
return x + y.lower()
new_s = reduce(func, s) # eXaMpLe
This code also returns alternative caps string:-
def alternative_strings(strings):
for i,x in enumerate(strings):
if i % 2 == 0:
print(x.upper(), end="")
else:
print(x.lower(), end= "")
return ''
print(alternative_strings("Testing String"))
def myfunc(string):
# Un-hash print statements to watch python build out the string.
# Script is an elementary example of using an enumerate function.
# An enumerate function tracks an index integer and its associated value as it moves along the string.
# In this example we use arithmetic to determine odd and even index counts, then modify the associated variable.
# After modifying the upper/lower case of the character, it starts adding the string back together.
# The end of the function then returns back with the new modified string.
#print(string)
retval = ''
for space, letter in enumerate(string):
if space %2==0:
retval = retval + letter.upper()
#print(retval)
else:
retval = retval + letter.lower()
#print(retval)
print(retval)
return retval
myfunc('Thisisanamazingscript')

Python: Count character in string which are following each other

I have a string in which I want to count the occurrences of # following each other to replace them by numbers to create a increment.
For example:
rawString = 'MyString1_test##_edit####'
for x in xrange(5):
output = doConvertMyString(rawString)
print output
MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
MyString1_test05_edit0005
Assuming that the number of # is not fixed and that rawString is a user input containing only string.ascii_letters + string.digits + '_' + '#, how can I do that?
Here is my test so far:
rawString = 'MyString1_test##_edit####'
incrDatas = {}
key = '#'
counter = 1
for x in xrange(len(rawString)):
if rawString[x] != key:
counter = 1
continue
else:
if x > 0:
if rawString[x - 1] == key:
counter += 1
else:
pass
# ???
You may use zfill in the re.sub replacement to pad any amount of # chunks. #+ regex pattern matches 1 or more # symbols. The m.group() stands for the match the regex found, and thus, we replace all #s with the incremented x converted to string padded with the same amount of 0s as there are # in the match.
import re
rawString = 'MyString1_test##_edit####'
for x in xrange(5):
output = re.sub(r"#+", lambda m: str(x+1).zfill(len(m.group())), rawString)
print output
Result of the demo:
MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
MyString1_test05_edit0005
The code below converts the rawString to a format string, using groupby in a list comprehension to find groups of hashes. Each run of hashes is converted into a format directive to print a zero-padded integer of the appropriate width, runs of non-hashes are simply joined back together.
This code works on Python 2.6 and later.
from itertools import groupby
def convert(template):
return ''.join(['{{x:0{0}d}}'.format(len(list(g))) if k else ''.join(g)
for k, g in groupby(template, lambda c: c == '#')])
rawString = 'MyString1_test##_edit####'
fmt = convert(rawString)
print(repr(fmt))
for x in range(5):
print(fmt.format(x=x))
output
'MyString1_test{x:02d}_edit{x:04d}'
MyString1_test00_edit0000
MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
How about this-
rawString = 'MyString1_test##_edit####'
splitString = rawString.split('_')
for i in xrange(10): # you may put any count
print '%s_%s%02d_%s%04d' % (splitString[0], splitString[1][0:4], i, splitString[2][0:4], i, )
You can try this naive (and probably not most efficient) solution. It assumes that the number of '#' is fixed.
rawString = 'MyString1_test##_edit####'
for i in range(1, 6):
temp = rawString.replace('####', str(i).zfill(4)).replace('##', str(i).zfill(2))
print(temp)
>> MyString1_test01_edit0001
MyString1_test02_edit0002
MyString1_test03_edit0003
MyString1_test04_edit0004
MyString1_test05_edit0005
test_string = 'MyString1_test##_edit####'
def count_hash(raw_string):
str_list = list(raw_string)
hash_count = str_list.count("#") + 1
for num in xrange(1, hash_count):
new_string = raw_string.replace("####", "000" + str(num))
new_string = new_string.replace("##", "0" + str(num))
print new_string
count_hash(test_string)
It's a bit clunky, and only works for # counts of less than 10, but seems to do what you want.
EDIT: By "only works" I mean that you'll get extra characters with the fixed number of # symbols inserted
EDIT2: amended code

Trouble appending to a length limit if statement won't append more than one each time the function is called

So I'm trying to generate Imgur links to append to a list until the list is 10000 links long, but the function only generates one link each time it's called, instead of all 10000. What's wrong?
import string
import random
maybe = []
def id_generator(size=7, chars=string.ascii_uppercase + string.digits + string.ascii_lowercase):
if len(maybe) < 10000:
maybe.append('http://imgur.com/gallery/' + ''.join(random.choice(chars) for x in range(size)))
You're using if, a conditional statement, not a loop.
What you need is this:
while len(maybe) < 10000:
You arent looping the append part:
do this:
def id_generator(size=7, chars=string.ascii_uppercase + string.digits + string.ascii_lowercase):
for i in xrange(10000):
maybe.append('http://imgur.com/gallery/' + ''.join(random.choice(chars) for x in range(size)))

Categories

Resources