I'm setting up a simple sentence generator in python, to create as many word combinations as possible to describe a generic set of images involving robots. (Its a long story :D)
It outputs something like this: 'Cyborg Concept Downloadable Illustration'
Amazingly, the random generate I wrote only goes up to 255 unique combinations. Here is the script:
import numpy
from numpy import matrix
from numpy import linalg
import itertools
from pprint import pprint
import random
m = matrix( [
['Robot','Cyborg','Andoid', 'Bot', 'Droid'],
['Character','Concept','Mechanical Person', 'Artificial Intelligence', 'Mascot'],
['Downloadable','Stock','3d', 'Digital', 'Robotics'],
['Clipart','Illustration','Render', 'Image', 'Graphic'],
])
used = []
i = 0
def make_sentence(m, used):
sentence = []
i = 0
while i <= 3:
word = m[i,random.randrange(0,4)]
sentence.append(word)
i = i+1
return ' '.join(sentence)
def is_used(sentence, used):
if sentence not in used:
return False
else:
return True
sentences = []
i = 0
while i <= 1000:
sentence = make_sentence(m, used)
if(is_used(sentence, used)):
continue
else:
sentences.append(sentence)
print str(i) + ' ' +sentence
used.append(sentence)
i = i+1
Using randint instead of randrange, I get up to 624 combinations (instantly) then it hangs in an infinite loop, unable to create more combos.
I guess the question is, is there a more appropriate way of determining all possible combinations of a matrix?
You can make use of itertools to get the all possible combinations of matrix. I given one example to show how itertools will work.
import itertools
mx = [
['Robot','Cyborg','Andoid', 'Bot', 'Droid'],
['Character','Concept','Mechanical Person', 'Artificial Intelligence', 'Mascot'],
['Downloadable','Stock','3d', 'Digital', 'Robotics'],
['Clipart','Illustration','Render', 'Image', 'Graphic'],
]
for combination in itertools.product(*mx):
print combination
Your code can make use of recursion. Without itertools, here is one strategy:
def make_sentences(m, choices = []):
output = []
if len(choices) == 4:
sentence = ""
i = 0
#Go through the four rows of the matrix
#and choose words for the sentence
for j in choices:
sentence += " " + m[i][j]
i += 1
return [sentence] #must be returned as a list
for i in range(0,4):
output += make_sentences(m, choices+[i])
return output #this could be changed to a yield statement
This is quite different from your original function.
The choices list keeps track of the index of the column for each ROW in m that has been selected. When the recursive method finds that choices four rows have been selected, it outputs a list with just ONE sentence.
Where the method finds that the choices list doesn't have four elements, it recursively calls itself for FOUR new choices lists. The results of these recursive calls are added to the output list.
Related
I wrote small program to populate my game with NPCs named by random selections from first name and last name lists.
It worked but sometimes there are duplicate names selected. How can I prevent duplicates?
I could use dict but I prefer list. Is this big disadvantage?
The commented block in adding_male_NPC is my attempt to solve this problem.
import random
women_names = ["Jennifer", "Jenna", "Judith", "Becky", "Kelly"]
man_names = ["Adam", "John", "Jack", "Jim", ]
surnames =["Salzinger", "Jefferson", "Blunt", "Jigsaw", "Elem"]
marriage_status = ["Single", "In couple", "Engaged", "Married", "Divorced", "Widow"]
male_NPCs = []
list = []
def clr_list(list):
del list
def randomizer(list):
random_choice = random.choice(list)
clr_list(list)
return random_choice
def random_male():
male_surname = randomizer(surnames)
male_name = randomizer(man_names)
male_NPC = male_name + " " + male_surname
return (male_NPC)
def add_one_man():
male_NPCs.append(random_male())
return
def addding_male_NPC(count_of_NPC_males):
while count_of_NPC_males > 1:
add_one_man()
# for m in male_NPCs:
# unique_count = male_NPCs.count(m)
# if unique_count > 1:
# male_NPCs.pop(unique)
# count_of_NPC_males +=1
# else:
count_of_NPC_males -= 1
count_of_NPC_males = int(input("How many males should create?: "))
addding_male_NPC(count_of_NPC_males)
print(male_NPCs)
print(len(male_NPCs))
So i tried this but its impossible to count strings or somehow don't use well .count what is most possible.
Get idea to take indexes before creating sum of stings and use it to check double but i feel that i make circles.
I understand that provided list of names and surnames are not guarantee make doubles with high numbers but you got the point of this.
def addding_male_NPC(count_of_NPC_males):
while count_of_NPC_males > 1:
add_one_man()
for m in male_NPCs:
unique_count = male_NPCs.count(m)
if unique_count > 1:
male_NPCs.pop(unique)
count_of_NPC_males +=1
else:
count_of_NPC_males -= 1
Edit
This is so sad :(
mylist = ["a", "b", "a", "c", "c"]
mylist = list(dict.fromkeys(mylist))
print(mylist)
But anyway it will cut planned and needed numbers of item. So question is still active. This answer is quite half answer. I wait for better one.
==========================
Yes! I finally found an answer!
Thank to >>Parnav<< ( he is The guy!)
From his suggestion i made code generating from text file more than i can imagine
import random
import itertools
with open('stock_male_names.txt', 'r') as mn, open('stock_female_names.txt', 'r') as wn, open('stock_surnames.txt', 'r') as sn:
broken_male_names, broken_female_names, broken_surnames = mn.readlines(), wn.readlines(), sn.readlines()
male_names = [name.strip() for name in broken_male_names]
female_names = [name.strip() for name in broken_female_names]
surnames = [name.strip() for name in broken_surnames]
male_persons = [f"{fname} {lname}" for fname, lname in itertools.product(male_names, surnames)]
female_persons = [f"{fname} {lname}" for fname, lname in itertools.product(female_names, surnames)]
print(male_names)
print(len(male_names)) #1001
print(female_names)
print(len(female_names)) #1000
print(surnames)
print(len(surnames)) #1003
print(male_persons)
print(len(male_persons)) #1004003
print(female_persons)
print(len(female_persons)) #1003000
So from three text files of 1k items i made 1kk unique NPC names in almost no load time with open road to expand.
I am amazingly Happy :)
Case closed!
First, we want all possible combinations of the first and last names. We can get this using itertools.product:
import itertools
import random
male_names = [f"{fname} {lname}" for fname, lname in itertools.product(man_names, surname)]
print(male_names)
# ['Adam Salzinger', 'Adam Jefferson', ..., 'John Salzinger', 'John Jefferson', ..., 'Jim Jigsaw', 'Jim Elem']
Since you want to randomly get names from this list, shuffle it.
random.shuffle(male_names)
print(male_names)
# ['Jim Jefferson', 'Jack Jigsaw', 'Adam Jefferson', ..., 'Adam Blunt', 'John Blunt']
Every time you want to add a NPC, pop the last element from this list. Since you shuffled the list earlier, you're guaranteed a random element even if you always pop the last element. Popping the element removes it from the list, so you don't have to worry about duplicates. Take care not to pop more than the list contains (you have indicated elsewhere that this isn't a problem). I prefer to pop from the end of the list because that is an O(1) operation. Popping from a different location would be more expensive.
def add_male_npcs(count=1):
for _ in range(count):
male_NPCs.append(male_names.pop())
At the end, convert the list of strings into a set and then back to a list to remove any duplicates. Then, use the len() function to determine the length of the list compared to the desired length and call the function again this time adding to the list.
I'm working on an OCR use case and have identified common misclassification from the confusion matrix which is for example: '1' being confused for 'J' and '2' being confused with 'Z' and 'J'.
For a given word, I am trying to create a python script which would create all the permutations which account for all the misclassification.
Example:
Common Misclassifications: {'1':['J'],'2':['Z','J']}
Input: "AB1CD2"
Output: AB1CD2, AB1CDZ, ABJCD2, ABJCDZ, AB1CDJ, ABJCDJ
How do I go about solving this?
You get a neat solution by using a dictionary of all possible classifications, not just all mis-classifications. That is, you first "enrich" your misclassification dictionary with all possible correct classifications.
from itertools import product
all_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
common_misclass = {'1':['J'],'2':['Z','J']}
input_string = "AB1CD2"
common_class = {}
for char in all_characters:
if char in common_misclass:
common_class[char] = [char] + common_misclass[char]
else:
common_class[char] = [char]
possible_outputs = ["".join(tup) for tup in
product(*[common_class[letter] for letter in input_string])]
print(possible_outputs)
itertools product should help
from itertools import product
misclass = {'1':['J'],'2':['Z','J']}
misclass_items = [tuple([k, *v]) for k, v in misclass.items()]
print(["AB" + x + "CD" + y for (x, y) in list(product(*misclass_items))])
# ['AB1CD2', 'AB1CDZ', 'AB1CDJ', 'ABJCD2', 'ABJCDZ', 'ABJCDJ']
import random
quizwords = [["hot", "cold"],["summer","winter"],["hard","soft"],["dry","wet"],["simple","complex"],["light","darkness"],["weak","strong"],["male","female"],["sad","happy"],["win","lose"],["small","big"],["ignore","pay attention"],["sell","buy"],["succeed","fail"],["reject","accept"],["prevent","allow"],["exclude","include"]]
c = 0
random.shuffle(quizwords)
for i in quizwords:
print(i[0][0],"is to",i[0][1],"as",i[1][0],"is to...")
ans = input()
This is my code so far and I believe it would for example output 'e is to c as s is to...'
im trying to have it instead say 'exclude is to include as reject is to...(answer: accept)'
import random
quizwords = [["hot", "cold"],["summer","winter"],["hard","soft"],["dry","wet"],["simple","complex"],["light","darkness"],["weak","strong"],["male","female"],["sad","happy"],["win","lose"],["small","big"],["ignore","pay attention"],["sell","buy"],["succeed","fail"],["reject","accept"],["prevent","allow"],["exclude","include"]]
c = 0
random.shuffle(quizwords)
for i in range(len(quizwords) - 1)[::2]:
first_pair = quizwords[i]
second_pair = quizwords[i+1]
print(f'{first_pair[0]} is to {first_pair[1]} as {second_pair[0]} is to...')
ans = input()
Since you're already iterating through items in the list in the for-loop, doing two levels of indexing was pulling the first letter of each word.
I am a beginner python coder and I am writing a code to generate random mutation at random position.
I have written a function which includes:
The sequence where mutation happens
A List of nucleotide from which a nucleotide is selected randomly and replaced to the nucleotide of the original sequence.
Basic concept of the code:
Say we have to pick one ball from (A) basket and replace with another ball from another basket (B). The colors of the two balls need to be different.
I know I need to use while loop but I am not able to do it.
def random(s)
length = len(s)
seq = list(s)
nucl = "ATGC" ## pick one nucleotide from this list
lengthnucl= len(nucleotide_list)
position_orgseq = np.random.choice(range(0,length))
position_nucl = np.random.choice(range(0,lengthnucl))
#while c < length:
##if the two nucleotides chosen are not equaul then:
#two nucleotides are from
# TTTTGGGCCCCAAA - original seq, ATGC = nucloetide list
if seq[position_orgseq] != nucleotide_list[position_nucl]:
seq[position_orgseq] = nucleotide_list[position_nucl]
final = "".join(seq)
return s,final
actual_seq, mut_seq = random("TTTTGGGCCCCAAA")
print(actual_seq)
print(mut_seq)
First, as #Error - Syntatical Remorse pointed out in the comment, there is no need to import numpy, use built in random instead (specifically, you can use random.randint()).
Your code as is, doesn't run, you have misnamed variables. Other than that, you are close. Your hunch to using a while loop is correct. You can simply keep looping until your two random values don't give the same nucleotide in the two lists. Like so:
from random import randint
def random(s):
length = len(s)
seq = list(s)
nucl = "ATGC"
lengthnucl = len(nucl)
position_orgseq = randint(0, length - 1)
position_nucl = randint(0, lengthnucl - 1)
while seq[position_orgseq] == nucl[position_nucl]:
position_orgseq = randint(0, length - 1)
position_nucl = randint(0, lengthnucl - 1)
seq[position_orgseq] = nucl[position_nucl]
final = "".join(seq)
return s, final
actual_seq, mut_seq = random("TTTTGGGCCCCAAA")
print(actual_seq)
print(mut_seq)
This may be optimized further.
For example,
The function could be something like def RandABCD(n, .25, .34, .25, .25):
Where n is the length of the string to be generated and the following numbers are the desired probabilities of A, B, C, D.
I would imagine this is quite simple, however i am having trouble creating a working program. Any help would be greatly appreciated.
Here's the code to select a single weighted value. You should be able to take it from here. It uses bisect and random to accomplish the work.
from bisect import bisect
from random import random
def WeightedABCD(*weights):
chars = 'ABCD'
breakpoints = [sum(weights[:x+1]) for x in range(4)]
return chars[bisect(breakpoints, random())]
Call it like this: WeightedABCD(.25, .34, .25, .25).
EDIT: Here is a version that works even if the weights don't add up to 1.0:
from bisect import bisect_left
from random import uniform
def WeightedABCD(*weights):
chars = 'ABCD'
breakpoints = [sum(weights[:x+1]) for x in range(4)]
return chars[bisect_left(breakpoints, uniform(0.0,breakpoints[-1]))]
The random class is quite powerful in python. You can generate a list with the characters desired at the appropriate weights and then use random.choice to obtain a selection.
First, make sure you do an import random.
For example, let's say you wanted a truly random string from A,B,C, or D.
1. Generate a list with the characters
li = ['A','B','C','D']
Then obtain values from it using random.choice
output = "".join([random.choice(li) for i in range(0, n)])
You could easily make that a function with n as a parameter.
In the above case, you have an equal chance of getting A,B,C, or D.
You can use duplicate entries in the list to give characters higher probabilities. So, for example, let's say you wanted a 50% chance of A and 25% chances of B and C respectively. You could have an array like this:
li = ['A','A','B','C']
And so on.
It would not be hard to parameterize the characters coming in with desired weights, to model that I'd use a dictionary.
characterbasis = {'A':25, 'B':25, 'C':25, 'D':25}
Make that the first parameter, and the second being the length of the string and use the above code to generate your string.
For four letters, here's something quick off the top of my head:
from random import random
def randABCD(n, pA, pB, pC, pD):
# assumes pA + pB + pC + pD == 1
cA = pA
cB = cA + pB
cC = cB + pC
def choose():
r = random()
if r < cA:
return 'A'
elif r < cB:
return 'B'
elif r < cC:
return 'C'
else:
return 'D'
return ''.join([choose() for i in xrange(n)])
I have no doubt that this can be made much cleaner/shorter, I'm just in a bit of a hurry right now.
The reason I wouldn't be content with David in Dakota's answer of using a list of duplicate characters is that depending on your probabilities, it may not be possible to create a list with duplicates in the right numbers to simulate the probabilities you want. (Well, I guess it might always be possible, but you might wind up needing a huge list - what if your probabilities were 0.11235442079, 0.4072777384, 0.2297927874, 0.25057505341?)
EDIT: here's a much cleaner generic version that works with any number of letters with any weights:
from bisect import bisect
from random import uniform
def rand_string(n, content):
''' Creates a string of letters (or substrings) chosen independently
with specified probabilities. content is a dictionary mapping
a substring to its "weight" which is proportional to its probability,
and n is the desired number of elements in the string.
This does not assume the sum of the weights is 1.'''
l, cdf = zip(*[(l, w) for l, w in content.iteritems()])
cdf = list(cdf)
for i in xrange(1, len(cdf)):
cdf[i] += cdf[i - 1]
return ''.join([l[bisect(cdf, uniform(0, cdf[-1]))] for i in xrange(n)])
Here is a rough idea of what might suit you
import random as r
def distributed_choice(probs):
r= r.random()
cum = 0.0
for pair in probs:
if (r < cum + pair[1]):
return pair[0]
cum += pair[1]
The parameter probs takes a list of pairs of the form (object, probability). It is assumed that the sum of probabilities is 1 (otherwise, its trivial to normalize).
To use it just execute:
''.join([distributed_choice(probs)]*4)
Hmm, something like:
import random
class RandomDistribution:
def __init__(self, kv):
self.entries = kv.keys()
self.where = []
cnt = 0
for x in self.entries:
self.where.append(cnt)
cnt += kv[x]
self.where.append(cnt)
def find(self, key):
l, r = 0, len(self.where)-1
while l+1 < r:
m = (l+r)/2
if self.where[m] <= key:
l=m
else:
r=m
return self.entries[l]
def randomselect(self):
return self.find(random.random()*self.where[-1])
rd = RandomDistribution( {"foo": 5.5, "bar": 3.14, "baz": 2.8 } )
for x in range(1000):
print rd.randomselect()
should get you most of the way...
Thank you all for your help, I was able to figure something out, mostly with this info.
For my particular need, I did something like this:
import random
#Create a function to randomize a given string
def makerandom(seq):
return ''.join(random.sample(seq, len(seq)))
def randomDNA(n, probA=0.25, probC=0.25, probG=0.25, probT=0.25):
notrandom=''
A=int(n*probA)
C=int(n*probC)
T=int(n*probT)
G=int(n*probG)
#The remainder part here is used to make sure all n are used, as one cannot
#have half an A for example.
remainder=''
for i in range(0, n-(A+G+C+T)):
ramainder+=random.choice("ATGC")
notrandom=notrandom+ 'A'*A+ 'C'*C+ 'G'*G+ 'T'*T + remainder
return makerandom(notrandom)