I want to construct a list of 100 randomly generated share prices by generate 100 random 4-letter-names for the companies and a corresponding random share price.
So far, I have written the following code which provides a random 4-letter company name:
import string
import random
def stock_generator():
return ''.join(random.choices(string.ascii_uppercase, k=4))
stock_name_generator()
# OUTPUT
'FIQG'
But, I want to generate 100 of these with accompanying random share prices. It is possible to do this while keeping the list the same once it's created (i.e. using a seed of some sort)?
I think this approach will work for your task.
import string
import random
random.seed(0)
def stock_generator():
return (''.join(random.choices(string.ascii_uppercase, k=4)), random.random())
parse_result =[]
n=100
for i in range(0,n):
parse_result.append(stock_generator())
print(parse_result)
import string
import random
random.seed(0)
def stock_generator(n=100):
return [(''.join(random.choices(string.ascii_uppercase, k=4)), random.random()) for _ in range(n)]
stocks = stock_generator()
print(stocks)
You can generate as many random stocks as you want with this generator expression. stock_generator() returns a list of tuples of random 4-letter names and a random number between 0 and 1. I image a real price would look different, but this is how you'd start.
random.seed() lets you replicate the random generation.
Edit: average stock price as additionally requested
average_price = sum(stock[1] for stock in stocks) / len(stocks)
stocks[i][1] can be used to access the price in the name/price tuple.
You can generate a consistent n random samples by updating the seed of random before shuffle. Here is an example on how to generate these list of names (10 samples):
import random, copy
sorted_list = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
i = 0 #counter
n = 10 # number of samples
l = 4 # length of samples
myset = set()
while i < n:
shuffled_list = copy.deepcopy(sorted_list)
random.seed(i)
random.shuffle(shuffled_list)
name = tuple(shuffled_list[:l])
if name not in myset:
myset.add(name)
i+=1
print(sorted([''.join(list(x)) for x in myset]))
# ['CKEM', 'DURP', 'GQXO', 'JFWI', 'JNRX', 'MNSV', 'OAXS', 'TIFX', 'VLZS', 'XYLK']
Then you can randomly generate n number of prices and create a list of tuples that binds each name to a price:
names = sorted([''.join(list(x)) for x in myset])
int_list = random.sample(range(1, 100), n)
prices = [x/10 for x in int_list]
names_and_prices = []
for name, price in zip(names,prices):
names_and_prices.append((name,price))
# [('CKEM', 1.5), ('DURP', 1.7), ('GQXO', 6.5), ('JFWI', 7.6), ('JNRX', 0.9), ('MNSV', 8.9), ('OAXS', 5.0), ('TIFX', 9.6), ('VLZS', 1.4), ('XYLK', 3.8)]
Try:
import string
import random
def stock_generator(num):
names = []
for n in range(num):
x = ''.join(random.choices(string.ascii_uppercase, k=4))
names.append(x)
return names
print(stock_generator(100))
Each time you will call the function stock_generator using a number of your choice as parameter, you'll generate the stock name you need.
Related
I have a dataframe containing 15K+ strings in the format of xxxx-yyyyy-zzz. The yyyyy is a random 5 digit number generated. Given that I have xxxx as 1000 and zzz as 200, how can I generate the random yyyyy and add it to the dataframe so that the string is unique?
number
0 1000-12345-100
1 1000-82045-200
2 1000-93035-200
import pandas as pd
data = {"number": ["1000-12345-100", "1000-82045-200", "1000-93035-200"]}
df = pd.DataFrame(data)
print(df)
I'd generate a new column with just the middle values and generate random numbers until you find one that's not in the column.
from random import randint
df["excl"] = df.number.apply(lambda x:int(x.split("-")[1]))
num = randint(10000, 99999)
while num in df.excl.values:
num = randint(10000, 99999)
I tried to come up with a generic approach, you can use this for lists:
import random
number_series = ["1000-12345-100", "1000-82045-200", "1000-93035-200"]
def rnd_nums(n_numbers: int, number_series: list, max_length: int=5, prefix: int=1000, suffix: int=100):
# ignore following numbers
blacklist = [int(x.split('-')[1]) for x in number_series]
# define space with allowed numbers
rng = range(0, 10**max_length)
# get unique sample of length "n_numbers"
lst = random.sample([i for i in rng if i not in blacklist], n_numbers)
# return sample as string with pre- and suffix
return ['{}-{:05d}-{}'.format(prefix, mid, suffix) for mid in lst]
rnd_nums(5, number_series)
Out[69]:
['1000-79396-100',
'1000-30032-100',
'1000-09188-100',
'1000-18726-100',
'1000-12139-100']
Or use it to generate new rows in a dataframe Dataframe:
import pandas as pd
data = {"number": ["1000-12345-100", "1000-82045-200", "1000-93035-200"]}
df = pd.DataFrame(data)
print(df)
df.append(pd.DataFrame({'number': rnd_nums(5, number_series)}), ignore_index=True)
Out[72]:
number
0 1000-12345-100
1 1000-82045-200
2 1000-93035-200
3 1000-00439-100
4 1000-36284-100
5 1000-64592-100
6 1000-50471-100
7 1000-02005-100
In addition to the other suggestions, you could also write a function that takes your df and the amount of new numbers you would like to add as arguments, appends it with the new numbers and returns the updated df. The function could look like this:
import pandas as pd
import random
def add_number(df, num):
lst = []
for n in df["number"]:
n = n.split("-")[1]
lst.append(int(n))
for i in range(num):
check = False
while check == False:
new_number = random.randint(10000, 99999)
if new_number not in lst:
lst.append(new_number)
l = len(df["number"])
df.at[l+1,"number"] = "1000-%i-200" % new_number
check = True
df = df.reset_index(drop=True)
return df
This would have the advantage that you could use the function every time you want to add new numbers.
try:
import random
df['number'] = [f"1000-{x}-200" for x in random.sample(range(10000, 99999), len(df))]
output:
number
0 1000-24744-200
1 1000-28991-200
2 1000-98322-200
...
One option is to use sample from the random module:
import random
num_digits = 5
col_length = 15000
rand_nums = random.sample(range(10**num_digits),col_length)
data["number"]=['-'.join(
'1000',str(num).zfill(num_digits),'200')
for num in rand_nums]
It took my computer about 30 ms to generate the numbers. For numbers with more digits, it may become infeasible.
Another option is to just take sequential integers, then encrypt them. This will result in a sequence in which each element is unique. They will be pseudo-random, rather than truly random, but then Python's random module is producing pseudo-random numbers as well.
I know that random.random() generates pseudo-random real numbers between 0 and 1. But what
if I needed other random reals? for example between 30 and 35. I am trying to write this function but I am stuck in the middle Please help me.
Thank you All.
import random
def generate_random():
""" Make a list of 10 random reals between 30 and 35 """
random.seed()
pass
You can use:
from random import random
x = [random()*5+30 for p in range(0, 10)]
print(x)
output:
[33.42156572091028, 34.34567526662949, 32.032391323008056, 33.25221139836125, 31.149272884440236, 33.9733171427812, 34.684482360876856, 34.927541219373296, 30.99028290511958, 34.344225720130446]
import random
def problem2_4():
random.seed() # if u need concrete numbers
x = [] # list for generated numbers
for num in range():
x.append(30+5*random.random())
print(x)
Try this:
import random
def generate_random():
return [random.randint(30, 35) for _ in range(10)]
Or this:
import random
def generate_random():
return [30+5*random.random() for _ in range(10)]
input = ['beleriand','mordor','hithlum','eol','morgoth','melian','thingol']
I'm having trouble creating X number of lists of size Y without repeating any elements.
What I have been doing is using:
x = 3
y = 2
import random
output = random.sample(input, y)
# ['mordor', 'thingol']
but if I repeat this then I will have repeats.
I would like the output to be something like
[['mordor', 'thingol'], ['melian', 'hithlum'], ['beleriand', 'eol']]
since I chose x = 3 (3 lists) of size y = 2 (2 elements per list).
def random_generator(x,y):
....
You can simply shuffle the original list and then generate n groups of m elements successively from it. There may be fewer or more than that number of groups possible. Note thatinputis the name of a Python built-in function, so I renamed itwords.
import itertools
from pprint import pprint
import random
def random_generator(seq, n, m):
rand_seq = seq[:] # make a copy to avoid changing input argument
random.shuffle(rand_seq)
lists = []
limit = n-1
for i,group in enumerate(itertools.izip(*([iter(rand_seq)]*m))):
lists.append(group)
if i == limit: break # have enough
return lists
words = ['beleriand', 'mordor', 'hithlum', 'eol', 'morgoth', 'melian', 'thingol']
pprint(random_generator(words, 3, 2))
Output:
[('mordor', 'hithlum'), ('thingol', 'melian'), ('morgoth', 'beleriand')]
It would be more Pythonic to generate the groups iteratively. The above function could easily be turned into generator by making ityieldeach group, one-by-one, instead of returning them all in a relatively much longer list-of-lists:
def random_generator_iterator(seq, n, m):
rand_seq = seq[:]
random.shuffle(rand_seq)
limit = n-1
for i,group in enumerate(itertools.izip(*([iter(rand_seq)]*m))):
yield group
if i == limit: break
pprint([group for group in random_generator_iterator(words, 3, 2)])
rather than randomly taking two things from your list, just randomize your list and iterate through it to create your new array of the dimensions you specify!
import random
my_input = ['beleriand','mordor','hithlum','eol','morgoth','melian','thingol']
def random_generator(array,x,y):
random.shuffle(array)
result = []
count = 0
while count < x:
section = []
y1 = y * count
y2 = y * (count + 1)
for i in range (y1,y2):
section.append(array[i])
result.append(section)
count += 1
return result
print random_generator(my_input,3,2)
You could use random.sample in combination with the itertools.grouper recipe.
input = ['beleriand','mordor','hithlum','eol','morgoth','melian','thingol']
import itertools
import random
def grouper(iterable,group_size):
return itertools.izip(*([iter(iterable)]*group_size))
def random_generator(x,y):
k = x*y
sample = random.sample(input,k)
return list(grouper(sample,y))
print random_generator(3,2)
print random_generator(3,2)
print random_generator(3,2)
print random_generator(3,2)
print random_generator(3,2)
print random_generator(3,2)
for one run, this results in:
[('melian', 'mordor'), ('hithlum', 'eol'), ('thingol', 'morgoth')]
[('hithlum', 'thingol'), ('mordor', 'beleriand'), ('morgoth', 'eol')]
[('morgoth', 'beleriand'), ('melian', 'thingol'), ('hithlum', 'mordor')]
[('beleriand', 'thingol'), ('melian', 'hithlum'), ('eol', 'morgoth')]
[('mordor', 'hithlum'), ('eol', 'beleriand'), ('melian', 'morgoth')]
[('mordor', 'melian'), ('thingol', 'beleriand'), ('morgoth', 'eol')]
And the next run:
[('mordor', 'thingol'), ('eol', 'hithlum'), ('melian', 'beleriand')]
[('eol', 'beleriand'), ('mordor', 'melian'), ('hithlum', 'thingol')]
[('hithlum', 'mordor'), ('thingol', 'morgoth'), ('melian', 'eol')]
[('morgoth', 'eol'), ('mordor', 'thingol'), ('melian', 'beleriand')]
[('melian', 'morgoth'), ('mordor', 'eol'), ('thingol', 'hithlum')]
[('mordor', 'morgoth'), ('hithlum', 'thingol'), ('eol', 'melian')]
I wish to select a random word from a list where the is a known chance for each word, for example:
Fruit with Probability
Orange 0.10
Apple 0.05
Mango 0.15
etc
How would be the best way of implementing this? The actual list I will take from is up to 100 items longs and the % do not all tally to 100 % they do fall short to account for the items that had a really low chance of occurrence. I would ideally like to take this from a CSV which is where I store this data. This is not a time critical task.
Thank you for any advice on how best to proceed.
You can pick items with weighted probabilities if you assign each item a number range proportional to its probability, pick a random number between zero and the sum of the ranges and find what item matches it. The following class does exactly that:
from random import random
class WeightedChoice(object):
def __init__(self, weights):
"""Pick items with weighted probabilities.
weights
a sequence of tuples of item and it's weight.
"""
self._total_weight = 0.
self._item_levels = []
for item, weight in weights:
self._total_weight += weight
self._item_levels.append((self._total_weight, item))
def pick(self):
pick = self._total_weight * random()
for level, item in self._item_levels:
if level >= pick:
return item
You can then load the CSV file with the csv module and feed it to the WeightedChoice class:
import csv
weighed_items = [(item,float(weight)) for item,weight in csv.reader(open('file.csv'))]
picker = WeightedChoice(weighed_items)
print(picker.pick())
What you want is to draw from a multinomial distribution. Assuming you have two lists of items and probabilities, and the probabilities sum to 1 (if not, just add some default value to cover the extra):
def choose(items,chances):
import random
p = chances[0]
x = random.random()
i = 0
while x > p :
i = i + 1
p = p + chances[i]
return items[i]
lst = [ ('Orange', 0.10), ('Apple', 0.05), ('Mango', 0.15), ('etc', 0.69) ]
x = 0.0
lst2 = []
for fruit, chance in lst:
tup = (x, fruit)
lst2.append(tup)
x += chance
tup = (x, None)
lst2.append(tup)
import random
def pick_one(lst2):
if lst2[0][1] is None:
raise ValueError, "no valid values to choose"
while True:
r = random.random()
for x, fruit in reversed(lst2):
if x <= r:
if fruit is None:
break # try again with a different random value
else:
return fruit
pick_one(lst2)
This builds a new list, with ascending values representing the range of values that choose a fruit; then pick_one() walks backward down the list, looking for a value that is <= the current random value. We put a "sentinel" value on the end of the list; if the values don't reach 1.0, there is a chance of a random value that shouldn't match anything, and it will match the sentinel value and then be rejected. random.random() returns a random value in the range [0.0, 1.0) so it is certain to match something in the list eventually.
The nice thing here is that you should be able to have one value with a 0.000001 chance of matching, and it should actually match with that frequency; the other solutions, where you make a list with the items repeated and just use random.choice() to choose one, would require a list with a million items in it to handle this case.
lst = [ ('Orange', 0.10), ('Apple', 0.05), ('Mango', 0.15), ('etc', 0.69) ]
x = 0.0
lst2 = []
for fruit, chance in lst:
low = x
high = x + chance
tup = (low, high, fruit)
lst2.append(tup)
x += chance
if x > 1.0:
raise ValueError, "chances add up to more than 100%"
low = x
high = 1.0
tup = (low, high, None)
lst2.append(tup)
import random
def pick_one(lst2):
if lst2[0][2] is None:
raise ValueError, "no valid values to choose"
while True:
r = random.random()
for low, high, fruit in lst2:
if low <= r < high:
if fruit is None:
break # try again with a different random value
else:
return fruit
pick_one(lst2)
# test it 10,000 times
d = {}
for i in xrange(10000):
x = pick_one(lst2)
if x in d:
d[x] += 1
else:
d[x] = 1
I think this is a little clearer. Instead of a tricky way of representing ranges as ascending values, we just keep ranges. Because we are testing ranges, we can simply walk forward through the lst2 values; no need to use reversed().
from numpy.random import multinomial
import numpy as np
def pickone(dist):
return np.where(multinomial(1, dist) == 1)[0][0]
if __name__ == '__main__':
lst = [ ('Orange', 0.10), ('Apple', 0.05), ('Mango', 0.15), ('etc', 0.70) ]
dist = [p[1] for p in lst]
N = 10000
draws = np.array([pickone(dist) for i in range(N)], dtype=int)
hist = np.histogram(draws, bins=[i for i in range(len(dist)+1)])[0]
for i in range(len(lst)):
print(f'{lst[i]} {hist[i]/N}')
One solution is to normalize the probabilities to integers and then repeat each element once per value (e.g. a list with 2 Oranges, 1 Apple, 3 Mangos). This is incredibly easy to do (from random import choice). If that is not practical, try the code here.
import random
d= {'orange': 0.10, 'mango': 0.15, 'apple': 0.05}
weightedArray = []
for k in d:
weightedArray+=[k]*int(d[k]*100)
random.choice(weightedArray)
EDITS
This is essentially what Brian said above.
For example,
The function could be something like def RandABCD(n, .25, .34, .25, .25):
Where n is the length of the string to be generated and the following numbers are the desired probabilities of A, B, C, D.
I would imagine this is quite simple, however i am having trouble creating a working program. Any help would be greatly appreciated.
Here's the code to select a single weighted value. You should be able to take it from here. It uses bisect and random to accomplish the work.
from bisect import bisect
from random import random
def WeightedABCD(*weights):
chars = 'ABCD'
breakpoints = [sum(weights[:x+1]) for x in range(4)]
return chars[bisect(breakpoints, random())]
Call it like this: WeightedABCD(.25, .34, .25, .25).
EDIT: Here is a version that works even if the weights don't add up to 1.0:
from bisect import bisect_left
from random import uniform
def WeightedABCD(*weights):
chars = 'ABCD'
breakpoints = [sum(weights[:x+1]) for x in range(4)]
return chars[bisect_left(breakpoints, uniform(0.0,breakpoints[-1]))]
The random class is quite powerful in python. You can generate a list with the characters desired at the appropriate weights and then use random.choice to obtain a selection.
First, make sure you do an import random.
For example, let's say you wanted a truly random string from A,B,C, or D.
1. Generate a list with the characters
li = ['A','B','C','D']
Then obtain values from it using random.choice
output = "".join([random.choice(li) for i in range(0, n)])
You could easily make that a function with n as a parameter.
In the above case, you have an equal chance of getting A,B,C, or D.
You can use duplicate entries in the list to give characters higher probabilities. So, for example, let's say you wanted a 50% chance of A and 25% chances of B and C respectively. You could have an array like this:
li = ['A','A','B','C']
And so on.
It would not be hard to parameterize the characters coming in with desired weights, to model that I'd use a dictionary.
characterbasis = {'A':25, 'B':25, 'C':25, 'D':25}
Make that the first parameter, and the second being the length of the string and use the above code to generate your string.
For four letters, here's something quick off the top of my head:
from random import random
def randABCD(n, pA, pB, pC, pD):
# assumes pA + pB + pC + pD == 1
cA = pA
cB = cA + pB
cC = cB + pC
def choose():
r = random()
if r < cA:
return 'A'
elif r < cB:
return 'B'
elif r < cC:
return 'C'
else:
return 'D'
return ''.join([choose() for i in xrange(n)])
I have no doubt that this can be made much cleaner/shorter, I'm just in a bit of a hurry right now.
The reason I wouldn't be content with David in Dakota's answer of using a list of duplicate characters is that depending on your probabilities, it may not be possible to create a list with duplicates in the right numbers to simulate the probabilities you want. (Well, I guess it might always be possible, but you might wind up needing a huge list - what if your probabilities were 0.11235442079, 0.4072777384, 0.2297927874, 0.25057505341?)
EDIT: here's a much cleaner generic version that works with any number of letters with any weights:
from bisect import bisect
from random import uniform
def rand_string(n, content):
''' Creates a string of letters (or substrings) chosen independently
with specified probabilities. content is a dictionary mapping
a substring to its "weight" which is proportional to its probability,
and n is the desired number of elements in the string.
This does not assume the sum of the weights is 1.'''
l, cdf = zip(*[(l, w) for l, w in content.iteritems()])
cdf = list(cdf)
for i in xrange(1, len(cdf)):
cdf[i] += cdf[i - 1]
return ''.join([l[bisect(cdf, uniform(0, cdf[-1]))] for i in xrange(n)])
Here is a rough idea of what might suit you
import random as r
def distributed_choice(probs):
r= r.random()
cum = 0.0
for pair in probs:
if (r < cum + pair[1]):
return pair[0]
cum += pair[1]
The parameter probs takes a list of pairs of the form (object, probability). It is assumed that the sum of probabilities is 1 (otherwise, its trivial to normalize).
To use it just execute:
''.join([distributed_choice(probs)]*4)
Hmm, something like:
import random
class RandomDistribution:
def __init__(self, kv):
self.entries = kv.keys()
self.where = []
cnt = 0
for x in self.entries:
self.where.append(cnt)
cnt += kv[x]
self.where.append(cnt)
def find(self, key):
l, r = 0, len(self.where)-1
while l+1 < r:
m = (l+r)/2
if self.where[m] <= key:
l=m
else:
r=m
return self.entries[l]
def randomselect(self):
return self.find(random.random()*self.where[-1])
rd = RandomDistribution( {"foo": 5.5, "bar": 3.14, "baz": 2.8 } )
for x in range(1000):
print rd.randomselect()
should get you most of the way...
Thank you all for your help, I was able to figure something out, mostly with this info.
For my particular need, I did something like this:
import random
#Create a function to randomize a given string
def makerandom(seq):
return ''.join(random.sample(seq, len(seq)))
def randomDNA(n, probA=0.25, probC=0.25, probG=0.25, probT=0.25):
notrandom=''
A=int(n*probA)
C=int(n*probC)
T=int(n*probT)
G=int(n*probG)
#The remainder part here is used to make sure all n are used, as one cannot
#have half an A for example.
remainder=''
for i in range(0, n-(A+G+C+T)):
ramainder+=random.choice("ATGC")
notrandom=notrandom+ 'A'*A+ 'C'*C+ 'G'*G+ 'T'*T + remainder
return makerandom(notrandom)