this maybe simple but how can I automatize this:
for example, this is just for 2 steps but I want to do the same for 100
should I use looping or another kind of function
def earn(w,y):
return w+y
def spend(w,x):
new_wealth = w -x
if new_wealth < 0:
print("Insufficient funds")
else:
return new_wealth
w0=0
w1=earn(w0,2300)
w2=spend(w1,1500)
w3=earn(w2,2300)
w4=spend(w3,1500)
print("w0,w1,w2,w3,w4 = ", w0,w1,w2,w3,w4)
To repeat the same action, you should obviously use loops.
Here is an example using a "for" loop with 100 iterations, appending each operation result in a list:
results = []
w = 0
for _i in range(100):
w = earn(w, 2300)
results.append(w)
w = spend(w, 1500)
results.append(w)
print(results) # [2300, 800, 3100, 1600, 3900, 2400 ... ]
You should obviously modify it for your purpose.
if you had a list of transactions with weather it was a spend or earn and the value you could try something like this
transactions = ['earn 2300', 'spend 1500', 'earn 2300', 'spend 1500']
string = ''
results = [0]
for n in range(len(transactions)):
string += 'w'+str(n)
transactions_type = transactions[n].split(' ')[0]
transactions_value = int(transactions[n].split(' ')[1])
if transactions_type.lower() == 'earn':
results.append(earn(results[-1], transactions_value))
if transactions_type.lower() == 'spend':
results.append(spend(results[-1], transactions_value))
output = string + ' = '
for value in results:
output += str(value)
which outputs
w0,w1,w2,w3,w4 = 0 2300 800 3100 1600
but this would depend on how you are storing this transaction information the method here works but is not ideal
Related
I've been working for around a week to learn SimPy for a discrete simulation I have to run. I've done my best, but I'm just not experienced enough to figure it out quickly. I am dying. Please help.
The system in question goes like this:
order arrives -> resource_1 (there are 2) performs take_order -> order broken into items -> resource_2 (there are 10) performs process_item
My code runs and performs the simulation, but I'm having a lot of trouble getting the queues on the resources to function. As in, queues do not build up on either resource when I run it, and I cannot find the reason why. I try resource.get_queue and get empty lists. There should absolutely be queues, as the orders arrive faster than they can be processed.
I think it has something to do with the logic for requesting resources, but I can't figure it out. Here's how I've structured the code:
import simpy
import random
import numpy as np
total_items = []
total_a = []
total_b = []
total_c = []
order_Q = []
item_Q = []
skipped_visits = []
order_time_dict = {}
order_time_dict2 = {}
total_order_time_dict = {}
var = []
class System:
def __init__(self,env,num_resource_1,num_resource_2):
self.env = env
self.resource_1 = simpy.Resource(env,num_resource_1)
self.resource_2 = simpy.Resource(env,num_resource_2)
def take_order(self, order):
self.time_to_order = random.triangular(30/60,60/60,120/60)
arrive = self.env.now
yield self.env.timeout(self.time_to_order)
def process_item(self,item):
total_process_time = 0
current = env.now
order_num = item[1][0]
for i in range(1,item[1][1]):
if 'a' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6) #bagging time only
#here edit order time w x
if 'b' in item[0]:
total_process_time += random.triangular(.05,.3333,.75)
if 'c' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6)
#the following is handling time: getting to station, waiting on car to arrive at window after finished, handing to cust
total_process_time += random.triangular(.05, 10/60, 15/60)
item_finish_time = current + total_process_time
if order_num in order_time_dict2.keys():
start = order_time_dict2[order_num][0]
if order_time_dict2[order_num][1] < item_finish_time:
order_time_dict2[order_num] = (start, item_finish_time)
else:
order_time_dict2[order_num] = (current, item_finish_time)
yield self.env.timeout(total_process_time)
class Order:
def __init__(self, order_dict,order_num):
self.order_dict = order_dict
self.order_num = order_num
self.order_stripped = {}
for x,y in list(self.order_dict.items()):
if x != 'total':
if y != 0:
self.order_stripped[x] = (order_num,y) #this gives dictionary format {item: (order number, number items) } but only including items in order
self.order_list = list(self.order_stripped.items())
def generate_order(num_orders):
print('running generate_order')
a_demand = .1914 ** 3
a_stdev = 43.684104
b_demand = .1153
b_stdev = 28.507782
c_demand = .0664
c_stdev = 15.5562624349
num_a = abs(round(np.random.normal(a_demand)))
num_b = abs(round(np.random.normal(b_demand)))
num_c = abs(round(np.random.normal(c_demand)))
total = num_orders
total_a.append(num_a)
total_b.append(num_b)
total_c.append(num_c)
total_num_items = num_a + num_b + num_c
total_items.append(total_num_items)
order_dict = {'num_a':num_a, 'num_b':num_b,'num_c':num_c, 'total': total}
return order_dict
def order_process(order_instance,system):
enter_system_at = system.env.now
print("order " + str(order_instance.order_num) + " arrives at " + str(enter_system_at))
if len(system.resource_1.get_queue) > 1:
print("WORKING HERE ******************")
if len(system.resource_1.get_queue) <= 25:
with system.resource_1.request() as req:
order_Q.append(order_instance)
yield req
yield env.process(system.take_order(order_instance))
order_Q.pop()
enter_workstation_at = system.env.now
print("order num " + str(order_instance.order_num) + " enters workstation at " + str(enter_workstation_at))
for item in order_instance.order_list:
item_Q.append(item)
with system.resource_2.request() as req:
yield req
yield env.process(system.process_item(item))
if len(system.resource_2.get_queue) >1:
var.append(1)
item_Q.pop()
leave_workstation_at = system.env.now
print("Order num " + str(order_instance.order_num) + " leaves at " + str(leave_workstation_at))
order_time_dict[order_instance.order_num] = leave_workstation_at-enter_workstation_at
total_order_time_dict[order_instance.order_num]=leave_workstation_at-enter_system_at
else:
skipped_visits.append(1)
def setup(env):
system = System(env,2,15)
order_num = 0
while True:
next_order = random.expovariate(3.5) #where 20 is order arrival mean (lambda)
yield env.timeout(next_order)
order_num+=1
env.process(order_process(Order(generate_order(order_num),order_num),system))
env = simpy.Environment()
env.process(setup(env))
env.run(until=15*60)
print("1: \n", order_time_dict)
I think you are looking at the wrong queue.
the api for getting queued requests for resources is just attribute queue so try using
len(system.resource_1.queue)
get_queue and put_queue is from the base class and used to derive new resource classes.
but wait they are not what any reasonable person would assume, and I find this confusing too, but the doc says
Requesting a resources is modeled as “putting a process’ token into the resources” which means when you call request() the process is put into the put_queue, not the get_queue. And with resource, release always succeeds immediately so its queue (which is the get_queue) is always empty
I think queue is just a alias for the put_queue, but queue is much less confussing
I am looking to combine 10 audio samples in various manners (format - wav probably, but this can be changed to any format as they will be pre-recorded).
from pydub import AudioSegment
sounds = []
sound1 = AudioSegment.from_wav("Dropbox/PIREAD/1.wav")
sound2 = AudioSegment.from_wav("Dropbox/PIREAD/2.wav")
sound3 = AudioSegment.from_wav("Dropbox/PIREAD/3.wav")
sound4 = AudioSegment.from_wav("Dropbox/PIREAD/4.wav")
sound5 = AudioSegment.from_wav("Dropbox/PIREAD/5.wav")
sound6 = AudioSegment.from_wav("Dropbox/PIREAD/6.wav")
sound7 = AudioSegment.from_wav("Dropbox/PIREAD/7.wav")
sound8 = AudioSegment.from_wav("Dropbox/PIREAD/8.wav")
sound9 = AudioSegment.from_wav("Dropbox/PIREAD/9.wav")
sound0 = AudioSegment.from_wav("Dropbox/PIREAD/0.wav")
sounds=[sound1,sound2,sound3,sound4,sound5,sound6,sound7,sound8,sound9,sound0]
combined_sounds = AudioSegment.empty()
for x in range(10):
for y in range(10):
combined_sounds += sounds[y]
combined_sounds.export("Dropbox/PIREAD/joinedFile.wav", format="wav")
This is literally me reading the numbers 0-9 and assembling them into one overall wav file.
It works - but it is slow once the loop is extended x=100, x=1000.
Q: How can I speed things up?
The actual order of the numbers will be read from a text$ - for example "354224848179261915075" which happens to be the 100th Fibonacci number.
Cheers
Glen
I believe it's slow because when you loop over x, you repeat operations (the loop over y) which could be computed before the loop over x, then assembled.
I looked into AudioSegment and found potentially useful method for you namely from_mono_audiosegments but it is limited to mono sounds and you will need to test if it is faster than += please compare time-wise these options, i.e.
import time
from pydub import AudioSegment
sounds = []
sound1 = AudioSegment.from_wav("Dropbox/PIREAD/1.wav")
sound2 = AudioSegment.from_wav("Dropbox/PIREAD/2.wav")
sound3 = AudioSegment.from_wav("Dropbox/PIREAD/3.wav")
sound4 = AudioSegment.from_wav("Dropbox/PIREAD/4.wav")
sound5 = AudioSegment.from_wav("Dropbox/PIREAD/5.wav")
sound6 = AudioSegment.from_wav("Dropbox/PIREAD/6.wav")
sound7 = AudioSegment.from_wav("Dropbox/PIREAD/7.wav")
sound8 = AudioSegment.from_wav("Dropbox/PIREAD/8.wav")
sound9 = AudioSegment.from_wav("Dropbox/PIREAD/9.wav")
sound0 = AudioSegment.from_wav("Dropbox/PIREAD/0.wav")
sounds=[sound1,sound2,sound3,sound4,sound5,sound6,sound7,sound8,sound9,sound0]
# option1 using +=
t1 = time.time()
combined_sounds1 = AudioSegment.empty()
for s in sounds
combined_sounds1 += s
t2 = time.time()
# end of option1
# option2 using from_mono_audiosegments
t3 = time.time()
combined_sounds2 = AudioSegment.from_mono_audiosegments(*sounds)
t4 = time.time()
# end of option2
print('option1 (seconds):',t2-t1)
print('option2 (seconds):',t4-t3)
Thanks for the suggestions and advice above. This is the final code I used and link to the resultant video (with ffmpeg visualisation):
# Program to display the Fibonacci sequence up to n-th term
from pydub import AudioSegment
combined_sounds = ""
sound1 = AudioSegment.from_wav("1_2.wav")
sound2 = AudioSegment.from_wav("2_2.wav")
sound3 = AudioSegment.from_wav("3_2.wav")
sound4 = AudioSegment.from_wav("4_2.wav")
sound5 = AudioSegment.from_wav("5_2.wav")
sound6 = AudioSegment.from_wav("6_2.wav")
sound7 = AudioSegment.from_wav("7_2.wav")
sound8 = AudioSegment.from_wav("8_2.wav")
sound9 = AudioSegment.from_wav("9_2.wav")
sound0 = AudioSegment.from_wav("0_2.wav")
nterms=1000
# first two terms
n1, n2 = 0, 1
count = 0
fib = ""
# check if the number of terms is valid
if nterms <= 0:
print("Please enter a positive integer")
# if there is only one term, return n1
elif nterms == 1:
print("Fibonacci sequence upto",nterms,":")
print(n1)
# generate fibonacci sequence
else:
print("Fibonacci sequence:")
while count < nterms:
#print(n1)
fib += str(n1)
nth = n1 + n2
# update values
n1 = n2
n2 = nth
count += 1
i=-36
j=0
fibs = [fib[i:i+1000] for i in range(0, len(fib), 1000)]
seg = 0
for a in fibs:
if seg == 2:
break
combined_sounds = AudioSegment.empty()
seg +=1
for x in a:
i,j = -36,0
s = eval("sound"+str(x))
s = s.apply_gain_stereo(i,j)
combined_sounds += s
i,j = j,i
combined_sounds.export("joinedFile"+str(seg)+".wav", format="wav")
This splits the output into 1000 digit wav files. The first 1000 Fibonacci terms produces nearly 15Gb of wavs!
Uploaded to YouTube: https://www.youtube.com/watch?v=U7Z_HOGqjlE
Thanks all.
I have some code (this is not the full file):
chunk_list = []
def makeFakeTransactions(store_num, num_transactions):
global chunk_list
startTime = datetime.now()
data_load_datetime = startTime.isoformat()
data_load_name = "Faked Data v2.2"
data_load_path = "data was faked"
index_list = []
number_of_stores = store_num + 10
number_of_terminals = 13
for month in range(1, 13):
number_of_days = 30
extra_day_months = [1, 3, 5, 7, 8, 10, 12]
if month == 2:
number_of_days = 28
elif month in extra_day_months:
number_of_days = 31
for day in range(1, number_of_days + 1):
for store in range(store_num, number_of_stores):
operator_id = "0001"
operator_counter = 1
if store < 11:
store_number = "0000" + str(store)
else:
store_number = "000" + str(store)
for terminal in range(1, number_of_terminals + 1):
if terminal < 10:
terminal_id = str(terminal) + "000"
else:
terminal_id = str(terminal) + "00"
transaction_type = "RetailTransaction"
transaction_type_code = "Transaction"
transaction_date = date(2015, month, day)
transaction_date_str = transaction_date.isoformat()
transaction_time = time(random.randint(0, 23), random.randint(0, 59))
transaction_datetime = datetime.combine(transaction_date, transaction_time)
transaction_datetime_str = transaction_datetime.isoformat()
max_transactions = num_transactions
for transaction_number in range (0, max_transactions):
inactive_time = random.randint(80, 200)
item_count = random.randint(1, 15)
sequence_number = terminal_id + str(transaction_number)
transaction_datetime = transaction_datetime + timedelta(0, ring_time + special_time + inactive_time)
transaction_summary = {}
transaction_summary["transaction_type"] = transaction_type
transaction_summary["transaction_type_code"] = transaction_type_code
transaction_summary["store_number"] = store_number
transaction_summary["sequence_number"] = sequence_number
transaction_summary["data_load_path"] = data_load_path
index_list.append(transaction_summary.copy())
operator_counter += 10
operator_id = '{0:04d}'.format(operator_counter)
chunk_list.append(index_list)
if __name__ == '__main__':
store_num = 1
process_number = 6
num_transactions = 10
p = multiprocessing.Pool(process_number)
results = [p.apply(makeFakeTransactions, args = (store_num, num_transactions,)) for store_num in xrange(1, 30, 10)]
results = [p.apply(elasticIndexing, args = (index_list,)) for index_list in chunk_list]
I have a global variable chunk_list that gets appended to at the end of my makeFakeTransactions function and basically it's a list of lists. However, when I do a test print of chunk_list after the 3 processes for makeFakeTransactions, the chunk_list shows up empty, even though it should've been appended to 3 times. Am I doing something wrong regarding global list variables in multiprocessing? Is there a better way to do this?
Edit: makeFakeTransactions appends a dictionary copy to index_list and once all the dictionaries are appended to index_list, it appends index_list to the global variable chunk_list.
First, your code isn't actually running in parallel. According to the docs, p.apply will block until complete, so you are running your tasks sequentially on the process pool. You need to use p.map_async to kick off a task and not wait for it to complete.
Second, as was said in a comment, global state isn't shared between processes. You can use shared memory, but in this case it is much simpler to just transfer the result back from the worker process. Since you don't use chunk_list for anything other than collecting the result, you can just send the result back after computation and collect them on the calling process. This is easy using multiprocessing.Pool, you just return the result from your worker function:
return index_list
This will make p.apply() return index_list. p.apply_async() will return an AsyncResult that will return index_list with AsyncResult.get(). Since you're already using list comprehension, the modifications are small:
p = multiprocessing.Pool(process_number)
async_results = [p.apply_async(makeFakeTransactions, args = (store_num, num_transactions,)) for store_num in xrange(1, 30, 10)]
results = [ar.get() for ar in async_results]
You can do simplify it down to one step by using p.map, which effectively does what those previous two lines do. Note p.map blocks until all results are available.
p = multiprocessing.Pool(process_number)
results = p.map(lambda store_num: makeFakeTransactions(store_num, num_transactions), xrange(1, 30, 10))
Since p.map expects a single argument function, you need to wrap it in a lambda.
i'm beginner in machine learning and i'm trying to implement my first Naive Bayes by myself for better understanding. So, i have dataset from http://archive.ics.uci.edu/ml/datasets/Adult (american census data, classes are '<=50k' and '>50k').
Here is my python code:
#!/usr/bin/python
import sys
import csv
words_stats = {} # {'word': {'class1': cnt, 'class2': cnt'}}
words_cnt = 0
targets_stats = {} # {'class1': 3234, 'class2': 884} how many words in each class
class_stats = {} # {'class1': 7896, 'class2': 3034} how many lines in each class
items_cnt = 0
def train(dataset, targets):
global words_stats, words_cnt, targets_stats, items_cnt, class_stats
num = len(dataset)
for item in xrange(num):
class_stats[targets[item]] = class_stats.get(targets[item], 0) + 1
for i in xrange(len(dataset[item])):
word = dataset[item][i]
if not words_stats.has_key(word):
words_stats[word] = {}
tgt = targets[item]
cnt = words_stats[word].get(tgt, 0)
words_stats[word][tgt] = cnt + 1
targets_stats[tgt] = targets_stats.get(tgt, 0) + 1
words_cnt += 1
items_cnt = num
def classify(doc, tgt_set):
global words_stats, words_cnt, targets_stats, items_cnt
probs = {} #the probability itself P(c|W) = P(W|c) * P(c) / P(W)
pc = {} #probability of the class in document set P(c)
pwc = {} #probability of the word set in particular class. P(W|c)
pw = 1 #probability of the word set in documet set
for word in doc:
if word not in words_stats:
continue #dirty, very dirty
pw = pw * float(sum(words_stats[word].values())) / words_cnt
for tgt in tgt_set:
pc[tgt] = class_stats[tgt] / float(items_cnt)
for word in doc:
if word not in words_stats:
continue #dirty, very dirty
tgt_wrd_cnt = words_stats[word].get(tgt, 0)
pwc[tgt] = pwc.get(tgt, 1) * float(tgt_wrd_cnt) / targets_stats[tgt]
probs[tgt] = (pwc[tgt] * pc[tgt]) / pw
l = sorted(probs.items(), key = lambda i: i[1], reverse=True)
print probs
return l[0][0]
def check_results(dataset, targets):
num = len(dataset)
tgt_set = set(targets)
correct = 0
incorrect = 0
for item in xrange(num):
res = classify(dataset[item], tgt_set)
if res == targets[item]:
correct = correct + 1
else:
incorrect = incorrect + 1
print 'correct:', float(correct) / num, ' incorrect:', float(incorrect) / num
def load_data(fil):
data = []
tgts = []
reader = csv.reader(fil)
for line in reader:
d = [x.strip() for x in line]
if '?' in d:
continue
if not len(d):
continue
data.append(d[:-1])
tgts.append(d[-1:][0])
return data, tgts
if __name__ == '__main__':
if len(sys.argv) < 3:
print './program train_data.txt test_data.txt'
sys.exit(1)
filename = sys.argv[1]
fil = open(filename, 'r')
data, tgt = load_data(fil)
train(data, tgt)
test_file = open(sys.argv[2], 'r')
test_data, test_tgt = load_data(test_file)
check_results(test_data, tgt)
it gives ~61% of correct results. when i print probabilities i get the following:
{'<=50K': 0.07371606889800396, '>50K': 15.325378327213354}
but in case of correct classifier i expect to see sum of both probabilities equal to 1.
At first i thought the problem is in float underflow and tried to make all calculations in logarithms, but results were similiar.
i understand that omitting some words is gonna affect accuracy, but the probabilities are sooo wrong.
What do i do wrong or don't understand?
for your convinience i've uploaded dataset and python script here:
https://dl.dropboxusercontent.com/u/36180992/adult.tar.gz
Thank you for your help.
Naive Bayes doesn't compute a probability directly, rather it computes a "raw score" that is relatively compared to the other scores for each label in order to classify an instance. This score can easily be converted to a "probability" in the range of [0, 1]:
total = sum(probs.itervalues())
for label, score in probs.iteritems():
probs[label] = score / total
However, keep in mind this still doesn't represent a true probability, as mentioned in this answer:
naive Bayes tends to predict probabilities that are almost always either very close to zero or very close to one.
im having trouble with getting my python code to read through a text file and add together all of the monetary values. the code seemed to be working fine on my pc, but as soon as i transferred the file to my mac it gave me a whole slew of errors. here is the code
#!usr/bin/python
import sys
def findnum(x):
list = x.split(' ')
index = 0
listindex = -1
numlist = []
sum = 0
for w in list:
if ((w.strip('. n,')).isalpha() != True and w[0].isalpha() != True and w[-2].isdigit() == True):
numlist.append(w)
listindex += 1
while listindex >= 0:
sum += float(numlist[listindex].strip('$ n.'))
listindex -= 1
return sum
def main():
text = open(sys.argv[1])
x = text.readline()
sum = 0
if len(x) > 0:
findnum(x)
while len(x) > 0:
sum += findnum(x)
x = text.readline()
print '{0:.2f}'.format(sum)
if __name__ == '__main__':
main()
here is the text
This is your invoice from the ACME materials
company. You received 50lbs of sand at a
cost of $40. The brick we delivered is 70.5
for the 75Kg. In addition, we delivered 30yards
of sod for $200.00. Delivery charge is $35.
so i need to add 40 + 70.5 + 200 +35
i keep getting a index out of range error..
anyone think they can help me out?
import re
print re.findall('(\$\d+(?:\.\d{2})?)', x)