I am trying to loop a Python script 50 times but I'm only getting one line of output in my file

I am trying to loop a Python script 50 times but I'm only getting one line of output in my file - python

I want to loop the following code 50 times, so I'm using the range function. When it runs, the output file only has 1 row of data in it.
I've gotten the code to run successfully without the looping portion. It provides me with a table of data.
What I want to do now is to loop the code 50 times and write the last row of each iteration to a new dataframe.
Ideally, I don't even want that much data. I just need the value in the 'Period' field in the last row of each iteration.
If someone could help me see what I'm doing wrong, I'd really appreciate it.
import pandas as pd
import numpy as np
import numpy_financial as npf
from datetime import date, datetime
from numpy import random
for x in range(50):
#np.round(df, decimals=2)
interest=0.04125
years=2
payments_year=12
mortgage=40000
start_date=(date(2023, 2, 1))
#additional_pmt = np.random.randint(2100, 4500, size=len(df))
initial_pmt = -1 * npf.pmt(interest / 12, years * payments_year, mortgage)
initial_ipmt = -1 * npf.ipmt(interest / payments_year, 1, years * payments_year, mortgage)
initial_ppmt = -1 * npf.ppmt(interest / payments_year, 1, years * payments_year, mortgage)
print('Initial Payment: {:,.2f}'.format(initial_pmt))
print('Initial Interest: {:,.2f}'.format(initial_ipmt))
print('Initial Principal Payment: {:,.2f}'.format(initial_ppmt))
# Create date range in pandas dataframe
rng = pd.date_range(start_date, periods=years * payments_year, freq='MS')
# label the date column
rng.name="Payment Date"
# create dataframe
df=pd.DataFrame(
index=rng,
columns= ['Org Total Payment',
'Total Payment',
'Interest',
'Principal',
'Additional Payment',
'Org Ending Balance',
'Ending Balance'], dtype='float')
# set index as payment period
df.reset_index(inplace=True)
df.index += 1
df.index.name="Period"
# Create values for the first period
period=1
additional_pmt = np.random.randint(2100, 4500, size=len(df))
# for each element in the row set the value
initial_row_dict = {
'Org Total Payment':initial_pmt,
'Total Payment': initial_pmt + (additional_pmt),
'Interest': initial_ipmt,
'Principal':initial_ppmt,
'Additional Payment': additional_pmt,
'Org Ending Balance': mortgage - initial_ppmt,
'Ending Balance': mortgage - initial_ppmt - (additional_pmt)
}
# set values
columns = list(initial_row_dict.keys())
period_values = list(initial_row_dict.values())
df.at[period, columns]= period_values
# add additional rows
for period in range(2, len(df) + 1):
#get prior period values
previous_total_payment = df.loc[period - 1, 'Total Payment']
previous_principal = df.loc[period - 1, 'Principal']
previous_org_ending_balance = df.loc[period - 1, 'Org Ending Balance']
previous_ending_balance = df.loc[period - 1, 'Ending Balance']
#get end balance
period_interest = previous_org_ending_balance * interest / payments_year
period_principal = initial_pmt - period_interest
additional_pmt = np.random.randint(2100, 4500) + 400
org_ending_balance = previous_org_ending_balance - period_principal
ending_balance = previous_ending_balance - period_principal - additional_pmt
row_dict = {'Org Total Payment':initial_pmt,
'Total Payment': initial_pmt + (additional_pmt),
'Interest': period_interest,
'Principal': period_principal,
'Additional Payment': additional_pmt,
'Org Ending Balance': org_ending_balance,
'Ending Balance': ending_balance}
columns = list(row_dict.keys())
period_values = list(row_dict.values())
df.at[period, columns]= period_values
df_mask=df['Ending Balance']>=0
filtered_df = df[df_mask].round(2)
storage_df = pd.DataFrame()
last_row = filtered_df.tail(1)
storage_df = storage_df.append(last_row, ignore_index=True)
print(storage_df)
storage_df.to_csv('allruns.csv')

Many thanks to Tim Roberts. His help got me to the solution. Here's the final code:
import pandas as pd
import numpy as np
import numpy_financial as npf
from datetime import date, datetime
from numpy import random
storage_df = pd.DataFrame()
for x in range(50):
#np.round(df, decimals=2)
interest=0.04125
years=1
payments_year=12
mortgage=40000
start_date=(date(2023, 2, 1))
#additional_pmt = np.random.randint(2100, 4500, size=len(df))
initial_pmt = -1 * npf.pmt(interest / 12, years * payments_year, mortgage)
initial_ipmt = -1 * npf.ipmt(interest / payments_year, 1, years * payments_year, mortgage)
initial_ppmt = -1 * npf.ppmt(interest / payments_year, 1, years * payments_year, mortgage)
#print('Initial Payment: {:,.2f}'.format(initial_pmt))
#print('Initial Interest: {:,.2f}'.format(initial_ipmt))
#print('Initial Principal Payment: {:,.2f}'.format(initial_ppmt))
# Create date range in pandas dataframe
rng = pd.date_range(start_date, periods=years * payments_year, freq='MS')
# label the date column
rng.name="Payment Date"
# create dataframe
df=pd.DataFrame(
index=rng,
columns= ['Org Total Payment',
'Total Payment',
'Interest',
'Principal',
'Additional Payment',
'Org Ending Balance',
'Ending Balance'], dtype='float')
# set index as payment period
df.reset_index(inplace=True)
df.index += 1
df.index.name="Period"
# Create values for the first period
period=1
additional_pmt = np.random.randint(2100, 4500, size=len(df))
# for each element in the row set the value
initial_row_dict = {
'Org Total Payment':initial_pmt,
'Total Payment': initial_pmt + (additional_pmt),
'Interest': initial_ipmt,
'Principal':initial_ppmt,
'Additional Payment': additional_pmt,
'Org Ending Balance': mortgage - initial_ppmt,
'Ending Balance': mortgage - initial_ppmt - (additional_pmt)
}
# set values
columns = list(initial_row_dict.keys())
period_values = list(initial_row_dict.values())
df.at[period, columns]= period_values
# add additional rows
for period in range(2, len(df) + 1):
#get prior period values
previous_total_payment = df.loc[period - 1, 'Total Payment']
previous_principal = df.loc[period - 1, 'Principal']
previous_org_ending_balance = df.loc[period - 1, 'Org Ending Balance']
previous_ending_balance = df.loc[period - 1, 'Ending Balance']
#get end balance
period_interest = previous_org_ending_balance * interest / payments_year
period_principal = initial_pmt - period_interest
additional_pmt = np.random.randint(2100, 4500) + 400
org_ending_balance = previous_org_ending_balance - period_principal
ending_balance = previous_ending_balance - period_principal - additional_pmt
row_dict = {'Org Total Payment':initial_pmt,
'Total Payment': initial_pmt + (additional_pmt),
'Interest': period_interest,
'Principal': period_principal,
'Additional Payment': additional_pmt,
'Org Ending Balance': org_ending_balance,
'Ending Balance': ending_balance}
columns = list(row_dict.keys())
period_values = list(row_dict.values())
df.at[period, columns]= period_values
df_mask=df['Ending Balance']>=0
filtered_df = df[df_mask].round(2)
last_row = filtered_df.tail(1)
storage_df = storage_df.append(last_row, ignore_index=True)
storage_df.to_csv('allruns.csv')

Related

Resampling timeseries Data Frame for different customized seasons and finding aggregates

I am working on a huge timeseries dataset of following format:
import pandas as pd
import numpy as np
import random
import seaborn as sns
df = pd.DataFrame({'date':pd.date_range('1990',end = '1994',freq='3H'),
'A': np.random.randint(low = 0,high=100,size=11689),
'B': np.random.randint(low = 10,high=45,size=11689) })
df['date'] = pd.to_datetime(df.date.astype(str), format='%Y/%m/%d %H:%M',errors ='coerce')
df.index = pd.DatetimeIndex(df.date)
df.drop('date', axis = 1, inplace = True)
My aim is to first filter the dataframe according to the customized seasons: winter (12,1,2) (i.e. Dec, Jan, Feb), Pre-monsoon (3,4,5) , monsoon (6,7,8,9) and post-monsoon (10,11). I am aware of resample('Q-NOV') function but it is quarterly only. As mentioned, I need to customize the months.
I have been able to do so by executing the following codes:
# DF-Winter
winterStart = '-12'
winterEnd = '-02'
df_winter = pd.concat([df.loc[str(year) + winterStart : str(year+1) + winterEnd].mean() for year in range(1990, 1994)]) # DJF
# used year and year+1 because winter season spans from an initial year to the next year.
# DF - Premonsoon
df_preMonsoon = df[df.index.month.isin([3,4,5])] # MAM
and so on.
Problem
I want to find the seasonal average values (every year and season) of my parameters A and B for my data period. Any help will be highly appreciated.
Thank you in advance.

Could something like that work for you:
import pandas as pd
import numpy as np
seasons_order = (
'Winter',
'Pre-monsoon',
'Monsoon',
'Post-monsoon',
)
dates = pd.date_range('1990',end = '1994',freq='3H')
def define_seasons(month):
if 1 <= month <= 2:
return 'Winter'
if 3 <= month <= 5:
return 'Pre-monsoon'
if 6 <= month <= 9:
return 'Monsoon'
if 10 <= month <= 11:
return 'Post-monsoon'
if month == 12:
return 'Winter'
def define_seasons_year(date_to_convert):
if date_to_convert.month == 12:
return date_to_convert.year + 1
else:
return date_to_convert.year
df = pd.DataFrame(
{
'A': np.random.randint(low = 0, high=100, size=dates.size),
'B': np.random.randint(low = 10, high=45, size=dates.size),
'season': pd.Categorical(
dates.month.map(define_seasons),
categories=seasons_order,
ordered=True
),
'season_year': dates.map(define_seasons_year),
},
index=dates
)
print(df)
seasonal_average_values = df.groupby(['season_year', 'season']).mean()
print(seasonal_average_values)

Compute balance column python dataframe with initial static value

i am trying to get a balance column in a python dataframe with an initial static value.
The logic:
start balance = 1000
current balance = previous current balance*(1+df['return'])
My attempt:
df.at[1,'current balance'] = 1000
df['current balance'] = df['current balance'].shift(1)*(1+df['return])
I can't get this output
Output dataframe:
return current balance
0.01 1010.00
0.03 1040.30
0.045 1087.11

Standard compound return:
initial_balance = 1000
df['current balance'] = (1 + df['return']).cumprod() * initial_balance
>>> df
return current balance
0 0.010 1010.0000
1 0.030 1040.3000
2 0.045 1087.1135

I would approach this by getting my df columns ready in lists and then add those rather than changing values in the df itself.
There's probably a more elegant solution but this should work:
df_return = [0.00, 0.01, 0.03, 0.045]
df_current_balance = [1000]
# Calculate the current value for each return value
for x in range(1, len(df_return)):
df_current_balance.append(df_current_balance[x - 1] * (1 + df_return[x]))
output = {"return": df_return[1:], "current balance": df_current_balance[1:]}
df = pd.DataFrame(output)

Python For Loop, Use Index To Conditionally Compare Between Values In a List

I am using python 3.7 in a Jupyter Notebook on Windows.
Gsheet Below is a sample of what the final data set should look like: https://docs.google.com/spreadsheets/d/1y8hlJYjk8-sY-sIMeJldZ2DFMFLpy2MWFVdYTeAbV9s/edit#gid=0
My Issue:
Within my for loop, I would like the adjustment of the payment and total investment to be dynamic to the repayment terms defined by the variables at the start of the script. Currently, it's fixed with each elif block compared to my list. Can this be created so it's dynamic to the list of repayment terms? Let's say, I was making $8,000 repayments every 5 years. How can I structure the for loop to behave similarly but not have the need for each elif block to be added in manually?
Ideally, this will NOT be hard coded like it is now. While the script works and produces the desired table. It will not flex if different inputs are established for the repayment terms.
Code I Have So Far:
import pandas as pd
import numpy as np
## Link To Sample Desired OutPut On Below GSHEET, Rows Are Highlighted Where Changes Occur ##
## https://docs.google.com/spreadsheets/d/1y8hlJYjk8-sY-sIMeJldZ2DFMFLpy2MWFVdYTeAbV9s/edit#gid=0
## How Many Years is The Loan Over ##
years = 12
## How Many "Periods" aka "Months" is the Load Over ##
total_periods = years * 12
## Total Initial Investment ##
initial_investment = 40000
## Monthly Interest To Be Paid To Investor ##
payment_ratio = 100
## Investors Initial Investment Is Repaid In Installments Across The Years Of The Loan. In This Case, We Repay 25% Every 3 Years ##
graduated_repayment = .25
repayment_years = 3
## Repayment_Periods represents the "Period" or "Month" that we will need to pay our installment to the investor ##
## In this Example, since it's every 3 years, that means every 36 months ##
repayment_periods = repayment_years * years
## With Each Repayment Every 36 Months, Our Monthly Interest Payment Decreases, in this case, by 50% ##
payment_reduction = .5
## Generating A List Of Integers That Represent The Periods In Which We Make a Repayment and Calculations Will Change ##
payback_periods = [i for i in range(0,total_periods + 1, repayment_periods) if(total_periods%repayment_periods==0) & (i!=0)]
payback_periods.insert(0, 0)
print(payback_periods)
cols = ['Period', 'Payment', 'Total_Investment', 'Annualized_Return']
n_periods = np.arange(years * 12) + 1
df_initialize = pd.DataFrame({"Period": n_periods})
df = pd.DataFrame(df_initialize, columns = cols)
## This is Still Fixed. A new Elif block ould need to be made or removed depending on the repayment terms ##
for i in range(len(df)):
cur_payment = df.loc[i, 'Payment']
cur_invested = df.loc[i, 'Total_Investment']
cur_period = df.loc[i, 'Period']
pay_reduction = payment_ratio * payment_reduction
if payback_periods[0] <= i <= payback_periods[1]:
df.loc[i, 'Payment'] = initial_investment / payment_ratio
df.loc[i, 'Total_Investment'] = initial_investment
df.loc[i, 'Annualized_Return'] = (cur_payment * 12) / cur_invested
elif payback_periods[1] <= i <= payback_periods[2]:
df.loc[i, 'Payment'] = df.loc[payback_periods[1], 'Payment'] - pay_reduction
df.loc[i, 'Total_Investment'] = df.loc[payback_periods[1], 'Total_Investment'] - (initial_investment * graduated_repayment)
df.loc[i, 'Annualized_Return'] = (cur_payment * 12) / cur_invested
elif payback_periods[2] <= i <= payback_periods[3]:
df.loc[i, 'Payment'] = df.loc[payback_periods[2], 'Payment'] - pay_reduction
df.loc[i, 'Total_Investment'] = df.loc[payback_periods[2], 'Total_Investment'] - (initial_investment * graduated_repayment)
df.loc[i, 'Annualized_Return'] = (cur_payment * 12) / cur_invested
elif i > payback_periods[3]:
df.loc[i, 'Payment'] = df.loc[payback_periods[3], 'Payment'] - pay_reduction
df.loc[i, 'Total_Investment'] = df.loc[payback_periods[3], 'Total_Investment'] - (initial_investment * graduated_repayment)
df.loc[i, 'Annualized_Return'] = (cur_payment * 12) / cur_invested
else:
pass
df['Running_Earnings'] = df['Payment'].cumsum()

Numpy repeat to odd length index

I have a dataframe:
proj_length = 6
nb_date = pd.Period("2017-10-13")
rb_date = pd.Period("2017-11-13")
rev_length = proj_length * 30
end_date = rb_date + (rev_length * 2)
df_index = pd.PeriodIndex(start=nb_date, end=end_date)
df = pd.DataFrame(data=[[0,0]] * len(df_index),
index=df_index,
columns=["in", "out"])
len(df) == 392
And i'm trying to groupby 30 days at a time, so my initial thought was to just create some new column:
groups = (end_date - nb_date) // 30
gb_key = np.repeat(np.arange(groups), 30)
len(gb_key) == 390
This is good so far, but I cannot figure out a pythonic way to get the overflow (392-390) to be set to 13?
Non-numpy/pandas way:
arr = np.zeros(len(df))
for i, idx in enumerate(range(0, len(df), 30)):
arr[idx:] = i

How to generate random time series data including February with 29 days?

I have used the following codes to generate a random rainfall data from 1950 to 2009 with known probability, mean and standard deviations. But, I have been suffering from iterating the days of February as 29 days in the leap years. And, also I was trying to save the output in a text file, but it gives an error message like
TypeError: float argument required, not numpy.string_
Can anyone please help me out?
My code:
import numpy as np
import random
import itertools
import datetime
dry =[0.33,0.27,0.32,0.41,0.42,0.45,0.57,0.52,0.45,0.39,0.37,0.37]
wet = [0.66,0.72,0.67,0.58,0.57,0.54,0.42,0.47,0.54,0.60,0.62,0.62]
d2d_tran = [0.56,0.50,0.58,0.62,0.63,0.67,0.73,0.66,0.60,0.56,0.57,0.62]
w2w_tran = [0.78,0.80,0.79,0.73,0.72,0.72,0.63,0.64,0.66,0.71,0.74,0.76]
mu = [3.71, 4.46, 4.11, 2.94, 3.00, 2.87, 2.31, 2.44, 2.56, 3.45, 4.32, 4.12]
sigma = [6.72,7.92,7.49,6.57,6.09,5.53,4.38,4.69,4.31,5.71,7.64,7.54]
days = [31,28,31,30,31,30,31,31,30,31,30,31]
rain = []
for y in xrange(0,60):
for m in xrange(0,12):
random_num = np.random.rand(days[m])
if random.random() <= dry[m]:
random_num[0] = 0
else:
r = abs(random.gauss(mu[m],sigma[m]))
random_num[0] = r
for i in xrange(1,days[m]):
if random_num[i-1] == 0:
if random_num[i] <= d2d_tran[m]:
random_num[i] = 0
else:
r = abs(random.gauss(mu[m],sigma[m]))
random_num[i] = r
else:
if random_num[i] <= w2w_tran[m]:
r = abs(random.gauss(mu[m],sigma[m]))
random_num[i] = r
else:
random_num[i] = 0
rain.append(random_num)
rain_series = []
for j in itertools.chain.from_iterable(rain):
rain_series.append(j)
y = np.array(rain_series).reshape(-1, 1)
date_series = []
def generate_dates(start_date, end_date):
return (start_date + datetime.timedelta(days=d) for d in xrange((end_date - start_date).days + 1))
start_date = datetime.date(1950, 1, 1)
end_date = datetime.date(2009, 12, 16)
for current_date in generate_dates(start_date, end_date):
f = current_date.strftime('%Y-%m-%d')
date_series.append(f)
z = np.array(date_series).reshape(-1, 1)
#### Here I have 365x60 = 21900 rainfall values, that is why I had to
####set the end_date upto (2009,12,16). If
#### the February days for leap years can be set as 29 in days[] of
####rain_series than this problem would be solved.
data = np.concatenate((z,y), axis=1)
print data
data1 = data.reshape((21900,2))
np.savetxt('Random Rainfall Data.txt', data1)
#### I want to shape data in two columns like dates and rainfall.
#### And than, save it into a text file. But, it provides an error!!!

Use the calendar.monthrange() to get the number of days of a month.
for year in xrange(1950,2020):
for month in xrange(1,13):
day_num = calendar.monthrange(year, month)[1]
random_num = np.random.rand(day_num)
...

Regarding the data writing problem, you must add a third argument. It depends on your specific problem:
np.savetxt('test.txt', a, '%s') # Example
See documentation for more formatting info.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

I am trying to loop a Python script 50 times but I'm only getting one line of output in my file - python

Related

Resampling timeseries Data Frame for different customized seasons and finding aggregates

Compute balance column python dataframe with initial static value

Python For Loop, Use Index To Conditionally Compare Between Values In a List

Numpy repeat to odd length index

How to generate random time series data including February with 29 days?

Categories

Resources