Infinity loop issue using for loops - python

import pandas as pd
import time
import yfinance as yf
import money_18
import talib
def backtest(df,us_code, profit_target, stop_loss, macd_diff):
pos_opened = False
open_price = 0
close_price = 0
pnl = 0
pnl_list = []
original_capital = 100000
temp_capital = original_capital
num_of_lot = 0
equity_value = 0
equity_value_list = []
dd_dollar = 0
dd_dollar_list = []
dd_pct = 0
dd_pct_list = []
mdd_dollar = 0
mdd_pct = 0
total_profit = 0
num_of_trade = 0
for i in range(1, len(df)):
now_date = df.loc[i,'Date']
now_open = df.loc[i,'Open']
now_high = df.loc[i,'High']
now_low = df.loc[i,'Low']
now_close = df.loc[i,'Close']
now_rsi = df.loc[i,'RSI']
now_upper_band = df.loc[i,'Upper_Band']
now_middle_band = df.loc[i,'Middle_Band']
now_lower_band = df.loc[i,'Lower_Band']
now_macd = df.loc[i,'MACD']
now_macd_signal = df.loc[i,'MACD_Signal']
now_macd_hist = df.loc[i,'MACD_Hist']
##### equity curve #####
equity_value = round(temp_capital + (now_open - open_price) * num_of_lot )
equity_value_list.append(equity_value)
temp_max_equity = max(equity_value_list)
dd_dollar = temp_max_equity - equity_value
dd_dollar_list.append(dd_dollar)
mdd_dollar = max(dd_dollar_list)
dd_pct = (temp_max_equity - equity_value) / temp_max_equity
dd_pct_list.append(dd_pct)
mdd_pct = max(dd_pct_list)
##### open position #####
if (pos_opened == False) and (i < len(df) - 1) and now_macd_hist > macd_diff :
pos_opened = True
open_price = now_close
num_of_lot = temp_capital // (open_price)
##### profit taking and stop loss #####
if (pos_opened == True) and ((now_open - open_price > profit_target * open_price) or (now_open - open_price < stop_loss * open_price) or (i == len(df) -1)):
pos_opened = False
close_price = now_open
pnl = (close_price - open_price) * num_of_lot
pnl_list.append(pnl)
open_price = 0
num_of_lot = 0
temp_capital = temp_capital + pnl
if len(pnl_list) > 0:
total_profit = sum(pnl_list)
num_of_trade = len(pnl_list)
return us_code, profit_target, stop_loss, total_profit, num_of_trade, mdd_dollar, mdd_pct, macd_diff
if __name__ == '__main__':
us_code_list = ['TSLA', 'AAPL']
macd_diff_list = [0, 0.05]
profit_target_list = [0.03, 0.06]
stop_loss_list = [-0.01, -0.02, -0.03]
start_date = '2020-01-01'
end_date = '2020-12-31'
df_dict = {}
for us_code in us_code_list:
df= yf.Ticker(us_code).history(start=start_date, end=end_date)
df= df[df['Volume'] > 0]
df = df[['Open', 'High', 'Low', 'Close']]
df['RSI'] = talib.RSI(df['Close'], timeperiod=14)
df['Upper_Band'], df['Middle_Band'], df['Lower_Band'] = talib.BBANDS(df['Close'], 20, 2, 2)
df['MACD'], df['MACD_Signal'], df['MACD_Hist'] = talib.MACD(df['Close'], fastperiod=12, slowperiod=26,
signalperiod=9)
df = df[df['MACD_Hist'].notna()]
df = df.reset_index()
df_dict[us_code] = df
save_us_code = ''
save_macd_diff = 0
save_profit_target = 0
save_stop_loss = 0
total_profit = 0
num_of_trade = 0
mdd_dollar = 0
mdd_pct = 0
save_us_code_list = []
save_macd_diff_list = []
save_profit_target_list = []
save_stop_loss_list = []
total_profit_list = []
num_of_trade_list = []
mdd_dollar_list = []
mdd_pct_list = []
result_dict = {}
for us_code in us_code_list:
for macd_diff in macd_diff_list:
for profit_target in profit_target_list:
for stop_loss in stop_loss_list:
print(us_code, macd_diff, profit_target, stop_loss) ## the problem should be starting from here##
save_us_code, save_profit_target, save_stop_loss, total_profit, num_of_trade, mdd_dollar, mdd_pct, macd_diff = backtest(df, us_code, profit_target, stop_loss, macd_diff)
save_us_code_list.append(save_us_code)
save_profit_target_list.append(save_profit_target)
save_stop_loss_list.append(save_stop_loss)
total_profit_list.append(total_profit)
num_of_trade_list.append(num_of_trade)
mdd_dollar_list.append(mdd_dollar)
mdd_pct_list.append(mdd_pct)
macd_diff_list.append(macd_diff)
I am working on the algo trade, however, I created a for loop to put my parameter into my backtest function. However, the for loop keeps looping non-stop.
I think the error starting from "for macd_diff in macd_diff_list:" because i try to print the result below that row, the result is already indefinite.

Now that you've shown the full code, your problem is obvious. Your original example didn't show the issue because you didn't include all relevant code. Here's your example with the relevant code that's causing the issue:
for us_code in us_code_list:
for macd_diff in macd_diff_list:
for profit_target in profit_target_list:
for stop_loss in stop_loss_list:
... # irrelevant code not shown
macd_diff_list.append(macd_diff)
The issue is that you're looping through each item in macd_diff_list, but then for each loop iteration, you add an item to that list. So of course the loop will be infinite. You need to be looping through a different list, or adding items to a different list.

Related

How to optimize the finding of divergences between 2 signals

I am trying to create an indicator that will find all the divergences between 2 signals.
The output of the function so far looks like this
But the problem is that is painfully slow when I am trying to use it with long signals. Could any of you guys help me to make it faster if is possible?
My code:
def find_divergence(price: pd.Series, indicator: pd.Series, width_divergence: int, order: int):
div = pd.DataFrame(index=range(price.size), columns=[
f"Bullish_{width_divergence}_{order}",
f"Berish_{width_divergence}_{order}"
])
div[f'Bullish_idx_{width_divergence}_{order}'] = False
div[f'Berish_idx_{width_divergence}_{order}'] = False
def calc_argrelextrema(price_: np.numarray):
return argrelextrema(price_, np.less_equal, order=order)[0]
price_ranges = []
for i in range(len(price)):
price_ranges.append(price.values[0:i + 1])
f = []
with ThreadPoolExecutor(max_workers=16) as exe:
for i in price_ranges:
f.append(exe.submit(calc_argrelextrema, i))
prices_lows = SortedSet()
for r in concurrent.futures.as_completed(f):
data = r.result()
for d in reversed(data):
if d not in prices_lows:
prices_lows.add(d)
else:
break
price_lows_idx = pd.Series(prices_lows)
for idx_1 in range(price_lows_idx.size):
min_price = price[price_lows_idx[idx_1]]
min_indicator = indicator[price_lows_idx[idx_1]]
for idx_2 in range(idx_1 + 1, idx_1 + width_divergence):
if idx_2 >= price_lows_idx.size:
break
if price[price_lows_idx[idx_2]] < min_price:
min_price = price[price_lows_idx[idx_2]]
if indicator[price_lows_idx[idx_2]] < min_indicator:
min_indicator = indicator[price_lows_idx[idx_2]]
consistency_price_rd = min_price == price[price_lows_idx[idx_2]]
consistency_indicator_rd = min_indicator == indicator[price_lows_idx[idx_1]]
consistency_price_hd = min_price == price[price_lows_idx[idx_1]]
consistency_indicator_hd = min_indicator == indicator[price_lows_idx[idx_2]]
diff_price = price[price_lows_idx[idx_1]] - price[price_lows_idx[idx_2]] # should be neg
diff_indicator = indicator[price_lows_idx[idx_1]] - indicator[price_lows_idx[idx_2]] # should be pos
is_regular_divergence = diff_price > 0 and diff_indicator < 0
is_hidden_divergence = diff_price < 0 and diff_indicator > 0
if is_regular_divergence and consistency_price_rd and consistency_indicator_rd:
div.at[price_lows_idx[idx_2], f'Bullish_{width_divergence}_{order}'] = (price_lows_idx[idx_1], price_lows_idx[idx_2])
div.at[price_lows_idx[idx_2], f'Bullish_idx_{width_divergence}_{order}'] = True
elif is_hidden_divergence and consistency_price_hd and consistency_indicator_hd:
div.at[price_lows_idx[idx_2], f'Berish_{width_divergence}_{order}'] = (price_lows_idx[idx_1], price_lows_idx[idx_2])
div.at[price_lows_idx[idx_2], f'Berish_idx_{width_divergence}_{order}'] = True
return div

Split json file into multiple csv files depending on date?

I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
csv_file.write(dat)
csv_file.close()
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T
dxf.index.name = 'time'
dxf.reset_index(inplace=True)
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time'].dt.day
dxf['date'] = dxf['time'].dt.date
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1)
length.index.name = 'day'
length.reset_index(inplace=True)
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
dt_range.append(min_dt.strftime("%H:%M:%S"))
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
time.append(dasf['minute'][ib])
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
empty_value.append(dasf['ev'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1

How to find Average directional movement for stocks using Pandas?

I have a dataframe of OHLCV data. I would like to know if anyone knows any tutorial or any way of finding ADX(Average directional movement ) using pandas?
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
import numpy as nm
start=dt.datetime.today()-dt.timedelta(59)
end=dt.datetime.today()
df=pd.DataFrame(yf.download("MSFT", start=start, end=end))
The average directional index, or ADX, is the primary technical indicator among the five indicators that make up a technical trading system developed by J. Welles Wilder, Jr. and is calculated using the other indicators that make up the trading system. The ADX is primarily used as an indicator of momentum, or trend strength, but the total ADX system is also used as a directional indicator.
Directional movement is calculated by comparing the difference between two consecutive lows with the difference between their respective highs.
For the excel calculation of ADX this is a really good video:
https://www.youtube.com/watch?v=LKDJQLrXedg&t=387s
I was playing with this a little bit and found something that can help you with the issue:
def ADX(data: pd.DataFrame, period: int):
"""
Computes the ADX indicator.
"""
df = data.copy()
alpha = 1/period
# TR
df['H-L'] = df['High'] - df['Low']
df['H-C'] = np.abs(df['High'] - df['Close'].shift(1))
df['L-C'] = np.abs(df['Low'] - df['Close'].shift(1))
df['TR'] = df[['H-L', 'H-C', 'L-C']].max(axis=1)
del df['H-L'], df['H-C'], df['L-C']
# ATR
df['ATR'] = df['TR'].ewm(alpha=alpha, adjust=False).mean()
# +-DX
df['H-pH'] = df['High'] - df['High'].shift(1)
df['pL-L'] = df['Low'].shift(1) - df['Low']
df['+DX'] = np.where(
(df['H-pH'] > df['pL-L']) & (df['H-pH']>0),
df['H-pH'],
0.0
)
df['-DX'] = np.where(
(df['H-pH'] < df['pL-L']) & (df['pL-L']>0),
df['pL-L'],
0.0
)
del df['H-pH'], df['pL-L']
# +- DMI
df['S+DM'] = df['+DX'].ewm(alpha=alpha, adjust=False).mean()
df['S-DM'] = df['-DX'].ewm(alpha=alpha, adjust=False).mean()
df['+DMI'] = (df['S+DM']/df['ATR'])*100
df['-DMI'] = (df['S-DM']/df['ATR'])*100
del df['S+DM'], df['S-DM']
# ADX
df['DX'] = (np.abs(df['+DMI'] - df['-DMI'])/(df['+DMI'] + df['-DMI']))*100
df['ADX'] = df['DX'].ewm(alpha=alpha, adjust=False).mean()
del df['DX'], df['ATR'], df['TR'], df['-DX'], df['+DX'], df['+DMI'], df['-DMI']
return df
At the beginning the values aren't correct (as always with the EWM approach) but after several computations it converges to the correct value.
Math was taken from here.
def ADX(df):
def getCDM(df):
dmpos = df["High"][-1] - df["High"][-2]
dmneg = df["Low"][-2] - df["Low"][-1]
if dmpos > dmneg:
return dmpos
else:
return dmneg
def getDMnTR(df):
DMpos = []
DMneg = []
TRarr = []
n = round(len(df)/14)
idx = n
while n <= (len(df)):
dmpos = df["High"][n-1] - df["High"][n-2]
dmneg = df["Low"][n-2] - df["Low"][n-1]
DMpos.append(dmpos)
DMneg.append(dmneg)
a1 = df["High"][n-1] - df["High"][n-2]
a2 = df["High"][n-1] - df["Close"][n-2]
a3 = df["Low"][n-1] - df["Close"][n-2]
TRarr.append(max(a1,a2,a3))
n = idx + n
return DMpos, DMneg, TRarr
def getDI(df):
DMpos, DMneg, TR = getDMnTR(df)
CDM = getCDM(df)
POSsmooth = (sum(DMpos) - sum(DMpos)/len(DMpos) + CDM)
NEGsmooth = (sum(DMneg) - sum(DMneg)/len(DMneg) + CDM)
DIpos = (POSsmooth / (sum(TR)/len(TR))) *100
DIneg = (NEGsmooth / (sum(TR)/len(TR))) *100
return DIpos, DIneg
def getADX(df):
DIpos, DIneg = getDI(df)
dx = (abs(DIpos- DIneg) / abs(DIpos + DIneg)) * 100
ADX = dx/14
return ADX
return(getADX(df))
print(ADX(df))
This gives you the exact numbers as Tradingview and Thinkorswim.
import numpy as np
def ema(arr, periods=14, weight=1, init=None):
leading_na = np.where(~np.isnan(arr))[0][0]
arr = arr[leading_na:]
alpha = weight / (periods + (weight-1))
alpha_rev = 1 - alpha
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
out1 = np.array([])
if 0 in pows:
out1 = ema(arr[:int(len(arr)/2)], periods)
arr = arr[int(len(arr)/2) - 1:]
init = out1[-1]
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
scale_arr = 1/pows[:-1]
if init:
offset = init * pows[1:]
else:
offset = arr[0]*pows[1:]
pw0 = alpha*alpha_rev**(n-1)
mult = arr*pw0*scale_arr
cumsums = mult.cumsum()
out = offset + cumsums*scale_arr[::-1]
out = out[1:] if len(out1) > 0 else out
out = np.concatenate([out1, out])
out[:periods] = np.nan
out = np.concatenate(([np.nan]*leading_na, out))
return out
def atr(highs, lows, closes, periods=14, ema_weight=1):
hi = np.array(highs)
lo = np.array(lows)
c = np.array(closes)
tr = np.vstack([np.abs(hi[1:]-c[:-1]),
np.abs(lo[1:]-c[:-1]),
(hi-lo)[1:]]).max(axis=0)
atr = ema(tr, periods=periods, weight=ema_weight)
atr = np.concatenate([[np.nan], atr])
return atr
def adx(highs, lows, closes, periods=14):
highs = np.array(highs)
lows = np.array(lows)
closes = np.array(closes)
up = highs[1:] - highs[:-1]
down = lows[:-1] - lows[1:]
up_idx = up > down
down_idx = down > up
updm = np.zeros(len(up))
updm[up_idx] = up[up_idx]
updm[updm < 0] = 0
downdm = np.zeros(len(down))
downdm[down_idx] = down[down_idx]
downdm[downdm < 0] = 0
_atr = atr(highs, lows, closes, periods)[1:]
updi = 100 * ema(updm, periods) / _atr
downdi = 100 * ema(downdm, periods) / _atr
zeros = (updi + downdi == 0)
downdi[zeros] = .0000001
adx = 100 * np.abs(updi - downdi) / (updi + downdi)
adx = ema(np.concatenate([[np.nan], adx]), periods)
return adx

While and for loop with global variable not working Python updated

Working for single symbol
todate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=0))
fromdate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=5))
symbol = "ZINC20MAYFUT"
instype = "MCX"
Timeinterval = "5minute"
tradeDir = 0 #neutral
while (True):
histdata1 = zerodha.get_history(symbol, fromdate, todate, Timeinterval, instype)
df = pd.DataFrame(histdata1)
df = heikinashi(df)
df = bollinger_bands(df,field='h_close',period=20, numsd=2)
df1 =pd.DataFrame(df, columns=['date','volume','close','h_close','middle_band', 'upper_band'])
pp = pd.DataFrame(df1.tail(3))
print(pp)
dfCToList = pp['h_close'].tolist()
dfCList = list(pp['h_close'])
dfHValues = pp['h_close'].values
dfBMValues = pp['middle_band'].values
H_last = dfHValues[2] # tail 1
BM_last = dfBMValues[2] # tail 1
if (H_last > BM_last and (tradeDir == 0 or tradeDir == -1)):
print("buy")
tradeDir = 1 # up
if (H_last < BM_last and (tradeDir == 0 or tradeDir == 1)):
print("SELL")
tradeDir = -1 # down
# pdb.set_trace()
Question: When conditions meet its Printing "BUY/SELL" again and again. I want to just print a single time when condition meet the first time
todate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=0))
fromdate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=5))
tradeDir = 0 #neutral
def script():
global tradeDir
##For historical Data##
symbol = ["ZINC20MAYFUT" ,"CRUDEOIL20MAYFUT","GOLD20JUNFUT"]
instype = "MCX"
Timeinterval = "5minute"
for symbol in symbol:
global tradeDir
histdata1 = zerodha.get_history(symbol, fromdate, todate, Timeinterval, instype)
df = pd.DataFrame(histdata1)
df = heikinashi(df)
df = bollinger_bands(df,field='h_close',period=20, numsd=2)
df1 =pd.DataFrame(df, columns=['date','volume','close','h_close','middle_band', 'upper_band'])
pp = pd.DataFrame(df1.tail(3))
print(pp)
dfCToList = pp['h_close'].tolist()
dfCList = list(pp['h_close'])
dfHValues = pp['h_close'].values
dfBMValues = pp['middle_band'].values
H_last = dfHValues[2] # tail 1
BM_last = dfBMValues[2] # tail 1
if (H_last > BM_last and (tradeDir == 0 or tradeDir == -1)):
print("buy")
tradeDir = 1 # up
if (H_last < BM_last and (tradeDir == 0 or tradeDir == 1)):
print("SELL")
tradeDir = -1 # down
# pdb.set_trace()
while True:
try:
script()
except Exception as e:
sleep(2)
continue
When conditions meet its Printing "BUY/SELL" again and again. I want to just print a single time when condition meet the first time full Script and should run continuously
If you want the code to stop looping after the first time it prints "buy" or "SELL", you just need to add a break statement after each of the prints (inside the scope of the containing if blocks).

Generate synthetic time series data from existing sample data

Are there any good library/tools in python for generating synthetic time series data from existing sample data? For example I have sales data from January-June and would like to generate synthetic time series data samples from July-December )(keeping time series factors intact, like trend, seasonality, etc).
Leaving the question about quality of such data aside, here is a simple approach you can use Gaussian distribution to generate synthetic data based-off a sample. Below is the critical part.
import numpy as np
x # original sample np.array of features
feature_means = np.mean(x, axis=1)
feature_std = np.std(x, axis=1)
random_normal_feature_values = np.random.normal(feature_means, feature_std)
Here is a fully functioning code I used,
def generate_synthetic_data(sample_dataset, window_mean, window_std, fixed_window=None, variance_range =1 , sythesize_ratio = 2, forced_reverse = False):
synthetic_data = pd.DataFrame(columns=sample_dataset.columns)
synthetic_data.insert(len(sample_dataset.columns), "synthesis_seq", [], True)
for k in range(sythesize_ratio):
if len(synthetic_data) >= len(sample_dataset) * sythesize_ratio:
break;
#this loop generates a set that resembles the entire dataset
country_synthetic = pd.DataFrame(columns=synthetic_data.columns)
if fixed_window != None:
input_sequence_len = fixed_window
else:
input_sequence_len = int(np.random.normal(window_mean, window_std))
#population data change
country_data_i = sample_dataset
if len(country_data_i) < input_sequence_len :
continue
feature_length = configuration['feature_length'] #number of features to be randomized
country_data_array = country_data_i.to_numpy()
country_data_array = country_data_array.T[:feature_length]
country_data_array = country_data_array.reshape(feature_length,len(country_data_i))
x = country_data_array[:feature_length].T
reversed = np.random.normal(0,1)>0
if reversed:
x = x[::-1]
sets =0
x_list = []
dict_x = dict()
for i in range(input_sequence_len):
array_len = ((len(x) -i) - ((len(x)-i)%input_sequence_len))+i
if array_len <= 0:
continue
sets = int( array_len/ input_sequence_len)
if sets <= 0:
continue
x_temp = x[i:array_len].T.reshape(sets,feature_length,input_sequence_len)
uniq_keys = np.array([i+(input_sequence_len*k) for k in range(sets)])
x_temp = x_temp.reshape(feature_length,sets,input_sequence_len)
arrays_split = np.hsplit(x_temp,sets)
dict_x.update(dict(zip(uniq_keys, arrays_split)))
temp_x_list = [dict_x[i].T for i in sorted(dict_x.keys())]
temp_x_list = np.array(temp_x_list).squeeze()
feature_means = np.mean(temp_x_list, axis=1)
feature_std = np.std(temp_x_list, axis=1) /variance_range
random_normal_feature_values = np.random.normal(feature_means, feature_std).T
random_normal_feature_values = np.round(random_normal_feature_values,0)
random_normal_feature_values[random_normal_feature_values < 0] = 0
if reversed:
random_normal_feature_values = random_normal_feature_values.T[::-1]
random_normal_feature_values = random_normal_feature_values.T
for i in range(len(random_normal_feature_values)):
country_synthetic[country_synthetic.columns[i]] = random_normal_feature_values[i]
country_synthetic['synthesis_seq'] = k
synthetic_data = synthetic_data.append(country_synthetic, ignore_index=True)
return synthetic_data
for i in range(1):
directory_name = '/synthetic_'+str(i)
mypath = source_path+ '/cleaned'+directory_name
if os.path.exists(mypath) == False:
os.mkdir(mypath)
data = generate_synthetic_data(original_data, window_mean = 0, window_std= 0, fixed_window=2 ,variance_range = 10**i, sythesize_ratio = 1)
synthetic_data.append(data)
#data.to_csv(mypath+'/synthetic_'+str(i)+'_dt31_05_.csv', index=False )
print('synth step : ', i, ' len : ', len(synthetic_data))
Good luck!

Categories

Resources