python class empty dataframe - python

The purpose of this script is:
• Read a group of csv files.
• Scrape the date and extract some features out of it.
• Merge these csv files into a single data frame.
• Import the final data frame into another class and print it.
Here is the code:
import pandas as pd
import os
class DataSource:
def __init__(self):
self.dfs = []
self.final = pd.DataFrame()
self.names = ['Date', 'Time', 'open', 'high', 'low', 'close', 'Volume']
self.directory = os.chdir(r"C:\Users\Sayed\Desktop\forex")
def merge(self):
for file in os.listdir(self.directory):
df = pd.read_csv(file, names=self.names,
parse_dates={'Release Date': ['Date', 'Time']})
self.dfs.append(df)
self.final = pd.concat(self.dfs, axis=0)
self.final = self.final[['Release Date', 'open', 'high', 'low', 'close']]
print(self.final.head())
return self.final
class test():
def __init__(self):
self.df = DataSource().final
def print(self):
return print(self.df)
x = test()
x.print()
Here is the output:
Empty DataFrame
Columns: []
Index: []

You should call function to create something. Try this one.
class test():
def __init__(self):
self.df = DataSource(). merge()

Related

Class confusion containing pandas concat

when i use this class i dont get the transformed dataframe
but the old one
the class instance does not tansfrom the dataframe given as a parameter
class DataPreparation:
def __init__(
self,
df_train: pd.DataFrame,
df_test: pd.DataFrame
):
self.df_train = df_train
self.df_test = df_test
self.add_embarked()
def add_embarked(self):
all_embarked = pd.concat([self.df_train.Embarked, self.df_test.Embarked])
most_commont_value_emb= all_embarked.mode()[0]
self.df_train.Embarked.fillna(most_commont_value_emb, inplace=True)
self.df_test.Embarked.fillna(most_commont_value_emb, inplace=True)
self.onehotencoder_labels("Embarked")
def onehotencoder_labels(self, column: str):
one_hot_encoder_train = pd.get_dummies(self.df_train[column])
one_hot_encoder_test = pd.get_dummies(self.df_test[column])
self.df_train = pd.concat([self.df_train, one_hot_encoder_train], axis=1)
self.df_test = pd.concat([self.df_test, one_hot_encoder_test], axis=1)

How do I do a time series analysis as part of running a function in Python 3.7?

I have this:
formulas_count_stats.py:
import pandas as pd
from df_count_stats import df, df1
df = df
df1 = df1
class Data_load_compare_0:
def __init__(self, df):
self.df = pd.read_csv(df, delimiter=';')
'''
Data information section from df = basic stats
'''
def get_EDA_columns(self):
return self.df.columns
def get_EDA_info(self):
return self.df.info()
def get_EDA_describe(self):
return self.df.describe()
def get_EDA_shape(self):
return self.df.shape
def get_EDA_value_counts(self):
return self.df.value_counts()
def get_EDA_isnull(self):
return self.df.isnull()
def get_EDA_dtypes(self):
return self.df.dtypes
def get_EDA_isna(self):
return self.df.isna()
def get_EDA_nunique(self):
return self.df.nunique()
def get_EDA_sort_dipl(self):
return self.df.query("col1 == 'X'")
def get_EDA_sort_bach(self):
return self.df.query("col1 == 'Y'")
def get_EDA_sort_by_line(self):
return self.df.groupby(['col2', 'col1', 'col3']).agg(['count'])# groupby(['User Name', 'col2'])['col1'].size().reset_index(name='counts')
'''
Time series
'''
import matplotlib.pyplot as plt
def get_time_series(self):
df['Logon Time'] = pd.to_datetime(df['Logon Time'], errors='coerce')
df['Year'] = df.index.dt.year
df['month'] = df.index.dt.month
df['day'] = df.inde.dt.day
df['hour'] = df.index.dt.hour
df['week'] = df.index.dt.week
df['count'] = df['User Name']
return df.groupby([df['Logon Time'].dt.year, df['Logon Time'].dt.month]).sum().plot.bar()
plt.show()
...and running the functions from main_count_stats.py (loading data from df_count_stats.py):
from df_count_stats import df_load, df1_load
from formulas_count_stats import Data_load_compare_0, Data_load_compare_1
myData = Data_load_compare_0(df_load)
myData1 = Data_load_compare_1(df1_load)
EDA_stats_00_0 = myData.get_EDA_columns()
EDA_stats_01_0 = myData.get_EDA_nunique()
EDA_stats_02_0 = myData.get_EDA_shape()
EDA_stats_03_0 = myData.get_EDA_info()
EDA_stats_04_0 = myData.get_EDA_isna()
EDA_stats_05_0 = myData.get_EDA_isnull()
EDA_stats_06_0 = myData.get_EDA_describe()
EDA_stats_07_0 = myData.get_EDA_dtypes()
EDA_stats_08_0 = myData.get_EDA_sort_bach()
EDA_stats_09_0 = myData.get_EDA_sort_dipl()
EDA_stats_10_0 = myData.get_EDA_sort_by_line()
EDA_stats_11_0 = myData.get_time_series()
I get this error:
Traceback (most recent call last):
File "C:/.../.../main_count_stats.py", line 25, in <module>
EDA_stats_11_0 = myData.get_time_series()
File "C:\...\...\...\formulas_count_stats.py", line 59, in get_time_series
df['Year'] = df.index.dt.year
AttributeError: 'RangeIndex' object has no attribute 'dt'
I hoped that my attempt to integrate a simple time series analysis as part of an otherwise well-functioning formulas_count_stats.py would work. Obviously, it does not. I did change the index to a 'to_datetime' format.
How can I solve this?
or you can convert the index to Series and use .dt:
def get_time_series(self):
self.df['Logon Time'] = pd.to_datetime(df['Logon Time'], errors='coerce')
self.df[['Year', 'month', 'day', 'hour', 'week']] = (pd.Series(df.index)
.dt.strftime('%Y-%m-%d-%H-%W')
.str.split('-', expand=True).astype(int)).values
self.df['count'] = df['User Name']
return self.df.groupby([df['Logon Time'].dt.year, df['Logon Time'].dt.month]).sum().plot.bar()

how to append a data at a list and plot it with python plotly

hi there i m still trying to get trade bot and try to plot them with their time and low price data.
i wanna get buy signals that i ve specified at if condition (when macdh turns from negative to positive). then i want to plot them at a data. but can not add them at buy_signal=[] place.
my error is
self.plotData(buy_signals = buy_signals)
IndexError: list index out of range
import requests
import json
from stockstats import StockDataFrame as Sdf
import plotly.graph_objects as go
from plotly.offline import plot
class TradingModel:
def __init__(self, symbol):
self.symbol = symbol
self.df = self.getData
def getData(self):
# define URL
base = 'https://api.binance.com'
endpoint = '/api/v3/klines'
params = '?&symbol='+self.symbol+'&interval=4h'
url = base + endpoint + params
# download data
data = requests.get(url)
dictionary = data.json()
# put in dataframe and clean-up
df = pd.DataFrame.from_dict(dictionary)
df = df.drop(range(6, 12), axis=1)
# rename columns and stockstasts
col_names = ['time', 'open', 'high', 'low', 'close', 'volume']
df.columns = col_names
stock = Sdf.retype(df)
for col in col_names:
df[col]=df[col].astype(float)
#defined macdh
df['macdh']=stock['macdh']
return (df)
def strategy(self):
df = self.df
buy_signals=[]
for i in range(1, len(df['close'])):
if df['macdh'].iloc[-1]>0 and df['macdh'].iloc[-2]<0:
buy_signals.append([df['time'][i], df['low'][i]])
self.plotData(buy_signals = buy_signals)
def plotData(self,buy_signal=False):
df=self.df
candle=go.Candlestick(
x=df['time'],
open=df['open'],
close=df['close'],
high=df['high'],
low=df['low'],
name="Candlesticks"
)
macdh=go.Scatter(
x=df['time'],
y=df['macdh'],
name="Macdh",
line = dict(color=('rgba(102, 207, 255, 50)')))
Data=[candle,macdh]
if buy_signals:
buys = go.Scatter(
x = [item[0] for item in buy_signals],
y = [item[1] for item in buy_signals],
name = "Buy Signals",
mode = "markers",
)
sells = go.Scatter(
x = [item[0] for item in buy_signals],
y = [item[1]*1.04 for item in buy_signals],
name = "Sell Signals",
mode = "markers",
)
data = [candle, macdh, buys, sells]
# style and display
layout = go.Layout(title = self.symbol)
fig = go.Figure(data = data, layout = layout)
plot(fig, filename=self.symbol)
def Main():
symbol = "BTCUSDT"
model = TradingModel(symbol)
model.strategy()
if __name__ == '__main__':
Main() ```
You need to replace :
self.plotData(buy_signals[i]) by self.plotData(buy_signals)
def plotData(self,buy_signal=False): by def plotData(self,buy_signals=None):
And it should be good to go !

Function erroring out when calling another function

I'm getting the following error when calling a function from another function:
TypeError: 'GLMResultsWrapper' object is not callable
I get the error at the coeffs = model_results(model_results) line below.
This is another function that runs error free outside of the table_to_graph function. The model_results function takes the summary output from a statsmodel model and puts it into a data frame.
The table_to_graph function joins that dataframe to another table that is the df in the input. table_to_graph function below.
The ultimate function is the following:
# Add into table generation table
def table_to_graph(model_results, df):
'''
#function that combines rating tables and model summary
'''
coeffs = model_results(model_results)
try:
df['key'] = df['variable']+"_"+df['level']
df = pd.merge(df, coeffs, left_on = 'key', right_on = 'index', how = 'left')
df['factor'] = np.exp(df[factor])
df['factor'].fillna(1, inplace = True)
df['error_up'] = np.exp(df[error_up])
df['error_down'] = np.exp(df[error_down])
#title2 = title1
df = df[['model', 'variable', 'level', 'total_incurred', 'total_count', 'cmeu', 'factor', 'error_up', 'error_down'
, 'pricing_model_1_p_values']]
return df
#df1 = df1.append(df)
except:
#df['level'] = df['level'].astype('str')
df['key'] = df['variable']+"_"+df['level'].astype('str')
df['level'] = df['level'].astype('int')
df = pd.merge(df, coeffs, left_on = 'key', right_on = 'index', how = 'left')
df['factor'] = np.exp(df[factor])
df['factor'].fillna(1, inplace = True)
df['error_up'] = np.exp(df[error_up])
df['error_down'] = np.exp(df[error_down])
df = df[['model', 'variable', 'level', 'total_incurred', 'total_count', 'cmeu', 'factor', 'error_up'
, 'error_down', 'pricing_model_1_p_values']]
#df1 = df1.append(df)
return df
model_results function below:
def model_results(model_results):
'''
function that puts model parameters into a data frame
'''
df = pd.DataFrame(model_results.params, columns = ['factor'])
df['error_down'] = model_results.conf_int()[0]
df['error_up'] = model_results.conf_int()[1]
df['standard_error'] = model_results.bse
df['pvalues'] = round(model_results.pvalues, 3)
df.reset_index(inplace = True)
return df
The problem is that you are not calling the function you have defined as model_results but instead are "calling" the model_results data on the model_results data. This is why you get the error that the object is not callable.
Change either the function name or the name of the model_results data to something else, this will allow python to make a distinction between the two and do what you want it to do. Which is call the function model_results on the model_results data.

How to save data from pandastable?

I'm creating a table interface in which the user will receive a table from pandastable and write some data. Then, I need to save the updated pandastable to make some evaluations. How could I do this?
This is the code:
from tkinter import *
from pandastable import Table, TableModel
import pandas as pd
class TestApp(Frame):
def __init__(self, parent=None):
self.parent = parent
Frame.__init__(self)
self.main = self.master
self.main.geometry('600x400+200+100')
self.main.title('Table app')
f = Frame(self.main)
f.pack(fill=BOTH,expand=1)
df = pd.read_excel('2018.xlsx','November',7)
x = 'desc'
nome = df[x].values.tolist()
name = []
for i in range(len(nome)):
if nome[i] == nome[i] and nome[i] != x:
name.append(nome[i])
df1 = pd.DataFrame({x:name})
cfop = [float('Nan') for i in range(len(name))]
cst_in = [float('Nan') for i in range(len(name))]
cst_out = [float('Nan') for i in range(len(name))]
ncm = [float('Nan') for i in range(len(name))]
icms = [float('Nan') for i in range(len(name))]
df1['ncm'] = pd.Series(ncm)
df1['CST in'] = pd.Series(cst_in)
df1['CST out'] = pd.Series(cst_out)
df1['CFOP'] = pd.Series(cfop)
df1['ICMS'] = pd.Series(icms)
self.table = pt = Table(f, dataframe=df1, showtoolbar=True, showstatusbar=True)
pt.show()
return
app = TestApp()
app.mainloop()
df1 = Table.model.df1
The user is expected to fill the columns ncm, CFOP, ICMS, CST in and CST out. Right now, he can write on those columns, but the data is lost once he closes the app. I want to get the data that he writes and put in a variable DataFrame.
You can use the following line:
pt.doExport(filename="test2.csv")
This will result in a .csv file with all of the data from the table.

Categories

Resources