Both versions of the code are working. I am trying to understand the difference between self.data_as_csv vs data_as_csv.
In which scenario each of them is useful over the another.
Version 1:
import pandas as pd
class test_class:
def __init__(self, inputFile):
self.file = inputFile
def generate_csv(self):
self.data_as_csv = pd.read_csv(self.file)
return self.data_as_csv
x = test_class("out.csv")
df = x.generate_csv()
print(df)
Version 2:
import pandas as pd
class test_class:
def __init__(self, inputFile):
self.file = inputFile
def generate_csv(self):
data_as_csv = pd.read_csv(self.file)
return data_as_csv
x = test_class("out.csv")
df = x.generate_csv()
print(df)
using as self, you have access to the variable like this, so if you needed access to the data from the class, it would be useful in that case:
x.data_as_csv
Out[1456]:
Empty DataFrame
Columns: [P234, Dog, Billy, No, No.1, D32432432, Lost, 01/09/2018, 28/08/2019, return to owner, 123 Fake Drive, LS34 1LE]
Index: []
using self. will allow you to access to this variable from any method of your class, that means u can have your 'global' variable available only in current class and only for current instance
Related
I have created a DataFrame inside of a class but I am having trouble using it outside of the class or even calling it. How would I do that? I just want to print the DataFrame outside of the class.
class Youpi(Baseball, Soccer):
def __init__(self):
Baseball.__init__(self, self)
self.Random_df = pd.DataFrame(columns = ["Hot-Dogs"])
def Attendance(self, hot_dogs):
dictionary = {"Hot-Dogs":5}
self.Random_df = self.Random_df.append(dictionary, ignore_index=True)
return self.Random_df
Desired output:
// instruction to print the dataframe here
Output:
Hot-Dogs
5
I would try print(dictionary.Attendance)
I need to have 100 of those similar python scripts that have MyData class from MyData_1 to MyData_100.
import torch
import numpy as np
from torch_geometric.data import InMemoryDataset, Data
from torch_geometric.utils import to_undirected
class MyData_1(InMemoryDataset):
def __init__(self, root, transform=None):
super(MyData_1, self).__init__(root, transform)
self.data, self.slices = torch.load(self.processed_paths[0])
#property
def raw_file_names(self):
return "mydata_1.npz"
#property
def processed_file_names(self):
return "data_1.pt"
def process(self):
raw_data = np.load(self.raw_paths[0])
cluster_data = torch.load('./raw/total_clusters.pt')
x = torch.from_numpy(raw_data['x'])
y = torch.from_numpy(raw_data['y'])
pos = torch.stack([x,y], dim=-1)
cp = torch.from_numpy(raw_data['cp'])
data_list = []
for i in range(cp.size(0)):
data = Data(x=cp[i].view(-1,1),pos=pos.view(-1,2), cluster=cluster_data[0])
data_list.append(data)
torch.save(self.collate(data_list), self.processed_paths[0])
I'm trying to do this because each MyData class calls different mydata_1,2,...100.npz to generate dataset.
Is there any way to make this fast?
Thanks in advance!
I didn't fully understand the reason why you need to create 100 different classes.
Is it because you need to return mydata_1.npz to mydata_100.npz? If then, You can create a single class like this:
class Myclass:
def __init__(self, index):
self.index = index
def raw_file_names(self):
return "mydata_{}.npz".format(self.index)
Then, at another script like main.py, you can create/assign it like:
for i in range(100):
exec('dataset_{} = MyData_{}({})'.format(i, i, i))
I believe you can build your own code that fits your problem with above examples.
You can achieve this by creating Metaclass(subclass ) below is a example how to pass dynamic name of class in subclass magicfunction
MyDynamicSubclass
class MyClass:
def __init_subclass__(cls, my_name):
print(f"Subclass created and my name is {my_name}")
print(cls, cls.__class__.__name__)
MyDynamicSubclass = type("MyDynamicSubclass", (MyClass,), {}, my_name="Ellis")
output:
<class 'main.MyDynamicSubclass'> type
The first step of my program is to get the data.
After that I am finding myself passing this data to all the different classes (with also a config dictionary variable) over and over again.
So I am wondering if there is a better way to just store the data somewhere and make it available to all classes and functions, without passing them as a parameter.
Thank you
Edit: here is a code example
go.py
config = {
'mode' : 'single',
'data' : { },
'strategy' : { },
'view' : { }
}
stratego.start(config)
stratego.py
def start(config):
data = dt.Data(config['data'])
if (config['data']['type'] == 'yahoo'):
df = data.get_yahoo_data()
elif (config['data']['type'] == 'csv'):
df = data.get_csv_data()
else:
return False
trades = str.Strategy(df, config['strategy'])
tradeBook = trades.run()
but then I am realising that the problem is my main function (start). If I run the main code not in a function I have all my instances available in the global. Is that right? Is it correct to do this way or it is better to wrap the program in a main function?
If really you don't want to pass it as an argument you could define it as a variable in a python file and import this variable where you define your fonction. You should be able to use this variable in the function without passing it in argument.
EDIT: Refactored code according to code update by OP
Ok since you use a strategy pattern you can actually do that using a strategy like design pattern
stratego.py
def start(*strategies):
for strategy in strategies:
strategy.run()
go.py
from functools import lru_cache, wraps
from abc import ABC, abstractmethod
import stratego
#lru_cache()
def get_data(filepath):
# Load data from filepath
data = ...
return data
#lru_cache()
def get_data_with_config(**data_config):
# Load data based on data_config
data = get_data(data_config['filepath'])
if (data_config['type'] == 'yahoo'):
df = data.get_yahoo_data()
elif (data_config['type'] == 'csv'):
df = data.get_csv_data()
...
return df
class Strategy(ABC):
def __init__(self, config):
self.config = config
#abstractmethod
def run(self):
pass
class YahooStrategy(Strategy):
def __init__(self, config):
config = config.copy()
config['data']['type'] = 'yahoo'
super().__init__(config)
def run(self):
df = get_data_with_config(**self.config['data'])
# Do sth with data
class CsvStrategy(Strategy):
def __init__(self, config):
config = config.copy()
config['data']['type'] = 'csv'
super().__init__(config)
def run(self):
df = get_data_with_config(**self.config['data'])
# Do sth with data
class FunctionStrategy(Strategy):
def __init__(self, config, func):
super().__init__(config)
self.func = func
def run(self):
return self.func(self.config)
def strategy_decorator(func):
#wraps(func)
def wrapper(config):
return FunctionStrategy(config, func)
return wrapper
#strategy_decorator
def some_strategy_function(config):
df = get_data_with_config(**config['data'])
# Do smth with data
# With one strategy
strategy = YahooStrategy({'data': {'filepath': 'data.csv', ...}})
stratego.run(strategy)
# Multiple strategies
strategies = [
YahooStrategy({'data': {'filepath': 'data.csv', ...}}),
CsvStrategy({'data': {'filepath': 'data2.csv', ...}}),
some_strategy_function({'data': {'filepath': 'data4.csv', ...}})
]
stratego.run(*strategies)
If you're thinking pass by reference vs pass by value then I would suspect you are newer to Python. To my understanding, all variables are passed by reference. That is, you aren't copying the actual data every time you call a function with parameters.
If you're thinking more along the lines of global variables, you can do something like this:
globvar = 0
def set_globvar_to_one():
global globvar # Needed to modify global copy of globvar
globvar = 1
def print_globvar():
print(globvar) # No need for global declaration to read value of globvar
I am trying to create 2 python classes, class CsvtoDataFrame for moving data from csv to DataFrame. and class DataFrametoDB from Dataframe to database. When I am trying to return the dataframe from CsvtoDataFrame and print it. It says "<main.CsvtoDataFrame object at 0x00981890>" How can I see the data of the dataframe outside the CsvtoDataFrame . I need help in this. Please!
import pandas as pd
class CsvtoDataFrame:
global pd_sales
def init(self,FileName):
self.FileName = FileName
pd_sales=pd.read_csv(FileName)
#print(pd_sales)
def ReturnFile(self):
return pd_sales
class DataFrametoDB:
def init(self,obj):
self.pd_sales=obj.pd_sales
print(self.pd_sales)
df=CsvtoDataFrame('test.csv')
print(df)enter image description here
In order to return pd_sales, you may need to create another function, insteading of doing in def init(self, FileName).
import pandas as pd
class CsvtoDataFrame:
global pd_sales
def __init__(self,File):
self.FileName = File
#print(pd_sales)
def readcvs(self):
pd_sales=pd.read_csv(self.FileName)
return pd_sales;
class DataFrametoDB:
def __init__(self,obj):
self.pd_sales=obj.pd_sales
print(self.pd_sales)
df=CsvtoDataFrame('test.csv')
df2=df.readcvs()
print(df2)
I have the following issue, I am trying to define a new class, and I have just created new methods an attributes, but when I try to return the values I have some issue like: "objects is not callable", "XXX is not an attribute of the class", etc. I imported all the libraries out of the class:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
(...)
Then just read the data:
charge = pd.read_csv('iris.csv',delimiter=';',decimal=",")
print(charge)
Then just decided to work with numeric variables as following:
data = charge.iloc[:, 0:4].values #
dataval= charge.iloc[:, 4].values
print(data)
print(dataval)
And everything looks great until this: when I defined my class
class mynew_ACP:
def __init__(self, data):
self.__data = data
#property
def data(self):
return self.__data
#datos.setter
def data(self, data):
self.__data = data
def trasformation(self, data):
col = StandardScaler().fit_transform(self.__data)
col2= pd.DataFrame(col)
return col2
def correlation_var(self, data):
corr = data.corr()
return corr
But when I tried to call the methods to see the values I do not have anything. I have. tried with this code:
acp = mynew_ACP(data)
acp.data()
acp.trasformation()
acp.correlation_var()
Got error as mentioned in the beggining. I am new using Python and I do not know where is/are my issues. Tried with parenthesis and brackets but did not work.
Any help?