class which reads the file content - python

I'd like to write class which reads the *.csv file and parse it using the pandas library. I'm wondering where I should initialize df.
#!/usr/bin/env python
import pandas as pd
import os
class ParseDataBase(object):
def __init__(self, name_file):
self.name_file = name_file
def read_file(self):
"""Read the file concent"""
try:
self.df = pd.read_csv(self.name_file)
except IndexError:
print ("Error: Wrong file name")
sys.exit(2)
return self.df
def dispaly_file(self):
print self.df
def main():
x = ParseDataBase('something.csv')
x.dispaly_file()
if __name__ == '__main__':
main()
The above code returns the following error: 'ParseDataBase' object has no attribute 'df'.
I don't want to pass to many variables while crating the object.
I'm new to object oriented programming, so any comments and hints are highly appreciated!

You aren't assigning self.df unless you run read_file(), which you aren't.
def main():
x = ParseDataBase('something.csv')
x.read_file()
x.dispaly_file()

the attribute df gets assigned in the read_file method. You are trying to access that attribute prior to it existing.
I'd do this:
#!/usr/bin/env python
import pandas as pd
import os
class ParseDataBase(object):
def __init__(self, name_file):
self.name_file = name_file
# Change I made to initiate in the init method.
self.df = self.read_file()
def read_file(self):
"""Read the file concent"""
try:
self.df = pd.read_csv(self.name_file)
except IndexError:
print ("Error: Wrong file name")
sys.exit(2)
return self.df
def dispaly_file(self):
print self.df
def main():
x = ParseDataBase('something.csv')
x.dispaly_file()
if __name__ == '__main__':
main()

Related

Returning DataFrame from one class to another class

I am trying to create 2 python classes, class CsvtoDataFrame for moving data from csv to DataFrame. and class DataFrametoDB from Dataframe to database. When I am trying to return the dataframe from CsvtoDataFrame and print it. It says "<main.CsvtoDataFrame object at 0x00981890>" How can I see the data of the dataframe outside the CsvtoDataFrame . I need help in this. Please!
import pandas as pd
class CsvtoDataFrame:
global pd_sales
def init(self,FileName):
self.FileName = FileName
pd_sales=pd.read_csv(FileName)
#print(pd_sales)
def ReturnFile(self):
return pd_sales
class DataFrametoDB:
def init(self,obj):
self.pd_sales=obj.pd_sales
print(self.pd_sales)
df=CsvtoDataFrame('test.csv')
print(df)enter image description here
In order to return pd_sales, you may need to create another function, insteading of doing in def init(self, FileName).
import pandas as pd
class CsvtoDataFrame:
global pd_sales
def __init__(self,File):
self.FileName = File
#print(pd_sales)
def readcvs(self):
pd_sales=pd.read_csv(self.FileName)
return pd_sales;
class DataFrametoDB:
def __init__(self,obj):
self.pd_sales=obj.pd_sales
print(self.pd_sales)
df=CsvtoDataFrame('test.csv')
df2=df.readcvs()
print(df2)

Module 'self' has no attribute 'file' error in class format

I am building a python code to validate the email address and the phone number in a given CSV file using pandas and I want to write a separate CSV file with only the validated values. I am totally new to python and I have written a code for the functionality as follows:
from email_validator import validate_email, EmailNotValidError
import pandas as pd
import re
file = r'sample.csv'
filtered = r'filtered.csv'
valid = r'Valid.csv'
df=pd.read_csv(file)
def eVali(dataFrame):
try:
validate_email(dataFrame)
return True
except EmailNotValidError:
return False
def phoneValid(dataFrame):
if re.search("\w{3}-\w{3}-\w{4}",dataFrame):
return True
else:
return False
df["Email_validate"] = df['email'].apply(eVali)
df_fltrd = df[df['Email_validate']]
#del df_fltrd['Email_validate']
print(df_fltrd)
df_fltrd["Phone_validate"] =df_fltrd['phone'].apply(phoneValid)
df_valid = df_fltrd[df_fltrd["Phone_validate"]]
del df_valid["Phone_validate", "Email_validate"]
print(df_valid)
df_fltrd.to_csv(filtered)
df_valid.to_csv(valid)
This code is working fine and I could create a new CSV with validated values as I require. but when I tried to organize this code as a proper class with the proper method it gives an error saying,
Traceback (most recent call last):
File "E:\myTasks\validator.py", line 7, in <module>
class Validator:
File "E:\myTasks\validator.py", line 47, in Validator
validation(self.file)
AttributeError: module 'self' has no attribute 'file'
This is the class I created.
Validator.py
import self as self
from email_validator import validate_email, EmailNotValidError
import pandas as pd
import re
class Validator:
def __init__(self):
self.file = r'sample.csv'
self.outFile =r'filteredSample.csv'
def emailValid(dataframe):
try:
validate_email(dataframe)
return True
except EmailNotValidError:
return False
def phoneValid(dataframe):
if re.search("\w{3}-\w{3}-\w{4}", dataframe):
return True
else:
return False
def validation(self):
df = pd.read_csv(self.file)
df = df.copy();
df["Email_validate"] = df['email'].apply(Validator.emailValid)
df_filtered = df[df['Email_validate']]
print(df_filtered)
df_filtered["Phone_validate"] = df_filtered['phone'].apply(Validator.phoneValid)
df_valid = df_filtered[df_filtered["Phone_validate"]]
del df_valid["Email_validate"]
del df_valid["Phone_validate"]
print(df_valid)
df_valid.to_csv(self.outFile)
validation(self)
Can someone please help me with this. It will be really appreciated. Thanks in advance!
Well, you can't call an instance method from the class itself
validation(self)
This bit should be outside of your class, for example it could be called from your main function after having instantiated your Validator object.
my_validator = Validator()
my_validator.validation()
You do not import self.
self is the instance you are in at the time of code execution.
Your problem is that you did not understand classes yet. You tried to call a class method within the class which python does but toes not like.
I'd suggest you have a look at https://docs.python.org/3/tutorial/classes.html and/or https://www.w3schools.com/python/python_classes.asp.
You want to push the last line to the end and add
def main():
i = Validator()
i.validation()
if __name__ == "__main__":
main()

how to create an object which takes as a parameter another object using python

I want to create my own logging class: which writes some data to a text file.
For this I have made a class mylog.py
I want to be able to create an instance object of the mylog.py class an pass the instance object as a paremeter to the other classes I have written.
However when I try to access the mylog object using the self notation and without using the self notation I am having issues.
The issue is that when I refer to the mylog object in the startup class and use self.log = logger this doesn't work to use the methods of the mylog class like self.log.write() nor does setting logobj to a variable without self and passing that in.
My mylog.py class
import datetime
import os
class logtextfile(object):
def __init__(self, name):
self.name = name
def __str__(self):
return "{} ".format(self.__class__.__name__)
def write(self,**kwargs):
"""Writes a log message to a user specified file which indicates the action takes and if it was successful"""
self.file = kwargs.get('file',"log.txt")
self.loglevel = kwargs.get('loglevel',"critical")
self.logmessage = kwargs.get('logmessage',"error")
self.success = kwargs.get('success',False)
self.class_name = kwargs.get('class',str("{}".format(self.__class__.__name__)))
self.output = ", ".join([str(datetime.datetime.now().replace(second=0,microsecond=0)),self.class_name,str(self.logmessage),str(self.success),str("\n")])
for key, value in kwargs.items():
setattr(self,key,value)
f = open(str(self.file),"a")
f.write(self.output)
f.close()
def now(self, filename, openas, data):
"""Creates a log file with todays date and time"""
fmt='%Y-%m-%d-%H-%M-%S_{fname}'
fn = datetime.datetime.now().strftime(fmt).format(fname=filename)
f = open(str(fn),openas)
f.write(data + "\n")
f.close()
My startup class
import pandas as pd
import numpy as np
from pandas_datareader import data as web
import datetime
import requests
import lxml
from IPython.display import clear_output
import time
import timeit
from bs4 import BeautifulSoup
import re
import os
import sqlite3
from sqlalchemy import create_engine # database connection
from zenlog import log
class company(object):
def __init__(self, name, logobj):
self.name = name
logger = logobj
def __str__(self):
return "{} ".format(self.__class__.__name__)
def listed(self):
try:
#all companies on asx downloaded from asx website csv
self.function_name = str("{}".format(self.__class__.__name__))
df = pd.read_csv('http://asx.com.au/asx/research/ASXListedCompanies.csv', skiprows=1)
df.columns = ["company","asx_code","industry"]
df["yahoo_code"] = df["asx_code"]+".AX"
message = "succesfully downloaded ASXListedCompanies.csv"
logger.write(file="asx_module_log.txt",logmessage=message,success=True)
return df
except:
message = "ASXListedCompanies.csv could not be retrieved, the website is unavailable"
try:
logger.write(file="asx_module_log.txt",logmessage=message)
except:
log.critical(message)
def valid(self):
try:
df = self.listed()
return df[(df["industry"]!= "Not Applic") & (df["industry"]!="Class Pend")]
except:
message = "Could not retrieve listed companies object with pandas dataframe"
try:
logfile.write(file="asx_module_log.txt",logmessage=message)
except:
log.critical(message)
def invalid(self):
try:
df = self.listed()
return df[(df["industry"]=="Not Applic") | (df["industry"]=="Class Pend")]
except:
message = "Could not retrieve listed companies object with pandas dataframe"
try:
logfile.write(file="asx_module_log.txt",logmessage=message)
except:
log.critical(message)
my code to create an instance of mylog and pass it to the startup class so that it can log to the textfile.
import mylog
import startup
logger = mylog.logtextfile(name="mylogfile")
c = startup.company(name="mycompany",logobj=logger)
df = c.invalid()
df.head()
I can't test your company class: I don't have most of those 3rd-party modules. However, it's generally a bad idea to have "naked" except clauses. Use named exceptions, otherwise you may be catching things that you don't expect.
Anyway, here's a short demo of using your logger in an instance of another class.
import datetime
class LogTextfile(object):
def __init__(self, name):
self.name = name
def __str__(self):
return "{} ".format(self.__class__.__name__)
def write(self, **kwargs):
""" Writes a log message to a user specified file which
indicates the action takes and if it was successful
"""
self.file = kwargs.get('file', "log.txt")
self.loglevel = kwargs.get('loglevel', "critical")
self.logmessage = kwargs.get('logmessage', "error")
self.success = kwargs.get('success', False)
self.class_name = kwargs.get('class', str("{}".format(self.__class__.__name__)))
self.output = ", ".join([str(datetime.datetime.now().replace(second=0, microsecond=0)),
self.class_name, str(self.logmessage), str(self.success), str("\n")])
for key, value in kwargs.items():
setattr(self, key, value)
f = open(str(self.file), "a")
f.write(self.output)
f.close()
def now(self, filename, openas, data):
"""Creates a log file with todays date and time"""
fmt = '%Y-%m-%d-%H-%M-%S_{fname}'
fn = datetime.datetime.now().strftime(fmt).format(fname=filename)
f = open(str(fn), openas)
f.write(data + "\n")
f.close()
class Test(object):
def __init__(self, logger):
self.logger = logger
def logtest(self, message):
self.logger.write(logmessage=message)
logger = LogTextfile(name="mylogfile")
logger.write(logmessage='This is a test')
t = Test(logger)
t.logtest('Message from Test')
contents of "log.txt"
2017-05-04 22:40:00, LogTextfile, This is a test, False,
2017-05-04 22:40:00, LogTextfile, Message from Test, False,
Simply:
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
class A(object):
def __init__(self, x=None):
self.x = x
class B(object):
def __init__(self, a):
if (isinstance(a, A)):
self.x = a.x
else:
self.x = 0
if __name__ == "__main__":
a = A(10)
b = B(a)

Python: Should I use a Class to wrap main?

I have a simple script that needs to poll for example 'weather data' every 24 hours using cron and dump the output to a DB.
What would be the most Pythonic way of implementing this?
I do however plan on using a WeatherRecord class used inside the parsing which is responsible for cleaning and modifying the raw input to the format required.
The second method below just feels like wrapping "main" in a class without adding any benefit.
Thanks for any comments
Function version:
file: bin/import-weather
from weather.weather import import_weather()
def main()
import_weather()
if __name__ == "__main__":
main()
file: weather/weather.py
def import_weather():
raw_weather = get_weather('http://weather.is.here/')
parsed_weather = parse_weather(raw_weather)
db_connection = DbConnection.getClient()
write_weather_to_db(parsed_weather, db_connection)
Class version:
file: bin/import-weather
from weather.weather import WeatherImporter
def main():
weather = WeatherImporter()
weather.parse_weather()
weather.write_to_db()
if __name__ == "__main__":
main()
file: weather/weather.py
class WeatherImporter(object):
def __init__(self):
self.db_connection = DbConnection.getClient()
self.url = 'http://weather.is.here/'
self.raw_weather = self._get_weather() # list of dict
self.parsed_weather = None # list of WeatherRecord
def _get_weather(self):
print(self.url)
return ["{JSON Data}"]
def parse_weather(self):
self.parsed_weather = self.raw_weather
def write_to_db(self):
print(self.parsed_weather)
pass

how to save/read class wholly in Python

som = SOM_CLASS() # includes many big difficult data structures
som.hard_work()
som.save_to_disk(filename)
#then later or another program
som = SOM_CLASS()
som.read_from_file(filename)
som.do_anythink_else()
or
som = SOM_CLASS()
save(som)
#...
load(som)
som.work()
what is easiest way to do this?
You can (de)serialize with pickle. It is backward-compatible, i.e. it will support all old protocols in future versions.
import pickle
som = SOM_CLASS()
fileObject = <any file-like object>
pickle.dump(som, fileObject)
#...
som = pickle.load(fileObject)
som.work()
But mind that if you transfer pickled objects to another computer, make sure the connection cannot be tampered with as pickle might be unsecure (this is an article that every pickle user should know).
Another alternative is the older module marshal.
I use this code:
import cPickle
import traceback
class someClass():
def __init__(self):
#set name from variable name. http://stackoverflow.com/questions/1690400/getting-an-instance-name-inside-class-init
(filename,line_number,function_name,text)=traceback.extract_stack()[-2]
def_name = text[:text.find('=')].strip()
self.name = def_name
try:
self.load()
except:
##############
#to demonstrate
self.someAttribute = 'bla'
self.someAttribute2 = ['more']
##############
self.save()
def save(self):
"""save class as self.name.txt"""
file = open(self.name+'.txt','w')
file.write(cPickle.dumps(self.__dict__))
file.close()
def load(self):
"""try load self.name.txt"""
file = open(self.name+'.txt','r')
dataPickle = file.read()
file.close()
self.__dict__ = cPickle.loads(dataPickle)
This code saves and loads the class from its actual class instance name. Code is from my blog http://www.schurpf.com/python-save-a-class/.
Take a look at Python's pickle library.
Use pickle in this way:
import pickle
class Student:
def __init__(self, name, age, grade):
self.name = name
self.age = age
self.grade = grade # 0 - 100
def get_grade(self):
print (self.grade)
s1 = Student("Tim", 19, 95)
#save it
with open(f'test.pickle', 'wb') as file:
pickle.dump(s1, file)
#load it
with open(f'test.pickle', 'rb') as file2:
s1_new = pickle.load(file2)
#check it
s1_new.get_grade()
# it prints 95

Categories

Resources