I'm implementing an API in order to get a data from a database and return the data according to some conditions.
I managed to create a dataframe by using Pandas. Now, my task is to implement a function that returns the records of the dataframe, one by one, like an iterator. Meaning, each time the user calls this method getRecord(self), he gets the next record.
I'm having trouble with implementing this method and I'd really like to get some help. I looked for a ways to do it by using function of Pandas and couldn't find a thing. I also thought about implement the function __iter__ and __next__ in my class but it didn't work.
Wondering if you are looking something like this -
class DfIter:
def __init__(self):
self.df: pd.DataFrame = pd.DataFrame({"a": [1,2,3,4,5], "b": [2,4,8,9,5], "c": [3,4,5,6,7]})
self.state = 0
def getRecord(self):
while self.state < self.df.shape[0]:
_data = self.df.iloc[self.state]
self.state += 1
return _data
else:
raise IndexError("No more datapoints to return")
iter_obj = DfIter()
iter_obj.getRecord()
Perhaps, you are looking for something as follows -
class LazyPandas:
def __init__(self, df):
self.df = df
self.max = df.shape[0]
def __next__(self):
if self.n <= self.max:
result = self.df.iloc[self.n]
self.n += 1
return result
else:
raise StopIteration
def __iter__(self):
self.n = 0
return self
import pandas as pd
df = pd.read_csv("test.csv")
lazy_df = LazyPandas(df)
i = iter(lazy_df)
print(next(i))
print(next(i))
print(next(i))
print(next(i))
Related
My __repr__ method works fine using objects created in it's class, but with objects that were created with the help of importing a library and using methods from it, it only represented the memory address...
from roster import student_roster #I only got the list if students from here
import itertools as it
class ClassroomOrganizer:
def __init__(self):
self.sorted_names = self._sort_alphabetically(student_roster)
def __repr__(self):
return f'{self.get_combinations(2)}'
def __iter__(self):
self.c = 0
return self
def __next__(self):
if self.c < len(self.sorted_names):
x = self.sorted_names[self.c]
self.c += 1
return x
else:
raise StopIteration
def _sort_alphabetically(self,students):
names = []
for student_info in students:
name = student_info['name']
names.append(name)
return sorted(`your text`names)
def get_students_with_subject(self, subject):
selected_students = []
for student in student_roster:
if student['favorite_subject'] == subject:
selected_students.append((student['name'], subject))
return selected_students
def get_combinations(self, r):
return it.combinations(self.sorted_names, r)
a = ClassroomOrganizer()
# for i in a:
# print(i)
print(repr(a))
I tried displaying objects that don't rely on anther library, and they dispayed properly.
The issue I was facing was linked to me not understanding the nature of the object. itertools.combinations is an iterable, and in order to represent the values stored I needed to either:
unpack it inside a variable like:
def get_combinations(self, r):
*res, = it.combinations(self.sorted_names, r)
return res
Iter through it inside a loop and leave the original code intact like
for i in a.get_combinations(2):
print(i)
I prefer the second solution
I have a simple python code for data cleaning. This code imports data from Excel that has the format like this:
product cost used_by prime
name price gender yes or no
name price gender yes or no
... and so on
afterward I will get a mylist using the class function that looks something like this:
mylist = [Item(comic,20.0,male,yes),
Item(paint,14.0,male,no),
Item(pen,5.0,female,nan),
Item(phone case,9.0,nan,no),
Item(headphone,40.0,male,yes),
Item(coat,nan,male,no),
Item(comic,15.0,male,yes),
Item(nan,15.0,male,no)
... and so on]
and after all the filter and cleaning I will get a result that looks like this:
result = [Item(comic,20.0,male,yes),
Item(underwear,15.0,male,yes),
Item(comic,15.0,male,yes)
...
Item(underwear,15.0,male,yes),
...and so on]
Here is the code I got so far:
import os
import pandas as pd
import math
cwd = os.path.abspath('')
files = os.listdir(cwd)
df = pd.DataFrame()
for file in files:
if file.endswith('.XLSX'):
df = df.append(pd.read_excel(file), ignore_index=True)
df = df.where(df.notnull(), None)
array = df.values.tolist()
print(array)
class Item():
def has_all_properties(self):
return bool(self.__name and not math.isnan(self.__cost) and self.__gender and self.__prime)
def clean(self):
return bool(self.__name and self.__cost <=20 and self.__gender == "male" and self.__prime == "yes")
def __init__(self, name, cost, gender, prime):
self.__name = name
self.__cost = cost
self.__gender = gender
self.__prime = prime
def __repr__(self):
return f"Item({self.__name},{self.__cost},{self.__gender},{self.__prime})"
def __tuple__(self):
return self.__name, self.__cost, self.__gender, self.__prime
mylist = [Item(*k) for k in array]
filtered = filter(Item.has_all_properties, mylist)
clean = filter(Item.clean, filtered)
result = list(clean)
t_list = [obj.__tuple__() for obj in result]
print(t_list)
output = pd.DataFrame(t_list, columns =['name', 'cost', 'gender', 'prime'])
print(output)
output.to_excel('clean_data.xlsx', index = False, header = True)
In the result, there are two type of repetitive data, one is like the underwear which have two exact same lines; and the other one is like the comic, with different cost values.
So what I want to do is remove one of the line that are exact the same for case one and keep the line that has the smaller cost value for case 2.
So for case two, I am think of reading the product to identify if they are the same and if they are I then compare their cost and keep the one with smaller value. But I am not sure if that is the right way of doing it.
I am aware that using pandas all the way work but I wish to explore the class function and use the customized data frame.
Can you suggest how to do this?
You could use a Set instead of a list, i.e., changing myList = [Item(*k) for k in array] to mySet = {Item(*k) for k in array}.
Sets do not allow duplicates.
EDIT
Make sure to include implementations of __hash__ and __eq__ in your Item class so that set can know how to determine whether an item has a duplicate.
In your case, the __eq__ would look something like the following:
def __eq__(self, other):
self.__name == other.__name
self.__cost == other.__cost
self.__gender == other.__gender
self.__prime == other.__prime
Learning python from Udacity. Exercise is mentioned below. I cannot see where output 'None' is coming from. Is there something about classes that I am missing ? Thx in advance
Output is always
0
None
======= CODE BEGIN ==============
"""You can use this class to represent how classy someone
or something is.
"Classy" is interchangable with "fancy".
If you add fancy-looking items, you will increase
your "classiness".
Create a function in "Classy" that takes a string as
input and adds it to the "items" list.
Another method should calculate the "classiness"
value based on the items.
The following items have classiness points associated
with them:
"tophat" = 2
"bowtie" = 4
"monocle" = 5
Everything else has 0 points.
Use the test cases below to guide you!"""
class Classy(object):
def __init__(self):
self.items = []
self.classiness = 0
def getClassiness(self):
print(self.classiness)
def createList(self):
self.items.append(item)
def addItem(self, item):
if item=="tophat":
self.classiness+=2
elif item=="bowtie":
self.classiness+=4
elif item=="monocle":
self.classiness+=5
else:
self.classiness+=0
return self.classiness
# Test cases
me = Classy()
# Should be 0
print(me.getClassiness())
Your method getClassiness() is printing and the caller is also printing.
Maybe you meant to return a value rather than printing?
def getClassiness(self):
return self.classiness
class Classy(object):
def __init__(self):
self.items = []
self.classiness = 0
def getClassiness(self):
return self.classiness
def createList(self):
self.items.append(item)
def addItem(self, item):
if item=="tophat":
self.classiness+=2
elif item=="bowtie":
self.classiness+=4
elif item=="monocle":
self.classiness+=5
else:
self.classiness=0
Test cases
me = Classy()
Should be 0
print(me.getClassiness())
I have the following example in which the next method of a class is supposed to return the values from two generators:
class Test():
def __next__(self):
g1, g2 = self._gen1(), self._gen2()
return next(g1), next(g2)
def _gen1(self):
i = 0
while True:
yield i
i += 2
def _gen2(self):
i = 1
while True:
yield i
i += 2
However, when I call next for this class, the values are not incremented.
>>> t = Test()
>>> next(t)
>>> (0, 1)
>>> next(t)
>>> (0, 1)
What is wrong? Is there a more eloquent way to write this class?
Although I have no idea what you are trying to accomplish, here is a cleaned up version which (I think) does what you want.
class Test():
def __init__(self):
self.g1 = self._gen2()
self.g2 = self._gen1()
def __next__(self):
return next(self.g1), next(self.g2)
def _gen1(self):
i = 0
while True:
yield i
i += 2
def _gen2(self):
i = 1
while True:
yield i
i += 2
t = Test()
print(next(t))
print(next(t))
print(next(t))
Your code doesn't work because it recreates the generator functions every time __next__() is called, which effectively resets them back to their initial state before their next next() values are returned:
def __next__(self):
g1, g2 = self._gen1(), self._gen2() # Don't do this here.
return next(g1), next(g2)
You can fix that by adding an __init__() method and initializing them in it:
class Test:
def __init__(self):
self.g1, self.g2 = self._gen1(), self._gen2() # Initialize here.
def __next__(self):
return next(self.g1), next(self.g2)
...
A more eloquent and slightly more concise way to do it which likewise will avoid the problem would be to use the builtin zip() function to create an "iterator of generators" that will return pairs of next values from each generator every time it's called. Another advantage is it's very easy to extend to handle even more generators simply just changing the __init__() method.
Here's what I mean:
class Test:
def __init__(self):
self.generators = zip(self._gen1(), self._gen2())
def __next__(self):
return next(self.generators)
def _gen1(self):
i = 0
while True:
yield i
i += 2
def _gen2(self):
i = 1
while True:
yield i
i += 2
t = Test()
for _ in range(3):
print(next(t))
Output:
(0, 1)
(2, 3)
(4, 5)
I was messing around with classes in python and wrote 2 little ones:
class ClaElement:
start = None
end = None
basesLeft = None
orientation = None
contig = None
size = None
def __init__(self, contig, start, end, orientation, basesLeft=None):
self.contig = contig
self.start = start
self.end = end
self.orientation = orientation
self.basesLeft = basesLeft
self.size = self.end - self.start
def __str__(self):
return "{ClaElement: "+str(self.contig)+"_"+str(self.start)+"_"+str(self.end)+"_"+str(self.orientation)+"}"
def getSize(self):
return self.size
class ClaCluster:
contig = None
clusterElements = []
def __init__(self, contig, firstElement):
self.contig = contig
self.addElement(firstElement)
def addElement(self, claElement):
self.clusterElements.append(claElement)
def getFirst(self):
return self.clusterElements[0]
def getLast(self):
return self.clusterElements[-1]
def getElements(self):
return self.clusterElements
def getContig(self):
return self.contig
def __str__(self):
return "{ClaCluster: "+str(self.contig)+" "+str(len(self.clusterElements))+" elements}"
And my test-main:
from ClaElement import ClaElement
from ClaCluster import ClaCluster
if __name__ == '__main__':
ele = ClaElement("x",1,2,"left")
claDict = dict()
cluster = ClaCluster("x", ele)
claDict["hello"] = cluster
print(claDict)
print(claDict["hello"])
print(ele)
This leads to the following output:
{'hello': <ClaCluster.ClaCluster object at 0x7fe8ee04c5f8>}
{ClaCluster: x 1 elements}
{ClaElement: x_1_2_left}
Now my question is why is the output of my first print the memory address even though I provided a functioning string-method for my class ClaCluster? Is there a way to get the method invoked when I am printing the dictionary or do I have to iterate by hand?
The __str__() method of the built-in dict type uses the __repr__() method of your class, not __str__(). Simply rename your method, and all should work fine.