Sort by grouping using first occurence in python?

Sort by grouping using first occurence in python? - python

In python, I have a list of classes that have a string field called category.
Let's consider the following example:
mylist[0].category # = "furniture"
mylist[1].category # = "car"
mylist[2].category # = "fruit"
mylist[3].category # = "car"
mylist[4].category # = "furniture"
My question is: how to reorder the list by grouping using the first occurence of a new category ?
Using the previous example, the result would be:
mylist[0].category # = "furniture"
mylist[1].category # = "furniture"
mylist[2].category # = "car"
mylist[3].category # = "car"
mylist[4].category # = "fruit"

First, get a list of the categories in the same order as my_list. Then, sort my_list according to the position of the first appearance of each item's category in the list of categories.
categories = [item.category for item in my_list]
my_list.sort(key = lambda item: categories.index(item.category))

# create a first-appearance index
order = {}
for ndx,item in enumerate(mylist):
if item.category not in order:
order[item.category] = ndx
# sort by that index
mylist.sort(key=lambda i: order[i])

you may achieve this by traversing the list twice ( no sorting ):
from collections import defaultdict
# put all the items of the same category together
l = defaultdict( list )
for x in mylist:
l[ x.category ].append( x )
# expand in the order categories appear in the list
xs = [ ]
for x in mylist:
xs.extend( l[ x.category ] )
l.pop( x.category )

Perhaps something like this?
#!/usr/local/cpython-3.3/bin/python
import pprint
CATEGORY_FIRST_SEEN = {}
def extract_order(list_of_class):
for index, element in enumerate(list_of_class):
if element.category not in CATEGORY_FIRST_SEEN:
CATEGORY_FIRST_SEEN[element.category] = index
#pprint.pprint(CATEGORY_FIRST_SEEN)
class Class_with_category:
def __init__(self, category):
self.category = category
def __cmp__(self, other):
if CATEGORY_FIRST_SEEN[self.category] < CATEGORY_FIRST_SEEN[other.category]:
return -1
elif CATEGORY_FIRST_SEEN[self.category] > CATEGORY_FIRST_SEEN[other.category]:
return 1
else:
return 0
def __lt__(self, other):
return self.__cmp__(other) < 0
def __str__(self):
return self.category
__repr__ = __str__
def main():
mylist = [ "furniture", "car", "fruit", "car", "furniture", ]
list_of_class = [ Class_with_category(element) for element in mylist ]
extract_order(list_of_class)
list_of_class.sort()
pprint.pprint(list_of_class)
main()
I've tested it to work on cpython 3.3, but I believe it should work on 2.x or 3.x.

Related

Sort list of objects by attribute, but attribute contains name, numbers and underscores PYTHON

A user has asked this question on here but it was in regards to a list of strings. I have a list of classes and want to modify the answer in this question:
original thread
Specifically, this code, works exactly the way I want but I need to modify it to work with class attributes not just a list of strings:
import re
## ref: https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
def sort_nicely( l ):
""" Sort the given list in the way that humans expect.
"""
convert = lambda text: int(text) if text.isdigit() else text
alphanum_key = lambda key: [ convert(c.replace("_","")) for c in re.split('([0-9]+)', key) ]
l.sort( key=alphanum_key )
return print(l)
```
I've tried modifying the lambda expression in many ways but no matter how I modify it I always get the following error:
expected string or bytes-like object
Here is where I am currently at, which does not work. Any insights will be much appreciated
import re
class ContainerTest:
def __init__( self ):
self.name = ''
def sort_nicely( l ):
""" Sort the given list in the way that humans expect.
"""
convert = lambda x: x.name, lambda text: int( x ) if x.isdigit() else x
alphanum_key = lambda key: [ convert( c.replace( "_", "" ) ) for c in re.split( '([0-9]+)', key ) ]
l.sort( key=alphanum_key )
return print( l )
items = []
for i in range( 0, 2 ):
item = ContainerTest()
item.name = 'test_128_0' + str( i + 1 )
items.append( item )
for i in range( 0, 2 ):
item = ContainerTest()
item.name = 'test_64_0' + str( i + 1 )
items.append( item )
sort_nicely( items )

If you want to sort your ContainerTest instances by their names (using the logic developed to sort strings), you just need to make the key function you're sorting with get the attribute from the object it's being passed. The important variable is the key argument passed to alphanum_key, which will be the item in the list (an instance of the class, in this case).
Try:
alphanum_key = lambda key: [convert( c.replace("_", ""))
for c in re.split('([0-9]+)', key.name)]
The change is at the end of the line, where we do the regex split on key.name rather than key.

How to create multiple subclasses from a list in python?

I'm trying to get my data in an hierarchical way, so I decided to turn to subclasses. The file I'm getting the data from is formatted like this:
2WQZ_chain_A
Score = 338.0
53-164
208-317
327-595
611-654
2WQZ_chain_B
Score = 344.0
53-164
205-317
327-595
611-655
2XB6_chain_A
Score = 319.0
64-163
211-317
327-596
613-654
2XB6_chain_B
Score = 329.0
53-163
212-317
327-596
613-654
And what I want to obtain is a first class called as the PDB name (i.e: 2WQZ) with the subclasses called chain_A, chain_B and so on. These subclasses should contain an object called "score" and a third subclass called "intervals" containing the possibles intervals. The general idea is something like this.
At the moment I tried using a dictionary but ended up with the correct PDB class, but only the second chain, my code is
class PDB(object):
def __init__(self, pdbname):
self.pdbid = pdbname
class Chain(PDB):
def __init__(self, chainame, score, pdbname):
self.chainid = chainame
self.score = score
super().__init__(pdbname)
making_class = open("covered_intervals.txt", "r").readlines()
pdblist = []
for i in making_class:
if "chain" in i:
pdblist.append(i[:4])
pdblist = list(dict.fromkeys(pdblist))
pdblist2 = dict.fromkeys(pdblist)
for i in pdblist:
pdblist2[i] = PDB(i)
for j in making_class:
if i in j:
chainame = j[5:12]
pdblist2[i] = Chain(chainame, 4, i)
4 is a placeholder, and I see why I get only the last chain, but have no idea how to get the two chains under the same PDB.

In this case, a dictionary can be created for top nodes, and since It is a fixed depth tree, class nesting is not required. Chain Class will have three components
Chain Name
Score
List of Ranges - I have implemented a class for range
class Chain():
def __init__(self, chainame, score=None):
self.chainid = chainame
self.score = score
self.ranges=[]
def add_range(self, range):
self.ranges.append(range)
def add_score(self, score):
self.score = score
class range1():
def __init__(self, str):
x = str.split("-")
self.start = int(x[0])
self.end = int(x[1])
counter = 0
pdb = ""
ch = None
data = {}
with open("covered_intervals.txt", "r") as f:
line = f.readline()
while line:
line = line.strip()
if line.strip()=="":
counter=0
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x
elif counter==0:
x = line.split("_", 1)
pdb = x[0]
chainname = x[1]
ch = Chain(chainname)
counter = counter +1
elif counter==1:
ch.add_score(float(line.split("=")[1]))
counter = counter +1
else:
ch.add_range(range1(line))
line = f.readline()
if counter != 0:
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x

First I would suggest creating something, that can parse one textblock from the file into usable variables, for example like this:
def parse_block(lines):
pdb_name = lines[0][:4]
chain = lines[0][5:]
score = lines[1].split("=")[1].strip()
intervals = lines[2:]
return (pdb_name, chain, score, intervals)
Using this, you could build your classes, or use a nested dictionary, which would fit the data structure well, too.
from collections import defaultdict
with open("pdbdata", "r") as f:
content = f.read()
pdb_dict = defaultdict(dict)
for block in content.split("\n\n"):
pdb_name, chain, score, intervals = parse_block(block.splitlines())
pdb_dict[pdb_name][chain] = {"score": score, "intervals": intervals}
The resulting nested dict looks like this:
{'2WQZ': {'chain_A': {'intervals': ['53-164', '208-317', '327-595', '611-654'],
'score': '338.0'},
'chain_B': {'intervals': ['53-164', '205-317', '327-595', '611-655'],
'score': '344.0'}},
'2XB6': {'chain_A': {'intervals': ['64-163', '211-317', '327-596', '613-654'],
'score': '319.0'},
'chain_B': {'intervals': ['53-163', '212-317', '327-596', '613-654'],
'score': '329.0'}}}

TypeError: list indices must be integers or slices, not list - What do i do?

I keep getting the error "
TypeError: list indices must be integers or slices, not list" and im not sure on how to fix it what do i need to change?
#app.route("/browse/<where>")
def collectPage(where):
for item in lostItems:
if item[1] == where:
passedItem = lostItems[item]
return render_template("mainPage.html", collect = Collect, item = passedItem)

Try:
def collectPage(where):
for item in range(len(lostItems)):
if item[1] == where:
passedItem = lostItems[item]
return render_template("mainPage.html", collect = Collect, item = passedItem)
Or
def collectPage(where):
for item in lostItems:
if item[1] == where:
passedItem = item
return render_template("mainPage.html", collect = Collect, item = passedItem)

It looks like lostItems is a nested list. and when you use lostItems[item] you are getting the error. you can change your statement to passedItem = item.
#app.route("/browse/<where>")
def collectPage(where):
for item in lostItems:
if item[1] == where:
passedItem = item
return render_template("mainPage.html", collect = Collect, item = passedItem)
or you can use enumetrate to access the list index.
#app.route("/browse/<where>")
def collectPage(where):
for indx,item in enumerate(lostItems):
if item[1] == where:
passedItem = lostItems[indx]
return render_template("mainPage.html", collect = Collect, item = passedItem)

return longest word from a string

I need to define a function called "len_score" that returns the length of a word. Call the "best" function with the len_score function as a parameter.
Output I want to be:
print(best(len_score, names), "has the longest name.")
McJagger has the longest name.
My code is:
def len_score(name):
lst=[len(x) for x in name]
return lst
def best(lst1,lst2):
final=zip(lst1,lst2)
return max final
names = ["Ben", "April", "Zaber", "Alexis", "McJagger", "J.J.", "Madonna"]
print(best(len_score, names) == 'McJagger')
But I got error and not sure how to set condition on the list.

I think what you're trying to do is:
def len_score(name):
lst = [len(x) for x in name]
return lst
def best(func, lst):
func_lst = func(lst) # call the passed in function ON lst
return lst[func_lst.index(max(func_lst))]
names = ["Ben", "April", "Zaber", "Alexis", "McJagger", "J.J.", "Madonna"]
print(best(len_score, names) == 'McJagger')

You are almost there:
def len_score(name):
lst=[len(x) for x in name]
return lst
def best(lst1,lst2):
final=zip(lst1,lst2)
return max(final)
names = ["Ben", "April", "Zaber", "Alexis", "McJagger", "J.J.", "Madonna"]
print(best(len_score(names), names)[1])
print(best(len_score(names), names)[1] == 'McJagger')
Working code: https://py3.codeskulptor.org/#user302_Lx3nAppYJe_0.py

Group objects with an identical list attribute

I have two classes:
class A:
def __init__(self, name, li):
self.b_list = li
class B:
def __init__(self, i):
self.i = i
class A contains a list of objects of type class B.
Assuming I have a list of class A objects, how can I group the class A objects that have an identical b_list together?
For example:
a_list = []
li = [B(1), B(2), B(3)]
a_list.append(A(li))
li = [B(2), B(3)]
a_list.append(A(li))
li = [B(1), B(2), B(3)]
a_list.append(A(li))
After processing this should give us two lists, one list with the first and third A, and another list with only the second A. Or in a nutshell:
result = [
[ A([B(1),B(2),B(3)]), A([B(1),B(2),B(3)]) ],
[ A([B(2),B(3)] ]
]

For starters, I've removed the name parameter from class A, since the rest of your details omitted it.
To group your class A objects together, you're going to need to define exactly what is meant when two A objects are equal. I've created a __cmp__ method that will let us sort on A objects by comparing them.
Now, since your A objects are composed of B objects, you're going to need something to define what is meant by two B objects being equal. I've created a __eq__ method in class B that does that.
Next, I've sorted the A instances to make grouping them easier, and added a __str__ method to class A, and a __repr__ method to class B so you can verify that they are being grouped together correctly.
I haven't added any error checking anywhere, so this code is a little fragile.
class A:
def __init__(self, li):
self.b_list = li
def __cmp__(self, other):
return cmp([elem.i for elem in self.b_list],
[elem.i for elem in other.b_list])
def __str__(self):
return "A({})".format(self.b_list)
class B:
def __init__(self, i):
self.i = i
def __eq__(self, other):
return self.i == other.i
def __repr__(self):
return "B({})".format(self.i)
def main():
a_list = []
li = [B(1), B(2), B(3)]
a_list.append(A(li))
li = [B(2), B(3)]
a_list.append(A(li))
li = [B(1), B(2), B(3)]
a_list.append(A(li))
result = []
last_item = None
for item in sorted(a_list):
if last_item and item == last_item:
result[-1] = result[-1] + [item]
else:
result.append([item])
last_item = item
for row in result:
print [str(elem) for elem in row]
if __name__ == '__main__':
main()
Result:
['A([B(1), B(2), B(3)])', 'A([B(1), B(2), B(3)])']
['A([B(2), B(3)])']

You can sort a_list using the attribute b_list as a key.
a_list.sort(key=lambda a:a.b_list)
or:
from operator import itemgetter
a_list.sort(key=itemgetter("b_list"))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Sort by grouping using first occurence in python? - python

First, get a list of the categories in the same order as my_list. Then, sort my_list according to the position of the first appearance of each item's category in the list of categories. categories = [item.category for item in my_list] my_list.sort(key = lambda item: categories.index(item.category))

# create a first-appearance index order = {} for ndx,item in enumerate(mylist): if item.category not in order: order[item.category] = ndx # sort by that index mylist.sort(key=lambda i: order[i])

Related

Sort list of objects by attribute, but attribute contains name, numbers and underscores PYTHON

How to create multiple subclasses from a list in python?

TypeError: list indices must be integers or slices, not list - What do i do?

return longest word from a string

Group objects with an identical list attribute

Categories

Resources