GAE Search: Relationships between documents in index - python

I'd like to return a result object which contains the indexed document AND other information, from another entity, with which the indexed document has a relationship.
So, let's say I have two Kinds:
class Store(BaseHandler):
store_name = ndb.StringProperty()
logo_url = ndb.StringProperty()
about_store = ndb.TextProperty()
class Product(BaseHandler):
product_name = ndb.StringProperty
store_key = ndb.KeyProperty() #Store entity which created this product.
Then, I add each new Product entity to the index, like this:
class NewProduct(BaseHandler):
def get(self, store_id):
self.render('new-product.html')
def post(self, store_id):
product_name = self.request.get('product_name')
store_key = ndb.Key('Store', store_id)
try:
p = Product(
store_key = store_key,
product_name = product_name)
p.put()
# Add p to index
p_doc = search.Document(
doc_id = str(p.key.id()),
fields = [
search.AtomField(name = 'store_id', value = str(str_id)),
search.TextField(name = 'product_name', value = e.product_name)])
index = search.Index('product_index')
index.put(p_doc)
except:
# handle error
Now, if I run a search query using...
index = search.Index('product_index')
index.search('PRODUCT_NAME')
I should be able to return all the Product documents from the index by its query string.
My question is: How do I efficiently return a result object which contains both the product document AND its Store kind information (store_name, logo_url, about_store)?

Related

How to compare what values two models/querysets share in a view for later display in the template?

I'm trying to compare all the things in ModelOne with ModelTwo, to check which things are or are not in one or the other model, then put this in the view context for display in the template.
class Things(model.Model):
name = models.CharField()
class ModelOne(models.Model):
things = models.ManyToManyField(Things)
class ModelTwo(models.Model):
things = models.ManyToManyField(Things)
How would you do this?
one_instance = ModelOne.objects.get(id=one_id)
two_instance = ModelTwo.objects.get(id=two_id)
one_thing_ids = set(one_instance.things.values_list("id", flat=True))
two_thing_ids = set(two_instance.things.values_list("id", flat=True))
shared_thing_ids = one_thing_ids & two_thing_ids
thing_ids_in_one_not_in_two = one_thing_ids - two_thing_ids
thing_ids_in_two_not_in_one = two_thing_ids - one_thing_ids
shared_things = Thing.objects.filter(id__in=shared_thing_ids)
You can then pass shared_things queryset into the template for display.
If your Thing model only has a name field and the names are unique we can simplify a little by altering the model:
class Things(model.Model):
name = models.CharField(unique=True)
or even:
class Things(model.Model):
name = models.CharField(primary_key=True, unique=True)
(in this case the db table will not have an id column, it's not needed)
Either way we can then eliminate the extra Thing query at the end:
one_instance = ModelOne.objects.get(id=one_id)
two_instance = ModelTwo.objects.get(id=two_id)
one_thing_names = set(one_instance.things.values_list("name", flat=True))
two_thing_names = set(two_instance.things.values_list("name", flat=True))
shared_thing_names = one_thing_names & two_thing_names
thing_names_in_one_not_in_two = one_thing_names - two_thing_names
thing_names_in_two_not_in_one = two_thing_names - one_thing_names
...and just pass sets of string names into the template.

Odoo: Values of Many2many with dynamic domain aren't getting saved

I am trying to dynamically change the values of a many2many field products_ids based on multiple onchange functions of other fields (e.g. brand_id and origin_id).
So far everything is working great and it does show the expected values, but once i hit the save button the values of the many2many field disappear
class CustomModifyPrice(models.Model):
brand_id = fields.Many2many(comodel_name="custom.brand", string="Brand", required=False, )
origin_id = fields.Many2many(comodel_name="custom.country", string="Origin", required=False, )
product_ids = fields.Many2many(comodel_name="custom.product", string="Products", readonly=True, )
search_terms = {}
product_ids_list = []
#api.onchange('brand_id')
def onchange_change_brand(self):
for rec in self:
product_brands = []
for prod_brand in rec.brand_id:
product_brands.append(prod_brand.id)
rec.search_terms["product_brands"] = product_brands
rec.get_products()
#api.onchange('origin_id')
def onchange_change_origin(self):
for rec in self:
product_origins = []
for prod_origin in rec.origin_id:
product_origins.append(prod_origin.id)
rec.search_terms["product_origins"] = product_origins
rec.get_products()
def get_products(self):
domain = []
self.product_ids_list = []
if 'product_brands' in self.search_terms:
product_brands = self.search_terms['product_brands']
if product_brands:
tuple1 = ('brand_id', 'in', product_brands)
domain.append(tuple1)
if 'product_origins' in self.search_terms:
product_origins = self.search_terms['product_origins']
if product_origins:
tuple1 = ('country_id', 'in', product_origins)
domain.append(tuple1)
if domain:
products = self.env['custom.product'].search(domain)
if products.ids:
for prod in products:
self.product_ids_list.append(prod.id)
self.product_ids = [(6, False, self.product_ids_list)]
Make sure force_save="1" is placed as an attribute in your field (xml file)

django - can't assign a foreign key

For unknown reasons, I cannot assign a foreign key instance of Item_rarity table into Detailed_item table. Django throws an error:
Cannot assign "u'Basic'": "Detailed_item.rarity" must be a "Item_rarity" instance.
... But in Item_rarity dictionary "Basic" record exists - I can choose it from admin panel and create Detailed_item record manually.
I have defined models:
class Detailed_item(models.Model):
item_id = models.IntegerField(primary_key=True)
name = models.CharField(max_length=50)
level = models.IntegerField()
icon = models.CharField(max_length=150)
rarity = models.ForeignKey('Item_rarity')
general_type = models.ForeignKey('Item_type')
detailed_type = models.ForeignKey('Item_detailed_type')
class Item_rarity(models.Model):
name = models.CharField(max_length=15, primary_key=True)
class Item_type(models.Model):
name = models.CharField(max_length=15, primary_key=True)
class Item_detailed_type(models.Model):
name = models.CharField(max_length=20, primary_key=True)
In views, I try to populate it in this manner (inserting multiple items):
...
items = get_all_items() #get dict of items
for element in items:
tmp_det_type = ''
for key, val in element.iteritems():
#get 'detailed type' from inner dict
if key == "type":
tmp_det_type = val
item = Detailed_item(
item_id=element['id'],
name=element['name'],
level=element['level'],
icon=element['icon'],
rarity=element['rarity'], #error
general_type=element['type'],
detailed_type=tmp_det_type,
)
item.save()
...
I even tried to hard code "Basic" string, but it doesn't work either.
* Solved *
Next two entries, that is Item_type and Item_detailed_type were also invalid.
Correct code:
from app.models import Detailed_item, Item_rarity, Item_type, Item_detailed_type
...
items = get_all_items() #get dict of items
for element in items:
tmp_det_type = ''
for key, val in element.iteritems():
#get 'detailed type' from inner dict
if key == "type":
tmp_det_type = val
#create objects with string values
obj_rarity = Item_rarity(name=element['rarity'])
obj_item_type = Item_type(name=element['type'])
obj_item_detailed_type = Item_detailed_type(name=tmp_det_type)
item = Detailed_item(
item_id=element['id'],
name=element['name'],
level=element['level'],
icon=element['icon'],
rarity=obj_rarity,
general_type=obj_item_type,
detailed_type=obj_item_detailed_type,
)
item.save()
...
Item_rarity instance should be passed while storing Detailed_item object since Item_rarity is a foreign key related object in Detailed_item.
Its that you might have passed the Basic string instead of the <Basic Object> itself.
While creating an object in django using its ORM, any foreign_key related object should be provided with the instance itself instead of the id(pk) of the object, where as while fetching the data from the database you can use either of instance or the id(pk) of the instance.
class ParentModel(models.Model):
model_field = models.CharField(max_length=16)
class MyModel(models.Model):
some_field = models.ForeignKey('ParentModel')
parent_model = ParentModel.objects.create(model_field='some_data')
my_model = MyModel.objects.create(some_field=parent_model)
^^^^^^^^^^^^
Note here that the parent_model object itself is passed instead of the id
While fetching the data back,
parent_model = ParentModel.objects.get(model_field='some_data')
my_model = MyModel.objects.get(some_field=parent_model)
or
my_model = MyModel.objects.get(some_field=parent_model.id)
Both would work in case of data fetch.
You do not have to provide the related object on creation if you change the kwarg in to rarity_name:
item = Detailed_item(
item_id=element['id'],
name=element['name'],
level=element['level'],
icon=element['icon'],
rarity_name=element['rarity'], # no error
general_type=element['type'],
detailed_type=tmp_det_type,
)
I have only tested this with the regular id field (the auto pk) but it
should work with your primary key just fine.
E.g.
class SimpleModel(Model):
value = TextField(blank=True)
class ComplexModel(Model):
simple = ForeingKey(SimpleModel)
title = TextField(unique=True)
ComplexModel.objects.create(title='test', simple_id=1)

Django - Getting/Saving large objects takes a lot of time

I'm trying to get a few million of items from a model, and parsing them. However, somehow it spends a lot of time trying to get the data saved.
These are the current models that I have:
class mapt(models.Model):
s = models.IntegerField(primary_key=True)
name = models.CharField(max_length=2000)
def __unicode__(self):
return str(self.s)
class datt(models.Model):
s = models.IntegerField(primary_key=True)
setid = models.IntegerField()
var = models.IntegerField()
val = models.IntegerField()
def __unicode(self):
return str(self.s)
class sett(models.Model):
setid = models.IntegerField(primary_key=True)
block = models.IntegerField()
username = models.IntegerField()
ts = models.IntegerField()
def __unicode__(self):
return str(self.setid)
class data_parsed(models.Model):
setid = models.IntegerField(max_length=2000, primary_key=True)
block = models.CharField(max_length=2000)
username = models.CharField(max_length=2000)
data = models.CharField(max_length=200000)
time = models.IntegerField()
def __unicode__(self):
return str(self.setid)
The s parameter for the datt model should actually act as a foreign key to mapt's s parameter. Furthermore, sett's setid field should act as a foreign key to setid's setid.
Lastly, data_parsed's setid is a foreign key to sett's models.
The algorithm is currently written this way:
def database_rebuild(start_data_parsed):
listSetID = []
#Part 1
for items in sett.objects.filter(setid__gte=start_data_parsed):
listSetID.append(items.setid)
uniqueSetID = listSetID
#Part 2
for items in uniqueSetID:
try:
SetID = items
settObject = sett.objects.get(setid=SetID)
UserName = mapt.objects.get(pk=settObject.username).name
TS = pk=settObject.ts
BlockName = mapt.objects.get(pk=settObject.block).name
DataPairs_1 = []
DataPairs_2 = []
DataPairs_1_Data = []
DataPairs_2_Data = []
for data in datt.objects.filter(setid__exact=SetID):
DataPairs_1.append(data.var)
DataPairs_2.append(data.val)
for Data in DataPairs_1:
DataPairs_1_Data.append(mapt.objects.get(pk=Data).name)
for Data in DataPairs_2:
DataPairs_2_Data.append(mapt.objects.get(pk=Data).name)
assert (len(DataPairs_1) == len(DataPairs_2)), "Length not equal"
#Part 3
Serialize = []
for idx, val in enumerate(DataPairs_1_Data):
Serialize.append(str(DataPairs_1_Data[idx]) + ":PARSEABLE:" + str(DataPairs_2_Data[idx]) + ":PARSEABLENEXT:")
Serialize_Text = ""
for Data in Serialize:
Serialize_Text += Data
Data = Serialize_Text
p = data_parsed(SetID, BlockName, UserName, Data, TS)
p.save()
except AssertionError, e:
print "Error:" + str(e.args)
print "Possibly DataPairs does not have equal length"
except Exception as e:
print "Error:" + str(sys.exc_info()[0])
print "Stack:" + str(e.args)
Basically, what it does is that:
Finds all sett objects that is greater than a number
Gets the UserName, TS, and BlockName, then get all the fields in datt field that correspond to a var and val field maps to the mapt 's' field. Var and Val is basically NAME_OF_FIELD:VALUE type of relationship.
Serialize all the var and val parameters so that I could get all the parameters from var and val that is spread across the mapt table in a row in data_parsed.
The current solution does everything I would like to, however, on a Intel Core i5-4300U CPU # 1.90Ghz, it parses around 15000 rows of data daily on a celery periodic worker. I have around 3355566 rows of data at my sett table, and it will take around ~23 days to parse them all.
Is there a way to speed up the process?
============================Updated============================
New Models:
class mapt(models.Model):
s = models.IntegerField(primary_key=True)
name = models.CharField(max_length=2000)
def __unicode__(self):
return str(self.s)
class sett(models.Model):
setid = models.IntegerField(primary_key=True)
block = models.ForeignKey(mapt, related_name='sett_block')
username = models.ForeignKey(mapt, related_name='sett_username')
ts = models.IntegerField()
def __unicode__(self):
return str(self.setid)
# class sett(models.Model):
# setid = models.IntegerField(primary_key=True)
# block = models.IntegerField()
# username = models.IntegerField()
# ts = models.IntegerField()
# def __unicode__(self):
# return str(self.setid)
class datt(models.Model):
s = models.IntegerField(primary_key=True)
setid = models.ForeignKey(sett, related_name='datt_setid')
var = models.ForeignKey(mapt, related_name='datt_var')
val = models.ForeignKey(mapt, related_name='datt_val')
def __unicode(self):
return str(self.s)
# class datt(models.Model):
# s = models.IntegerField(primary_key=True)
# setid = models.IntegerField()
# var = models.IntegerField()
# val = models.IntegerField()
# def __unicode(self):
# return str(self.s)
class data_parsed(models.Model):
setid = models.ForeignKey(sett, related_name='data_parsed_setid', primary_key=True)
block = models.CharField(max_length=2000)
username = models.CharField(max_length=2000)
data = models.CharField(max_length=2000000)
time = models.IntegerField()
def __unicode__(self):
return str(self.setid)
New Parsing:
def database_rebuild(start_data_parsed, end_data_parsed):
for items in sett.objects.filter(setid__gte=start_data_parsed, setid__lte=end_data_parsed):
try:
UserName = mapt.objects.get(pk=items.username_id).name
TS = pk=items.ts
BlockName = mapt.objects.get(pk=items.block_id).name
DataPairs_1 = []
DataPairs_2 = []
DataPairs_1_Data = []
DataPairs_2_Data = []
for data in datt.objects.filter(setid_id__exact=items.setid):
DataPairs_1.append(data.var_id)
DataPairs_2.append(data.val_id)
for Data in DataPairs_1:
DataPairs_1_Data.append(mapt.objects.get(pk=Data).name)
for Data in DataPairs_2:
DataPairs_2_Data.append(mapt.objects.get(pk=Data).name)
assert (len(DataPairs_1) == len(DataPairs_2)), "Length not equal"
Serialize = []
for idx, val in enumerate(DataPairs_1_Data):
Serialize.append(str(DataPairs_1_Data[idx]) + ":PARSEABLE:" + str(DataPairs_2_Data[idx]))
Data = ":PARSEABLENEXT:".join(Serialize)
p = data_parsed(items.setid, BlockName, UserName, Data, TS)
p.save()
except AssertionError, e:
print "Error:" + str(e.args)
print "Possibly DataPairs does not have equal length"
except Exception as e:
print "Error:" + str(sys.exc_info()[0])
print "Stack:" + str(e.args)
Defining lists by appending repeadedly is very slow. Use list comprehensions or even just the list() constructor.
In python you should not join a list of strings using for loops and +=, you should use join().
But that is not the primary bottleneck here. You have a lot of objects.get()s which each takes a database roundtrip. If you didn't have milions of rows in the mapt table, you should probably just make a dictionary mapping mapt primary keys to mapt objects.
Had you defined your foreign keys as foreign keys the django orm could help you do much of this in like five queries in total. That is, instead of SomeModel.objects.get(id=some_instance.some_fk_id) you can do some_instance.some_fk (which will only hit the databse the first time you do it for each instance). You can then even get rid of the foreign key query if some_instance had been initialized as some_instance = SomeOtherModel.objects.select_related('some_fk').get(id=id_of_some_instance).
Perhaps changing the models without changing the database will work.

How do I construct an AND query on the same field in the URL of TastyPie?

I want to filter results in the tastypie to get results that conform to both of two filters on the same field.
So if I have a simple model like this...
class Item(models.Model):
name = models.CharField(max_length=255)
description = models.TextField()
With a ModelResource...
class ItemResource(ModelResource):
...
class Meta():
queryset = Item.objects.all()
resource_name = 'item'
filtering = {'name': ALL, 'description': ALL}
I can easily construct 'AND' queries in the url of tastypie:
/api/v1/item/?name__contains=hello&description__contains=foo
But if I want to construct an AND operator on the same field, it only takes the second argument and ignores the first. That is,
/api/v1/item/?name__contains=hello&name__contains=world
returns resources whose name field contains 'world' but not those whose name field contains BOTH 'hello' and 'world'.
I understand how to do this directly in django:
Item.objects.filter(name__contains='hello').filter(name__contains='world')
But how do I construct this kind of a query in the URL of the tastypie?
I'm using the below. It will give you support for name__contains=hello,world. And you could also do negations name__contains!=foo.
def build_filters(self, filters=None):
"""
Adds support for negation filtering
"""
if not filters:
return filters
applicable_filters = {}
self.filters = filters
# Normal filtering
filter_params = dict([(x, filters[x]) for x in filter(lambda x: not x.endswith('!'), filters)])
applicable_filters['filter'] = super(MainBaseResource, self).build_filters(filter_params)
# Exclude filtering
exclude_params = dict([(x[:-1], filters[x]) for x in filter(lambda x: x.endswith('!'), filters)])
applicable_filters['exclude'] = super(MainBaseResource, self).build_filters(exclude_params)
return applicable_filters
def apply_filters(self, request, applicable_filters):
"""
Adds support for:
1. negation filtering: value_date__year!=2013
2. multiple filtering value_date__year=2013,2012
"""
from django.db.models import Q
import operator
from types import *
objects = self.get_object_list(request)
f = applicable_filters.get('filter')
if f:
# Q Filters for multiple values (1,2,3 etc)
q_filters = []
for key, val in f.iteritems():
string = str(val)
if ',' in string:
for excl_filter in string.split(','):
q_filters.append((key, excl_filter))
q_list = [Q(x) for x in q_filters]
for x in q_filters:
try:
del f[x[0]]
except:
pass
if q_list:
objects = objects.filter(reduce(operator.or_, q_list), **f)
else:
objects = objects.filter(**f)
e = applicable_filters.get('exclude')
if e:
objects = objects.exclude(**e)
return objects

Categories

Resources