MongoEngine ReferenceField Text Index setup? - python

class Tag(db.Document):
text = db.StringField(unique=True)
class Post(db.Document):
user = db.ReferenceField(User, required=True)
pid = db.SequenceField(required=True, unique=True)
description = db.StringField()
title = db.StringField(required=True)
created = db.DateTimeField(default=utils.time_now, required=True)
updated = db.DateTimeField(default=utils.time_now, required=True)
tags = db.ListField(db.ReferenceField(Tag), default=[])
ratings = db.EmbeddedDocumentListField(Rating, default=[])
comments = db.EmbeddedDocumentListField(Comment, default=[])
url = db.URLField()
meta = {'indexes': [
{'fields': ['$title', '$description', '$tags.text'],
'default_language': 'english',
'weights': {'title': 10, 'description': 5, 'tags': 2}
}
]}
Here are my two documents. The Tag document is Referenced in a Listfield of the Post document. When I try to do a search with the text index I can successfully search on the title and description but not the tags. Does anyone know why. I couldn't find any helpful examples here http://docs.mongoengine.org/guide/text-indexes.html

A ReferenceField type is only de-referenced on access by some internal MongoEngine magic. As such you cannot use it as an element in the text index (the list saved in the mongodb document will hold only bson ObjectId references).

Related

Using nested/embedded documents in Mongoengine with Python

I want to use nested values inside Mongodb and from the documentation i understand this is done through Embedded documents. If there are any other ways please tell me.
I have the current code:
class compute_instances_subtype(EmbeddedDocument):
label_name = StringField()
class Post(Document):
title = StringField(max_length=120, required=True)
author = StringField(required=True)
tags = ListField(StringField(max_length=30))
compute_instances = ListField(EmbeddedDocumentField(compute_instances_subtype))
post = Post(title="Quora rocks", author="Ross", tags=['tutorial', 'how-to'])
add_test0_label = compute_instances_subtype()
add_test0_label.title = "test0"
add_test0_label.label_name= "value"
add_test1_label= compute_instances_subtype()
add_test1_label.title = "test1"
add_test1_label.label_name= "value"
post.compute_instances.append(add_test0_label)
post.compute_instances.append(add_test1_label)
post.save()
But my issue is that the document does not have a name for the Objects of the compute_instances field, it just says 0 and 1:
id : 60fec94dbb81d98abb557523
title : Quora rocks
author : Ross
tags : compute_instances : Array
0 : Object
label_name : value
1 : Object
label_name : value
I want to have 0 named test0 and 1 named test1.
Please guide me on how to achieve that.
Best regards
It's unclear what you use to print the object, but here are a few comments that might help you:
your compute_instances_subtype class has no title field that is set, so when you set add_test0_label.title = "test0", it's just added to the python object but it's not being saved in the database when you call .save().
0 and 1 that you see in your output are the indexes of the Object in the compute_instances array
Here is a simplified example that might also be helpful:
class ComputeInstancesSubtype(EmbeddedDocument):
label_name = StringField()
title = StringField()
class Post(Document):
title = StringField(max_length=120, required=True)
compute_instances = ListField(EmbeddedDocumentField(ComputeInstancesSubtype))
nested1 = ComputeInstancesSubtype(title='something0', label_name='test0')
nested2 = ComputeInstancesSubtype(title='something1', label_name='test1')
post = Post(title="Quora rocks", compute_instances = [nested1, nested2])
post.save()
print(Post._get_collection().find_one()) # print it in its raw format
returns
{'_id': ObjectId('61071c5e3686c4066dadbc05'),
'title': 'Quora rocks',
'compute_instances': [{'label_name': 'test0', 'title': 'something0'},
{'label_name': 'test1', 'title': 'something1'}]
}

read_only but still populated in create method on serializer

I have a custom create method on my serializer for adding tags where the consumer could solely send a payload with the tags key containing a list of tags' names.
{
"id": 1,
"title": "Testing",
...
"tags": ["Python", "Django", "Go"]
}
Serializer:
class StreamSerializer(serializers.HyperlinkedModelSerializer):
streamer = StreamerSerializer()
tags = TagSerializer(many=True)
class Meta:
model = Stream
fields = [
"id",
"source",
"stream_id",
"started_at",
"collected_at",
"title",
"thumbnail_url",
"viewer_count",
"video_type",
"language",
"streamer",
"stream_data",
"tags",
"live_now",
]
extra_kwargs = {"tags": {"validators": []}}
def create(self, validated_data):
# pop streamer and tags from the payload
print(validated_data)
streamer_data = validated_data.pop("streamer")
tag_names = validated_data.pop("tags")
# get_or_create the streamer for this stream
streamer_user_id = streamer_data.pop("user_id")
streamer, created = Streamer.objects.get_or_create(
user_id=streamer_user_id, defaults=streamer_data
)
# use get_or_create on the stream to prevent duplicates if stream
# crashes or a node change and just update the existing stream
# with new data instead.
stream, created = Stream.objects.get_or_create(
streamer=streamer, defaults=validated_data
)
# add tags to the newly created stream
for tag_name in tag_names:
tag = Tag.objects.get(name=tag_name)
stream.tags.add(tag.id)
stream.save()
return stream
I would like for tags to have read_only=True, but by doing this I get a KeyError when posting to this endpoint since this is now excluded from any write methods.
class StreamSerializer(serializers.HyperlinkedModelSerializer):
streamer = StreamerSerializer()
tags = TagSerializer(many=True, read_only=True) # add read_only
...
What could I do in order to not have tags necessary to validate, but still have access to the field in my create method? Would I need a custom validator for this?
This doesn't exactly answer the question, but does achieve the goal I'm going for using to_internal_value on my TagSerializer.
class TagSerializer(serializers.HyperlinkedModelSerializer):
parent = ParentTagSerializer()
class Meta:
model = Tag
fields = ["name", "aliases", "parent"]
extra_kwargs = {"aliases": {"validators": []}}
# to_internal_value will iterate each tag name in the post payload
# and return the tag matching that name.
def to_internal_value(self, tag_name):
tag = Tag.objects.get(name=tag_name)
return tag
class StreamSerializer(serializers.HyperlinkedModelSerializer):
streamer = StreamerSerializer()
tags = TagSerializer(many=True)
...

Add CharField to the search index in haystack

I use in my django app (1.8), haystack (2.4.1) to search. And I want to be able to serach words with autocomplete (EdgeNgramField) and words when I put only the part of name, for example 'zo-zo on' (this isn't working with EdgeNgramField)
Below I tired added: text_sec = indexes.CharField(use_template=True) but this isn't working for me.
Here is my code, but he doesn't works:
class EventIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
text_sec = indexes.CharField(use_template=True)
id = indexes.CharField(model_attr='id')
get_absolute_url = indexes.CharField(model_attr='get_absolute_url')
description = indexes.CharField(model_attr='description', null=True)
is_past = indexes.CharField(model_attr='is_past', default='false')
date_start = indexes.DateTimeField(model_attr='date_start')
You will need to setup to different fields in your schema to power autocomplete and normal search.
In the following i have defined two fields i.e. one is text and other is content_auto one which is populated with title from your model.
class EventIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
content_auto = indexes.EdgeNgramField(model_attr='title')
text_sec = indexes.CharField(use_template=True)
id = indexes.CharField(model_attr='id')
get_absolute_url = indexes.CharField(model_attr='get_absolute_url')
description = indexes.CharField(model_attr='description', null=True)
is_past = indexes.CharField(model_attr='is_past', default='false')
date_start = indexes.DateTimeField(model_attr='date_start')
When you want to do normal search you should search on text field, for autosuggest on content_auto.
You should read up on docs http://haystacksearch.org/ for more on this,

MongoEngine Query Optimization

I have two collections ScenarioDrivers and ModelDrivers which has One to Many relationship with each other.
class ScenarioDrivers(Document):
meta = {
'collection': 'ScenarioDrivers'
}
ScenarioId = ReferenceField('ModelScenarios')
DriverId = ReferenceField('ModelDrivers')
DriverCalibrationMethod = StringField()
SegmentName = StringField()
DriverValue = ListField()
CalibrationStatus = StringField()
AdjustedValues = ListField(default=[])
CreateDate = DateTimeField(default=ObjectId().generation_time)
LastUpdateDate = DateTimeField(default=datetime.utcnow())
class ModelDrivers(Document):
meta = {
'collection': 'ModelDrivers'
}
PortfolioModelId = ReferenceField('PortfolioModels')
DriverName = StringField()
CreateDate = DateTimeField(default=ObjectId().generation_time)
LastUpdateDate = DateTimeField(default=datetime.utcnow())
FieldFormat = StringField()
DriverData = ListField()
My query is like this.
class GetCalibratedDrivers(Resource):
def get(self, scenario_id):
scenario_drivers_list = []
scenario_drivers = ScenarioDrivers.objects(ScenarioId=scenario_id).exclude('ScenarioId').select_related(1)
for scenario_driver in scenario_drivers:
scenario_driver_dict = {
'id': str(scenario_driver.id),
'DriverId': str(scenario_driver.DriverId.id),
'SegmentName': scenario_driver.SegmentName,
'CalibrationMethod': scenario_driver.DriverCalibrationMethod,
'CalibratedValues': exchange(scenario_driver.DriverValue),
'AdjustedValues': scenario_driver.AdjustedValues,
'LastUpdateDate': formatted_date(scenario_driver.LastUpdateDate),
'FieldFormat': scenario_driver.DriverId.FieldFormat
}
scenario_drivers_list.append(scenario_driver_dict)
return {
'DriverCalibrations': scenario_drivers_list
}
The Query matches 1140 records and then I construct a dictionary and make it a list.
But this API call takes 30s to process just 1140 records. Where I am missing? Please help. I am using latest version of Pymongo and MongoEngine.
I think the problem is not with your query, it is with you looping over 1140 records. I do not see any use of referenced objects so you should consider removing select_related(1). Once you do that, if you want to convert reference object ids to string, you can use as_pymongo() which will do that by default for you. And finally if you must read some data in specific format like formatted_date or exchange, it is better to save them as part of your document. i.e. save FormattedLastUpdateDate with LastUpdateDate. In MongoDB, you have to think about your read specific logic when you save the document.

Odoo error when generating One2many field data

Given the following class for the header:
class vontatas_head(models.Model):
_name = 'vontatas.head'
display_name = fields.Char(string="Sor", compute='_compute_display_name', store=False)
plan_type_id = fields.Many2one(
comodel_name='plan.type', string='Terv típus', required=True)
year = fields.Integer(string='Év', required=True, default=lambda *a: strftime('%Y'))
version = fields.Integer(string='Verzió', required=True, default=1)
comment = fields.Char(string='Megjegyzés')
vontatas_data_ids = fields.One2many(
comodel_name='vontatas.data', inverse_name='vontatas_id', string='Adatok', default=get_default_lines)
And for the detail:
class vontatas_data(models.Model):
_name = 'vontatas.data'
vontatas_id = fields.Many2one(comodel_name="vontatas.head", string="Vontatás sor")
name = fields.Char(string="Megnevezés", required=True)
code = fields.Char(string="Kód", required=True)
type = fields.Selection([('total', 'Összesen'), ('input', 'Input')], string="Típus", default='input')
value = fields.Float(string="Várható költség")
parent_id = fields.Many2one(comodel_name="vontatas.data", ondelete='cascade', string="Összesen sor")
child_ids = fields.One2many(comodel_name="vontatas.data", inverse_name='parent_id', string='Input sorok')
I have to automatically generate details data from a template with this code:
def get_default_lines(self):
self.env.cr.execute("select name, code, type, parent_id from vontatas order by code")
sorok = self.env.cr.fetchall()
ids = []
for sor in sorok:
ids.append((0,0, { 'name': sor[0],
'code': sor[1],
'type': sor[2],
'parent_id': sor[3]
}))
return ids
Everyting is working fine, but at the creation I have an error message: "One of the documents you are trying to access has been deleted, please try again after refreshing."
I know why this error is happening: at the moment of generation there is no valid parent_id.
My question is: how to provide any valid parent_id within the function get_default_lines, knowing that the data is actually just in memory, not stored yet in the database?
Or asked otherwise: How to keep the hierarchy level defined within the template?

Categories

Resources