Conver "distinct on" query from Sql in Mongodb with Pymongo - python

I am trying to convert this query from sql:
select distinct on (id13) id13, timestamp1
from oneindextwocolumnsfalse3years
where timestamp1>='2010-01-01 00:00:00' and timestamp1<='2015-01-01 00:55:00'
order by id13,timestamp1 desc
To mongodb like that:
mydb1.mongodbindextimestamp1.aggregate([
{
"$match": {
"timestamp1": {"$gte": datetime.strptime("2010-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"),
"$lte" :datetime.strptime("2015-01-01 00:55:00", "%Y-%m-%d %H:%M:%S")}
}
},
{
"$group": {
"_id":{
"id_13":"$id13"
},
}
},
{
"$project": {
"_id": 0,
"id13":1,
"timestamp1":1
}
},
{"$sort": {"id13": 1,"timestamp1":-1}}
])
But it doesn't seems to work.Do you have something to suggest?I am doing something wrong but i cant find what!

give this pipeline a try:
db.collection.aggregate(
[
{
$match: {
timestamp: {
$gte: ISODate("2020-01-01"),
$lte: ISODate("2022-01-01")
}
}
},
{
$group: {
_id: "$id13", //define the grouping key
doc: { $first: "$$ROOT" } //pick the first doc from each group
}
},
{
$replaceWith: "$doc" //promote the doc to root
},
{
$project: {
_id: 0,
id13: "$id13",
timestamp: "$timestamp"
}
},
{
$sort: {
id13: 1,
timestamp: -1
}
}
]
)
https://mongoplayground.net/p/Cwzic2cMfgd

Related

convert elasticsearch query to elasticsearch-dsl query in python

i tried to convert this elasticsearch query to elasticsearch-dsl query but i didn't get the same result.
i am using elasticsearch version 5.X also for the elasticsearch-dsl
here the elasticsearch query :
qry = {
"_source": [
"accurate_call_duration",
"call_duration",
"called"
],
"query": {
"bool": {
"should": [
{
"has_child": {
"type": "outgoing",
"query": {
"match_all": {}
},
"inner_hits": {
"size": 5000,
"_source": [
"accurate_call_duration",
"accurate_ringing_duration",
"anonymous_call"
]
}
}
}
],
"must": [
{
"term": {
"client": 1212,
}
},
{
"range": {
"created_time": {
"gte": date_from,
"lte": date_to
}
}
},
{
"term": {
"type": "incoming"
}
}
]
}
},
"size": 5000
}
elaticsearch-dsl query:
result = Search(using=escli, index="cdr").source([
"accurate_call_duration",
"call_duration", "called").filter(
'range', created_time={'gt': date_from, 'lte': date_to}).filter(
"term", type="incoming").extra(size=5000).execute()
how can i get the same result as the elasticsearch query (with inner_hits) ?

How do you filter on not null values elasticsearch?

I am trying to filter out values with not null :
Exemple with sql
SELECT ALL FROM Mytable WHERE field_1 NOT NULL and field_2 ="alpha"
How should I be writing this query in elasticsearch-dsl(python)?
I tried things like:
s = Mytable.search().query(
Q('match', field_2 ='alpha')
).filter(~Q('missing', field='field_1'))
but it returns elements with null values of field_1
Also, I tried this down, but it didn't work
field_name_1 = 'field_2'
value_1 = "alpha"
field_name_2 = 'field_1'
value_2 = " "
filter = {
"query": {
"bool": {
"must": [
{
"match": {
field_name_1 : value_1
}
},
{
"bool": {
"should": [
{
"bool": {
"must_not": [
{
field_name_2: {
"textContent": "*"
}
}
]
} }
]
}
}
]
}
}
}
I am not familiar with elasticsearch-dsl(python), but the following search query, will get you the same search result as you want :
SELECT ALL FROM Mytable WHERE field_1 NOT NULL and field_2 ="alpha"
With the help of below search query, the search result will be such that name="alpha" AND cost field will not be null. You can refer exists query to know more about this.
Index Data:
{ "name": "alpha","item": null }
{ "name": "beta","item": null }
{ "name": "alpha","item": 1 }
{ "name": "alpha","item": [] }
Search query:
You can combine a bool query with a exists query like this:
{
"query": {
"bool": {
"must": [
{
"term": {
"name": "alpha"
}
},
{
"exists": {
"field": "item"
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "4",
"_score": 1.6931472,
"_source": {
"name": "alpha",
"item": 1
}
}
]
You can try this one:
s = Mytable.search()
.query(Q('bool', must=[Q('match', field_2='alpha'), Q('exists', field='field_1')]))
This is the way to use boolean compound query
query: {
bool: {
must: [
{
bool: {
must_not: {
missing: {
field: 'follower',
existence: true,
null_value: true,
},
},
},
},
{
nested: {
path: 'follower',
query: {
match: {
'follower.id': req.currentUser?.id,
},
},
},
},
],
},
},

MongoDB Query with Python

I was working in this query that you can see below and pythons IDE gives me a syntax error in "$group"´s line.The error is an "invalid syntax string" but I can not find where the problem is.
ID="01"
yearInicial="2018"
monthInicial="02"
dayInicial="11"
yearFinal="2018"
monthFinal="06"
dayFinal="22"
total=0
totalDias=0
result=list(db.alojamientos.aggregate( [ {'$match': { '$and': [ {"location.thcod":ID}, { "prices.year": { '$gte': yearInicial } },
{ "prices.year": { '$lte': yearFinal } }, {"prices.months.month": { '$gte': monthInicial } },
{ "prices.months.month": { '$lte': monthFinal } }, { "prices.months.days.day": { '$gte': dayInicial } },
{ "prices.months.days.day": { '$lte': dayFinal } }]}},
{'$unwind':"$prices"},
{'$unwind':"$prices.months"},
{'$unwind':"$prices.months.days"},
{'$unwind':"$prices.months.days.csvs"},
{'$unwind':"$prices.months.days.csvs.price"},
{'$group':{'_id':{ID, 'day':"$prices.months.days.day",'month': "$prices.months.month",'year':"$prices.year"},total:{'$sum':"$prices.months.days.csvs.price.price"}, count: { '$sum': 1 }
}}]))
Thanks you in advance
Delete the 3rd { in this line {'$group':{'_id':{ID, ....
And it should looks like {'$group':{'_id':ID, ...

Mongo Cursor not returnning a cursor but an object

test_cursor = db.command({
"aggregate": "New_layout",
"pipeline": [
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
"allowDiskUse": bool(1),
"cursor": {}
})
asd=list(test_cursor)
The contents of the cursor are as below :-
[u'cursor', u'ok', u'waitedMS'] .
However with an $out statement, the output collection has the expected contents.
I am running pymongo v3.2.2 and mongo 3.2. I was told this problem is experienced with v3.0 or lesser, but this is something I am not able to figure out
You should use aggregate() instead of command().
test_cursor = db.New_layout.aggregate([
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
allowDiskUse=True)

Filter timestamp with mongoDb

suppose this Json export from my mongoDb
{
"_id":{
"$oid":"51ca002d9e67460354bb0089"
},
"node":{
"id":"1",
"components":[
{
"sensors":[
{
"name":"backup-job-name",
"value":"Job_2"
},
{
"name":"backup-job-id",
"value":"187"
},
{
"name":"backup-start-date",
"value":"1372138227"
},
{
"name":"backup-stop-date",
"value":"1372138235"
},
{
"name":"backup-nb-errors",
"value":"0"
},
{
"name":"backup-nb-warnings",
"value":"0"
},
{
"name":"backup-source-files",
"value":"402"
},
{
"name":"backup-source-bytes",
"value":"168571449"
},
{
"name":"backup-status",
"value":null
}
],
"type":"backup",
"serial":"1-backup-backup-job-name-Job_2"
},
{
"sensors":[
{
"name":"backup-job-name",
"value":"Test_bckp"
},
{
"name":"backup-job-id",
"value":""
},
{
"name":"backup-start-date",
"value":"0"
},
{
"name":"backup-stop-date",
"value":"0"
},
{
"name":"backup-nb-errors",
"value":"0"
},
{
"name":"backup-nb-warnings",
"value":"0"
},
{
"name":"backup-source-files",
"value":"0"
},
{
"name":"backup-source-bytes",
"value":"0"
},
{
"name":"backup-status",
"value":null
}
],
"type":"backup",
"serial":"1-backup-backup-job-name-Test_bckp"
}
]
},
"timestamp":1372192813
}
I work with python and i'd like to get documents where "backup-start-date" (stored as timestamp) il greater than a given value.
I've tried with
collection.find({
'node.components.type': 'backup',
'node.components.sensors': {'name': 'backup-start-date', 'value': {'$gte': ts_to_compare_with}}}):
but any documents in the result. Is my query wrong ?
There are two things - you are wanting to match an sensor document in the components array and $gte only works on integers and dates.
If you convert the data to be ints then you can use $gte and query like so:
db.test.find({
'node.components.type': 'backup',
'node.components.sensors': {
$elemMatch: {'name': 'backup-start-date',
'value': {'$gte': 168571445}}}})

Categories

Resources