Mongodb query with regex

Mongodb query with regex - python

I am learning python with mongodb in tornado.
{
"_id" : ObjectId("566fb466d82f0769fbb0fb10"),
"Pid" : "1",
"Registration" : "1234",
"Date" : "11-11-2015",
"status" : "booked"
}
{
"_id" : ObjectId("566fb47cd82f0769fbb0fb11"),
"Pid" : "1",
"Registration" : "1234",
"Date" : "13-11-2015",
"status" : "AOs"
}
{
"_id" : ObjectId("566fb48bd82f0769fbb0fb12"),
"Pid" : "1",
"Registration" : "1234",
"Date" : "14-11-2015",
"status" : "NA"
}
{
"_id" : ObjectId("566fb4a3d82f0769fbb0fb13"),
"Pid" : "1",
"Registration" : "1234",
"Date" : "16-12-2015",
"status" : "AOr"
}
if I am passing the month and year only ,I need the entire record of the given month and year.
I tried doing this:
db.cal.aggregate({$project:{"monthyear": {$substr: ["$Date",0,2]}}})
the output is:
{ "_id" : ObjectId("566fb466d82f0769fbb0fb10"), "monthyear" : "11" }
{ "_id" : ObjectId("566fb47cd82f0769fbb0fb11"), "monthyear" : "13" }
{ "_id" : ObjectId("566fb48bd82f0769fbb0fb12"), "monthyear" : "14" }
{ "_id" : ObjectId("566fb4a3d82f0769fbb0fb13"), "monthyear" : "16" }
but I need the below output:
{
"_id" : ObjectId("566fb4a3d82f0769fbb0fb13")
"Pid" : "1",
"Registration" : "1234",
"Date" : "16-12-2015",
"status" : "AOr"
}
Please, help

What about this:
db.cal.find({"Date":{"$regex":"12-2015"}})

Related

Tranquility server would not send data to druid

I'm using imply-2.2.3. Here is my tranquility server configuration:
{
"dataSources" : [
{
"spec" : {
"dataSchema" : {
"dataSource" : "tutorial-tranquility-server",
"parser" : {
"type" : "string",
"parseSpec" : {
"timestampSpec" : {
"column" : "timestamp",
"format" : "auto"
},
"dimensionsSpec" : {
"dimensions" : [],
"dimensionExclusions" : [
"timestamp",
"value"
]
},
"format" : "json"
}
},
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "hour",
"queryGranularity" : "none"
},
"metricsSpec" : [
{
"type" : "count",
"name" : "count"
},
{
"name" : "value_sum",
"type" : "doubleSum",
"fieldName" : "value"
},
{
"fieldName" : "value",
"name" : "value_min",
"type" : "doubleMin"
},
{
"type" : "doubleMax",
"name" : "value_max",
"fieldName" : "value"
}
]
},
"ioConfig" : {
"type" : "realtime"
},
"tuningConfig" : {
"type" : "realtime",
"maxRowsInMemory" : "50000",
"windowPeriod" : "PT10M"
}
},
"properties" : {
"task.partitions" : "1",
"task.replicants" : "1"
}
},
{
"spec": {
"dataSchema" : {
"dataSource" : "test",
"parser" : {
"type" : "string",
"parseSpec" : {
"timestampSpec" : {
"column" : "timestamp",
"format" : "auto"
},
"dimensionsSpec" : {
"dimensions" : [
"a"
],
},
"format" : "json"
}
},
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "hour",
"queryGranularity" : "none"
},
"metricsSpec" : [
{
"type" : "count",
"name" : "count"
},
{
"type": "doubleSum",
"name": "b",
"fieldName": "b"
}
]
},
"ioConfig" : {
"type" : "realtime"
},
"tuningConfig" : {
"type" : "realtime",
"maxRowsInMemory" : "50000",
"windowPeriod" : "P1Y"
}
},
"properties": {
"task.partitions" : "1",
"task.replicants" : "1"
}
}
],
"properties" : {
"zookeeper.connect" : "localhost",
"druid.discovery.curator.path" : "/druid/discovery",
"druid.selectors.indexing.serviceName" : "druid/overlord",
"http.port" : "8200",
"http.threads" : "40",
"serialization.format" : "smile",
"druidBeam.taskLocator": "overlord"
}
}
I have trouble sending data to the second datasoruce, test, specifically. I tried to send the below data to druid with python requests:
{'b': 7, 'timestamp': '2017-01-20T03:32:54.586415', 'a': 't'}
The response I receive:
b'{"result":{"received":1,"sent":0}}'
If you read my config file you will notice that I set window period to one year. I would like to send data in with a large time span to druid using tranquility server. Is there something wrong with my config or data?

Why elasticsearch is not able to retrieve document by document id?

I am new to elasticsearch. I have created a new index, using following REST API:-
req = {
"settings": {
"analysis": {
"analyzer": {
"hinglish_analyzer": {
"type": "custom",
"tokenizer": "standard",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"asciifolding",
"hinglish-token-filter"
]
}
}
}
},
"mappings" : {
"p_ss__user" : {
"properties" : {
"age" : {
"type": "integer"
},
"first_name" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"gender" : {
"type" : "long"
},
"is_alive" : {
"type" : "boolean"
},
"last_name" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"marital_status" : {
"type" : "long"
},
"user_gotra" : {
"properties" : {
"Gotra" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
},
"kuldevi" : {
"properties" : {
"Kuldevi" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
}
}
}
}
},
"user_village" : {
"properties" : {
"areaOrVillageName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
},
"tehsil" : {
"properties" : {
"city" : {
"properties" : {
"cityName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
},
"state" : {
"properties" : {
"country" : {
"properties" : {
"countryCode" : {
"type" : "text"
},
"countryName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
}
}
},
"id" : {
"type" : "long"
},
"stateCode" : {
"type" : "text"
},
"stateName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
}
}
}
}
},
"id" : {
"type" : "long"
},
"tehsilName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
}
}
},
"zipcode" : {
"type" : "text"
}
}
},
"username" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
}
}
}
}
}
Here, 'hinglish-token-filter' is my custom token filter, which I have written and is perfectly fine.
Now, I have created a document in elasticsearch with the help of python script(here I pass my own value of _id variable in the request), which looks like given below :-
{
"username" : "Gopi_Chand",
"first_name" : "Gopi Chand",
"last_name" : "",
"gender" : 2,
"age" : 44,
"user_gotra" : {
"Gotra" : "Thanak",
"kuldevi" : {
"Kuldevi" : "Maa Jagdambaa",
"id" : 1
},
"id" : 1,
"kulrishi" : {
"Rishi" : "Parashar",
"id" : 1
}
},
"user_village" : {
"areaOrVillageName" : "Sanatra",
"tehsil" : {
"city" : {
"state" : {
"country" : {
"countryName" : "India",
"id" : 1,
"countryCode" : "IND"
},
"stateName" : "Rajasthan",
"id" : 1
},
"cityName" : "Barmer (Meru)",
"id" : 1
},
"tehsilName" : "Baitu",
"id" : 1
},
"id" : 1,
"zipcode" : ""
},
"marital_status" : 1,
"is_alive" : true
}
The document is successfully getting stored in the elasticsearch with the Id that I have passed, along with other values.
But the problem comes when I try to retrieve the document with the id, that I have set :-
http://localhost:9200/users/p_s_s__user/3222/
It gives me following response :-
{"_index":"users","_type":"p_s_s__user","_id":"3222","found":false}
But when I try following query :-
http://localhost:9200/users/_search?pretty=true
it shows me my document, as shown below :-
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [
{
"_index" : "users",
"_type" : "p_ss__user",
"_id" : "3222",
"_score" : 1.0,
"_source" : {
"username" : "Gopi_Chand",
"first_name" : "Gopi Chand",
"last_name" : "",
"gender" : 2,
"age" : 44,
"user_gotra" : {
"Gotra" : "Thanak",
"kuldevi" : {
"Kuldevi" : "Maa Jagdambaa",
"id" : 1
},
"id" : 1,
"kulrishi" : {
"Rishi" : "Parashar",
"id" : 1
}
},
"user_village" : {
"areaOrVillageName" : "Sanatra",
"tehsil" : {
"city" : {
"state" : {
"country" : {
"countryName" : "India",
"id" : 1,
"countryCode" : "IND"
},
"stateName" : "Rajasthan",
"id" : 1
},
"cityName" : "Barmer (Meru)",
"id" : 1
},
"tehsilName" : "Baitu",
"id" : 1
},
"id" : 1,
"zipcode" : ""
},
"marital_status" : 1,
"is_alive" : true
}
}
]
}
}
Can you help me out, what wrong I have done ? Moreover, other queries such as "match" queries, are also not working.
Thanks in advance.

MongDB - queries a large collection

I have 40 MM documents in my MongoDB collection (e.g. db.large_collection)
I want to get all the distinct User_ID.
I have created an Index on the field user_id but when I try to execute, it returns an error.
> db.large_collection.count()
39894523
> db.clean_tweets4.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "sampled_tourist.clean_tweets4"
},
{
"v" : 1,
"key" : {
"user_id" : 1
},
"name" : "user_id_1",
"ns" : "sampled_tourist.clean_tweets4"
},
{
"v" : 1,
"key" : {
"coordinates" : 1
},
"name" : "coordinates_1",
"ns" : "sampled_tourist.clean_tweets4"
},
{
"v" : 1,
"key" : {
"timestamp_ms" : 1
},
"name" : "timestamp_ms_1",
"ns" : "sampled_tourist.clean_tweets4"
}
]
But when I run
db.clean_tweets4.find({},{user_id:1})
{ "_id" : ObjectId("5790f9a178776f4b56ede2be"), "user_id" : NumberLong("2246342226") }
{ "_id" : ObjectId("5790f9a178776f4b56ede2bf"), "user_id" : NumberLong("2289817236") }
{ "_id" : ObjectId("5790f9a178776f4b56ede2c0"), "user_id" : 1904381486 }
{ "_id" : ObjectId("5790f9a178776f4b56ede2c1"), "user_id" : NumberLong("3044032705") }
{ "_id" : ObjectId("5790f9a178776f4b56ede2c2"), "user_id" : NumberLong("3407958364") }
{ "_id" : ObjectId("5790f9d278776f4b56ee4af2"), "user_id" : 1566025975 }
{ "_id" : ObjectId("5790f7ab78776f4b56ea55c6"), "user_id" : 15857879 }
{ "_id" : ObjectId("5790f9a178776f4b56ede28f"), "user_id" : NumberLong("3394102511") }
{ "_id" : ObjectId("5790f9a178776f4b56ede293"), "user_id" : 1376377652 }
{ "_id" : ObjectId("5790f9a178776f4b56ede294"), "user_id" : 352385989 }
{ "_id" : ObjectId("5790f9a178776f4b56ede295"), "user_id" : NumberLong("2383622643") }
{ "_id" : ObjectId("5790f9a178776f4b56ede29c"), "user_id" : 152362163 }
{ "_id" : ObjectId("5790f9a178776f4b56ede2a0"), "user_id" : 1446113954 }
{ "_id" : ObjectId("5790f9a178776f4b56ede2a1"), "user_id" : 1893437088 }
{ "_id" : ObjectId("5790f9a178776f4b56ede2a2"), "user_id" : 67121578 }
{ "_id" : ObjectId("5790f9a178776f4b56ede2a3"), "user_id" : 1714137770 }
{ "_id" : ObjectId("5790f9a178776f4b56ede2a4"), "user_id" : 52806609 }
Thanks!

find({}); means it returns everything.
This is what you want.
db.collection.find({user_id:1});

MongoDB find last submission for each user

I have many documents with such structure:
"_id" : ObjectId("52be9d8dbfbc2c17e6a4e06b"),
"contest" : "Teamcode",
"data" : [
{
"status" : "0",
"message" : "Correct",
"runtime" : 0.10917782783508301,
"score" : 20
},
{
"status" : "0",
"message" : "Correct",
"runtime" : 0.12033200263977051,
"score" : 20
},
{
"status" : "0",
"message" : "Correct",
"runtime" : 0.35556793212890625,
"score" : 20
},
{
"status" : "0",
"message" : "Correct",
"runtime" : 1.8789710998535156,
"score" : 20
},
{
"status" : "0",
"message" : "Correct",
"runtime" : 0.9521079063415527,
"score" : 20
}
],
"id" : 242,
"lang" : "c",
"problem" : "roate",
"result" : [ ],
"score" : 100,
"status" : "done",
"time" : 1388223885.051975,
"user" : {
"email" : "orizont1",
"user_class" : 0,
"name" : "orizont1"
}
}
Each user has many submissions for each problem in one contest.
I have variable called "contest", and I want to take the last submission of each user per each problem. I use pymongo.
How can I do that?

Query can be formed like this:
for each problem (for say Teamcode problem), give me last submission of all users
-> while querying you need to keep in mind that size of object array (data) is greater than equal to 1.
-> query: { "contest": "Teamcode" , "data": { $size: {$gte:1} } }
-> projection: {"data":{$slice:-1}, id:1}. $slice:-1 will give you last element of object array (data) in each document which match the query.
For $slice read this:
http://docs.mongodb.org/manual/reference/operator/projection/slice/#proj._S_slice
YOUR_COLLECTION_NAME.find( { "contest": "Teamcode" , "data": { $size: {$gte:1} } }, {"data":{$slice:-1}, id:1} )

How to use distinct with pipeline in mongodb using python

i have data like this
{ "_id": "1234gbrghr",
"Device" : "samsung",
"UserId" : "12654",
"Month" : "july"
},
{ "_id": "1278gbrghr",
"Device" : "nokia",
"UserId" : "87654",
"Month" : "july"
},
{ "_id": "1234gbrghr",
"Device" : "samsung",
"UserId" : "12654",
"Month" : "july"
}
I need to get the no of distinct user for a particular device in the month of july . for example " If a user(UserId) used samsung device twice or more in the month of july then it will count it as one for samsung .
For this i used this query to get the total no of users in the the month of july . but i need to get the distinct no of users
pipeline1 = [
{'$match':{'Month':'july'}},
{'$group':{'_id' : '$Device', 'count' : { '$sum' : 1 }}}
]
data = db.command('aggregate', 'collection', pipeline=pipeline1);

You will need to group on device and user instead first. You can do that with the following pipeline operator:
{'$group':{'_id' : { d: '$Device', u: '$UserId' } } }
And then secondly you need to count the number of devices per user (like you already had, but slighty modified:
{ '$group': { '_id' : '$_id.d', 'count': { '$sum' : 1 } } }
With the following dataset:
{ "_id" : "1234gbrghr", "Device" : "samsung", "UserId" : "12654", "Month" : "july" }
{ "_id" : "1278gbrghr", "Device" : "nokia", "UserId" : "87654", "Month" : "july" }
{ "_id" : "1239gbrghr", "Device" : "samsung", "UserId" : "12654", "Month" : "july" }
{ "_id" : "1238gbrghr", "Device" : "samsung", "UserId" : "12653", "Month" : "july" }
And the following aggregate command:
db.so.aggregate( [
{ '$match' : {'Month' : 'july' } },
{ '$group' : {
'_id' : { d: '$Device', u: '$UserId' },
'count' : { '$sum' : 1 }
} },
{ '$group': {
'_id' : '$_id.d',
'count': { '$sum' : 1 }
} }
] );
This outputs:
{
"result" : [
{
"_id" : "nokia",
"count" : 1
},
{
"_id" : "samsung",
"count" : 2
}
],
"ok" : 1
}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Mongodb query with regex - python

What about this: db.cal.find({"Date":{"$regex":"12-2015"}})

Related

Tranquility server would not send data to druid

Why elasticsearch is not able to retrieve document by document id?

MongDB - queries a large collection

MongoDB find last submission for each user

How to use distinct with pipeline in mongodb using python

Categories

Resources