Hey guys,
We have mitigated to MongoDB recently and I`m trying to create a pipeline for grouping the matching data.
Our cluster has 1.1m documents.
Here`s our query:
db.getCollection(“xyx_data”).aggregate([{
"$match":{
"name":{
"$ne":""
},
"time":{
"$gte":1630420570.277,
"$lte":1633012570.277
},
"$and":[
{
"$text":{
"$search":"IL"
}
},
{
"country":{
"$regex":"IL",
"$options":"gi"
}
}
]
}
},
{
"$group":{
"_id":"$name",
"time":{
"$first":"$time"
}
}
},
{
"$sort":{
"time":-1
}
},
{
"$limit":10
}])
The query takes huge amount of time to complete and the explain returned from the query telling us that most of the time was taken by the FETCH stage.
Here`s the explain:
{
"explainVersion":“1”,
"stages":[
{
"$cursor":{
"queryPlanner":{
"namespace":"xxx_data",
"indexFilterSet":false,
"parsedQuery":{
"$and":[
{
"time":{
"$lte":1633012570.277
}
},
{
"time":{
"$gte":1630420570.277
}
},
{
"country":"/IL/gi"
},
{
"name":{
"$not":{
"$eq":""
}
}
},
{
"$text":{
"$search":"IL",
"$language":"english",
"$caseSensitive":false,
"$diacriticSensitive":false
}
}
]
},
"queryHash":“A9EBD8B1”,
"planCacheKey":“A592F28E”,
"maxIndexedOrSolutionsReached":false,
"maxIndexedAndSolutionsReached":false,
"maxScansToExplodeReached":false,
"winningPlan":{
"stage":"PROJECTION_SIMPLE",
"transformBy":{
"_id":true,
"name":true
},
"inputStage":{
"stage":"FETCH",
"filter":{
"$and":[
{
"name":{
"$not":{
"$eq":""
}
}
},
{
"country":"/IL/gi"
}
]
},
"inputStage":{
"stage":"TEXT_MATCH",
"indexPrefix":{
},
"indexName":"index1",
"parsedTextQuery":{
"terms":[
"il"
],
"negatedTerms":[
],
"phrases":[
],
"negatedPhrases":[
]
},
"textIndexVersion":3,
"inputStage":{
"stage":"FETCH",
"inputStage":{
"stage":"IXSCAN",
"filter":{
"$and":[
{
"time":{
"$lte":1633012570.277
}
},
{
"time":{
"$gte":1630420570.277
}
}
]
},
"keyPattern":{
"_fts":"text",
"_ftsx":1,
"name":1,
"time":-1
},
"indexName":"index1",
"isMultiKey":true,
"isUnique":false,
"isSparse":false,
"isPartial":false,
"indexVersion":2,
"direction":"backward",
"indexBounds":{
}
}
}
}
}
},
"rejectedPlans":[
]
},
"executionStats":{
"executionSuccess":true,
"nReturned":1110137,
"executionTimeMillis":1233785,
"totalKeysExamined":1110824,
"totalDocsExamined":2221648,
"executionStages":{
"stage":"PROJECTION_SIMPLE",
"nReturned":1110137,
"executionTimeMillisEstimate":1153779,
"works":1110825,
"advanced":1110137,
"needTime":687,
"needYield":0,
"saveState":65665,
"restoreState":65665,
"isEOF":1,
"transformBy":{
"_id":true,
"name":true
},
"inputStage":{
"stage":"FETCH",
"filter":{
"$and":[
{
"name":{
"$not":{
"$eq":""
}
}
},
{
"country":"/IL/gi"
}
]
},
"nReturned":1110137,
"executionTimeMillisEstimate":1153200,
"works":1110825,
"advanced":1110137,
"needTime":687,
"needYield":0,
"saveState":65665,
"restoreState":65665,
"isEOF":1,
"docsExamined":1110824,
"alreadyHasObj":1110824,
"inputStage":{
"stage":"TEXT_MATCH",
"nReturned":1110824,
"executionTimeMillisEstimate":1150747,
"works":1110825,
"advanced":1110824,
"needTime":0,
"needYield":0,
"saveState":65665,
"restoreState":65665,
"isEOF":1,
"indexPrefix":{
},
"indexName":"index1",
"parsedTextQuery":{
"terms":[
"il"
],
"negatedTerms":[
],
"phrases":[
],
"negatedPhrases":[
]
},
"textIndexVersion":3,
"docsRejected":0,
"inputStage":{
"stage":"FETCH",
"nReturned":1110824,
"executionTimeMillisEstimate":1150533,
"works":1110825,
"advanced":1110824,
"needTime":0,
"needYield":0,
"saveState":65665,
"restoreState":65665,
"isEOF":1,
"docsExamined":1110824,
"alreadyHasObj":0,
"inputStage":{
"stage":"IXSCAN",
"filter":{
"$and":[
{
"time":{
"$lte":1633012570.277
}
},
{
"time":{
"$gte":1630420570.277
}
}
]
},
"nReturned":1110824,
"executionTimeMillisEstimate":6914,
"works":1110825,
"advanced":1110824,
"needTime":0,
"needYield":0,
"saveState":65665,
"restoreState":65665,
"isEOF":1,
"keyPattern":{
"_fts":"text",
"_ftsx":1,
"name":1,
"time":-1
},
"indexName":"index1",
"isMultiKey":true,
"isUnique":false,
"isSparse":false,
"isPartial":false,
"indexVersion":2,
"direction":"backward",
"indexBounds":{
},
"keysExamined":1110824,
"seeks":1,
"dupsTested":1110824,
"dupsDropped":0,
"indexDef":{
"indexName":"index1",
"isMultiKey":true,
"keyPattern":{
"_fts":"text",
"_ftsx":1,
"name":1,
"time":-1
},
"isUnique":false,
"isSparse":false,
"isPartial":false,
"direction":"backward"
}
}
},
"indexDef":{
"indexName":"index1"
}
}
}
}
}
},
"nReturned":1110137,
"executionTimeMillisEstimate":1233153
},
{
"$group":{
"_id":"$name"
},
"maxAccumulatorMemoryUsageBytes":{
},
"totalOutputDataSizeBytes":20285818,
"usedDisk":false,
"nReturned":73234,
"executionTimeMillisEstimate":1233762
},
{
"$sort":{
"sortKey":{
"time":-1
},
"limit":10
},
"totalDataSizeSortedBytesEstimate":2930,
"usedDisk":false,
"nReturned":10,
"executionTimeMillisEstimate":1233769
}
],
"serverInfo":{
"host":"localhost",
"port":27017,
"version":"5.0.3",
"gitVersion":"657fea5a61a74d7a79df7aff8e4bcf0bc742b748"
},
"serverParameters":{
"internalQueryFacetBufferSizeBytes":104857600,
"internalQueryFacetMaxOutputDocSizeBytes":104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes":104857600,
"internalDocumentSourceGroupMaxMemoryBytes":104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes":104857600,
"internalQueryProhibitBlockingMergeOnMongoS":0,
"internalQueryMaxAddToSetBytes":104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes":104857600
},
"command":{
"aggregate":"xxx_data",
"pipeline":[
{
"$match":{
"name":{
"$ne":""
},
"time":{
"$gte":1630420570.277,
"$lte":1633012570.277
},
"$and":[
{
"$text":{
"$search":"IL"
}
},
{
"country":"/IL/gi"
}
]
}
},
{
"$project":{
"name":1
}
},
{
"$group":{
"_id":"$name"
}
},
{
"$sort":{
"time":-1
}
},
{
"$limit":10
}
],
"cursor":{
},
"$db":"db"
},
"ok":1,
"$clusterTime":{
"clusterTime":Timestamp(1633355452,
1),
"signature":{
"hash":BinData(0,
"R4d0Xu0c/CHiGxgh0NTp6s/C9Ek="")",
"keyId":"NumberLong(""7009951518350639108"")"
}
},
"operationTime":Timestamp(1633355452,
1)
}
Visualization: https://i.imgur.com/GzMrxMk.png
I`ll be glad if someone can help me reduce the query time.
Thank you.