Hi there!
I have a certain aggregation that I’ve been struggling with, trying to improve its performance.
and it looks like the following:
db
.getCollection("paw.readouts")
.aggregate([
{
$match: {
readoutDescriptorId: {
$in: [
5632, 1, 5633, 2, 3, 4, 5, 6, 8, 9,
17, 5649, 18, 3859, 3860, 5396, 5397,
5656, 5657, 5402, 5403, 5404, 5153,
5154, 5414, 5415, 4910, 3888, 4912,
5680, 3889, 5681, 5684, 5685, 5174,
4409, 4410, 5180, 4157, 4158, 4424,
5453, 5454, 5459, 5460, 5461, 5462,
5468, 5469, 3944, 4968, 3945, 4969,
5233, 5234, 5235, 4216, 4217, 4218,
4220, 4221, 5505, 5250, 4487, 4488,
5256, 5001, 3978, 5002, 3980, 5007,
4240, 4241, 4761, 3998, 3999, 5279,
5280, 5281, 5282, 4515, 5283, 5539,
4517, 4518, 5287, 5032, 5288, 5033,
5289, 5034, 5290, 5546, 5035, 5547,
5036, 5037, 5038, 5294, 5039, 5555,
4291, 4292, 5316, 4041, 5321, 4042,
4044, 5068, 4045, 5069, 4820, 5333,
5334, 4823, 5335, 5591, 5592, 5593,
4826, 5594, 4827, 5595, 4828, 5596,
4829, 5597, 4830, 5598, 4831, 5599,
4832, 5600, 4833, 4834, 5602, 4835,
5603, 5604, 5350, 5606, 5607, 5608,
5098, 4587, 5099, 3822, 3823, 5360,
5624, 4345, 5625, 4346, 5626, 5627,
],
},
archived: false,
},
},
{
$project: {
readoutDescriptorId: 1,
sampleId: 1,
location: 1,
name: 1,
value: 1,
alias: 1,
species: 1,
trackingNumber: 1,
originLocation: 1,
batchAlias: 1,
replicate: 1,
groupingId: 1,
abpId: 1,
biosystemId: {
$ifNull: [
"$superRootId",
"$biosystemId",
],
},
},
},
{
$group: {
_id: {
alias: "$alias",
trackingNumber: "$trackingNumber",
origin: "$originLocation",
bioSystemId: "$biosystemId",
replicate: "$replicate",
},
readouts: {
$push: {
descriptorId: "$readoutDescriptorId",
location: "$location",
sampleId: "$sampleId",
readoutName: "$name",
readoutValue: "$value",
batchAlias: "$batchAlias",
groupingId: "$groupingId",
abpId: "$abpId",
},
},
},
},
{ $sort: { "_id.bioSystemId": 1 } },
{
$facet: {
count: [{ $count: "count" }],
results: [
{ $skip: 250 },
{ $limit: 250 },
],
unique: [
{
$group: {
_id: null,
unique: {
$addToSet: "$_id.bioSystemId",
},
},
},
{
$project: {
unique: { $size: ["$unique"] },
},
},
],
},
},
{
$project: {
total: {
$arrayElemAt: ["$count.count", 0],
},
page: "2",
unique: {
$arrayElemAt: ["$unique.unique", 0],
},
data: "$results",
},
},
{
$project: {
data: 1,
page: 2,
total: { $ifNull: ["$total", "0"] },
unique: { $ifNull: ["$unique", "0"] },
},
},
])
.allowDiskUse()
I have an index created on readoutDescriptorId.
What else can I do to improve the performance of this aggregation?
I have to mention that this particular aggregation goes through 196082 documents. And it takes for around more than 9 sec which is quite long.
Is there any low hanging fruits that can be done to improve the performance?