Facing Slowness in Complex Mongo Query

Hi Everyone, I’m facing some query slowness issue in my complex set of query stages with the million dump of records.
Can any one share me the some point of thoughts, it might be helpful for me to end this process.

db.getCollection("states").aggregate([
    { "$project": { "_id": 1 } },
    {
        "$lookup": {
            "from": "allbama",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } },
            ],
            "as": "allbama"
        }
    },
    {
        "$lookup": {
            "from": " alaska",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": " alaska"
        }
    },
    {
        "$lookup": {
            "from": " arizona",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": " arizona"
        }
    },
    {
        "$lookup": {
            "from": "california",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "california"
        }
    },
    {
        "$lookup": {
            "from": "colorado",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "colorado"
        }
    },
    {
        "$lookup": {
            "from": "florida",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "florida"
        }
    },
    {
        "$lookup": {
            "from": "lowa",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "lowa"
        }
    },
    {
        "$lookup": {
            "from": "maine",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "maine"
        }
    },
    {
        "$lookup": {
            "from": "nevada",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "nevada"
        }
    },
    {
        "$lookup": {
            "from": "oregon",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "oregon"
        }
    },
    {
        "$lookup": {
            "from": "texas",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "texas"
        }
    },
    {
        "$lookup": {
            "from": "utah",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "utah"
        }
    },
    {
        "$lookup": {
            "from": "vermont",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "vermont"
        }
    },
    {
        "$lookup": {
            "from": "virginia",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "virginia"
        }
    },
    {
        "$lookup": {
            "from": "washington",
            "localField": "_id",
            "foreignField": "state",
            "pipeline": [
                {
                    "$match": {
                        "createdAt": {
                            "$gte": ISODate("2023-01-01T18:30:00.000Z"),
                            "$lte": ISODate("2024-01-31T18:30:00.000Z")
                        },
                        "flag": "Security Flag"
                    }
                },
                { "$project": { "_id": 1 } }
            ],
            "as": "washington"
        }
    },
        {
            "$addFields": {
                "hasSecurityFlag": {
                    "$or": [
                        { "$gt": [{ "$size": "$allbama" }, 0] },
                        { "$gt": [{ "$size": "$ alaska" }, 0] },
                        { "$gt": [{ "$size": "$ arizona" }, 0] },
                        { "$gt": [{ "$size": "$california" }, 0] },
                        { "$gt": [{ "$size": "$colorado" }, 0] },
                        { "$gt": [{ "$size": "$florida" }, 0] },
                        { "$gt": [{ "$size": "$lowa" }, 0] },
                        { "$gt": [{ "$size": "$maine" }, 0] },
                        { "$gt": [{ "$size": "$nevada" }, 0] },
                        { "$gt": [{ "$size": "$oregon" }, 0] },
                        { "$gt": [{ "$size": "$texas" }, 0] },
                        { "$gt": [{ "$size": "$utah" }, 0] },
                        { "$gt": [{ "$size": "$vermont" }, 0] },
                        { "$gt": [{ "$size": "$virginia" }, 0] },
                        { "$gt": [{ "$size": "$washington" }, 0] }
                    ]
                }
            }
        },
        {
            "$match": {
                "hasSecurityFlag": true
            }
        },
        
]);

In this Query, I can able to get the results in below 5 seconds from lakhs of records without using the final match stage {“$match”: {“hasSecurityFlag”: true}}, But when i add this match stage it takes 3 to 4 minutes for the results.

But this is not an default collection field. So i can’t able to add appropriate index for this match stage. What can be updated to get this query process optimized and efficient. Thanks!

It looks like you tried to partition your data into different collections having the same structure. You are on point with

but it is because the schema is not appropriate for the use-case. This use-case would be better serve if your So it will be hard to improve, especially since we have to sample documents from all your collections. I am particularly curious about the value set of the foreignField state within each collections. I am also curious about why allbama rather than alabama, why spaces before alaska and arizona.

Things to try

1 - rather than $addField and then $match, you could $match directly using the same $or you have in $addField

2 - you could $match out after each $lookup, but without sample data it is hard to say if the result would be the same

3 - since you only $project _id out of the $lookup it is not clear what you could do with this other than counting or referring other things. May be you could $limit:1 in the $lookup pipeline as it could be sufficient to know if there is at lease one item per state

4 - may be $unionWith would be a better way to query each state

5 - your last $match is simply there to remove empty state, you surely could do that in the application because transferring a few empty arrays should not be that costly

The use-case is very USA oriented yet writing lakhs of records is not. steevej is very intrigued B-)

Thanks for your thoughts, i will come up with the changes as per you mentioned.