I managed to do it on my own. Consider the following data:
db.properties.deleteMany({})
db.properties.insertMany([{'_id': 'P1', 'label': 'main subject'}])
db.properties.insertMany([{'_id': 'P2', 'label': 'language'}])
db.properties.insertMany([{'_id': 'P3', 'label': 'instance of'}])
db.items.deleteMany({});
db.items.insertMany([
{'_id': 'Q1', 'label': 'algorithm'},
{'_id': 'Q2', 'label': 'C++'},
{'_id': 'Q3', 'label': 'English'},
{'_id': 'Q4', 'label': 'written work'},
{
'_id': 'Q5',
'label': 'Algorithms in C++',
'statements': {
'P1': [{'value': 'Q1'}, {'value': 'Q2'}],
'P2': [{'value': 'Q3'}],
'P3': [{'value': 'Q4'}]
}
},
]);
This is the query I came up with:
printjson(db.items.aggregate([
// We use objectToArray so that afterwards we can unwind $statements.
{
$project: {
statements: {
$objectToArray: '$statements'
}
}
},
// We $unwind $statements so that we have an object for each
// different property.
{
$unwind: "$statements"
},
// We $unwind $statements.v so that we have an object for each
// different pair of property and value.
{
$unwind: "$statements.v"
},
// lookup properties
{
$lookup: {
'from': 'properties',
'localField': 'statements.k',
'foreignField': '_id',
'as': 'statements.k'
}
},
// convert array to object in key "k"
{
$unwind: "$statements.k"
},
// lookup items
{
$lookup: {
'from': 'items',
'localField': 'statements.v.value',
'foreignField': '_id',
'as': 'statements.v'
}
},
// convert array to object in key "v"
{
$unwind: "$statements.v"
},
// group by item id and property id
{
$group: {
"_id": {
"_id": "$_id",
"property_id": "$statements.k._id"
},
"statements": {
"$push": {
"property": "$statements.k",
"value": "$statements.v"
}
}
}
},
// group by property id
{
$group: {
"_id": "$_id._id",
"statements": {
"$push": {
k: "$_id.property_id",
v: "$statements"
}
}
}
},
// make statements be an object
{
$project: {
"statements": {
$arrayToObject: "$statements"
}
}
},
]))
output:
[
{
_id: 'Q5',
statements: {
P1: [
{
property: {
_id: 'P1',
label: 'main subject'
},
value: {
_id: 'Q1',
label: 'algorithm'
}
},
{
property: {
_id: 'P1',
label: 'main subject'
},
value: {
_id: 'Q2',
label: 'C++'
}
}
],
P2: [
{
property: {
_id: 'P2',
label: 'language'
},
value: {
_id: 'Q3',
label: 'English'
}
}
],
P3: [
{
property: {
_id: 'P3',
label: 'instance of'
},
value: {
_id: 'Q4',
label: 'written work'
}
}
]
}
}
]
This is the first time I write an aggregate query that involves more than 2 operations, so I wonder if there’s a shorter way to get the same output.