我想获得MongoDB集合中所有键的名称。

例如,从这个:

db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : []  } );

我想获得唯一的键:

type, egg, hello

当前回答

要获得所有键减去_id的列表,可以考虑运行以下聚合管道:

var keys = db.collection.aggregate([
    { "$project": {
       "hashmaps": { "$objectToArray": "$$ROOT" } 
    } }, 
    { "$group": {
        "_id": null,
        "fields": { "$addToSet": "$hashmaps.k" }
    } },
    { "$project": {
            "keys": {
                "$setDifference": [
                    {
                        "$reduce": {
                            "input": "$fields",
                            "initialValue": [],
                            "in": { "$setUnion" : ["$$value", "$$this"] }
                        }
                    },
                    ["_id"]
                ]
            }
        }
    }
]).toArray()[0]["keys"];

其他回答

你可以用MapReduce来做:

mr = db.runCommand({
  "mapreduce" : "my_collection",
  "map" : function() {
    for (var key in this) { emit(key, null); }
  },
  "reduce" : function(key, stuff) { return null; }, 
  "out": "my_collection" + "_keys"
})

然后在结果集合上单独运行,以便找到所有的键:

db[mr.result].distinct("_id")
["foo", "bar", "baz", "_id", ...]

下面是用Python编写的示例: 这个示例内联返回结果。

from pymongo import MongoClient
from bson.code import Code

mapper = Code("""
    function() {
                  for (var key in this) { emit(key, null); }
               }
""")
reducer = Code("""
    function(key, stuff) { return null; }
""")

distinctThingFields = db.things.map_reduce(mapper, reducer
    , out = {'inline' : 1}
    , full_response = True)
## do something with distinctThingFields['results']

我认为这里提到的最好的方法是在mongod 3.4.4+中,但不使用$unwind操作符,只使用管道中的两个阶段。相反,我们可以使用$mergeObjects和$objectToArray操作符。

在$group阶段,我们使用$mergeObjects操作符返回一个文档,其中键/值来自集合中的所有文档。

然后是$project,我们使用$map和$objectToArray返回键。

let allTopLevelKeys =  [
    {
        "$group": {
            "_id": null,
            "array": {
                "$mergeObjects": "$$ROOT"
            }
        }
    },
    {
        "$project": {
            "keys": {
                "$map": {
                    "input": { "$objectToArray": "$array" },
                    "in": "$$this.k"
                }
            }
        }
    }
];

现在,如果我们有一个嵌套的文档,想要获得键,这是可行的。为了简单起见,让我们考虑一个包含简单嵌入式文档的文档,它看起来像这样:

{field1: {field2: "abc"}, field3: "def"}
{field1: {field3: "abc"}, field4: "def"}

下面的管道会生成所有的键(field1, field2, field3, field4)。

let allFistSecondLevelKeys = [
    {
        "$group": {
            "_id": null,
            "array": {
                "$mergeObjects": "$$ROOT"
            }
        }
    },
    {
        "$project": {
            "keys": {
                "$setUnion": [
                    {
                        "$map": {
                            "input": {
                                "$reduce": {
                                    "input": {
                                        "$map": {
                                            "input": {
                                                "$objectToArray": "$array"
                                            },
                                            "in": {
                                                "$cond": [
                                                    {
                                                        "$eq": [
                                                            {
                                                                "$type": "$$this.v"
                                                            },
                                                            "object"
                                                        ]
                                                    },
                                                    {
                                                        "$objectToArray": "$$this.v"
                                                    },
                                                    [
                                                        "$$this"
                                                    ]
                                                ]
                                            }
                                        }
                                    },
                                    "initialValue": [

                                    ],
                                    "in": {
                                        "$concatArrays": [
                                            "$$this",
                                            "$$value"
                                        ]
                                    }
                                }
                            },
                            "in": "$$this.k"
                        }
                    }
                ]
            }
        }
    }
]

只需稍加努力,我们就可以获得数组字段中所有子文档的键,其中元素也是object。

基于@Wolkenarchitekt的回答:https://stackoverflow.com/a/48117846/8808983,我写了一个脚本,可以在db中找到所有键的模式,我认为它可以帮助其他人阅读这个线程:

"""
Python 3
This script get list of patterns and print the collections that contains fields with this patterns.
"""

import argparse

import pymongo
from bson import Code


# initialize mongo connection:
def get_db():
    client = pymongo.MongoClient("172.17.0.2")
    db = client["Data"]
    return db


def get_commandline_options():
    description = "To run use: python db_fields_pattern_finder.py -p <list_of_patterns>"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-p', '--patterns', nargs="+", help='List of patterns to look for in the db.', required=True)
    return parser.parse_args()


def report_matching_fields(relevant_fields_by_collection):
    print("Matches:")

    for collection_name in relevant_fields_by_collection:
        if relevant_fields_by_collection[collection_name]:
            print(f"{collection_name}: {relevant_fields_by_collection[collection_name]}")

    # pprint(relevant_fields_by_collection)


def get_collections_names(db):
    """
    :param pymongo.database.Database db:
    :return list: collections names
    """
    return db.list_collection_names()


def get_keys(db, collection):
    """
    See: https://stackoverflow.com/a/48117846/8808983
    :param db:
    :param collection:
    :return:
    """
    map = Code("function() { for (var key in this) { emit(key, null); } }")
    reduce = Code("function(key, stuff) { return null; }")
    result = db[collection].map_reduce(map, reduce, "myresults")
    return result.distinct('_id')


def get_fields(db, collection_names):
    fields_by_collections = {}
    for collection_name in collection_names:
        fields_by_collections[collection_name] = get_keys(db, collection_name)
    return fields_by_collections


def get_matches_fields(fields_by_collections, patterns):
    relevant_fields_by_collection = {}
    for collection_name in fields_by_collections:
        relevant_fields = [field for field in fields_by_collections[collection_name] if
                           [pattern for pattern in patterns if
                            pattern in field]]
        relevant_fields_by_collection[collection_name] = relevant_fields

    return relevant_fields_by_collection


def main(patterns):
    """
    :param list patterns: List of strings to look for in the db.
    """
    db = get_db()

    collection_names = get_collections_names(db)
    fields_by_collections = get_fields(db, collection_names)
    relevant_fields_by_collection = get_matches_fields(fields_by_collections, patterns)

    report_matching_fields(relevant_fields_by_collection)


if __name__ == '__main__':
    args = get_commandline_options()
    main(args.patterns)

如果你正在使用mongodb 3.4.4及以上版本,那么你可以使用$objectToArray和$group聚合来使用下面的聚合

db.collection.aggregate([
  { "$project": {
    "data": { "$objectToArray": "$$ROOT" }
  }},
  { "$project": { "data": "$data.k" }},
  { "$unwind": "$data" },
  { "$group": {
    "_id": null,
    "keys": { "$addToSet": "$data" }
  }}
])

下面是工作示例