我想获得MongoDB集合中所有键的名称。
例如,从这个:
db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : [] } );
我想获得唯一的键:
type, egg, hello
我想获得MongoDB集合中所有键的名称。
例如,从这个:
db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : [] } );
我想获得唯一的键:
type, egg, hello
当前回答
根据mongoldb文档,一个不同的组合
在单个集合或视图中查找指定字段的不同值,并以数组形式返回结果。
索引集合操作将返回给定键或索引的所有可能值:
返回一个数组,其中包含标识和描述集合上现有索引的文档列表
所以在一个给定的方法中,你可以使用下面的方法,为了查询一个集合中所有注册的索引,并返回一个对象,比如一个键的索引(这个例子使用async/await用于NodeJS,但显然你可以使用任何其他异步方法):
async function GetFor(collection, index) {
let currentIndexes;
let indexNames = [];
let final = {};
let vals = [];
try {
currentIndexes = await collection.indexes();
await ParseIndexes();
//Check if a specific index was queried, otherwise, iterate for all existing indexes
if (index && typeof index === "string") return await ParseFor(index, indexNames);
await ParseDoc(indexNames);
await Promise.all(vals);
return final;
} catch (e) {
throw e;
}
function ParseIndexes() {
return new Promise(function (result) {
let err;
for (let ind in currentIndexes) {
let index = currentIndexes[ind];
if (!index) {
err = "No Key For Index "+index; break;
}
let Name = Object.keys(index.key);
if (Name.length === 0) {
err = "No Name For Index"; break;
}
indexNames.push(Name[0]);
}
return result(err ? Promise.reject(err) : Promise.resolve());
})
}
async function ParseFor(index, inDoc) {
if (inDoc.indexOf(index) === -1) throw "No Such Index In Collection";
try {
await DistinctFor(index);
return final;
} catch (e) {
throw e
}
}
function ParseDoc(doc) {
return new Promise(function (result) {
let err;
for (let index in doc) {
let key = doc[index];
if (!key) {
err = "No Key For Index "+index; break;
}
vals.push(new Promise(function (pushed) {
DistinctFor(key)
.then(pushed)
.catch(function (err) {
return pushed(Promise.resolve());
})
}))
}
return result(err ? Promise.reject(err) : Promise.resolve());
})
}
async function DistinctFor(key) {
if (!key) throw "Key Is Undefined";
try {
final[key] = await collection.distinct(key);
} catch (e) {
final[key] = 'failed';
throw e;
}
}
}
因此,使用基本_id索引查询一个集合,将返回以下内容(测试集合在测试时只有一个文档):
Mongo.MongoClient.connect(url, function (err, client) {
assert.equal(null, err);
let collection = client.db('my db').collection('the targeted collection');
GetFor(collection, '_id')
.then(function () {
//returns
// { _id: [ 5ae901e77e322342de1fb701 ] }
})
.catch(function (err) {
//manage your error..
})
});
注意,这使用了NodeJS Driver原生的方法。正如其他一些答案所建议的那样,还有其他方法,例如聚合框架。我个人认为这种方法更灵活,因为您可以轻松地创建和微调如何返回结果。显然,这只处理顶级属性,而不是嵌套属性。 此外,为了保证所有文档都被表示,如果有二级索引(除了主_id索引),这些索引应该根据需要设置。
其他回答
我认为这里提到的最好的方法是在mongod 3.4.4+中,但不使用$unwind操作符,只使用管道中的两个阶段。相反,我们可以使用$mergeObjects和$objectToArray操作符。
在$group阶段,我们使用$mergeObjects操作符返回一个文档,其中键/值来自集合中的所有文档。
然后是$project,我们使用$map和$objectToArray返回键。
let allTopLevelKeys = [
{
"$group": {
"_id": null,
"array": {
"$mergeObjects": "$$ROOT"
}
}
},
{
"$project": {
"keys": {
"$map": {
"input": { "$objectToArray": "$array" },
"in": "$$this.k"
}
}
}
}
];
现在,如果我们有一个嵌套的文档,想要获得键,这是可行的。为了简单起见,让我们考虑一个包含简单嵌入式文档的文档,它看起来像这样:
{field1: {field2: "abc"}, field3: "def"}
{field1: {field3: "abc"}, field4: "def"}
下面的管道会生成所有的键(field1, field2, field3, field4)。
let allFistSecondLevelKeys = [
{
"$group": {
"_id": null,
"array": {
"$mergeObjects": "$$ROOT"
}
}
},
{
"$project": {
"keys": {
"$setUnion": [
{
"$map": {
"input": {
"$reduce": {
"input": {
"$map": {
"input": {
"$objectToArray": "$array"
},
"in": {
"$cond": [
{
"$eq": [
{
"$type": "$$this.v"
},
"object"
]
},
{
"$objectToArray": "$$this.v"
},
[
"$$this"
]
]
}
}
},
"initialValue": [
],
"in": {
"$concatArrays": [
"$$this",
"$$value"
]
}
}
},
"in": "$$this.k"
}
}
]
}
}
}
]
只需稍加努力,我们就可以获得数组字段中所有子文档的键,其中元素也是object。
你可以用MapReduce来做:
mr = db.runCommand({
"mapreduce" : "my_collection",
"map" : function() {
for (var key in this) { emit(key, null); }
},
"reduce" : function(key, stuff) { return null; },
"out": "my_collection" + "_keys"
})
然后在结果集合上单独运行,以便找到所有的键:
db[mr.result].distinct("_id")
["foo", "bar", "baz", "_id", ...]
基于@Wolkenarchitekt的回答:https://stackoverflow.com/a/48117846/8808983,我写了一个脚本,可以在db中找到所有键的模式,我认为它可以帮助其他人阅读这个线程:
"""
Python 3
This script get list of patterns and print the collections that contains fields with this patterns.
"""
import argparse
import pymongo
from bson import Code
# initialize mongo connection:
def get_db():
client = pymongo.MongoClient("172.17.0.2")
db = client["Data"]
return db
def get_commandline_options():
description = "To run use: python db_fields_pattern_finder.py -p <list_of_patterns>"
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-p', '--patterns', nargs="+", help='List of patterns to look for in the db.', required=True)
return parser.parse_args()
def report_matching_fields(relevant_fields_by_collection):
print("Matches:")
for collection_name in relevant_fields_by_collection:
if relevant_fields_by_collection[collection_name]:
print(f"{collection_name}: {relevant_fields_by_collection[collection_name]}")
# pprint(relevant_fields_by_collection)
def get_collections_names(db):
"""
:param pymongo.database.Database db:
:return list: collections names
"""
return db.list_collection_names()
def get_keys(db, collection):
"""
See: https://stackoverflow.com/a/48117846/8808983
:param db:
:param collection:
:return:
"""
map = Code("function() { for (var key in this) { emit(key, null); } }")
reduce = Code("function(key, stuff) { return null; }")
result = db[collection].map_reduce(map, reduce, "myresults")
return result.distinct('_id')
def get_fields(db, collection_names):
fields_by_collections = {}
for collection_name in collection_names:
fields_by_collections[collection_name] = get_keys(db, collection_name)
return fields_by_collections
def get_matches_fields(fields_by_collections, patterns):
relevant_fields_by_collection = {}
for collection_name in fields_by_collections:
relevant_fields = [field for field in fields_by_collections[collection_name] if
[pattern for pattern in patterns if
pattern in field]]
relevant_fields_by_collection[collection_name] = relevant_fields
return relevant_fields_by_collection
def main(patterns):
"""
:param list patterns: List of strings to look for in the db.
"""
db = get_db()
collection_names = get_collections_names(db)
fields_by_collections = get_fields(db, collection_names)
relevant_fields_by_collection = get_matches_fields(fields_by_collections, patterns)
report_matching_fields(relevant_fields_by_collection)
if __name__ == '__main__':
args = get_commandline_options()
main(args.patterns)
试试这个:
doc=db.thinks.findOne();
for (key in doc) print(key);
如果你的目标集合不是很大,你可以在mongo shell客户端下尝试:
var allKeys = {};
db.YOURCOLLECTION.find().forEach(function(doc){Object.keys(doc).forEach(function(key){allKeys[key]=1})});
allKeys;