我想获得MongoDB集合中所有键的名称。
例如,从这个:
db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : [] } );
我想获得唯一的键:
type, egg, hello
我想获得MongoDB集合中所有键的名称。
例如,从这个:
db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : [] } );
我想获得唯一的键:
type, egg, hello
当前回答
基于@Wolkenarchitekt的回答:https://stackoverflow.com/a/48117846/8808983,我写了一个脚本,可以在db中找到所有键的模式,我认为它可以帮助其他人阅读这个线程:
"""
Python 3
This script get list of patterns and print the collections that contains fields with this patterns.
"""
import argparse
import pymongo
from bson import Code
# initialize mongo connection:
def get_db():
client = pymongo.MongoClient("172.17.0.2")
db = client["Data"]
return db
def get_commandline_options():
description = "To run use: python db_fields_pattern_finder.py -p <list_of_patterns>"
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-p', '--patterns', nargs="+", help='List of patterns to look for in the db.', required=True)
return parser.parse_args()
def report_matching_fields(relevant_fields_by_collection):
print("Matches:")
for collection_name in relevant_fields_by_collection:
if relevant_fields_by_collection[collection_name]:
print(f"{collection_name}: {relevant_fields_by_collection[collection_name]}")
# pprint(relevant_fields_by_collection)
def get_collections_names(db):
"""
:param pymongo.database.Database db:
:return list: collections names
"""
return db.list_collection_names()
def get_keys(db, collection):
"""
See: https://stackoverflow.com/a/48117846/8808983
:param db:
:param collection:
:return:
"""
map = Code("function() { for (var key in this) { emit(key, null); } }")
reduce = Code("function(key, stuff) { return null; }")
result = db[collection].map_reduce(map, reduce, "myresults")
return result.distinct('_id')
def get_fields(db, collection_names):
fields_by_collections = {}
for collection_name in collection_names:
fields_by_collections[collection_name] = get_keys(db, collection_name)
return fields_by_collections
def get_matches_fields(fields_by_collections, patterns):
relevant_fields_by_collection = {}
for collection_name in fields_by_collections:
relevant_fields = [field for field in fields_by_collections[collection_name] if
[pattern for pattern in patterns if
pattern in field]]
relevant_fields_by_collection[collection_name] = relevant_fields
return relevant_fields_by_collection
def main(patterns):
"""
:param list patterns: List of strings to look for in the db.
"""
db = get_db()
collection_names = get_collections_names(db)
fields_by_collections = get_fields(db, collection_names)
relevant_fields_by_collection = get_matches_fields(fields_by_collections, patterns)
report_matching_fields(relevant_fields_by_collection)
if __name__ == '__main__':
args = get_commandline_options()
main(args.patterns)
其他回答
基于@Wolkenarchitekt的回答:https://stackoverflow.com/a/48117846/8808983,我写了一个脚本,可以在db中找到所有键的模式,我认为它可以帮助其他人阅读这个线程:
"""
Python 3
This script get list of patterns and print the collections that contains fields with this patterns.
"""
import argparse
import pymongo
from bson import Code
# initialize mongo connection:
def get_db():
client = pymongo.MongoClient("172.17.0.2")
db = client["Data"]
return db
def get_commandline_options():
description = "To run use: python db_fields_pattern_finder.py -p <list_of_patterns>"
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-p', '--patterns', nargs="+", help='List of patterns to look for in the db.', required=True)
return parser.parse_args()
def report_matching_fields(relevant_fields_by_collection):
print("Matches:")
for collection_name in relevant_fields_by_collection:
if relevant_fields_by_collection[collection_name]:
print(f"{collection_name}: {relevant_fields_by_collection[collection_name]}")
# pprint(relevant_fields_by_collection)
def get_collections_names(db):
"""
:param pymongo.database.Database db:
:return list: collections names
"""
return db.list_collection_names()
def get_keys(db, collection):
"""
See: https://stackoverflow.com/a/48117846/8808983
:param db:
:param collection:
:return:
"""
map = Code("function() { for (var key in this) { emit(key, null); } }")
reduce = Code("function(key, stuff) { return null; }")
result = db[collection].map_reduce(map, reduce, "myresults")
return result.distinct('_id')
def get_fields(db, collection_names):
fields_by_collections = {}
for collection_name in collection_names:
fields_by_collections[collection_name] = get_keys(db, collection_name)
return fields_by_collections
def get_matches_fields(fields_by_collections, patterns):
relevant_fields_by_collection = {}
for collection_name in fields_by_collections:
relevant_fields = [field for field in fields_by_collections[collection_name] if
[pattern for pattern in patterns if
pattern in field]]
relevant_fields_by_collection[collection_name] = relevant_fields
return relevant_fields_by_collection
def main(patterns):
"""
:param list patterns: List of strings to look for in the db.
"""
db = get_db()
collection_names = get_collections_names(db)
fields_by_collections = get_fields(db, collection_names)
relevant_fields_by_collection = get_matches_fields(fields_by_collections, patterns)
report_matching_fields(relevant_fields_by_collection)
if __name__ == '__main__':
args = get_commandline_options()
main(args.patterns)
我试着用nodejs写,最后想到了这个:
db.collection('collectionName').mapReduce(
function() {
for (var key in this) {
emit(key, null);
}
},
function(key, stuff) {
return null;
}, {
"out": "allFieldNames"
},
function(err, results) {
var fields = db.collection('allFieldNames').distinct('_id');
fields
.then(function(data) {
var finalData = {
"status": "success",
"fields": data
};
res.send(finalData);
delteCollection(db, 'allFieldNames');
})
.catch(function(err) {
res.send(err);
delteCollection(db, 'allFieldNames');
});
});
读取新创建的集合“allFieldNames”后,删除它。
db.collection("allFieldNames").remove({}, function (err,result) {
db.close();
return;
});
要获得所有键减去_id的列表,可以考虑运行以下聚合管道:
var keys = db.collection.aggregate([
{ "$project": {
"hashmaps": { "$objectToArray": "$$ROOT" }
} },
{ "$group": {
"_id": null,
"fields": { "$addToSet": "$hashmaps.k" }
} },
{ "$project": {
"keys": {
"$setDifference": [
{
"$reduce": {
"input": "$fields",
"initialValue": [],
"in": { "$setUnion" : ["$$value", "$$this"] }
}
},
["_id"]
]
}
}
}
]).toArray()[0]["keys"];
我很惊讶,这里没有人使用简单的javascript和Set逻辑来自动过滤重复的值,下面是mongo shellas的简单例子:
var allKeys = new Set()
db.collectionName.find().forEach( function (o) {for (key in o ) allKeys.add(key)})
for(let key of allKeys) print(key)
这将打印集合名称:collectionName中所有可能的惟一键。
我扩展了Carlos LM的解决方案,使其更加详细。
一个模式的例子:
var schema = {
_id: 123,
id: 12,
t: 'title',
p: 4.5,
ls: [{
l: 'lemma',
p: {
pp: 8.9
}
},
{
l: 'lemma2',
p: {
pp: 8.3
}
}
]
};
在控制台输入:
var schemafy = function(schema, i, limit) {
var i = (typeof i !== 'undefined') ? i : 1;
var limit = (typeof limit !== 'undefined') ? limit : false;
var type = '';
var array = false;
for (key in schema) {
type = typeof schema[key];
array = (schema[key] instanceof Array) ? true : false;
if (type === 'object') {
print(Array(i).join(' ') + key+' <'+((array) ? 'array' : type)+'>:');
schemafy(schema[key], i+1, array);
} else {
print(Array(i).join(' ') + key+' <'+type+'>');
}
if (limit) {
break;
}
}
}
Run:
schemafy(db.collection.findOne());
输出
_id <number>
id <number>
t <string>
p <number>
ls <object>:
0 <object>:
l <string>
p <object>:
pp <number>