mongodb 何时使用索引,使用怎么样的索引?
发布于 8 年前 作者 haozxuan 7148 次浏览 来自 问答

下面是一个简单的demo操作,可以在本地有mongodb环境尝试

db.test.insert({price:45,category:'normal',brand:'GE'});
db.createIndex({price:-1});
db.createIndex({category:1,brand:1});

query1: db.test.find({brand:'GE'});
query2: db.test.find({brand:'GE'}).sort({price:1});
query3: db.test.find({$and:[{price:{$lt:50}}, {price:{$gt:30}}]}).sort({brand:1});
query4: db.test.find({ brand:'GE'}).sort({category:1, brand:-1});

场景还原:假设按照如上操作,插入一条数据,并建立索引,分别执行query,问有哪些使用了索引,为什么使用这样的索引? 提示:可以使用explain查看详细信息,以下是四条语句的executionStats;

query1:"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 1,
		"executionTimeMillis" : 0,
		"totalKeysExamined" : 0,
		"totalDocsExamined" : 1,
		"executionStages" : {
			"stage" : "COLLSCAN",
			"filter" : {
				"brand" : {
					"$eq" : "GE"
				}
			},
			"nReturned" : 1,
			"executionTimeMillisEstimate" : 0,
			"works" : 3,
			"advanced" : 1,
			"needTime" : 1,
			"needYield" : 0,
			"saveState" : 0,
			"restoreState" : 0,
			"isEOF" : 1,
			"invalidates" : 0,
			"direction" : "forward",
			"docsExamined" : 1
		},
		"allPlansExecution" : [ ]
	}

query2:	"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 1,
		"executionTimeMillis" : 0,
		"totalKeysExamined" : 1,
		"totalDocsExamined" : 1,
		"executionStages" : {
			"stage" : "FETCH",
			"filter" : {
				"brand" : {
					"$eq" : "GE"
				}
			},
			"nReturned" : 1,
			"executionTimeMillisEstimate" : 0,
			"works" : 2,
			"advanced" : 1,
			"needTime" : 0,
			"needYield" : 0,
			"saveState" : 0,
			"restoreState" : 0,
			"isEOF" : 1,
			"invalidates" : 0,
			"docsExamined" : 1,
			"alreadyHasObj" : 0,
			"inputStage" : {
				"stage" : "IXSCAN",
				"nReturned" : 1,
				"executionTimeMillisEstimate" : 0,
				"works" : 2,
				"advanced" : 1,
				"needTime" : 0,
				"needYield" : 0,
				"saveState" : 0,
				"restoreState" : 0,
				"isEOF" : 1,
				"invalidates" : 0,
				"keyPattern" : {
					"price" : -1
				},
				"indexName" : "price_-1",
				"isMultiKey" : false,
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 1,
				"direction" : "backward",
				"indexBounds" : {
					"price" : [
						"[MinKey, MaxKey]"
					]
				},
				"keysExamined" : 1,
				"dupsTested" : 0,
				"dupsDropped" : 0,
				"seenInvalidated" : 0
			}
		},
		"allPlansExecution" : [ ]
	}

query3:"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 1,
		"executionTimeMillis" : 8,
		"totalKeysExamined" : 1,
		"totalDocsExamined" : 1,
		"executionStages" : {
			"stage" : "SORT",
			"nReturned" : 1,
			"executionTimeMillisEstimate" : 0,
			"works" : 5,
			"advanced" : 1,
			"needTime" : 3,
			"needYield" : 0,
			"saveState" : 0,
			"restoreState" : 0,
			"isEOF" : 1,
			"invalidates" : 0,
			"sortPattern" : {
				"brand" : 1
			},
			"memUsage" : 78,
			"memLimit" : 33554432,
			"inputStage" : {
				"stage" : "KEEP_MUTATIONS",
				"nReturned" : 1,
				"executionTimeMillisEstimate" : 0,
				"works" : 3,
				"advanced" : 1,
				"needTime" : 1,
				"needYield" : 0,
				"saveState" : 0,
				"restoreState" : 0,
				"isEOF" : 1,
				"invalidates" : 0,
				"inputStage" : {
					"stage" : "SORT_KEY_GENERATOR",
					"nReturned" : 0,
					"executionTimeMillisEstimate" : 0,
					"works" : 3,
					"advanced" : 0,
					"needTime" : 1,
					"needYield" : 0,
					"saveState" : 0,
					"restoreState" : 0,
					"isEOF" : 1,
					"invalidates" : 0,
					"inputStage" : {
						"stage" : "FETCH",
						"nReturned" : 1,
						"executionTimeMillisEstimate" : 0,
						"works" : 2,
						"advanced" : 1,
						"needTime" : 0,
						"needYield" : 0,
						"saveState" : 0,
						"restoreState" : 0,
						"isEOF" : 1,
						"invalidates" : 0,
						"docsExamined" : 1,
						"alreadyHasObj" : 0,
						"inputStage" : {
							"stage" : "IXSCAN",
							"nReturned" : 1,
							"executionTimeMillisEstimate" : 0,
							"works" : 2,
							"advanced" : 1,
							"needTime" : 0,
							"needYield" : 0,
							"saveState" : 0,
							"restoreState" : 0,
							"isEOF" : 1,
							"invalidates" : 0,
							"keyPattern" : {
								"price" : -1
							},
							"indexName" : "price_-1",
							"isMultiKey" : false,
							"isUnique" : false,
							"isSparse" : false,
							"isPartial" : false,
							"indexVersion" : 1,
							"direction" : "forward",
							"indexBounds" : {
								"price" : [
									"(50.0, 30.0)"
								]
							},
							"keysExamined" : 1,
							"dupsTested" : 0,
							"dupsDropped" : 0,
							"seenInvalidated" : 0
						}
					}
				}
			}
		},
		"allPlansExecution" : [ ]
	}

query4:"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 1,
		"executionTimeMillis" : 0,
		"totalKeysExamined" : 0,
		"totalDocsExamined" : 1,
		"executionStages" : {
			"stage" : "SORT",
			"nReturned" : 1,
			"executionTimeMillisEstimate" : 0,
			"works" : 6,
			"advanced" : 1,
			"needTime" : 4,
			"needYield" : 0,
			"saveState" : 0,
			"restoreState" : 0,
			"isEOF" : 1,
			"invalidates" : 0,
			"sortPattern" : {
				"category" : 1,
				"brand" : -1
			},
			"memUsage" : 78,
			"memLimit" : 33554432,
			"inputStage" : {
				"stage" : "SORT_KEY_GENERATOR",
				"nReturned" : 0,
				"executionTimeMillisEstimate" : 0,
				"works" : 4,
				"advanced" : 0,
				"needTime" : 2,
				"needYield" : 0,
				"saveState" : 0,
				"restoreState" : 0,
				"isEOF" : 1,
				"invalidates" : 0,
				"inputStage" : {
					"stage" : "COLLSCAN",
					"filter" : {
						"brand" : {
							"$eq" : "GE"
						}
					},
					"nReturned" : 1,
					"executionTimeMillisEstimate" : 0,
					"works" : 3,
					"advanced" : 1,
					"needTime" : 1,
					"needYield" : 0,
					"saveState" : 0,
					"restoreState" : 0,
					"isEOF" : 1,
					"invalidates" : 0,
					"direction" : "forward",
					"docsExamined" : 1
				}
			}
		},
		"allPlansExecution" : [ ]
	}	

###分析: query1毋庸置疑,由于brand并没有索引,所以stage为[COLLSCAN](http://docs.mongoing.com/manual-zh/reference/explain-results.html" title="COLLSCAN) 全collection扫描

query2 因为查询语句执行优先级为,等值条件 > 排序条件 > 范围条件 索引stage为FETCH,进行了取值的查询,问题1 ,TETCH和COLLSCAN有什么区别?;

query3 对于该查询语句则是一脸懵逼,因为stage为SORT,并非四种常见的stage。 在此官方文档给出了解释,由于本人英语比较渣,所以个人的理解是:当客户端请求将结果排序返回时,如果mongodb不能使用索引字段进行排序,那么stage将是SORT,如果能则不包含stage为SORT;这个当然不是重点,重点是stage还有KEEP_MUTATIONS SORT_KEY_GENERATOR 这个不仅官方没有找到资料,Google也无能为了,只能希冀大神帮助解释一下了, 问题2 为何query3的executionStages有4层inputStage嵌套,而且最底层才使用price索引;

query4 同理query3的查询中出现的stage为SORT,因为mongodb并不能根据已有索引进行排序,所以也出现了stage为SORT,当然对于SORT_KEY_GENERATOR 还是一脸懵逼;问题3 为何query4不能使用category_1_brand_1这个索引,或者换个角度,category_1_brand_1 和category_1_brand_-1有区别吗?如果有,那么category_1_brand_1 和 brand_1_category_1有区别吗?当然对于该问题,本质上是不太了解mongodb的索引结构(虽然知道是B-Tree,但是仍旧不清楚);

希望大神不吝赐教

5 回复

问题3,category_1_brand_1索引并不是brand_1_category_1索引,因此find by brand是无法使用的,sort当然也就无法使用了。 https://docs.mongodb.org/manual/tutorial/sort-results-with-indexes/

@xltank 非常感谢,这个文档回答了我对compound index 的疑惑。不过对于multi index 为何只有 { a: 1 } { a: 1, b: 1 } { a: 1, b: 1, c: 1 }

却不包含

{ a: 1, c: 1 }

还有对于non-prefix 怎么理解An index can support sort operations on a non-prefix subset of the index key pattern. To do so, the query must include equality conditions on all the prefix keys that precede the sort keys. 这句话中的 equality

联合索引是依次排序的,按照{a:1, c:1}去查询索引时,先查到了a:1,然后和a索引紧密联系的是b,而不是c。找完a后找b时你发现容易找,但是c对a来说是无序的,因此没办法用索引。 联合索引的优化方式中,面对跳跃的情况,可以手工补全b的所有可能值,也因此这种优化方式是有条件的,就是b的值不会太多。 关于联合索引的b-tree形式,我没找到,但我理解为单线联系的关系:a1-b1-c1, a1-b2-c1, …

prefix说的是索引的前缀,比如a就是b,c的前缀;a,b又是c的前缀。non-prefix,就是说可以按照c排序,但是在排序前的查询阶段,需要“补全”整个前缀。这个equality应该是“等价”的意思,也就是和query by a,b,c等价的条件,比如query by a, sort by b,c, 或者query by a,b, sort by c

@xltank 非常感谢,现在对索引的使用有了新的理解;

欠楼主10块钱的路过。。

回到顶部