• MongoDB 索引相关知识


    背景:

          MongoDB和MySQL一样,都会产生慢查询,所以都需要对其进行优化:包括创建索引、重构查询等。现在就说明在MongoDB下的索引相关知识点,可以通过这篇文章MongoDB 查询优化分析了解MongoDB慢查询的一些特点。

    执行计划分析:

          因为MongoDB也是BTree索引,所以使用上和MySQL大致一样。通过explain查看一个query的执行计划,来判断如何加索引,explain在3.0版本的时候做了一些改进,现在针对这2个版本进行分析:

    3.0之前:

    zjy:PRIMARY> db.newtask.find({"b":"CYHS1301942"}).explain()
    {
        "cursor" : "BtreeCursor b_1_date_1", #游标类型:BasicCursor(全表扫描)、BtreeCursor(BTree索引扫描)、GeoSearchCursor(地理空间索引扫描)。
        "isMultiKey" : false,
        "n" : 324,  #返回的结果数,count()。
        "nscannedObjects" : 324, #扫描的对象
        "nscanned" : 324,        #扫描的索引数
        "nscannedObjectsAllPlans" : 324, #代表所有尝试执行的计划所扫描的对象
        "nscannedAllPlans" : 324,        #代表所有尝试执行的计划所扫描的索引
        "scanAndOrder" : false,          #True:对文档进行排序,false:对索引进行排序
        "indexOnly" : false,             #对查询的结果进行排序不需要搜索其他文档,查询和返回字段使用同一索引
        "nYields" : 0,                   #为了让写操作执行而让出读锁的次数
        "nChunkSkips" : 0,               #忽略文档数
        "millis" : 1,                    #执行查询消耗的时间
        "indexBounds" : {   #索引扫描中使用的最大/小值。
            "b" : [
                [
                    "CYHS1301942",
                    "CYHS1301942"
                ]
            ],
            "date" : [
                [
                    {
                        "$minElement" : 1
                    },
                    {
                        "$maxElement" : 1
                    }
                ]
            ]
        },
        "server" : "db-mongo1:27017"
    }

    3.0之后:在explain()里有三个参数:"queryPlanner", "executionStats", and "allPlansExecution",默认是:queryPlanner。具体的含义见官方文档

    zjy:PRIMARY> db.newtask.find({"b":"CYHS1301942"}).explain()
    {
        "queryPlanner" : {
            "plannerVersion" : 1,
            "namespace" : "cde.newtask",    #集合
            "indexFilterSet" : false,
            "parsedQuery" : {
                "b" : {
                    "$eq" : "CYHS1301942"
                }
            },
            "winningPlan" : {
                "stage" : "FETCH",
                "inputStage" : {
                    "stage" : "IXSCAN",     #索引扫描,COLLSCAN表示全表扫描。
                    "keyPattern" : {
                        "b" : 1,
                        "date" : 1
                    },
                    "indexName" : "b_1_date_1", #索引名
                    "isMultiKey" : false,
                    "direction" : "forward",
                    "indexBounds" : {
                        "b" : [
                            "["CYHS1301942", "CYHS1301942"]"
                        ],
                        "date" : [
                            "[MinKey, MaxKey]"
                        ]
                    }
                }
            },
            "rejectedPlans" : [ ]
        },
        "serverInfo" : {
            "host" : "mongo1",
            "port" : 27017,
            "version" : "3.0.4",
            "gitVersion" : "0481c958daeb2969800511e7475dc66986fa9ed5"
        },
        "ok" : 1
    }

    3.0要是查看更详细的执行计划请看其他2个参数:

    zjy:PRIMARY> db.newtask.find({"b":"CYHS1301942"}).explain("allPlansExecution")
    {
        "queryPlanner" : {
            "plannerVersion" : 1,
            "namespace" : "cde.newtask",
            "indexFilterSet" : false,
            "parsedQuery" : {
                "b" : {
                    "$eq" : "CYHS1301942"
                }
            },
            "winningPlan" : {
                "stage" : "FETCH",
                "inputStage" : {
                    "stage" : "IXSCAN",
                    "keyPattern" : {
                        "b" : 1,
                        "date" : 1
                    },
                    "indexName" : "b_1_date_1",
                    "isMultiKey" : false,
                    "direction" : "forward",
                    "indexBounds" : {
                        "b" : [
                            "["CYHS1301942", "CYHS1301942"]"
                        ],
                        "date" : [
                            "[MinKey, MaxKey]"
                        ]
                    }
                }
            },
            "rejectedPlans" : [ ]
        },
        "executionStats" : {
            "executionSuccess" : true,
            "nReturned" : 1,
            "executionTimeMillis" : 0,
            "totalKeysExamined" : 1,
            "totalDocsExamined" : 1,
            "executionStages" : {
                "stage" : "FETCH",
                "nReturned" : 1,
                "executionTimeMillisEstimate" : 0,
                "works" : 2,
                "advanced" : 1,
                "needTime" : 0,
                "needFetch" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "docsExamined" : 1,
                "alreadyHasObj" : 0,
                "inputStage" : {
                    "stage" : "IXSCAN",
                    "nReturned" : 1,
                    "executionTimeMillisEstimate" : 0,
                    "works" : 2,
                    "advanced" : 1,
                    "needTime" : 0,
                    "needFetch" : 0,
                    "saveState" : 0,
                    "restoreState" : 0,
                    "isEOF" : 1,
                    "invalidates" : 0,
                    "keyPattern" : {
                        "b" : 1,
                        "date" : 1
                    },
                    "indexName" : "b_1_date_1",
                    "isMultiKey" : false,
                    "direction" : "forward",
                    "indexBounds" : {
                        "b" : [
                            "["CYHS1301942", "CYHS1301942"]"
                        ],
                        "date" : [
                            "[MinKey, MaxKey]"
                        ]
                    },
                    "keysExamined" : 1,
                    "dupsTested" : 0,
                    "dupsDropped" : 0,
                    "seenInvalidated" : 0,
                    "matchTested" : 0
                }
            },
            "allPlansExecution" : [ ]
        },
        "serverInfo" : {
            "host" : "mongo1",
            "port" : 27017,
            "version" : "3.0.4",
            "gitVersion" : "0481c958daeb2969800511e7475dc66986fa9ed5"
        },
        "ok" : 1
    }
    View Code
    zjy:PRIMARY> db.newtask.find({"b":"CYHS1301942"}).explain("executionStats")
    {
        "queryPlanner" : {
            "plannerVersion" : 1,
            "namespace" : "cde.newtask",
            "indexFilterSet" : false,
            "parsedQuery" : {
                "b" : {
                    "$eq" : "CYHS1301942"
                }
            },
            "winningPlan" : {
                "stage" : "FETCH",
                "inputStage" : {
                    "stage" : "IXSCAN",
                    "keyPattern" : {
                        "b" : 1,
                        "date" : 1
                    },
                    "indexName" : "b_1_date_1",
                    "isMultiKey" : false,
                    "direction" : "forward",
                    "indexBounds" : {
                        "b" : [
                            "["CYHS1301942", "CYHS1301942"]"
                        ],
                        "date" : [
                            "[MinKey, MaxKey]"
                        ]
                    }
                }
            },
            "rejectedPlans" : [ ]
        },
        "executionStats" : {
            "executionSuccess" : true,
            "nReturned" : 1,
            "executionTimeMillis" : 0,
            "totalKeysExamined" : 1,
            "totalDocsExamined" : 1,
            "executionStages" : {
                "stage" : "FETCH",
                "nReturned" : 1,
                "executionTimeMillisEstimate" : 0,
                "works" : 2,
                "advanced" : 1,
                "needTime" : 0,
                "needFetch" : 0,
                "saveState" : 0,
                "restoreState" : 0,
                "isEOF" : 1,
                "invalidates" : 0,
                "docsExamined" : 1,
                "alreadyHasObj" : 0,
                "inputStage" : {
                    "stage" : "IXSCAN",
                    "nReturned" : 1,
                    "executionTimeMillisEstimate" : 0,
                    "works" : 2,
                    "advanced" : 1,
                    "needTime" : 0,
                    "needFetch" : 0,
                    "saveState" : 0,
                    "restoreState" : 0,
                    "isEOF" : 1,
                    "invalidates" : 0,
                    "keyPattern" : {
                        "b" : 1,
                        "date" : 1
                    },
                    "indexName" : "b_1_date_1",
                    "isMultiKey" : false,
                    "direction" : "forward",
                    "indexBounds" : {
                        "b" : [
                            "["CYHS1301942", "CYHS1301942"]"
                        ],
                        "date" : [
                            "[MinKey, MaxKey]"
                        ]
                    },
                    "keysExamined" : 1,
                    "dupsTested" : 0,
                    "dupsDropped" : 0,
                    "seenInvalidated" : 0,
                    "matchTested" : 0
                }
            }
        },
        "serverInfo" : {
            "host" : "mongo1",
            "port" : 27017,
            "version" : "3.0.4",
            "gitVersion" : "0481c958daeb2969800511e7475dc66986fa9ed5"
        },
        "ok" : 1
    }
    View Code

    上面介绍了如何查看执行计划,那么下面介绍下如何管理索引。

    索引管理具体请看[权威指南第5章]

    1)查看/显示集合的索引:db.collectionName.getIndexes()或则db.system.indexes.find()

    zjy:PRIMARY> db.data.getIndexes()
    [
        {
            "v" : 1,
            "key" : {
                "_id" : 1
            },
            "name" : "_id_",       #索引名
            "ns" : "survey.data"   #集合名
        },
        {
            "v" : 1,
            "unique" : true,       #唯一索引
            "key" : {
                "sid" : 1,
                "user" : 1
            },
            "name" : "sid_1_user_1",
            "ns" : "survey.data"
        },
        {
            "v" : 1,
            "key" : {
                "sid" : 1,
                "cdate" : -1
            },
            "name" : "sid_1_cdate_-1",
            "ns" : "survey.data"
        },
        {
            "v" : 1,
            "key" : {
                "sid" : 1,
                "created" : -1
            },
            "name" : "sid_1_created_-1",
            "ns" : "survey.data"
        },
        {
            "v" : 1,
            "key" : {
                "sid" : 1,
                "user" : 1,
                "modified" : 1
            },
            "name" : "sid_1_user_1_modified_1",
            "ns" : "survey.data"
        }
    ]
    zjy:PRIMARY> db.system.indexes.find({"ns":"survey.data"})
    { "v" : 1, "key" : { "_id" : 1 }, "name" : "_id_", "ns" : "survey.data" }
    { "v" : 1, "unique" : true, "key" : { "sid" : 1, "user" : 1 }, "name" : "sid_1_user_1", "ns" : "survey.data" }
    { "v" : 1, "key" : { "sid" : 1, "cdate" : -1 }, "name" : "sid_1_cdate_-1", "ns" : "survey.data" }
    { "v" : 1, "key" : { "sid" : 1, "created" : -1 }, "name" : "sid_1_created_-1", "ns" : "survey.data" }
    { "v" : 1, "key" : { "sid" : 1, "user" : 1, "modified" : 1 }, "name" : "sid_1_user_1_modified_1", "ns" : "survey.data" }

    2)创建索引:db.collections.ensureIndex({...})

    普通索引

    zjy:PRIMARY> db.comments.ensureIndex({"name":1})  #name字段上创建索引,升序。倒序为-1。
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 2,
        "numIndexesAfter" : 3,
        "ok" : 1
    }
    
    zjy:PRIMARY> db.comments.ensureIndex({"account.name":1}) #内嵌文档上创建索引。
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 3,
        "numIndexesAfter" : 4,
        "ok" : 1
    }
    
    zjy:PRIMARY> db.comments.ensureIndex({"age":1},{"name":"idx_name"}) #指定索引名称
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 4,
        "numIndexesAfter" : 5,
        "ok" : 1
    }
    
    zjy:PRIMARY> db.comments.ensureIndex({"name":1,"age":1},{"name":"idx_name_age","background":true}) #后台创建复合索引
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 5,
        "numIndexesAfter" : 6,
        "ok" : 1
    }
    
    zjy:PRIMARY> db.comments.ensureIndex({"name":1,"age":1},{"name":"uk_name_age","background":true,"unique":true}) #后台创建唯一索引
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
    }
    zjy:PRIMARY> db.comments.ensureIndex({"name":1,"age":1},{"unique":true,"dropDups":true,"name":"uk_name_age"})   #删除重复数据创建唯一索引,dropDups在3.0里废弃。
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
    }

    哈希索引hashed

    zjy:PRIMARY> db.abc.ensureIndex({"a":"hashed"})
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
    }
    zjy:PRIMARY> db.abc.getIndexes()
    [
        {
            "v" : 1,
            "key" : {
                "_id" : 1
            },
            "name" : "_id_",
            "ns" : "test.abc"
        },
        {
            "v" : 1,
            "key" : {
                "a" : "hashed"
            },
            "name" : "a_hashed",
            "ns" : "test.abc"
        }
    ]

    这里还有2个比较特殊的索引:稀疏索引(sparse)和TTL索引(expireAfterSeconds)

    TTL索引是一种特定的数据块,请求赋予时间范围的方式,它指定一个时间点,超过该时间点数据变成无效。

    zjy:PRIMARY> db.comments.find()
    { "_id" : ObjectId("55ae6b99313fd7b879b5296c"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:09.651Z") }
    { "_id" : ObjectId("55ae6b9a313fd7b879b5296d"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:10.739Z") }
    { "_id" : ObjectId("55ae6b9b313fd7b879b5296e"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:11.555Z") }
    { "_id" : ObjectId("55ae6b9c313fd7b879b5296f"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:12.267Z") }
    { "_id" : ObjectId("55ae6b9c313fd7b879b52970"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:12.899Z") }
    zjy:PRIMARY> db.comments.ensureIndex({"ts":1},{expireAfterSeconds:60})  #创建TTL索引,过期时间60秒,即60秒时间生成的数据会被删除。
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
    }
    zjy:PRIMARY> db.comments.find()
    { "_id" : ObjectId("55ae6b99313fd7b879b5296c"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:09.651Z") }
    { "_id" : ObjectId("55ae6b9a313fd7b879b5296d"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:10.739Z") }
    { "_id" : ObjectId("55ae6b9b313fd7b879b5296e"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:11.555Z") }
    { "_id" : ObjectId("55ae6b9c313fd7b879b5296f"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:12.267Z") }
    { "_id" : ObjectId("55ae6b9c313fd7b879b52970"), "name" : "zhoujy", "age" : 22, "ts" : ISODate("2015-07-21T15:56:12.899Z") }
    
    zjy:PRIMARY> db.comments.getIndexes()
    [
        {
            "v" : 1,
            "key" : {
                "_id" : 1
            },
            "name" : "_id_",
            "ns" : "test.comments"
        },
        {
            "v" : 1,
            "key" : {
                "ts" : 1
            },
            "name" : "ts_1",
            "ns" : "test.comments",
            "expireAfterSeconds" : 60
        }
    ]
    
    zjy:PRIMARY> db.comments.find() #60秒之后查看,数据已经没有

    最后有一类索引是text index 文本索引:更多的信息见 [MongoDB大数据处理权威指南第八章]和这里

    测试数据:

    db.comments.insert({"name":"abc","mem":"You can create a text index on the field or fields whose value is a string or an array of string elements","ts":new Date()})
    
    db.comments.insert({"name":"def","mem":"When creating a text index on multiple fields, you can specify the individual fields or you can use wildcard specifier ($**)","ts":new Date()})
    
    db.comments.insert({"name":"ghi","mem":"This text index catalogs all string data in the subject field and the content field, where the field value is either a string or an array of string elements.","ts":new Date()})
    
    db.comments.insert({"name":"jkl","mem":"To allow for text search on all fields with string content, use the wildcard specifier ($**) to index all fields that contain string content.","ts":new Date()})
    
    db.comments.insert({"name":"mno","mem":"The following example indexes any string value in the data of every field of every document in collection and names the index TextIndex:","ts":new Date()})
    View Code

    创建:

    > db.comments.ensureIndex({"mem":"text"})   #创建text索引
    {
        "createdCollectionAutomatically" : false,
        "numIndexesBefore" : 1,
        "numIndexesAfter" : 2,
        "ok" : 1
    }

    使用:$text 操作符

    > db.comments.find({$text:{$search:"specifier"}}).pretty()
    {
        "_id" : ObjectId("55aee886a782f35b366926ef"),
        "name" : "jkl",
        "mem" : "To allow for text search on all fields with string content, use the wildcard specifier ($**) to index all fields that contain string content.",
        "ts" : ISODate("2015-07-22T00:49:10.350Z")
    }
    {
        "_id" : ObjectId("55aee886a782f35b366926ed"),
        "name" : "def",
        "mem" : "When creating a text index on multiple fields, you can specify the individual fields or you can use wildcard specifier ($**)",
        "ts" : ISODate("2015-07-22T00:49:10.346Z")
    }
    
    
    > db.comments.runCommand("text",{search:"specifier"})  #3.0之前可以使用,之后无效。
    {
        "results" : [
            {
                "score" : 0.8653846153846153,
                "obj" : {
                    "_id" : ObjectId("55aee886a782f35b366926ed"),
                    "name" : "def",
                    "mem" : "When creating a text index on multiple fields, you can specify the individual fields or you can use wildcard specifier ($**)",
                    "ts" : ISODate("2015-07-22T00:49:10.346Z")
                }
            },
            {
                "score" : 0.5357142857142857,
                "obj" : {
                    "_id" : ObjectId("55aee886a782f35b366926ef"),
                    "name" : "jkl",
                    "mem" : "To allow for text search on all fields with string content, use the wildcard specifier ($**) to index all fields that contain string content.",
                    "ts" : ISODate("2015-07-22T00:49:10.350Z")
                }
            }
        ],
        "stats" : {
            "nscanned" : NumberLong(2),
            "nscannedObjects" : NumberLong(2),
            "n" : 2,
            "timeMicros" : 173
        },
        "ok" : 1
    }

    上面大致介绍了各类索引的介绍和使用,具体的信息和注意事项可以找官方文档里查看,特别是要注意text和ttl索引的使用。

    3)删除索引:dropIndex

    zjy:PRIMARY> db.abc.getIndexes()    #查看索引
    [
        {
            "v" : 1,
            "key" : {
                "_id" : 1
            },
            "name" : "_id_",
            "ns" : "test.abc"
        },
        {
            "v" : 1,
            "key" : {               #索引字段
                "a" : "hashed"
            },
            "name" : "a_hashed",    #索引名
            "ns" : "test.abc"
        },
        {
            "v" : 1,
            "key" : {
                "b" : 1
            },
            "name" : "b_1",
            "ns" : "test.abc"
        },
        {
            "v" : 1,
            "key" : {
                "c" : 1
            },
            "name" : "idx_c",
            "ns" : "test.abc"
        }
    ]
    zjy:PRIMARY> db.abc.dropIndex({"a" : "hashed"})  #删除索引,指定"key"
    { "nIndexesWas" : 4, "ok" : 1 }
    zjy:PRIMARY> db.abc.dropIndex({"b" : 1})         #删除索引,指定"key"
    { "nIndexesWas" : 3, "ok" : 1 }
    zjy:PRIMARY> db.abc.dropIndex("idx_c")           #删除索引,指定"name"
    { "nIndexesWas" : 2, "ok" : 1 }
    zjy:PRIMARY> db.abc.getIndexes()
    [
        {
            "v" : 1,
            "key" : {
                "_id" : 1
            },
            "name" : "_id_",
            "ns" : "test.abc"
        }
    ]
    
    zjy:PRIMARY> db.abc.dropIndex("*")              #删除索引,删除集合的全部索引
    {
        "nIndexesWas" : 4,
        "msg" : "non-_id indexes dropped for collection",
        "ok" : 1
    }

    4)重建索引:索引出现损坏需要重建。reindex

    zjy:PRIMARY> db.abc.reIndex()   #执行
    {
        "nIndexesWas" : 1,
        "nIndexes" : 1,
        "indexes" : [
            {
                "key" : {
                    "_id" : 1
                },
                "name" : "_id_",
                "ns" : "test.abc"
            }
        ],
        "ok" : 1
    }

    5)强制使用指定索引。hint

    db.abc.find({"c":1,"b":2}).hint("b_1")  #hint里面是"索引字段"或则"索引名"

    总结:

          索引可以加快检索、排序等操作的效率,但是对于增删改的操作却有一定的开销,所以不要一味的加索引,在必要的字段上加合适的索引才是需要的。更多的信息请参考官方文档

  • 相关阅读:
    firefox和ie下面的初始化checkbox
    全球宽带排名出炉 韩国第一中国未入榜(附表)
    逆向查询所有父栏目
    js的点点滴滴
    Treeview绑定数据源 层叠结构数据源的应用
    asp.net读取服务器端文件夹列表
    Treeview绑定数据源 层叠结构数据源的应用(续--完善篇)
    VC数据类型
    jQuery核心文档(翻译中)
    iscroll 下拉刷新,上拉加载
  • 原文地址:https://www.cnblogs.com/zhoujinyi/p/4665903.html
Copyright © 2020-2023  润新知