索引
在認識索引的之前我們先建立一張表,並往其中插入200萬條資料。
// test.js
//生成隨機數
function GetRandomNum(min,max){
let range = max-min; //得到隨機數區間
let rand = Math.random(); //得到隨機值
return (min + Math.round(rand *range)); //最小值+隨機數取整
}
//console.log(GetRandomNum(10000,99999));
//生成隨機使用者名稱
function GetRadomUserName(min,max){
let tempStringArray= "123456789qwertyuiopasdfghjklzxcvbnm".split("");//構造生成時的字母庫陣列
let outPuttext = ""; //最後輸出的變數
//進行迴圈,隨機生產使用者名稱的長度,這裡需要生成隨機數方法的配合
for(let i=1 ;i<GetRandomNum(min,max);i++){
//隨機抽取字母,拼裝成需要的使用者名稱
outPuttext=outPuttext+tempStringArray[GetRandomNum(0,tempStringArray.length)]
}
return outPuttext;
}
var db = connect(`company`);
db.randomInfo.drop();
var tempInfo = [];
for (let i=0;i<2000000;i++){
tempInfo.push({
username:GetRadomUserName(7,16),
regeditTime:new Date(),
randNum0:GetRandomNum(100000,999999),
randNum1:GetRandomNum(100000,999999),
randNum2:GetRandomNum(100000,999999),
randNum3:GetRandomNum(100000,999999),
randNum4:GetRandomNum(100000,999999),
randNum5:GetRandomNum(100000,999999),
randNum6:GetRandomNum(100000,999999),
randNum7:GetRandomNum(100000,999999),
randNum8:GetRandomNum(100000,999999),
randNum8:GetRandomNum(100000,999999),
})
}
db.randomInfo.insert(tempInfo);
> mongo
> load("./test.js")
connecting to: mongodb://127.0.0.1:27017/company
MongoDB server version: 3.4.10
···
// 這個過程可能需要2分鐘左右
> use company
switched to db company
> db.randomInfo.stats() // 使用這個檢視插入了幾條資料
{
"ns" : "company.randomInfo",
"size" : 421908971,
"count" : 1835000,
"avgObjSize" : 229,
"storageSize" : 188686336,
"capped" : false,
"wiredTiger" : {
"metadata" : {
"formatVersion" : 1
},
"creationString" : "access_pattern_hint=none,allocation_size=4KB,app_metadata=(formatVersion=1),block_allocation=best,block_compressor=snappy,cache_resident=false,checksum=on,colgroups=,collator=,columns=,dictionary=0,encryption=(keyid=,name=),exclusive=false,extractor=,format=btree,huffman_key=,huffman_value=,ignore_in_memory_cache_size=false,immutable=false,internal_item_max=0,internal_key_max=0,internal_key_truncate=true,internal_page_max=4KB,key_format=q,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=64MB,log=(enabled=true),lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=10m,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,prefix_compression_min=4,source=,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,type=file,value_format=u",
"type" : "file",
"uri" : "statistics:table:collection-0-5869292382622143333",
"LSM" : {
"bloom filter false positives" : 0,
"bloom filter hits" : 0,
"bloom filter misses" : 0,
"bloom filter pages evicted from cache" : 0,
"bloom filter pages read into cache" : 0,
"bloom filters in the LSM tree" : 0,
"chunks in the LSM tree" : 0,
"highest merge generation in the LSM tree" : 0,
"queries that could have benefited from a Bloom filter that did not exist" : 0,
"sleep for LSM checkpoint throttle" : 0,
"sleep for LSM merge throttle" : 0,
"total size of bloom filters" : 0
},
"block-manager" : {
"allocations requiring file extension" : 15471,
"blocks allocated" : 15475,
"blocks freed" : 39,
"checkpoint size" : 188481536,
"file allocation unit size" : 4096,
"file bytes available for reuse" : 188416,
"file magic number" : 120897,
"file major version number" : 1,
"file size in bytes" : 188686336,
"minor version number" : 0
},
"btree" : {
"btree checkpoint generation" : 20,
"column-store fixed-size leaf pages" : 0,
"column-store internal pages" : 0,
"column-store variable-size RLE encoded values" : 0,
"column-store variable-size deleted values" : 0,
"column-store variable-size leaf pages" : 0,
"fixed-record size" : 0,
"maximum internal page key size" : 368,
"maximum internal page size" : 4096,
"maximum leaf page key size" : 2867,
"maximum leaf page size" : 32768,
"maximum leaf page value size" : 67108864,
"maximum tree depth" : 4,
"number of key/value pairs" : 0,
"overflow pages" : 0,
"pages rewritten by compaction" : 0,
"row-store internal pages" : 0,
"row-store leaf pages" : 0
},
"cache" : {
"bytes currently in the cache" : 502018875,
"bytes read into cache" : 0,
"bytes written from cache" : 437640755,
"checkpoint blocked page eviction" : 0,
"data source pages selected for eviction unable to be evicted" : 12,
"hazard pointer blocked page eviction" : 0,
"in-memory page passed criteria to be split" : 130,
"in-memory page splits" : 62,
"internal pages evicted" : 0,
"internal pages split during eviction" : 1,
"leaf pages split during eviction" : 56,
"modified pages evicted" : 56,
"overflow pages read into cache" : 0,
"overflow values cached in memory" : 0,
"page split during eviction deepened the tree" : 1,
"page written requiring lookaside records" : 0,
"pages read into cache" : 0,
"pages read into cache requiring lookaside entries" : 0,
"pages requested from the cache" : 2232017,
"pages written from cache" : 15472,
"pages written requiring in-memory restoration" : 0,
"tracked dirty bytes in the cache" : 0,
"unmodified pages evicted" : 0
},
"cache_walk" : {
"Average difference between current eviction generation when the page was last considered" : 0,
"Average on-disk page image size seen" : 0,
"Clean pages currently in cache" : 0,
"Current eviction generation" : 0,
"Dirty pages currently in cache" : 0,
"Entries in the root page" : 0,
"Internal pages currently in cache" : 0,
"Leaf pages currently in cache" : 0,
"Maximum difference between current eviction generation when the page was last considered" : 0,
"Maximum page size seen" : 0,
"Minimum on-disk page image size seen" : 0,
"On-disk page image sizes smaller than a single allocation unit" : 0,
"Pages created in memory and never written" : 0,
"Pages currently queued for eviction" : 0,
"Pages that could not be queued for eviction" : 0,
"Refs skipped during cache traversal" : 0,
"Size of the root page" : 0,
"Total number of pages currently in cache" : 0
},
"compression" : {
"compressed pages read" : 0,
"compressed pages written" : 15312,
"page written failed to compress" : 0,
"page written was too small to compress" : 158,
"raw compression call failed, additional data available" : 0,
"raw compression call failed, no additional data available" : 0,
"raw compression call succeeded" : 0
},
"cursor" : {
"bulk-loaded cursor-insert calls" : 0,
"create calls" : 3,
"cursor-insert key and value bytes inserted" : 429166606,
"cursor-remove key bytes removed" : 0,
"cursor-update value bytes updated" : 0,
"insert calls" : 1835000,
"next calls" : 162051,
"prev calls" : 1,
"remove calls" : 0,
"reset calls" : 30748,
"restarted searches" : 0,
"search calls" : 0,
"search near calls" : 1227,
"truncate calls" : 0,
"update calls" : 0
},
"reconciliation" : {
"dictionary matches" : 0,
"fast-path pages deleted" : 0,
"internal page key bytes discarded using suffix compression" : 31112,
"internal page multi-block writes" : 4,
"internal-page overflow keys" : 0,
"leaf page key bytes discarded using prefix compression" : 0,
"leaf page multi-block writes" : 66,
"leaf-page overflow keys" : 0,
"maximum blocks required for a page" : 242,
"overflow values written" : 0,
"page checksum matches" : 209,
"page reconciliation calls" : 171,
"page reconciliation calls for eviction" : 57,
"pages deleted" : 1
},
"session" : {
"object compaction" : 0,
"open cursor count" : 3
},
"transaction" : {
"update conflicts" : 0
}
},
"nindexes" : 1,
"totalIndexSize" : 18272256,
"indexSizes" : {
"_id_" : 18272256
},
"ok" : 1
}
// 執行
> db.randomInfo.getIndexes()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "company.randomInfo"
}
]
// 這是預設的索引,我們一般不會使用這個索引的
建立一個索引
> db.randomInfo.ensureIndex({username: 1})
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
> db.randomInfo.getIndexes() // 然後檢視發現有兩條索引了
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "company.randomInfo"
},
{
"v" : 2,
"key" : {
"username" : 1
},
"name" : "username_1",
"ns" : "company.randomInfo"
}
]
>
//test1.js
var startTime = new Date().getTime() //得到程式執行的開始時間
var db = connect(`company`) //連結資料庫
var rs=db.randomInfo.find({username:"tfruhjy8k"}) //根據使用者名稱查詢使用者
rs.forEach(rs=>{printjson(rs)}) //迴圈輸出
var runTime = new Date().getTime()-startTime; //得到程式執行時間
print(`[SUCCESS]This run time is:`+runTime+`ms`) //列印出執行時間
// 執行查詢
> load(`./test1.js`)
connecting to: mongodb://127.0.0.1:27017/company
MongoDB server version: 3.4.10
{
"_id" : ObjectId("5ac8b73b5646d96c6db3e1a8"),
"username" : "od2umr6kec",
"regeditTime" : ISODate("2018-04-07T12:18:44.292Z"),
"randNum0" : 577322,
"randNum1" : 961443,
"randNum2" : 999621,
"randNum3" : 968291,
"randNum4" : 834839,
"randNum5" : 637084,
"randNum6" : 172311,
"randNum7" : 219693,
"randNum8" : 617081
}
[SUCCESS]This run time is:11ms // 關鍵看這裡,你會發現時間縮短了好多呢
true
>
無論是在關係型資料庫還是文件資料庫,建立索引都是非常重要的。前邊講了,索引這東西是要消耗硬碟和記憶體資源的,所以還是要根據程式需要進行建立了。MongoDB也給我們進行了限制,只允許我們建立64個索引值。
複合索引
複合索引就是兩條以上的索引
// 在建立一個索引
> db.randomInfo.ensureIndex({randNum0: 1});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 2,
"numIndexesAfter" : 3,
"ok" : 1
}
> db.randomInfo.getIndexes();
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "company.randomInfo"
},
{
"v" : 2,
"key" : {
"username" : 1
},
"name" : "username_1",
"ns" : "company.randomInfo"
},
{
"v" : 2,
"key" : {
"randNum0" : 1
},
"name" : "randNum0_1",
"ns" : "company.randomInfo"
}
]
>
我們同時查詢兩個索引的值,看看效果是怎麼樣的。
//
var startTime=new Date().getTime();
var db = connect(`company`);
var rs= db.randomInfo.find({username:`7xwb8y3`,randNum0:565509});
rs.forEach(rs=>{printjson(rs)});
var runTime = new Date().getTime()-startTime;
print(`[Demo]this run time is `+runTime+`ms`);
// 從效能上看並沒有什麼特殊的變化,查詢時間還是在10ms左右。MongoDB的複合查詢是按照我們的索引順序進行查詢的。就是我們用db.randomInfo.getIndexes()查詢出的陣列。
指定索引查詢
//
var rs= db.randomInfo.find({username:`7xwb8y3`,randNum0:565509}).hint({randNum0:1});
刪除索引
db.randomInfo.dropIndex(`randNum0_1`);//索引的唯一ID
這裡需要注意的是刪除時填寫的值,並不是我們的欄位名稱(key),而是我們索引查詢表中的name值。這是一個小坑。
全文索引
有些時候需要在大篇幅的文章中搜尋關鍵詞,比如我寫的文章每篇都在萬字以上,這時候你想搜尋關鍵字是非常不容易的,MongoDB為我們提供了全文索引。
// 插入兩條資料
db.info.insert({contextInfo:"I am a programmer, I love life, love family. Every day after work, I write a diary."})
db.info.insert({contextInfo:"I am a programmer, I love PlayGame, love drink. Every day after work, I playGame and drink."})
建立全文索引
db.info.ensureIndex({contextInfo:`text`});
//需要注意的是這裡使用text關鍵詞來代表全文索引,我們在這裡就不建立資料模型了。
全文索引查詢
// $text:表示要在全文索引中查東西。這裡的$test指的就是contextInfo
// $search:後邊跟查詢的內容。
db.info.find({$text:{$search:"programmer"}}); // 查詢contextInfo中含有programmer關鍵字的
查詢多個詞
// 比如我們希望查詢資料中有programmer,family,diary,drink的資料(這是或的關係),所以兩條資料都會出現。
db.info.find({$text:{$search:"programmer family diary drink"}})
// 如果我們這時候希望不查詢出來有drink這個單詞的記錄,我們可以使用“-”減號來取消。
db.info.find({$text:{$search:"programmer family diary -drink"}})
// 全文搜尋中是支援轉義符的,比如我們想搜尋的是兩個詞(love PlayGame和drink),這時候需要使用斜槓來轉意。
db.info.find({$text:{$search:""love PlayGame" drink"}})
全文索引在工作還是經常使用的,比如部落格文章的搜尋,長檔案的關鍵詞搜尋,這些都需要使用全文索引來進行。
到這裡Mongodb的基本知識就基本結束了,下一節我們將會學習如何管理Mongodb
- 參考文獻