PostgreSQL DBA(47) - Index(Btree)

husthxd發表於2019-07-02

本節簡單介紹了PostgreSQL中的Btree索引,包括Btree的基本結構/NULL值處理/自定義型別支援等相關資訊.

結構
Btree是常見的資料結構,有以下特性:
1.Btree是平衡樹,以root節點為分界,左右兩邊的中間節點數目一樣,也就是說查詢任意一個值,時間都是一樣的
2.Btree有多個分支,每個page(8KB)可以有數百個TIDs,也就是說Btree只需要不多的幾個層次就可以支援行數巨大的表
3.索引中的資料Page之間和Page內部都是有序的,相同層次的Page通過雙向連結串列彼此連線
其結構如下:

第一個page是metapage,指向root節點.root節點指向N個Internal節點,最下面的是Leaf節點.
下面是等值搜尋的過程(查詢值49):

NULLs
PostgreSQL在建立索引時會儲存NULLs,因此條件為IS NULL和IS NOT NULL時可以支援索引掃描.


testdb=# insert into t_null select x,'c1'||x from generate_series(1,10000) as x;
INSERT 0 10000
testdb=# insert into t_null values(null,null);
INSERT 0 1
testdb=# 
testdb=# create index idx_t_null_id on t_null(id);
CREATE INDEX
testdb=# analyze t_null;
ANALYZE
testdb=# 
testdb=# explain verbose select * from t_null where id is null;
                                     QUERY PLAN                                     
------------------------------------------------------------------------------------
 Index Scan using idx_t_null_id on public.t_null  (cost=0.29..8.30 rows=1 width=10)
   Output: id, c1
   Index Cond: (t_null.id IS NULL)
(3 rows)
testdb=# explain verbose select * from t_null where id is not null;
                             QUERY PLAN                             
--------------------------------------------------------------------
 Seq Scan on public.t_null  (cost=0.00..155.01 rows=10000 width=10)
   Output: id, c1
   Filter: (t_null.id IS NOT NULL)
(3 rows)
testdb=# 
testdb=# truncate t_null;
TRUNCATE TABLE
testdb=# insert into t_null select null,null from generate_series(1,10000);
INSERT 0 10000
testdb=# insert into t_null values(1,'1');
INSERT 0 1
testdb=# analyze t_null;
ANALYZE
testdb=# 
testdb=# explain verbose select * from t_null where id is null;
                            QUERY PLAN                             
-------------------------------------------------------------------
 Seq Scan on public.t_null  (cost=0.00..135.01 rows=10000 width=6)
   Output: id, c1
   Filter: (t_null.id IS NULL)
(3 rows)
testdb=# explain verbose select * from t_null where id is not null;
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
 Index Scan using idx_t_null_id on public.t_null  (cost=0.29..8.30 rows=1 width=6)
   Output: id, c1
   Index Cond: (t_null.id IS NOT NULL)
(3 rows)
testdb=#

NULLs可以儲存在Index的最前面,也可以儲存在最後面,可通過FIRST/LAST關鍵字指定,這對排序會有所影響.


testdb=# create table t_null_sort(id int,c1 varchar(20));
CREATE TABLE
testdb=# 
testdb=# insert into t_null_sort select x,'c1'||x from generate_series(1,10000) as x;
INSERT 0 10000
testdb=# insert into t_null_sort values(null,null);
INSERT 0 1
testdb=# 
testdb=# create index idx_t_null_id_first on t_null_sort(id nulls first);
CREATE INDEX
testdb=# create index idx_t_null_id_last on t_null_sort(id nulls last);
CREATE INDEX
testdb=# 
testdb=# analyze t_null_sort;
ANALYZE
testdb=# 
testdb=# explain verbose select * from t_null_sort order by id nulls first;
                                             QUERY PLAN                                              
-----------------------------------------------------------------------------------------------------
 Index Scan using idx_t_null_id_first on public.t_null_sort  (cost=0.29..328.30 rows=10001 width=10)
   Output: id, c1
(2 rows)
testdb=# explain verbose select * from t_null_sort order by id nulls last;
                                             QUERY PLAN                                             
----------------------------------------------------------------------------------------------------
 Index Scan using idx_t_null_id_last on public.t_null_sort  (cost=0.29..328.30 rows=10001 width=10)
   Output: id, c1
(2 rows)
testdb=# 
testdb=#

INCLUDE
建立索引時,通過使用INCLUDE可以把非索引欄位加入到該索引中,在通過索引掃描時如投影列只包含索引列和INCLUDE列,那麼可以通過INDEX ONLY SCAN掃描Fetch資料.


testdb=# create table t_include(id int,c1 varchar(20),c2 varchar(20),c3 varchar(20));
CREATE TABLE
testdb=# 
testdb=# insert into t_include(id,c1,c2) select x,'c1'||x,'c2'||x from generate_series(1,10000) as x;
INSERT 0 10000
testdb=# 
testdb=# create index idx_t_include_id on t_include(id) include (c1);
CREATE INDEX
testdb=# 
testdb=# analyze t_include;
ANALYZE
testdb=# explain verbose select id,c1 from t_include;
                              QUERY PLAN                               
-----------------------------------------------------------------------
 Seq Scan on public.t_include  (cost=0.00..163.00 rows=10000 width=10)
   Output: id, c1
(2 rows)
testdb=# 
testdb=# explain verbose select id,c1 from t_include where id = 1;
                                          QUERY PLAN                                           
-----------------------------------------------------------------------------------------------
 Index Only Scan using idx_t_include_id on public.t_include  (cost=0.29..8.30 rows=1 width=10)
   Output: id, c1
   Index Cond: (t_include.id = 1)
(3 rows)
testdb=#

New Data Type
建立型別complex以及資料表


testdb=# create type complex as (re float, im float);
CREATE TYPE
testdb=# create table numbers(x complex);
CREATE TABLE
testdb=# insert into numbers values ((0.0, 10.0)), ((1.0, 3.0)), ((1.0, 1.0));
INSERT 0 3
testdb=# select * from numbers order by x;
   x    
--------
 (0,10)
 (1,1)
 (1,3)
(3 rows)

建立比較函式


testdb=# 
testdb=# create function modulus(a complex) returns float as $$
testdb$#     select sqrt(a.re*a.re + a.im*a.im);
testdb$# $$ immutable language sql;
CREATE FUNCTION
testdb=# 
testdb=# create function complex_lt(a complex, b complex) returns boolean as $$
testdb$#     select modulus(a) < modulus(b);
testdb$# $$ immutable language sql;
CREATE FUNCTION
testdb=# 
testdb=# create function complex_le(a complex, b complex) returns boolean as $$
testdb$#     select modulus(a) <= modulus(b);
testdb$# $$ immutable language sql;
CREATE FUNCTION
testdb=# 
testdb=# create function complex_eq(a complex, b complex) returns boolean as $$
testdb$#     select modulus(a) = modulus(b);
testdb$# $$ immutable language sql;
CREATE FUNCTION
testdb=# 
testdb=# create function complex_ge(a complex, b complex) returns boolean as $$
testdb$#     select modulus(a) >= modulus(b);
testdb$# $$ immutable language sql;
CREATE FUNCTION
testdb=# 
testdb=# create function complex_gt(a complex, b complex) returns boolean as $$
testdb$#     select modulus(a) > modulus(b);
testdb$# $$ immutable language sql;
CREATE FUNCTION

建立operator


testdb=# create operator <(leftarg=complex, rightarg=complex, procedure=complex_lt);
CREATE OPERATOR
testdb=# 
testdb=# create operator <=(leftarg=complex, rightarg=complex, procedure=complex_le);
arg=complex, rightarg=complex, procedure=complex_gt);
CREATE OPERATOR
testdb=# 
testdb=# create operator =(leftarg=complex, rightarg=complex, procedure=complex_eq);
CREATE OPERATOR
testdb=# 
testdb=# create operator >=(leftarg=complex, rightarg=complex, procedure=complex_ge);
CREATE OPERATOR
testdb=# 
testdb=# create operator >(leftarg=complex, rightarg=complex, procedure=complex_gt);
CREATE OPERATOR
testdb=#

現在可以對complex進行比較了:


testdb=# select (1.0,1.0)::complex < (1.0,3.0)::complex;
 ?column? 
----------
 t
(1 row)

建立比較函式和opc,在建立opc的時候,pg會自動建立同名的opf


testdb=# create function complex_cmp(a complex, b complex) returns integer as $$
testdb$#     select case when modulus(a) < modulus(b) then -1
testdb$#                 when modulus(a) > modulus(b) then 1 
testdb$#                 else 0
testdb$#            end;
testdb$# $$ language sql;
CREATE FUNCTION
testdb=# create operator class complex_ops
testdb-# default for type complex
testdb-# using btree as
testdb-#     operator 1 <,
testdb-#     operator 2 <=,
testdb-#     operator 3 =,
testdb-#     operator 4 >=,
testdb-#     operator 5 >,
testdb-#     function 1 complex_cmp(complex,complex);
CREATE OPERATOR CLASS
testdb=# select * from pg_opfamily where opfname = 'complex_ops';
  oid   | opfmethod |   opfname   | opfnamespace | opfowner 
--------+-----------+-------------+--------------+----------
 106585 |       403 | complex_ops |         2200 |       10
(1 row)

現在可以建立資料型別為complex的Btree索引


testdb=# select amp.amprocnum,
testdb-#        amp.amproc,
testdb-#        amp.amproclefttype::regtype,
testdb-#        amp.amprocrighttype::regtype
testdb-# from   pg_opfamily opf,
testdb-#        pg_am am,
testdb-#        pg_amproc amp
testdb-# where  opf.opfname = 'complex_ops'
testdb-# and    opf.opfmethod = am.oid
testdb-# and    am.amname = 'btree'
testdb-# and    amp.amprocfamily = opf.oid;
 amprocnum |   amproc    | amproclefttype | amprocrighttype 
-----------+-------------+----------------+-----------------
         1 | complex_cmp | complex        | complex
(1 row)
testdb=# create index idx_numbers_x on numbers(x);
CREATE INDEX
testdb=# analyze numbers;
ANALYZE
testdb=# explain select * from numbers order by x;
                          QUERY PLAN                          
--------------------------------------------------------------
 Sort  (cost=1.14..1.15 rows=6 width=37)
   Sort Key: x
   ->  Seq Scan on numbers  (cost=0.00..1.06 rows=6 width=37)
(3 rows)
testdb=# set enable_seqscan=off;
SET
testdb=# explain select * from numbers order by x;
                                     QUERY PLAN                                     
------------------------------------------------------------------------------------
 Index Only Scan using idx_numbers_x on numbers  (cost=0.13..12.22 rows=6 width=37)
(1 row)

參考資料
Indexes in PostgreSQL — 4 (Btree)
Interfacing Extensions To Indexes

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/6906/viewspace-2649312/,如需轉載,請註明出處,否則將追究法律責任。

相關文章