Apache Kylin1.5.2.1之訂單案例詳細構建流程
一.Hive訂單資料倉儲構建
1. 建立事實表並插入資料
DROP TABLE IF EXISTS default.fact_order ;
create table default.fact_order (
time_key string,
product_key string,
salesperson_key string,
custom_key string,
quantity_ordered bigint,
order_dollars bigint,
cost_dollars bigint
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
load data local inpath '/root/kylinsample/fact_order.txt' overwrite into table default.fact_order;
##load data local inpath '/root/kylinsample/fact_order.txt' into table default.fact_order;
fact_order.txt
2016-05-01,pd001,sp001,ct001,100,2000,1000
2016-05-01,pd001,sp002,ct002,100,2000,1000
2016-05-01,pd001,sp003,ct002,100,2000,1000
2016-05-01,pd002,sp002,ct002,100,2000,1000
2016-05-01,pd003,sp003,ct001,100,2000,1000
2016-05-01,pd001,sp003,ct001,100,2000,1000
2016-05-01,pd001,sp002,ct001,100,2000,1000
2016-05-01,pd001,sp003,ct002,100,2000,1000
2016-05-01,pd002,sp001,ct001,100,2000,1000
2016-05-01,pd003,sp001,ct001,100,2000,1000
2016-05-01,pd004,sp001,ct001,50,1000,600
2016-05-02,pd001,sp001,ct001,50,1000,600
2016-05-02,pd001,sp002,ct002,100,2000,1000
2016-05-02,pd001,sp003,ct002,100,2000,1000
2016-05-02,pd002,sp001,ct001,50,1000,600
2016-05-02,pd003,sp001,ct001,50,1000,600
2016-05-02,pd004,sp001,ct001,50,1000,600
2016-05-03,pd001,sp001,ct001,50,1000,600
2016-05-03,pd001,sp002,ct002,100,2000,1000
2016-05-03,pd001,sp003,ct002,100,2000,1000
2016-05-04,pd002,sp001,ct001,700,14000,10000
2016-05-04,pd003,sp001,ct001,700,14000,10000
2016-05-04,pd004,sp001,ct001,100,2000,1000
2016-05-05,pd001,sp001,ct001,100,2000,1000
2016-05-05,pd001,sp002,ct002,700,14000,10000
2016-05-05,pd001,sp003,ct002,700,14000,10000
2016-05-05,pd002,sp001,ct001,100,2000,1000
2016-05-05,pd003,sp001,ct001,100,2000,1000
2016-05-05,pd004,sp001,ct001,100,2000,1000
2016-05-06,pd001,sp001,ct001,100,2000,1000
2016-05-06,pd001,sp002,ct002,100,2000,1000
2016-05-06,pd001,sp003,ct002,100,2000,1000
2016-05-07,pd002,sp001,ct001,100,2000,1000
2016-05-07,pd003,sp001,ct001,100,2000,1000
2016-05-07,pd004,sp001,ct001,50,1000,600
2016-05-07,pd002,sp001,ct001,100,2000,1000
2016-05-07,pd003,sp001,ct001,100,2000,1000
2016-05-07,pd004,sp001,ct001,50,1000,600
2016-05-08,pd001,sp001,ct001,50,1000,600
2016-05-08,pd001,sp002,ct002,100,2000,1000
2016-05-08,pd001,sp003,ct002,100,2000,1000
2016-05-08,pd001,sp001,ct001,50,1000,600
2016-05-08,pd001,sp002,ct002,100,2000,1000
2016-05-08,pd001,sp003,ct002,100,2000,1000
2016-05-08,pd001,sp001,ct001,50,1000,600
2016-05-08,pd001,sp002,ct002,100,2000,1000
2016-05-08,pd001,sp003,ct002,100,2000,1000
2016-05-09,pd002,sp001,ct001,50,1000,600
2016-05-09,pd003,sp001,ct001,50,1000,600
2016-05-09,pd004,sp001,ct001,50,1000,600
2016-05-09,pd001,sp001,ct001,50,1000,600
2016-05-09,pd002,sp001,ct001,50,1000,600
2016-05-09,pd003,sp001,ct001,50,1000,600
2016-05-09,pd004,sp001,ct001,50,1000,600
2016-05-09,pd001,sp001,ct001,50,1000,600
2016-05-09,pd001,sp002,ct002,100,2000,1000
2016-05-09,pd004,sp003,ct002,100,2000,1000
2016-05-09,pd002,sp001,ct001,700,14000,10000
2016-05-09,pd003,sp003,ct001,700,14000,10000
2016-05-09,pd004,sp003,ct001,100,2000,1000
2016-05-10,pd001,sp001,ct001,100,2000,1000
2016-05-10,pd001,sp002,ct002,700,14000,10000
2016-05-10,pd001,sp003,ct002,700,14000,10000
2016-05-10,pd002,sp001,ct001,100,2000,1000
2016-05-11,pd003,sp003,ct001,100,2000,1000
2016-05-11,pd004,sp001,ct001,100,2000,1000
2016-05-12,pd001,sp001,ct001,100,2000,1000
2016-05-12,pd004,sp002,ct002,100,2000,1000
2016-05-12,pd001,sp003,ct002,100,2000,1000
2016-05-12,pd001,sp001,ct001,100,2000,1000
2016-05-12,pd004,sp002,ct002,100,2000,1000
2016-05-12,pd001,sp003,ct002,100,2000,1000
2016-05-13,pd002,sp001,ct001,100,2000,1000
2016-05-13,pd003,sp001,ct001,100,2000,1000
2016-05-13,pd004,sp001,ct001,50,1000,600
2016-05-14,pd001,sp001,ct001,50,1000,600
2016-05-14,pd001,sp002,ct002,100,2000,1000
2016-05-14,pd001,sp003,ct002,100,2000,1000
2016-05-15,pd002,sp001,ct001,50,1000,600
2016-05-15,pd003,sp001,ct001,50,1000,600
2016-05-15,pd004,sp001,ct001,50,1000,600
2016-05-15,pd002,sp001,ct001,50,1000,600
2016-05-15,pd003,sp001,ct001,50,1000,600
2016-05-15,pd004,sp001,ct001,50,1000,600
2016-05-15,pd002,sp001,ct001,50,1000,600
2016-05-15,pd003,sp001,ct001,50,1000,600
2016-05-15,pd004,sp001,ct001,50,1000,600
2016-05-16,pd001,sp001,ct001,50,1000,600
2016-05-16,pd001,sp002,ct002,100,2000,1000
2016-05-16,pd001,sp003,ct002,100,2000,1000
2016-05-16,pd001,sp001,ct001,50,1000,600
2016-05-16,pd001,sp002,ct002,100,2000,1000
2016-05-16,pd001,sp003,ct002,100,2000,1000
2016-05-17,pd002,sp001,ct001,700,14000,10000
2016-05-17,pd003,sp001,ct001,700,14000,10000
2016-05-17,pd004,sp001,ct001,100,2000,1000
2016-05-17,pd002,sp001,ct001,700,14000,10000
2016-05-17,pd003,sp001,ct001,700,14000,10000
2016-05-17,pd004,sp001,ct001,100,2000,1000
2016-05-18,pd001,sp001,ct001,100,2000,1000
2016-05-18,pd003,sp002,ct001,700,14000,10000
2016-05-18,pd001,sp003,ct002,700,14000,10000
2016-05-19,pd002,sp001,ct001,100,2000,1000
2016-05-19,pd003,sp001,ct002,100,2000,1000
2016-05-20,pd001,sp001,ct001,100,2000,1000
2016-05-20,pd002,sp002,ct002,100,2000,1000
2016-05-20,pd003,sp003,ct001,100,2000,1000
2016-05-20,pd004,sp001,ct001,100,2000,1000
2016-05-20,pd001,sp002,ct002,100,2000,1000
2016-05-20,pd002,sp001,ct002,100,2000,1000
2. 建立天維度表dim_day
DROP TABLE IF EXISTS default.dim_day ;
create table default.dim_day (
day_key string,
full_day string,
month_name string,
quarter string,
year string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
load data local inpath '/root/kylinsample/dim_day.txt' overwrite into table default.dim_day;
dim_day.txt
2016-05-01,2016-05-01,201605,2016q2,2016
2016-05-02,2016-05-02,201605,2016q2,2016
2016-05-03,2016-05-03,201605,2016q2,2016
2016-05-04,2016-05-04,201605,2016q2,2016
2016-05-05,2016-05-05,201605,2016q2,2016
2016-05-06,2016-05-06,201605,2016q2,2016
2016-05-07,2016-05-07,201605,2016q2,2016
2016-05-08,2016-05-08,201605,2016q2,2016
2016-05-09,2016-05-09,201605,2016q2,2016
2016-05-10,2016-05-10,201605,2016q2,2016
2016-05-11,2016-05-11,201605,2016q2,2016
2016-05-12,2016-05-12,201605,2016q2,2016
2016-05-13,2016-05-13,201605,2016q2,2016
2016-05-14,2016-05-14,201605,2016q2,2016
2016-05-15,2016-05-15,201605,2016q2,2016
2016-05-16,2016-05-16,201605,2016q2,2016
2016-05-17,2016-05-17,201605,2016q2,2016
2016-05-18,2016-05-18,201605,2016q2,2016
2016-05-19,2016-05-19,201605,2016q2,2016
2016-05-20,2016-05-20,201605,2016q2,2016
3. 建立售賣員的維度表salesperson_dim
DROP TABLE IF EXISTS default.dim_salesperson ;
create table default.dim_salesperson (
salesperson_key string,
salesperson string,
salesperson_id string,
region string,
region_code string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
load data local inpath '/root/kylinsample/dim_salesperson.txt' overwrite into table default.dim_salesperson;
dim_salesperson.txt
sp001,hongbin,sp001,beijing,10086
sp002,hongming,sp002,beijing,10086
sp003,hongmei,sp003,beijing,10086
4. 建立客戶維度 custom_dim
DROP TABLE IF EXISTS default.dim_custom ;
create table default.dim_custom (
custom_key string,
custom_name string,
custorm_id string,
headquarter_states string,
billing_address string,
billing_city string,
billing_state string,
industry_name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
load data local inpath '/root/kylinsample/dim_custom.txt' overwrite into table default.dim_custom;
dim_custom.txt
ct001,custom_john,ct001,beijing,zgx-beijing,beijing,beijing,internet
ct002,custom_herry,ct002,henan,shlinjie,shangdang,henan,internet
5. 建立產品維度表並插入資料
DROP TABLE IF EXISTS default.dim_product ;
create table default.dim_product (
product_key string,
product_name string,
product_id string,
product_desc string,
sku string,
brand string,
brand_code string,
brand_manager string,
category string,
category_code string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
load data local inpath '/root/kylinsample/dim_product.txt' overwrite into table default.dim_product;
dim_product.txt
pd001,Box-Large,pd001,Box-Large-des,large1.0,brand001,brandcode001,brandmanager001,Packing,cate001
pd002,Box-Medium,pd001,Box-Medium-des,medium1.0,brand001,brandcode001,brandmanager001,Packing,cate001
pd003,Box-small,pd001,Box-small-des,small1.0,brand001,brandcode001,brandmanager001,Packing,cate001
pd004,Evelope,pd001,Evelope_des,large3.0,brand001,brandcode001,brandmanager001,Pens,cate002
這樣一個星型的結構表在hive中建立完畢, 實際上一個離線的資料倉儲已經完成, 它包含一個主題, 即商品訂單.
三.Kylin的Project建立與資料同步
1.單擊"Manage Project"
2.單擊"New Project"
3.輸入"Project Name", WareHouse_01
4.Submit
1.選擇WareHouse_01,選擇"Data Source" tab頁
2.單擊"Load Hive Table"
3.輸入需要同步的表
"DEFAULT.FACT_ORDER,DEFAULT.DIM_DAY,DEFAULT.DIM_PRODUCT,DEFAULT.DIM_SALESPERSON,DEFAULT.DIM_CUSTOM"
4.Sync
四.Kylin的Model建立
1.選擇"Models" tab頁,單擊"New Model"
2."Model Name"輸入,WareHouse_01_Model
3.選擇"Fact Table"為 DEFAULT.FACT_ORDER;再 新增Lookup Table;
4.選取每張表的哪些列欄位作為Dimensions
ID Table Name Columns
1 DEFAULT.FACT_ORDER TIME_KEY PRODUCT_KEY SALESPERSON_KEY CUSTOM_KEY
2 DEFAULT.DIM_DAY FULL_DAY
3 DEFAULT.DIM_PRODUCT PRODUCT_NAME
4 DEFAULT.DIM_SALESPERSON SALESPERSON
5 DEFAULT.DIM_CUSTOM CUSTOM_NAME
5.選取DEFAULT.FACT_ORDER表的哪些列欄位作為measures
QUANTITY_ORDERED ORDER_DOLLARS COST_DOLLARS
6.a.選取 "Partition Date Column"為DEFAULT.FACT_ORDER.TIME_KEY,格式 yyyy-MM-dd
b.對於"Filter"條件,由於沒有要過濾的條件,故不填寫
7.Save
五.Kylin的Cube建立
1.選擇"Models" tab頁,單擊"New Cube“
2.Cube Info:
"Model Name"選擇,WareHouse_01_Model
"Cube Name"輸入,cube01
3.Dismensions:
單擊"Auto Generator",依據情況選擇維度的列,全選
4.Measures:
a.單擊"+Measure",新增要聚合計算的度量,比如 sum(QUANTITY_ORDERED)
b.Expression: SUM/MIN/MAX/COUNT/COUNT_DISTINCT/TOP_N/RAW
5.Refresh Setting:
a.Auto Merge Thresholds,自動合併閾值,7~28 days
b.Retention Threshold,保留天數,60
c.Partition Start Date,非常重要,是後面build cube的開始日期
6.Advanced Setting:
--Aggregation Groups:
a.Includes: TIME_KEY ,PRODUCT_KEY ,SALESPERSON_KEY , CUSTOM_KEY
b.Mandatory Dimensions: TIME_KEY
c.Hierarchy Dimensions: PRODUCT_KEY ,SALESPERSON_KEY ,CUSTOM_KEY
d.Joint Dimensions: 無
--Rowkeys:
TIME_KEY ,PRODUCT_KEY ,SALESPERSON_KEY ,CUSTOM_KEY 4個欄位為dict字典編碼
7.Configuration Overwrites: 無
8.Overview:
儲存cube
五.Cube Build
1.選擇 cube01,單擊”Action”,選擇Build
2.填寫End Date,Submit
3.單擊”Monitor”,觀察Job
4.等待Process100% (Any Errors)
5.返回cube01,檢視 cube size 和 Source Records等欄位更新
六.Hive* kyin 查詢對比
點選(此處)摺疊或開啟
-
1.2016-05-01到2016-05-15期間的每天的訂單數量,訂單金額,訂單成本
-
-
Hive: 65.816 s
-
select fact.time_key, sum(fact.quantity_ordered), sum(fact.order_dollars), sum(fact.cost_dollars) from fact_order as fact
-
where fact.time_key >= "2016-05-01" and fact.time_key <= "2016-05-15"
-
group by fact.time_key order by fact.time_key;
-
-
Kylin: 0.32s-->0.27s
-
select fact.time_key, sum(fact.quantity_ordered), sum(fact.order_dollars), sum(fact.cost_dollars) from fact_order as fact
-
where fact.time_key between '2016-05-01' and '2016-05-15'
- group by fact.time_key order by fact.time_key
點選(此處)摺疊或開啟
-
2.2016-05-01到2016-05-15期間的每天的產品的訂單量
-
-
Hive: 100.336s
-
select dday.full_day,dsp.product_name, sum(fact.quantity_ordered) from fact_order as fact
-
inner join dim_day as dday on fact.time_key = dday.day_key
-
inner join dim_product as dsp on fact.product_key = dsp.product_key
-
where dday.full_day >= "2016-05-01" and dday.full_day <= "2016-05-15"
-
group by dday.full_day,dsp.product_name
-
order by dday.full_day,dsp.product_name;
-
-
Kylin:0.93s-->0.39s
-
select dday.full_day,dsp.product_name, sum(fact.quantity_ordered) from fact_order as fact
-
inner join dim_day as dday on fact.time_key = dday.day_key
-
inner join dim_product as dsp on fact.product_key = dsp.product_key
-
where dday.full_day >= '2016-05-01' and dday.full_day <= '2016-05-15'
-
group by dday.full_day,dsp.product_name
- order by dday.full_day,dsp.product_name
七. Hive ? Kylin查詢對比
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/30089851/viewspace-2122586/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- SiriKit框架詳細解析(九) —— 構建Siri Shortcuts簡單示例(三)框架
- 合約跟單開發案例,合約量化跟單系統開發技術詳細流程
- MVC 三層架構案例詳細講解MVC架構
- 製造業訂單流程
- 電商產品之訂單拆分規則與流程
- SAP建立採購訂單流程
- React 小案例 訂單列表評價React
- KingbaseES RAC部署案例之---SAN環境構建RAC
- WGCNA構建基因共表達網路詳細教程GC
- ElementUI的構建流程UI
- webpack的構建流程Web
- SpringBoot整合Mybatis超詳細流程Spring BootMyBatis
- Apache RocketMQ + Hudi 快速構建 LakehouseApacheMQ
- 構建靜態頁面 之 [ 表單 ]
- 初探webpack之單應用多端構建Web
- erp 專案訂單重構--修改訂單狀態部分
- webpack構建流程及梳理Web
- 使用Apache Spark和Apache Hudi構建分析資料湖ApacheSpark
- OpenIddict 登入及詳細流程解析
- SpringMVC工作流程 --通透較詳細SpringMVC
- Apache Kylin 入門 5 - 構建 CubeApache
- Apache Kylin 入門 4 – 構建 ModelApache
- Apache Kylin 入門 4 - 構建 ModelApache
- Apache Kylin 入門 5 – 構建 CubeApache
- 如何在AWS上構建Apache DolphinSchedulerApache
- apache虛擬web主機構建ApacheWeb
- 泛微oa流程表單之js操作明細資料1JS
- SAP訂單編排和流程增強概述
- 實驗案例1構建簡單的金融知識圖譜
- 爬蟲抓取網頁的詳細流程爬蟲網頁
- spdk nvme盤probe的流程詳細分析
- Apache Flink X Apache Doris 構建極速易用的實時數倉架構Apache架構
- 基於Amazon ECS Fargate構建Apache SupersetApache
- 詳細講解:零知識證明 之 ZCash 完整的匿名交易流程
- 超詳細!Apache Maven下載安裝使用教程ApacheMaven
- Python 內建logging 使用詳細講Python
- 申請微信支付詳細流程(最新2018)
- C#開發Windows服務詳細流程C#Windows
- Spring中Bean的例項化詳細流程SpringBean