基本概念
後臺啟動AsterixDB
cd ~/asterixdb/asterixdb/asterix-server/target/asterix-server-0.9.10-SNAPSHOT-binary-assembly/apache-asterixdb-0.9.10-SNAPSHOT/opt/local/bin
./start-sample-cluster.sh
修改AsterixDB程式碼後,除錯AsterixDB;啟動/asterixdb/asterixdb/asterix-app/src/test/java/org/apache/asterix/api/common/AsterixHyracksIntegrationUtil.java
AsterixDB操作流程
-
建立Dataverse
CREATE DATAVERSE Traffic IF NOT EXISTS;
-
使用Dataverse【每一條語句之前執行】
USE Traffic;
-
設定儲存型別(B-tree, R-tree等)
CREATE TYPE TrafficRecordType AS { Vehicle_ID: int, v_Vel: float, Location: string };
-
建立資料庫
// 設定單一主鍵 CREATE DATASET TrafficRecords(TrafficRecordType) PRIMARY KEY Vehicle_ID; // Choose an appropriate primary key // 設定複合主鍵 CREATE DATASET traffic_CSV_set(TrafficRecordType) PRIMARY KEY Vehicle_ID, Frame_ID;
-
建立索引(default: B-tree;可指定為R-tree)
// 在TrafficRecords【dataset】上的GeoLocation欄位,建立R-tree索引,名為TrafficRecords_Rtree create index TrafficRecords_Rtree on TrafficRecords(GeoLocation) type RTREE;
-
匯入資料(三種方式:直接插入,匯入csv檔案,轉化為ADM格式資料匯入)
// insert 資料 insert into TestPoint ({ "Vehicle_ID":1, "v_Vel": 2.2, "Location": "this is a String." }); // 匯入csv資料,csv中僅能使用int、double等基本型別 LOAD DATASET TrafficRecords USING localfs (("path"="127.0.0.1:///path/data.csv"), ("format"="delimited-text"), ("delimiter"=","), ("quote"="\""), ("header"="true")); // 匯入ADM格式資料 LOAD DATASET Exper2 USING localfs (("path"="127.0.0.1:///path/data.adm"), ("format"="adm"));
-
查詢,驗證資料是否匯入
SELECT * FROM TrafficRecords;
基本操作
-
刪除資料型別或dataset
- dataType 由 dataset 使用,因此在刪除時需要先刪除 dataset , 再刪除 dataType
- 刪除dataset
drop dataset TrafficRecords;
- 刪除dataType
drop type TrafficRecordType;
-
upsert語法,更新
upsert into TrafficLSMRTree_1 ({ "Vehicle_ID": 1, "Frame_ID": 271, // 將該值由270更新為271 "Total_Frames": 569 });
-
delete記錄
delete from TrafficLSMRTree_1 where Combined_ID_Time = "1,1118847869000";
-
插入空間資料
... // datatype create type pointType AS{ Vehicle_ID: int, GeoLocation: point }; create dataset TestPoint(pointType) primary key Vehicle_ID; insert into TestPoint ({ "Vehicle_ID":1, "GeoLocation":point("5.2,10.2") }); ... ----------------------------------- GeoLocation格式: "GeoLocation":point(5.2,10.2) N "GeoLocation":point("5.2","10.2") N "GeoLocation":point("5.2,10.2") Y ----------------------------------- 不能匯入含有point資料的csv檔案
-
ADM格式資料(JSON串)
// adm資料示例: [{ "Vehicle_ID": 809, "v_Vel": 53.5, "Combined_ID_Time": "809,1118847220800", "GeoLocation": point("6452727.119,1871874.94") }]
資料讀取
-
按照資料寫入時的時間戳進行讀取資料
USE TrafficUS101; SELECT * FROM TrafficLSMRTree_1 WHERE Global_Time >= 1118847890000 AND Global_Time <= 1118847900000; // success
-
按照空間位置資訊,即空間範圍GeoLocation進行劃分
USE TrafficUS101; SELECT * FROM TrafficLSMRTree_1 WHERE spatial_intersect(GeoLocation, create_rectangle(create_point(6451060.0, 1871870.0), create_point(6452745.0, 1873415.0)));
-
使用R-tree索引進行資料查詢
[AsterixDB中使用函式](AsterixDB – Builtin Functions (apache.org))
// 點查詢 SELECT * FROM TrafficRecordsWithGeo WHERE spatial_intersect(GeoLocation, create_point(5.2, 10.3)); // 範圍查詢(支援矩形、圓形等) SELECT * FROM TestGeoPointCSV1 WHERE spatial_intersect(GeoLocation, create_rectangle(create_point(5.0,10.0), create_point(6.0,11.0))); // 距離查詢,這條查詢會返回距離點 (5.2, 10.3) 最近的1個記錄。 SELECT * FROM TestGeoPointCSV1 ORDER BY spatial_distance(GeoLocation, create_point(5.2, 10.3)) LIMIT 1;