Apache Phoenix自定義函式(UDF)實踐

gucapg發表於2019-01-07

Apache Phoenix自定義函式,文章包含程式碼、配置、以及自己遇到的一些坑分享三大部分。
環境:JDK1.8、CDH5.14.4。
開發語言:java
官網連結:http://phoenix.apache.org/udf.html
自定義函式功能描述:根據一個日期,返回所屬季度

一. 程式碼

1. 建立maven J2SE工程,配置pom檔案

 <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <hbase.version>1.2.0-cdh5.14.4</hbase.version><!-- hbase版本 -->
    </properties>

    <dependencies>
        <dependency>
            <groupId>joda-time</groupId>
            <artifactId>joda-time</artifactId>
            <version>1.6</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
            <scope>provided</scope>
            <exclusions>
                <exclusion>
                    <groupId>commons-io</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>commons-lang</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <!---  此處版本12.0.1 -->
                    <groupId>com.google.guava</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>commons-codec</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.phoenix</groupId>
            <artifactId>phoenix-core</artifactId>
            <version>4.14.0-cdh5.14.2</version>
            <scope>provided</scope>
            <exclusions>
                <exclusion>
                    <groupId>jline</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <!-- 此處原生版本為13.0.1 -->
                    <groupId>com.google.guava</groupId>
                    <artifactId>guava</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>commons-collections</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>commons-codec</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>commons-io</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>commons-lang</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.hbase</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

    </dependencies>

2. 建立DfQuarter類

package cn.cdp.dope.phoenix.pefl;

import org.apache.phoenix.expression.Expression;
import org.apache.phoenix.expression.LiteralExpression;
import org.apache.phoenix.expression.function.DateScalarFunction;
import org.apache.phoenix.expression.function.ScalarFunction;
import org.apache.phoenix.parse.CurrentDateParseNode;
import org.apache.phoenix.parse.FunctionParseNode;
import org.apache.phoenix.schema.tuple.Tuple;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.schema.types.PDate;
import org.apache.phoenix.schema.types.PTimestamp;
import org.apache.phoenix.schema.types.PVarchar;
import org.joda.time.DateTime;

import java.sql.SQLException;
import java.util.List;

/*
 * @Description: 季節自定義函式,根據一個日期型別日期,返回一個字串型別季節
 * @Auther: gucp
 * @Date: 2019/1/2 16:11
 *
 */
@FunctionParseNode.BuiltInFunction(
        name = "QUARTER",
        args = {@FunctionParseNode.Argument(
                allowedTypes = {PTimestamp.class}
        )}
)
public class DfQuarter extends DateScalarFunction {
    public static final String NAME = "QUARTER";

    public DfQuarter() {
    }

    public DfQuarter(List<Expression> children) throws SQLException {
        super(children);
    }

    public boolean evaluate(Tuple tuple, org.apache.hadoop.hbase.io.ImmutableBytesWritable immutableBytesWritable) {
        Expression expression = this.getChildren().get(0);
        if (!expression.evaluate(tuple, immutableBytesWritable)) {
            return false;
        }
        if (immutableBytesWritable.getLength() == 0) {
            return true;
        }

        long dateTime = this.inputCodec.decodeLong(immutableBytesWritable, expression.getSortOrder());
        DateTime dt = new DateTime(dateTime);
        int month = dt.getMonthOfYear();
        PDataType returnType = this.getDataType();


        byte[] byteValue = (dt.getYear() + "第" + getQuarterByMonth(month) + "季度").getBytes();


        immutableBytesWritable.set(byteValue);
        return true;


    }

    private String getQuarterByMonth(int month) {
        String quarter = "";
        if (month <= 3) {
            quarter = "1";
        } else if (month <= 6) {
            quarter = "2";
        } else if (month <= 9) {
            quarter = "3";
        } else {
            quarter = "4";
        }

        return quarter;
    }


    public String getName() {
        return "QUARTER";
    }

    public PDataType getDataType() {
        return PVarchar.INSTANCE;
    }


}

3. 打jar包

	注意:打jar時,需要把joda-time1.6.jar 包含進去

二. CDH配置

1. hbase-site.xml服務端和客戶端都需要配置

<property>
  <name>phoenix.functions.allowUserDefinedFunctions</name>
  <value>true</value>
</property>
<property>
  <name>fs.hdfs.impl</name>
  <value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
</property>
<property>
  <name>hbase.rootdir</name>
  <value>hdfs://填寫自己的/hbase</value>
</property>	
	
<!-- 自定義函式,儲存jar的hdfs目錄 -- >
<property>
  <name>hbase.dynamic.jars.dir</name>
  <value>hdfs://填寫自己的/hbase/lib</value>
</property>

注意事項
後面兩個配置,需要和服務端的配置一樣。不然會提示找不到函式。

2. 登陸phoenix客戶端建立函式

CREATE FUNCTION QUARTER(TIMESTAMP) returns varchar as 'cn.cdp.dope.phoenix.pefl.DfQuarter' using jar 'hdfs://填寫自己的/hbase/lib/phoenix-udf-1.4-SNAPSHOT.jar'

3. 驗證

在這裡插入圖片描述

三. 錯誤摘要

1、jar 路徑不寫,則執行讀取配置hbase.dynamic.jars.dir
2、驗證函式是否可行,需要新增from
不行:select QUARTER(to_date(‘2019-01-02’))
行:select QUARTER(to_date(‘2019-01-02’)) from table
3、如果一直報函式未定義,有以下幾種可能
在這裡插入圖片描述

  • 退出phoenix客戶端,重新登陸試試
  • 如果是程式中,如java/scala/… JDBC中使用到了該函式,需要在hbase-site.xml中新增引數
<property>
  <name>hbase.local.dir</name>
  <value>/tmp/hbase-hbase/local/</value>
  <description>hbase本地檔案系統路徑</description>
</property>

修改JDBC配置

 Properties props = new Properties()
 props.setProperty("phoenix.functions.allowUserDefinedFunctions", "true")
    DriverManager.getConnection("jdbc:phoenix:10.20.127.68,10.20.127.74:2181",props)
  • 在oozie任務中,提示找不到
    把該jar包,上傳至oozie共享庫或者私有庫中

相關文章