匯入依賴
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
程式碼部分
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
public class UDTFTest01 extends GenericUDTF {
private ArrayList<String> list = new ArrayList<>();
@Override
public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
List<String> names = new ArrayList<>();
names.add("word");
List<ObjectInspector> fieldOIs = new ArrayList<>();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
StructObjectInspector outputOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, fieldOIs);
return outputOI;
}
@Override
public void process(Object[] objects) throws HiveException {
String input = objects[0].toString();
String string = objects[1].toString();
String[] word = input.split(string);
for (int i = 0; i < word.length; i++) {
list.clear();
list.add(word[i]);
forward(list);
}
}
@Override
public void close() throws HiveException {
}
}
建立函式
- 將jar包傳入叢集
- 開啟hive並且匯入jar包(如果將jar包直接放入hive的lib目錄的話,這一步可忽略)
add jar jar_path
- 在hive中建立函式
create function function_name as 'class_path'
function_name是為你自定義的函式取名,class_path是class檔案的路徑 - 如果不想使用可以刪除函式
drop function function_name