利用flink從kafka接收訊息,統計結果寫入mysql,訊息寫入hive

玄痛大師發表於2020-12-31

利用flink從帶有kerberos認證kafka中接收訊息,每分鐘統計各項指標,如每分鐘接收的記錄數,金額。在統計的時候要累計前一分鐘的累計量。統計值 寫入mysql,用來前端展示,同時要把訊息存入hadoop平臺,要在hive建立表
現將把主要邏輯展示如下
1、從指定的kafka的topic接收資料
2、統計資料寫入mysql
3、接收的資料寫入hdfs,在hive中建外部表的方式,這樣速度會更快
4、程式外面還要加個定時任務,給外部表加分割槽

 
/**
     * 1、從指定的kafka的topic接收資料
     * 2、統計資料寫入mysql
     * 3、接收的資料寫入hdfs,在hive中建外部表的方式,這樣速度會更快
     * 加入兩個sink,一個是寫mysql,一個寫hdfs
     * 程式外面還要加個定時任務,給表加分割槽,指令碼在resources/load_parition.sh
     * @param tableName 寫入的hive表名
     */
    public void writePayByKey(String tableName) {
        try {
            //取時間間隔,每隔多少分鐘
            Integer intevalTime = commonProperties.getInterValTime();
            //初始化計算環境,設定checkpoint等資訊
            StreamExecutionEnvironment bsEnv = createEnv(checkpointUrl);
            StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv);

            //因為kafka加入了kerberos認證,krb5檔案和kafka.jaas(此檔案的格式請看我的博文)
            Properties properties = getProperties();
            //如果配置檔案中啟用了kerberos認證
            if (commonProperties.getKafkaKerberosFlag()) {
            //從配置檔案中獲取kafka.jaas檔案的位置
                String kerberosConf = commonProperties.getKafkaKerberosConfig();
            //從配置檔案中獲取krb5.conf,也就是kdc伺服器上的/etc/krb5.conf檔案,可以把它放在其它位置,但是要在配置檔案指定位置
                String krb5Conf=commonProperties.getKerberoseKrb5Conf();
                log.info("kerberosConf:"+kerberosConf);
                log.info("krb5Conf:"+krb5Conf);
                System.setProperty("java.security.auth.login.config", kerberosConf);
                System.setProperty("java.security.krb5.conf", krb5Conf);
                properties.put("security.protocol", "SASL_PLAINTEXT");
                properties.put("sasl.mechanism", "GSSAPI");
                properties.put("sasl.kerberos.service.name", "kafka");

            }
            //把kafka中的json資料序列化成物件
            FlinkKafkaConsumer<UnionPay> kafkaConsumer011 = new FlinkKafkaConsumer(topicName, new ConsumerDeserializationSchema(UnionPay.class), properties);
            kafkaConsumer011.setStartFromLatest();
            DataStream<UnionPay> dataStreamSource = bsEnv.addSource(kafkaConsumer011);
            //放到一個視窗進行分類統計
            WindowedStream<UnionPay, String, TimeWindow> timeWindowWindowedStream = dataStreamSource.keyBy(new KeySelector<UnionPay, String>() {
                @Override
                public String getKey(UnionPay unionPay) throws Exception {
                   String mrchNo=MyUtils.getMrchNo(unionPay.getMrchno());
                    return mrchNo;
                }
            }).window(TumblingProcessingTimeWindows.of(Time.minutes(intevalTime)));
            SingleOutputStreamOperator<MonitorTranDetail> resultWin = timeWindowWindowedStream.aggregate(new RsesultAggregateFunc(), new ProcessWindowFunction<MonitorTranDetail, MonitorTranDetail, String, TimeWindow>() {
                @Override
                public void process(String s, Context context, Iterable<MonitorTranDetail> iterable, Collector<MonitorTranDetail> collector) {
                    try {
                        Long lend = context.window().getEnd();
                        String etltime = MyUtils.getDateFromformatter("yyyyMMddHHmmss", lend);
                        String datadate = MyUtils.getDateFromformatter("yyyyMMdd", lend);
                        //String txntime = MyUtils.getDateFromformatter("HHmmss", lend);
                        String txntime=etltime;
                        //設定統計物件的資料時間,這裡取的是每個視窗的結束時間
                        for (MonitorTranDetail monitorTranDetail : iterable) {
                            monitorTranDetail.setEtlTIme(etltime);
                            monitorTranDetail.setDataDate(datadate);
                            monitorTranDetail.setTxnTime(txntime);
                            collector.collect(monitorTranDetail);
                        }
                    } catch (Exception ex) {
                        ex.printStackTrace();
                    }

                }
            });
            //添回sink把統計資料寫入
            resultWin.addSink(new DbSinkFunction());
            //把接收的資料寫入hdfs
            List<FieldSchema> fieldSchemas = xmlParser.parseField(tableName);
            fieldSchemas.add(new FieldSchema("pexchangeDate", "string", "yyyyMMdd"));
            SingleOutputStreamOperator<String> singleOutputUnionPay = timeWindowWindowedStream.process(new HdfsAllWindowFunction(fieldSchemas));
            //獲取資料表所在hdfs位置
            String dataBasedfsdir = commonProperties.getDfsdatabaseDir();
            if (!dataBasedfsdir.endsWith("/")) {
                dataBasedfsdir = dataBasedfsdir + "/";
            }
            String hdfsDir = dataBasedfsdir + tableName.toLowerCase() + "/";
            BucketingSink<String> hdfsSink = new BucketingSink<>(hdfsDir);
            // 設定以yyyyMMdd的格式進行切分目錄,類似hive的日期分割槽
            String strSplitformatter = getSplitChar();
            hdfsSink.setBucketer(new HdfsBucketer(strSplitformatter));
            // 設定檔案塊大小128M,超過128M會關閉當前檔案,開啟下一個檔案
            hdfsSink.setBatchSize(1024 * 1024 * 128L);
            Configuration conf = new Configuration();
            String url = commonProperties.getDfsdefaultName();
            conf.set("fs.default.name", url);
            hdfsSink.setFSConfig(conf);
            // 設定一小時翻滾一次
            hdfsSink.setBatchRolloverInterval(60 * 60 * 1000L);
            hdfsSink.setInactiveBucketThreshold(1000L);
            //設定的是檢查兩次檢查桶不活躍的情況的週期
            hdfsSink.setInactiveBucketCheckInterval(1000L);
            // 設定等待寫入的檔案字首,預設是_
            hdfsSink.setPendingPrefix("");
            // 設定等待寫入的檔案字尾,預設是.pending
            hdfsSink.setPendingSuffix("");
            //設定正在處理的檔案字首,預設為_
            hdfsSink.setInProgressPrefix(".");
            //加入hdfssink
            singleOutputUnionPay.addSink(hdfsSink);
            HiveCatalog hive = new HiveCatalog(hiveCtalog, hiveDatabase, hiveConf, hiveVersion);
            bsTableEnv.registerCatalog(hiveCtalog, hive);
            bsTableEnv.useCatalog(hiveCtalog);
            bsTableEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
            bsTableEnv.useDatabase(hiveDatabase);
         
            //判斷是否有表
            List<String> listObj = Arrays.asList(bsTableEnv.listTables());
            List<String> listExist = listObj.stream().filter(r -> r.equalsIgnoreCase(tableName)).collect(Collectors.toList());
            //如果沒有表則建立表
            if (listExist.size() == 0) {
                String sinkDdl = xmlParser.parseExternalTable(tableName);
                log.info(sinkDdl);
                bsTableEnv.executeSql(sinkDdl);
            }
            bsEnv.execute("Insert_統計資料寫入mysql_Hive");
        } catch (Exception e) {
            log.error("writePayByKey:{}", e);
        }
    }

一、把kafka中json訊息轉換成物件

ConsumerDeserializationSchema.java

import com.alibaba.fastjson.JSONObject;
import com.tcloudata.utils.MyUtils;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;


public class ConsumerDeserializationSchema<T> implements DeserializationSchema<T> {
    private Class<T> clazz;
    public ConsumerDeserializationSchema(Class<T> clazz) {
        this.clazz = clazz;
    }
    @Override
    public T deserialize(byte[] message) throws IOException {
        ByteBuffer buffer = ByteBuffer.wrap(message).order(ByteOrder.LITTLE_ENDIAN);
        String mess = MyUtils.byteBuffertoString(buffer);
        //封裝為POJO類
        T objTarget=null;
        String className=clazz.getTypeName().toLowerCase();
        //此物件的json格式是巢狀的
        if (className.indexOf("UnionPay".toLowerCase())!=-1)
        {
            objTarget= (T) MyUtils.convertUnionPay(mess,clazz);
        }else
        {
            objTarget= JSONObject.parseObject(mess, clazz);
        }
        return objTarget;
    }
    @Override
    public boolean isEndOfStream(T t) {
        return false;
    }

    @Override
    public TypeInformation<T> getProducedType() {
        return TypeExtractor.getForClass(clazz);
    }

}

二、每條訊息進行統計

RsesultAggregateFunc.java

import com.tcloudata.model.MonitorTranDetail;
import com.tcloudata.model.UnionPay;
import com.tcloudata.utils.MyUtils;
import org.apache.flink.api.common.functions.AggregateFunction;

import java.math.BigDecimal;

public class RsesultAggregateFunc implements AggregateFunction<UnionPay, MonitorTranDetail, MonitorTranDetail> {
    @Override
    public MonitorTranDetail createAccumulator() {
        MonitorTranDetail resultInfo = new MonitorTranDetail();
        return resultInfo;
    }

    @Override
    public MonitorTranDetail add(UnionPay unionPay, MonitorTranDetail monitorTranDetail) {

        String mrchNo = MyUtils.getMrchNo(unionPay.getMrchno());
        Double amount = unionPay.getAmount();
        Integer txnCnt1 = 0;
        Integer txnCnt2 = 0;
        Integer txnCnt3 = 0;
        Integer txnCnt4 = 0;
        Integer txnCnt5 = 0;
        Integer txnCnt6 = 0;
        //當日消費總筆數
        Integer tottxnCnt = 1;
        //消費金額10000以上筆數
        if (amount >= 10000) {
            txnCnt1 = 1;
        }
        //消費金額3000-10000筆數
        if (amount >= 3000 && amount < 10000) {
            txnCnt2 = 1;
        }
        //消費金額1000-3000筆數
        if (amount >= 1000 && amount < 3000) {
            txnCnt3 = 1;
        }
        // 消費金額500-1000筆數
        if (amount >= 500 && amount < 1000) {
            txnCnt4 = 1;
        }
        // 消費金額100-500筆數
        if (amount >= 100 && amount < 500) {
            txnCnt5 = 1;
        }
        // 消費金額100以內筆數
        if (amount > 0 && amount < 100) {
            txnCnt6 = 1;
        }
        //當日消費總金額
        BigDecimal tmpBigdecimal = new BigDecimal(amount);
        if (mrchNo.equals(monitorTranDetail.getZoneCd())) {
            txnCnt1 = monitorTranDetail.getTxnCnt1() + txnCnt1;
            txnCnt2 = monitorTranDetail.getTxnCnt2() + txnCnt2;
            txnCnt3 = monitorTranDetail.getTxnCnt3() + txnCnt3;
            txnCnt4 = monitorTranDetail.getTxnCnt4() + txnCnt4;
            txnCnt5 = monitorTranDetail.getTxnCnt5() + txnCnt5;
            txnCnt6 = monitorTranDetail.getTxnCnt6() + txnCnt6;
            tottxnCnt = monitorTranDetail.getTottxnCntd() + 1;
            BigDecimal d1 = new BigDecimal(amount);
            BigDecimal d2 = new BigDecimal(monitorTranDetail.getTottxnAmountd());
            tmpBigdecimal = d1.add(d2);
        } else {
            monitorTranDetail.setZoneCd(mrchNo);
        }
        monitorTranDetail.setTxnCnt1(txnCnt1);
        monitorTranDetail.setTxnCnt2(txnCnt2);
        monitorTranDetail.setTxnCnt2(txnCnt2);
        monitorTranDetail.setTxnCnt3(txnCnt3);
        monitorTranDetail.setTxnCnt4(txnCnt4);
        monitorTranDetail.setTxnCnt5(txnCnt5);
        monitorTranDetail.setTxnCnt6(txnCnt6);
        monitorTranDetail.setTottxnCntd(tottxnCnt);
        monitorTranDetail.setTottxnAmountd(tmpBigdecimal.doubleValue());
        return monitorTranDetail;
    }

    @Override
    public MonitorTranDetail getResult(MonitorTranDetail monitorTranDetail) {
        return monitorTranDetail;
    }

    @Override
    public MonitorTranDetail merge(MonitorTranDetail acc1, MonitorTranDetail acc2) {
        acc2.setTxnCnt1(acc1.getTxnCnt1() + acc2.getTxnCnt1());
        acc2.setTxnCnt2(acc1.getTxnCnt2() + acc2.getTxnCnt2());
        acc2.setTxnCnt3(acc1.getTxnCnt3() + acc2.getTxnCnt3());
        acc2.setTxnCnt4(acc1.getTxnCnt4() + acc2.getTxnCnt4());
        acc2.setTxnCnt5(acc1.getTxnCnt5() + acc2.getTxnCnt5());
        acc2.setTxnCnt6(acc1.getTxnCnt6() + acc2.getTxnCnt6());
        acc2.setTottxnCntd(acc1.getTottxnCntd() + acc2.getTottxnCntd());
        BigDecimal d1 = new BigDecimal(acc1.getTottxnAmountd());
        BigDecimal d2 = new BigDecimal(acc2.getTottxnAmountd());
        BigDecimal tmpBigdecimal = d1.add(d2);
        acc2.setTottxnAmountd(tmpBigdecimal.doubleValue());
        return acc2;
    }
}

三、pom檔案

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.tcloudata</groupId>
    <artifactId>flinkhr</artifactId>
    <version>1.0</version>
    <name>flinkhr</name>
    <!-- FIXME change it to the project's website -->
    <properties>
        <flink.version>1.11.2</flink.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <scala.binary.version>2.11</scala.binary.version>
        <hive-jdbc.version>1.2.1</hive-jdbc.version>
        <hadoop-common.version>2.6.5</hadoop-common.version>
        <kafka.version>2.4.1</kafka.version>
    </properties>
    <dependencies>
        <!-- 利用Java開發 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!--使用Blink Planner-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.11</artifactId>
            <version>2.4.1</version>
        </dependency>
        <!-- Kafka裡面的訊息採用Json格式 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!--提交作業所必須的依賴,比如:LocalExecutorFactory -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
<!--            <scope>provided</scope>-->
        </dependency>
        <!-- 日誌方便除錯 -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.7</version>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.54</version>
        </dependency>
        <dependency>
            <groupId>org.yaml</groupId>
            <artifactId>snakeyaml</artifactId>
            <version>1.27</version>
        </dependency>
        <dependency>
            <groupId>com.oracle</groupId>
            <artifactId>ojdbc6</artifactId>
            <version>11.2.0.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hive_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-filesystem_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.bahir</groupId>
            <artifactId>flink-connector-redis_${scala.binary.version}</artifactId>
            <version>1.0</version>
        </dependency>
        <dependency>
            <groupId>dom4j</groupId>
            <artifactId>dom4j</artifactId>
            <version>1.6.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-shaded-hadoop-2-uber</artifactId>
            <version>2.6.5-8.0</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.48</version>
        </dependency>
        <!-- Hive Metastore -->
        <dependency>
            <groupId>org.apache.thrift</groupId>
            <artifactId>libfb303</artifactId>
            <version>0.9.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-metastore</artifactId>
            <version>1.2.1</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.hadoop</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>commons-cli</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>1.2.1</version>
            <exclusions>
                <exclusion>
                    <groupId>commons-cli</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>com.google</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.calcite</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.6</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>1.1.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-jdbc</artifactId>
            <version>${hive-jdbc.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.eclipse.jetty.aggregate</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>2.8.0</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. -->
            <!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.0.0</version>
                <executions>
                    <!-- Run shade goal on package phase -->
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <artifactSet>
                                <excludes>
                                    <exclude>org.apache.flink:force-shading</exclude>
                                    <exclude>com.google.code.findbugs:jsr305</exclude>
                                    <exclude>org.slf4j:*</exclude>
                                    <exclude>log4j:*</exclude>
                                </excludes>
                            </artifactSet>
                            <filters>
                                <filter>
                                    <!-- Do not copy the signatures in the META-INF folder.
                                    Otherwise, this might cause SecurityExceptions when using the JAR. -->
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.tcloudata.App</mainClass>
                                </transformer>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <!-- Java Compiler -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

相關文章