Griffin編譯安裝

柒小韩發表於2024-05-24

Apache Griffin 編譯安裝

1. 環境準備

  • Maven(Apache Maven 3.6.3)

  • Mysql 資料庫 (可以是 PostgreSQL,mysql版本5.7 )

  • npm(版本6.14.6)(version 6.0.0+,用於編譯ui模組)(推薦此版本)

  • Scala (版本2.11.8)

  • Hadoop (版本3.0.0或更高版本) (本地:2.6.0)

  • Hive (版本2.1.1)(本地:1.1.0)

  • Spark (版本2.4.0) (本地:2.4.0)

  • Livy (版本0.5.0)。 (推薦此版本)

  • ElasticSearch(版本5.0或更高版本)

  • Zookeeper (版本3.4.5)

2. 下載原始碼包

​ 下載: wget https://github.com/apache/griffin/archive/griffin-0.5.0.tar.gz
​ 解壓: tar -zxf griffin-0.5.0.tar.gz -C /opt/software/

3. 修改配置檔案

3.1 配置MySQL

mysql -uroot -e "create database quartz" -p123456 -hhadoop-node3

3.2 配置環境變數

vim /etc/profile
export HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_COMMON_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=/opt/cloudera/parcels/CDH/lib/hadoop/lib/native
export HADOOP_HDFS_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-hdfs
export HADOOP_INSTALL=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_MAPRED_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
export HADOOP_USER_CLASSPATH_FIRST=true
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_HOME=/opt/cloudera/parcels/SPARK2/lib/spark2
export LIVY_HOME=/opt/software/livy-0.5.0
export HIVE_HOME=/opt/cloudera/parcels/CDH/lib/hive
export YARN_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-yarn
export SCALA_HOME=/opt/software/scala-2.11.12
export PATH=$PATH:$HIVE_HOME/bin:$HADOOP_HOME/bin:$SPARK_HOME/bin:$LIVY_HOME/bin:$SCALA_HOME/bin


export GRIFFIN_HOME=/opt/software/griffin-0.5.0/
export PATH=$PATH:$GRIFFIN_HOME/bin
source /etc/profile

3.3 Hive 配置

# 將 hive-site.xml 上傳至 hdfs

sudo -u hdfs hadoop fs -mkdir -p /home/spark_conf
sudo -u hdfs hadoop fs -put /opt/cloudera/parcels/CDH/lib/hive/conf/hive-site.xml /home/spark_conf
useradd griffin
sudo -u hdfs hadoop fs -chown -R griffin /home/spark_conf

3.4 配置 Griffin 的 配置檔案 application.properties

vim /opt/software/griffin-0.5.0/service/src/main/resources/application.properties
spring.application.name=griffin_service
server.port=8081


spring.datasource.url=jdbc:mysql://hadoop-node3:3306/quartz?autoReconnect=true&useSSL=false
spring.datasource.username=root
spring.datasource.password=123456
spring.jpa.generate-ddl=true
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.jpa.show-sql=true

# Hive metastore
hive.metastore.uris=thrift://node01:9083
hive.metastore.dbname=hive
hive.hmshandler.retry.attempts=15
hive.hmshandler.retry.interval=2000ms
# Hive cache time
cache.evict.hive.fixedRate.in.milliseconds=900000

# Kafka schema registry
kafka.schema.registry.url=http://localhost:8081
# Update job instance state at regular intervals
jobInstance.fixedDelay.in.milliseconds=60000
# Expired time of job instance which is 7 days that is 604800000 milliseconds.Time unit only supports milliseconds
jobInstance.expired.milliseconds=604800000
# schedule predicate job every 5 minutes and repeat 12 times at most
#interval time unit s:second m:minute h:hour d:day,only support these four units
predicate.job.interval=5m
predicate.job.repeat.count=12
# external properties directory location
external.config.location=
# external BATCH or STREAMING env
external.env.location=
# login strategy ("default" or "ldap")
login.strategy=default
# ldap
ldap.url=ldap://hostname:port
ldap.email=@example.com
ldap.searchBase=DC=org,DC=example
ldap.searchPattern=(sAMAccountName={0})
# hdfs default name
fs.defaultFS=hdfs://node01:8020
# elasticsearch
elasticsearch.host=node01
elasticsearch.port=9200
elasticsearch.scheme=http
# elasticsearch.user = user
# elasticsearch.password = password
# livy
livy.uri=http://node01:8998/batches
livy.need.queue=false
livy.task.max.concurrent.count=20
livy.task.submit.interval.second=3
livy.task.appId.retry.count=3
# yarn url
yarn.uri=http://node01:8088
# griffin event listener
internal.event.listeners=GriffinJobEventHook

3.5 配置 Griffin 的 quartz.properties

vim /opt/software/griffin-0.5.0/service/src/main/resources/quartz.properties
org.quartz.scheduler.instanceName=spring-boot-quartz
org.quartz.scheduler.instanceId=AUTO
org.quartz.threadPool.threadCount=5
org.quartz.jobStore.class=org.quartz.impl.jdbcjobstore.JobStoreTX
# If you use postgresql as your database,set this property value to org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
# If you use mysql as your database,set this property value to org.quartz.impl.jdbcjobstore.StdJDBCDelegate
# If you use h2 as your database, it's ok to set this property value to StdJDBCDelegate, PostgreSQLDelegate or others
#org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.StdJDBCDelegate
org.quartz.jobStore.useProperties=true
org.quartz.jobStore.misfireThreshold=60000
org.quartz.jobStore.tablePrefix=QRTZ_
org.quartz.jobStore.isClustered=true
org.quartz.jobStore.clusterCheckinInterval=20000

3.6 配置 Griffin 的 sparkProperties.json

vim /opt/software/griffin-0.5.0/service/src/main/resources/sparkProperties.json
{
  "file": "hdfs:///griffin/griffin-measure.jar",
  "className": "org.apache.griffin.measure.Application",
  "name": "griffin",
  "queue": "default",
  "numExecutors": 2,
  "executorCores": 1,
  "driverMemory": "1g",
  "executorMemory": "1g",
  "conf": {
    "spark.yarn.dist.files": "hdfs:///home/spark_conf/hive-site.xml"
  },
  "files": [
  ]
}

3.7 配置 Griffin 的 env_batch.json

vim /opt/software/griffin-0.5.0/service/src/main/resources/env/env_batch.json
{
  "spark": {
    "log.level": "WARN"
  },
  "sinks": [
    {
      "type": "CONSOLE",
      "config": {
        "max.log.lines": 10
      }
    },
    {
      "type": "HDFS",
      "config": {
        "path": "hdfs:///griffin/persist",
        "max.persist.lines": 10000,
        "max.lines.per.file": 10000
      }
    },
    {
      "type": "ELASTICSEARCH",
      "config": {
        "method": "post",
        "api": "http://node01:9200/griffin/accuracy",
        "connection.timeout": "1m",
        "retry": 10
      }
    }
  ],
  "griffin.checkpoint": []
}

3.8 配置 Griffin 的 env_streaming.json

vim /opt/software/griffin-0.5.0/service/src/main/resources/env/env_streaming.json
{
  "spark": {
    "log.level": "WARN",
    "checkpoint.dir": "hdfs:///griffin/checkpoint/${JOB_NAME}",
    "init.clear": true,
    "batch.interval": "1m",
    "process.interval": "5m",
    "config": {
      "spark.default.parallelism": 4,
      "spark.task.maxFailures": 5,
      "spark.streaming.kafkaMaxRatePerPartition": 1000,
      "spark.streaming.concurrentJobs": 4,
      "spark.yarn.maxAppAttempts": 5,
      "spark.yarn.am.attemptFailuresValidityInterval": "1h",
      "spark.yarn.max.executor.failures": 120,
      "spark.yarn.executor.failuresValidityInterval": "1h",
      "spark.hadoop.fs.hdfs.impl.disable.cache": true
    }
  },
  "sinks": [
    {
      "type": "CONSOLE",
      "config": {
        "max.log.lines": 100
      }
    },
    {
      "type": "HDFS",
      "config": {
        "path": "hdfs:///griffin/persist",
        "max.persist.lines": 10000,
        "max.lines.per.file": 10000
      }
    },
    {
      "type": "ELASTICSEARCH",
      "config": {
        "method": "post",
        "api": "http://node01:9200/griffin/accuracy"
      }
    }
  ],
  "griffin.checkpoint": [
    {
      "type": "zk",
      "config": {
        "hosts": "node01:2181,node02:2181,node03:2181,node04:2181,node05:2181",
        "namespace": "griffin/infocache",
        "lock.path": "lock",
        "mode": "persist",
        "init.clear": true,
        "close.clear": false
      }
    }
  ]
}

3.9 Elasticsearch設定

這裡提前在Elasticsearch設定索引,以便將分片數,副本數和其他設定配置為所需的值:

curl -k -H "Content-Type: application/json" -X PUT http://localhost:9200/griffin?pretty \
 -d '{
    "aliases": {},
    "mappings": {
        "accuracy": {
            "properties": {
                "name": {
                    "fields": {
                        "keyword": {
                            "ignore_above": 256,
                            "type": "keyword"
                        }
                    },
                    "type": "text"
                },
                "tmst": {
                    "type": "date"
                }
            }
        }
    },
    "settings": {
        "index": {
            "number_of_replicas": "0",
            "number_of_shards": "1"
        }
    }
}'

# 看見下面結果即為建立成功
{
	"acknowledged":true,
	"shards_acknowledged":true,
	"index":"griffin"
}

3.10 驅動包的依賴修改,修改 service/pom.xml,取消 註釋

vim /opt/software/griffin-0.5.0/service/pom.xml
<!-- 114-118行-->  
	<dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>${mysql.java.version}</version>
    </dependency>

3.11 配置 Griffin 的measure的env-batch.json

vim /opt/software/griffin-0.5.0/measure/src/main/resources/env-batch.json 
{
  "spark": {
    "log.level": "WARN",
    "config": {
      "spark.master": "local[*]"
    }
  },

  "sinks": [
    {
      "type": "CONSOLE",
      "config": {
        "max.log.lines": 10
      }
    },
    {
      "type": "HDFS",
      "config": {
        "path": "hdfs:///griffin/batch/persist",
        "max.persist.lines": 10000,
        "max.lines.per.file": 10000
      }
    },
    {
      "type": "ELASTICSEARCH",
      "config": {
        "method": "post",
        "api": "http://node01:9200/griffin/accuracy",
        "connection.timeout": "1m",
        "retry": 10
      }
    }
  ],

  "griffin.checkpoint": []
}

3.12 配置 Griffin 的measure的env-streaming.json

vim /opt/software/griffin-0.5.0/measure/src/main/resources/env-streaming.json 
{
  "spark": {
    "log.level": "WARN",
    "checkpoint.dir": "hdfs:///griffin/cp",
    "batch.interval": "2s",
    "process.interval": "10s",
    "init.clear": true,
    "config": {
      "spark.master": "local[*]",
      "spark.task.maxFailures": 5,
      "spark.streaming.kafkaMaxRatePerPartition": 1000,
      "spark.streaming.concurrentJobs": 4,
      "spark.yarn.maxAppAttempts": 5,
      "spark.yarn.am.attemptFailuresValidityInterval": "1h",
      "spark.yarn.max.executor.failures": 120,
      "spark.yarn.executor.failuresValidityInterval": "1h",
      "spark.hadoop.fs.hdfs.impl.disable.cache": true
    }
  },

  "sinks": [
    {
      "type": "CONSOLE",
      "config": {
        "max.log.lines": 100
      }
    },
    {
      "type": "HDFS",
      "config": {
        "path": "hdfs:///griffin/streaming/persist",
        "max.persist.lines": 10000,
        "max.lines.per.file": 10000
      }
    },
    {
      "type": "ELASTICSEARCH",
      "config": {
        "method": "post",
        "api": "http://node01:9200/griffin/accuracy"
      }
    }
  ],

  "griffin.checkpoint": [
    {
      "type": "zk",
      "config": {
        "hosts": "node01:2181,node02:2181,node03:2181,node04:2181,node05:2181",
        "namespace": "griffin/infocache",
        "lock.path": "lock",
        "mode": "persist",
        "init.clear": true,
        "close.clear": false
      }
    }
  ]
}
~

3.13 新建 hdfs 目錄

sudo -u hdfs hadoop fs -mkdir -p /griffin
sudo -u hdfs hdfs dfs -chown -R griffin:griffin /griffin

sudo -u griffin hadoop fs -mkdir -p /griffin/persist
sudo -u griffin hadoop fs -mkdir -p /griffin/checkpoint
sudo -u griffin hadoop fs -mkdir -p /griffin/cp
sudo -u griffin hadoop fs -mkdir -p /griffin/streaming/dump/source
sudo -u griffin hadoop fs -mkdir -p /griffin/streaming/persist

4. 編譯

4.1 第一步:編譯 measure

cd /opt/software/griffin-0.5.0/measure/ && mvn -Dmaven.test.skip=true clean install

4.1.1 修改 部分 spark 讀取 hive 的原始碼

# 解決不支援讀取hive分割槽樣式 為 “YYYY-MM-dd” 型別的資料表,(預設僅支援 YYYYMMdd等不帶特殊字元的分割槽)

vim /opt/software/griffin-0.5.0/measure/src/main/scala/org/apache/griffin/
measure/datasource/connector/batch/HiveBatchDataConnector.scala
private def dataSql(): String = {
    val tableClause = s"SELECT * FROM ${concreteTableName}"
    if (wheres.length > 0) {
      val clauses = wheres.map { w =>
        s"${tableClause} WHERE ${w}"
      }
      clauses.mkString(" UNION ALL ")
    } else tableClause
 }


// 替換為下面內容
  private def dataSql(): String = {
    val tableClause = s"SELECT * FROM ${concreteTableName}"
    if (wheres.length > 0) {
      val clauses = wheres.map { item => {
        val sw: Array[String] = item.split("and|AND").map(_.trim).filter(_.nonEmpty)
        val whereList = sw.map{
          w => {
            val nw = w.split("=").map(_.trim)
            val new_w1 = s"${nw(0)} = '${nw(1)}'"
            new_w1
          }
        }
        val new_w = whereList.mkString(" AND ")
        s"${tableClause} WHERE ${new_w}"
      }
      }
      clauses.mkString(" UNION ALL ")
    } else tableClause
  }

4.2 第二步:編譯service

cd /opt/software/griffin-0.5.0/service/ && mvn -Dmaven.test.skip=true clean install

4.3 第三步:編譯ui

# 修改 /opt/software/griffin-0.5.0/ui/angular/src/environments/environment.ts
export const environment = {
  production: false,
  BACKEND_SERVER: 'http://node01:8081',
};

cd /opt/software/griffin-0.5.0/ui/ && mvn -Dmaven.test.skip=true clean install

# 常見問題:
### --- 編譯報錯解決方案:編譯報錯:
​~~~     這個檔案在編譯之前是沒有的
 
[root@hadoop02 griffin-0.5.0]# mvn -Dmaven.test.skip=true clean install
​~~~     # 編譯報錯
[ERROR] Failed to execute goal com.github.eirslett:frontend-maven-plugin:1.6:npm (npm build) on project ui: Failed to run task: 'npm run build' failed. org.apache.commons.exec.ExecuteException: Process exited with an error: 1 (Exit value: 1) -> [Help 1]
[ERROR] ERROR in /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts (4137,26): Cannot find name 'SVGElementTagNameMap'. [ERROR] ERROR in /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts (4137,89): Cannot find name 'SVGElementTagNameMap'.


# 解決方案

### --- 解決方案
 
[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts
​~~~     # 刪除4137行內容,8705行也刪掉 
4137     <!-- find<K extends keyof SVGElementTagNameMap>(selector_element: K | JQuery<K>): JQuery<SVGElementTagNameMap[K]>; -->
8705     <!-- parents<K extends keyof SVGElementTagNameMap>(selector: K | JQuery<K>): JQuery<SVGElementTagNameMap[K]>; -->

5. 安裝

5.1 將 measure-0.5.0.jar 這個jar上傳到HDFS的/griffin

mv /opt/software/griffin-0.5.0/measure/target/measure-0.5.0.jar /opt/software/griffin-0.5.0/griffin-measure.jar

# 因為spark在yarn叢集上執行任務時,需要到HDFS的/griffin目錄下載入griffin-measure.jar
sudo -u hdfs hadoop fs -put /opt/software/griffin-0.5.0/griffin-measure.jar /griffin/

sudo -u hdfs hdfs dfs -chown -R griffin:griffin /griffin

5.2 啟動service.jar,執行Griffin管理服務

mv /opt/software/griffin-0.5.0/service/target/service-0.5.0.jar /opt/software/griffin-0.5.0/griffin-service.jar

nohup java -jar $GRIFFIN_HOME/griffin-service.jar>$GRIFFIN_HOME/service.out 2>&1 &


# 啟動之後我們可以檢視啟動日誌,如果日誌中沒有錯誤,則啟動成功,
tail -f $GRIFFIN_HOME/service.out

5.3 啟動前端

cd /opt/software/griffin-0.5.0/ui/angular/

node_modules/.bin/ng serve -host cdh04 -port 8081

# 訪問地址,預設使用者: admin 密碼 為空
# http://localhost:8081

相關文章