pyspark 解析kafka陣列結構資料

Young_Mo發表於2024-11-07
from pyspark.sql.functions import get_json_object, col,from_unixtime, instr, length, regexp_replace, explode, from_json
from pyspark.sql.types import * 
# 定義陣列結構 schema = ArrayType(StructType([ StructField("home", StringType()), StructField("room", StringType()), StructField("operation", StringType()), StructField("time", StringType()) ])) # kafka = kafka.select(col("value").cast("string").alias("data"))
# 使用from_json解析 再使用explode將陣列結構拆分成多行資料 kafka = kafka.select(from_json(col("value").cast("string"), schema).alias("data") ).select(explode("data").alias("data") ).selectExpr("data.home","data.room","data.operation", "data.time" )

  

相關文章