eclipse Java Spark local模式

BusyMonkey發表於2019-03-06

https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java

 

1、新建工程,引入一下spark2.4.0的lib依賴

2、測試程式碼:

import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.SparkSession;

import scala.Tuple2;

public class SparkWordCount {
	private static final Pattern SPACE = Pattern.compile(" ");

	public static void main(String[] args) throws Exception {

		SparkConf conf = new SparkConf().setMaster("local");
		
		SparkSession spark = SparkSession.builder().appName("JavaWordCount").config(conf).getOrCreate();

		JavaRDD<String> lines = spark.read().textFile("f:/wordCount.txt").javaRDD();

		JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());

		JavaPairRDD<String, Integer> ones = words.mapToPair(s -> new Tuple2<>(s, 1));

		JavaPairRDD<String, Integer> counts = ones.reduceByKey((i1, i2) -> i1 + i2);

		List<Tuple2<String, Integer>> output = counts.collect();
		for (Tuple2<?, ?> tuple : output) {
			System.out.println(tuple._1() + ": " + tuple._2());
		}
		Thread.sleep(500000);// 可以訪問http://localhost:4040  檢視UI介面
		spark.stop();
	}
}

 

相關文章