spark-core源码学习记录 2 应用提交submit 及driver注册流程,以javawordcount为例
时间: 2023-05-02 11:02:29
浏览: 129
当你提交Spark Core源码学习笔记的时候,并注册Driver的流程,以Java的WordCount为例。
首先,编写提交Job的代码:
```java
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
public class WordCount {
public static void main([string](https://geek.csdn.net/edu/8802d631b97a4a6af1f4d0bbf8527465?dp_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6NDQ0MDg2MiwiZXhwIjoxNzA3MzcxOTM4LCJpYXQiOjE3MDY3NjcxMzgsInVzZXJuYW1lIjoid2VpeGluXzY4NjQ1NjQ1In0.RrTYEnMNYPC7AQdoij4SBb0kKEgHoyvF-bZOG2eGQvc&spm=1055.2569.3001.10083)[] args) {
SparkConf conf = new SparkConf().setAppName("WordCount");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<[string](https://geek.csdn.net/edu/8802d631b97a4a6af1f4d0bbf8527465?dp_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6NDQ0MDg2MiwiZXhwIjoxNzA3MzcxOTM4LCJpYXQiOjE3MDY3NjcxMzgsInVzZXJuYW1lIjoid2VpeGluXzY4NjQ1NjQ1In0.RrTYEnMNYPC7AQdoij4SBb0kKEgHoyvF-bZOG2eGQvc&spm=1055.2569.3001.10083)> lines = sc.textFile(args[0]);
JavaRDD<[string](https://geek.csdn.net/edu/8802d631b97a4a6af1f4d0bbf8527465?dp_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6NDQ0MDg2MiwiZXhwIjoxNzA3MzcxOTM4LCJpYXQiOjE3MDY3NjcxMzgsInVzZXJuYW1lIjoid2VpeGluXzY4NjQ1NjQ1In0.RrTYEnMNYPC7AQdoij4SBb0kKEgHoyvF-bZOG2eGQvc&spm=1055.2569.3001.10083)> words = lines.flatMap(line -> Arrays.asList(line.split(" ")).iterator());
JavaPairRDD<[string](https://geek.csdn.net/edu/8802d631b97a4a6af1f4d0bbf8527465?dp_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6NDQ0MDg2MiwiZXhwIjoxNzA3MzcxOTM4LCJpYXQiOjE3MDY3NjcxMzgsInVzZXJuYW1lIjoid2VpeGluXzY4NjQ1NjQ1In0.RrTYEnMNYPC7AQdoij4SBb0kKEgHoyvF-bZOG2eGQvc&spm=1055.2569.3001.10083), Integer> pairs = words.mapToPair(word -> new Tuple2<>(word, 1));
JavaPairRDD<[string](https://geek.csdn.net/edu/8802d631b97a4a6af1f4d0bbf8527465?dp_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6NDQ0MDg2MiwiZXhwIjoxNzA3MzcxOTM4LCJpYXQiOjE3MDY3NjcxMzgsInVzZXJuYW1lIjoid2VpeGluXzY4NjQ1NjQ1In0.RrTYEnMNYPC7AQdoij4SBb0kKEgHoyvF-bZOG2eGQvc&spm=1055.2569.3001.10083), Integer> counts = pairs.reduceByKey((a, b) -> a + b);
counts.saveAsTextFile(args[1]);
```