0
点赞
收藏
分享

微信扫一扫

Spark Action算子->countByKey、countByValue

countByKey:作用到 K,V 格式的 RDD 上,根据 Key 计数相同 Key 的数据集元素。

  1. java
package action;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.util.Arrays;

/**
* @Author yqq
* @Date 2021/12/10 12:27
* @Version 1.0
*/
public class CountByKeyTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByKeyTest")
);
context.setLogLevel("Error");
context.parallelizePairs(Arrays.asList(
new Tuple2<>("科比",24),
new Tuple2<>("詹姆斯",23),
new Tuple2<>("詹姆斯",6),
new Tuple2<>("杜兰特",45),
new Tuple2<>("科比",8)
)).countByKey().entrySet().forEach(e->{
System.out.println("key="+e.getKey()+",value="+e.getValue());
});
}
}

Spark Action算子->countByKey、countByValue_java
2. scala

package action

import org.apache.spark.{SparkConf, SparkContext}

/**
* @Author yqq
* @Date 2021/12/10 13:11
* @Version 1.0
*/
object CountByKeyTest1 {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByKeyTest1")
)
context.setLogLevel("Error")
context.parallelize(Array[(String,Int)](
("科比", 24),
("科比",8),
("詹姆斯",23),
("詹姆斯",6),
("乔丹",23)
)).countByKey().foreach(println)
}
}

Spark Action算子->countByKey、countByValue_apache_02

countByValue:根据数据集每个元素相同的内容来计数。返回相同内容的元素对应的条数。

  1. java
package action;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.util.Arrays;

/**
* @Author yqq
* @Date 2021/12/10 13:39
* @Version 1.0
*/
public class CountByValueTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByKeyTest")
);
context.setLogLevel("Error");
context.parallelizePairs(Arrays.asList(
new Tuple2<>("科比",24),
new Tuple2<>("詹姆斯",23),
new Tuple2<>("詹姆斯",6),
new Tuple2<>("杜兰特",45),
new Tuple2<>("科比",24)
)).countByValue().entrySet().forEach(e->{
System.out.println("key="+e.getKey()+",value="+e.getValue());
});
}
}

Spark Action算子->countByKey、countByValue_scala_03
2. scala

package action

import org.apache.spark.{SparkConf, SparkContext}

/**
* @Author yqq
* @Date 2021/12/10 13:46
* @Version 1.0
*/
object CountByValueTest1 {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByValueTest1")
)
context.setLogLevel("Error")
context.parallelize(Array[(String,Int)](
("科比", 24),
("科比",24),
("詹姆斯",23),
("詹姆斯",6),
("乔丹",23)
)).countByValue().foreach(println)
}
}

Spark Action算子->countByKey、countByValue_spark_04


举报

相关推荐

0 条评论