countByKey:作用到 K,V 格式的 RDD 上,根据 Key 计数相同 Key 的数据集元素。
- java
package action;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
import java.util.Arrays;
/**
* @Author yqq
* @Date 2021/12/10 12:27
* @Version 1.0
*/
public class CountByKeyTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByKeyTest")
);
context.setLogLevel("Error");
context.parallelizePairs(Arrays.asList(
new Tuple2<>("科比",24),
new Tuple2<>("詹姆斯",23),
new Tuple2<>("詹姆斯",6),
new Tuple2<>("杜兰特",45),
new Tuple2<>("科比",8)
)).countByKey().entrySet().forEach(e->{
System.out.println("key="+e.getKey()+",value="+e.getValue());
});
}
}
2. scala
package action
import org.apache.spark.{SparkConf, SparkContext}
/**
* @Author yqq
* @Date 2021/12/10 13:11
* @Version 1.0
*/
object CountByKeyTest1 {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByKeyTest1")
)
context.setLogLevel("Error")
context.parallelize(Array[(String,Int)](
("科比", 24),
("科比",8),
("詹姆斯",23),
("詹姆斯",6),
("乔丹",23)
)).countByKey().foreach(println)
}
}
countByValue:根据数据集每个元素相同的内容来计数。返回相同内容的元素对应的条数。
- java
package action;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
import java.util.Arrays;
/**
* @Author yqq
* @Date 2021/12/10 13:39
* @Version 1.0
*/
public class CountByValueTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByKeyTest")
);
context.setLogLevel("Error");
context.parallelizePairs(Arrays.asList(
new Tuple2<>("科比",24),
new Tuple2<>("詹姆斯",23),
new Tuple2<>("詹姆斯",6),
new Tuple2<>("杜兰特",45),
new Tuple2<>("科比",24)
)).countByValue().entrySet().forEach(e->{
System.out.println("key="+e.getKey()+",value="+e.getValue());
});
}
}
2. scala
package action
import org.apache.spark.{SparkConf, SparkContext}
/**
* @Author yqq
* @Date 2021/12/10 13:46
* @Version 1.0
*/
object CountByValueTest1 {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("CountByValueTest1")
)
context.setLogLevel("Error")
context.parallelize(Array[(String,Int)](
("科比", 24),
("科比",24),
("詹姆斯",23),
("詹姆斯",6),
("乔丹",23)
)).countByValue().foreach(println)
}
}