countByKey:作用到 K,V 格式的 RDD 上,根据 Key 计数相同 Key 的数据集元素。
- java
package action;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
import java.util.Arrays;
/**
 * @Author yqq
 * @Date 2021/12/10 12:27
 * @Version 1.0
 */
public class CountByKeyTest {
    public static void main(String[] args) {
        JavaSparkContext context = new JavaSparkContext(
                new SparkConf()
                        .setMaster("local")
                        .setAppName("CountByKeyTest")
        );
        context.setLogLevel("Error");
        context.parallelizePairs(Arrays.asList(
                new Tuple2<>("科比",24),
                new Tuple2<>("詹姆斯",23),
                new Tuple2<>("詹姆斯",6),
                new Tuple2<>("杜兰特",45),
                new Tuple2<>("科比",8)
        )).countByKey().entrySet().forEach(e->{
            System.out.println("key="+e.getKey()+",value="+e.getValue());
        });
    }
}
 2. scala
package action
import org.apache.spark.{SparkConf, SparkContext}
/**
 * @Author yqq
 * @Date 2021/12/10 13:11
 * @Version 1.0
 */
object CountByKeyTest1 {
  def main(args: Array[String]): Unit = {
    val context = new SparkContext(
      new SparkConf()
        .setMaster("local")
        .setAppName("CountByKeyTest1")
    )
    context.setLogLevel("Error")
    context.parallelize(Array[(String,Int)](
      ("科比", 24),
      ("科比",8),
      ("詹姆斯",23),
      ("詹姆斯",6),
      ("乔丹",23)
    )).countByKey().foreach(println)
  }
}
countByValue:根据数据集每个元素相同的内容来计数。返回相同内容的元素对应的条数。
- java
package action;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
import java.util.Arrays;
/**
 * @Author yqq
 * @Date 2021/12/10 13:39
 * @Version 1.0
 */
public class CountByValueTest {
    public static void main(String[] args) {
        JavaSparkContext context = new JavaSparkContext(
                new SparkConf()
                        .setMaster("local")
                        .setAppName("CountByKeyTest")
        );
        context.setLogLevel("Error");
        context.parallelizePairs(Arrays.asList(
                new Tuple2<>("科比",24),
                new Tuple2<>("詹姆斯",23),
                new Tuple2<>("詹姆斯",6),
                new Tuple2<>("杜兰特",45),
                new Tuple2<>("科比",24)
        )).countByValue().entrySet().forEach(e->{
            System.out.println("key="+e.getKey()+",value="+e.getValue());
        });
    }
}
 2. scala
package action
import org.apache.spark.{SparkConf, SparkContext}
/**
 * @Author yqq
 * @Date 2021/12/10 13:46
 * @Version 1.0
 */
object CountByValueTest1 {
  def main(args: Array[String]): Unit = {
    val context = new SparkContext(
      new SparkConf()
        .setMaster("local")
        .setAppName("CountByValueTest1")
    )
    context.setLogLevel("Error")
    context.parallelize(Array[(String,Int)](
      ("科比", 24),
      ("科比",24),
      ("詹姆斯",23),
      ("詹姆斯",6),
      ("乔丹",23)
    )).countByValue().foreach(println)
  }
}
                
                










