takeOrdered : 针对RDD 获取RDD中最小的前num个值,返回一个集合,放在Driver端。
top(num) :针对RDD 获取RDD中最大的前num个值,返回一个集合,放在Driver端。
- scala
package spark
import org.apache.spark.{SparkConf, SparkContext}
/**
* @Author yqq
* @Date 2021/12/10 12:22
* @Version 1.0
*/
object TopAndOrderedTest1 {
def main(args: Array[String]): Unit = {
val context = new SparkContext(
new SparkConf()
.setMaster("local")
.setAppName("TopAndOrderedTest1")
)
context.setLogLevel("Error")
context.parallelize(Array[Int](1,2,3,4,5,6,7,8,9))
.takeOrdered(5).foreach(println)
// .top(5).foreach(println)
}
}
2. java
package action;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import java.util.Arrays;
/**
* @Author yqq
* @Date 2021/12/10 12:16
* @Version 1.0
*/
public class TopAndOrderedTest {
public static void main(String[] args) {
JavaSparkContext context = new JavaSparkContext(
new SparkConf()
.setMaster("local")
.setAppName("TopAndOrderedTest")
);
context.setLogLevel("Error");
context.parallelize(Arrays.asList(1,2,3,4,5,6,7,8,9))
.takeOrdered(3).forEach(e-> System.out.println(e));//获取最小前三个
// .top(3).forEach(e-> System.out.println(e));//获取最大前三个
}
}