IDEA中程序的打包和运行方式都和SparkCore类似,Maven依赖中需要添加新的依赖项:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.1</version>
</dependency>
程序如下:
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
object Spark03_Transform {
def main(args: Array[String]): Unit = {
//创建SparkConf
val conf = new SparkConf().setMaster("local[*]").setAppName("Test")
//创建SparkSession
val spark = SparkSession.builder().config(conf).getOrCreate()
//创建RDD
//这里的spark不是包的含义,而是SparkSession对象的名字
import spark.implicits._
val rdd = spark.sparkContext.makeRDD(List((1,"zhangsan",23),(2,"lisi",24),(3,"wangwu",25)))
//RDD转换为DataFarme(是结构)
val df = rdd.toDF("id","name","age")
//DataFrame转换为DataSet(是对象),转换为DataSet首先需要创建一个样例类
val ds = df.as[User]
//DataSet转换为DataFrame
val df1 = ds.toDF()
//DataFrame转换为RDD
val rdd1 = df1.rdd
rdd1.foreach{
datas => {
println(datas.getString(1))
}
}
//释放资源
spark.stop()
}
}
case class User(id:Int,name:String,age:Int