0
点赞
收藏
分享

微信扫一扫

spark大数据分析:spark SQL (12) 数据读取

RockYoungTalk 2022-02-10 阅读 97



文章目录


  • ​​读取json , csv 文件​​
  • ​​读取parquet文件​​
  • ​​读取代码中数据​​
  • ​​读取Mysql中数据​​
  • ​​读取hive数据​​


读取json , csv 文件

import org.apache.spark.sql.SparkSession

object TestSQL2 {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("test")
.getOrCreate()
import spark.implicits._
val df = spark.read.json("d:/test/a.json")
val df2 = spark.read.csv("d:/test/b.json")
df.toDF("name","age","sex")
spark.stop()

}
}

toDF 相当于给列取别名,若无toDF操作,则内部默认为_c0,_c1 类推

读取parquet文件

import org.apache.spark.sql.SparkSession

object TestSQL2 {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("test")
.getOrCreate()
import spark.implicits._
spark.read.load("d:/test/a.parquet")
spark.stop()

}
}

读取代码中数据

val df5_1 = spark.createDataFrame(List(
("Alice", "Female", "20"),
("Tom", "Male", "25"),
("Boris", "Male", "18"))).toDF("name", "sex", "age")
df5_1.show()

自定义schema

val schema = StructType(List(
StructField("name", StringType, true),
StructField("age", IntegerType, true),
StructField("sex", StringType, true)
))

val javaList = new java.util.ArrayList[Row]()
javaList.add(Row("Alice", 20, "Female"))
javaList.add(Row("Tom", 18, "Male"))
javaList.add(Row("Boris", 30, "Male"))
val df5_2 = spark.createDataFrame(javaList, schema)
df5_2.show

读取Mysql中数据

val options = Map("url" -> "jdbc:mysql://linux01:3306/syllabus",
"driver" -> "com.mysql.jdbc.Driver",
"user" -> "root",
"password" -> "123456",
"dbtable" -> "person")
val df6 = spark.read.format("jdbc").options(options).load()
df6.show

读取hive数据

import org.apache.spark.sql.SparkSession

object TestSQL2 {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("test")
.enableHiveSupport()
.getOrCreate()
import spark.implicits._
spark.sql(
"""
| select (*) from app.test;
|""".stripMargin
)
spark.stop()

}
}



举报

相关推荐

0 条评论