文章目录
概述
- JSON(JavaScript Object Notation, JS 对象简谱)
是一种轻量级的数据交换格式
采用完全独立于编程语言的文本格式来存储和表示数据
使用fastjson,把字符串转成JSON
MAVEN依赖
<properties>
<fastjson.version>1.2.47</fastjson.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
</dependencies>
Java版
import com.alibaba.fastjson.JSONObject;
public class JsonDemo {
public static void main(String[] args) {
// 字符串->JSON
String s = "{\"b\": \"BB\",\"c\":{\"cc\":43 }}";
JSONObject j = JSONObject.parseObject(s);
// 打印
System.out.println(s);
// {"b": "BB","c":{"cc":43 }}
System.out.println(j);
// {"b":"BB","c":{"cc":43}}
System.out.println(j.getClass());
// class com.alibaba.fastjson.JSONObject
System.out.println(j.getString("b"));
// BB
System.out.println(j.getJSONObject("c").getInteger("cc"));
// 43
}
}
Scala版
import com.alibaba.fastjson.{JSON, JSONObject}
object JsDemo {
def main(args: Array[String]): Unit = {
// 字符串->JSON
val s: String = "{\"b\": \"BB\",\"c\":{\"cc\":43 }}"
val b: JSONObject = JSON.parseObject(s)
// 打印
println(s)
// {"b": "BB","c":{"cc":43 }}
println(b.getClass)
// class com.alibaba.fastjson.JSONObject
println(b)
// {"b":"BB","c":{"cc":43}}
println(b.getString("b"))
// BB
println(b.getJSONObject("c").getInteger("cc"))
// 43
}
}
Spark版
pom.xml
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
</dependencies>
SparkDemo.scala
import com.alibaba.fastjson.{JSON, JSONObject}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
case class UP(user_id: String, page_id: String, timestamp: BigInt, hms: String)
object SparkDemo {
def main(args: Array[String]): Unit = {
// 创建SparkContext
val c0: SparkConf = new SparkConf().setAppName("a0").setMaster("local[2]")
val sc: SparkContext = new SparkContext(c0)
// 读数据
val r: RDD[String] = sc.textFile("src/main/resources/a.txt")
// JSON解析
val js: RDD[UP] = r.map {
line =>
val j: JSONObject = JSON.parseObject(line)
UP(
j.getString("user_id"),
j.getString("page_id"),
j.getBigInteger("timestamp"),
j.getString("hms"),
)
}
js.foreach(println)
}
}
a.txt
{"user_id": "u102", "page_id": "p7", "timestamp": 1649997187106, "hms": "2022-04-15 12:33:07"}
{"user_id": "u0", "page_id": "p7", "timestamp": 1649997307206, "hms": "2022-04-15 12:35:07"}
{"user_id": "u216", "page_id": "p6", "timestamp": 1649997427304, "hms": "2022-04-15 12:37:07"}
{"user_id": "u51", "page_id": "p7", "timestamp": 1649997547403, "hms": "2022-04-15 12:39:07"}
{"user_id": "u75", "page_id": "p6", "timestamp": 1649997667504, "hms": "2022-04-15 12:41:07"}
{"user_id": "u53", "page_id": "p3", "timestamp": 1649997787603, "hms": "2022-04-15 12:43:07"}
{"user_id": "u93", "page_id": "p8", "timestamp": 1649997907704, "hms": "2022-04-15 12:45:07"}
{"user_id": "u119", "page_id": "p5", "timestamp": 1649998027803, "hms": "2022-04-15 12:47:07"}
{"user_id": "u177", "page_id": "p7", "timestamp": 1649998147902, "hms": "2022-04-15 12:49:07"}