Intro
时间函数的一些操作,记录之。备查
当前时间
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import DoubleType, IntegerType, StringTypedef get_or_create(app_name):
spark = (
SparkSession.builder.appName(app_name)
.config("spark.driver.maxResultSize", "10g")
.config("spark.sql.execution.arrow.enabled", "true")
.config("spark.dynamicAllocation.enabled", "false")
.config("spark.sql.crossJoin.enabled", "true")
.config("spark.kryoserializer.buffer.max", "512m")
.getOrCreate()
)
spark.sparkContext.setLogLevel("ERROR")
return sparkspark = get_or_create("spark")时间戳-秒级
spark.sql("select unix_timestamp() as now_timestamp ").show()+-------------+
|now_timestamp|
+-------------+
| 1690342345|
+-------------+日期
sql = """
select from_unixtime(unix_timestamp(),"yyyy-MM-dd HH:mm:ss") as date_time,
from_unixtime(unix_timestamp(),"yyyyMMdd") as ds,
current_timestamp() as t,--取得当前日期时间,粒度到毫秒级
current_date() as t1 --当前日期
"""spark.sql(sql).show(truncate=False)+-------------------+--------+-----------------------+----------+
|date_time |ds |t |t1 |
+-------------------+--------+-----------------------+----------+
|2022-08-05 19:01:32|20220805|2022-08-05 19:01:32.239|2022-08-05|
+-------------------+--------+-----------------------+----------+日期转时间戳
spark.sql("select to_unix_timestamp('20170101','yyyyMMdd') as timestamp,from_unixtime(to_unix_timestamp('20170101','yyyyMMdd'),'yyyy-MM-dd HH:mm:ss') as date_time").show(truncate=False)+----------+-------------------+
|timestamp |date_time |
+----------+-------------------+
|1483200000|2017-01-01 00:00:00|
+----------+-------------------+当月首日
spark.sql("SELECT trunc(substring('2022-03-18 00:00:00',1,10),'MM') as month_first_day").show(truncate=False)+---------------+
|month_first_day|
+---------------+
|2022-03-01 |
+---------------+月份差
spark.sql("SELECT to_unix_timestamp('2023-07-26 12:00:00')-to_unix_timestamp('2023-07-21 23:01:00') as diff_seconds").show(truncate=False)+------------+
|diff_seconds|
+------------+
|392340 |
+------------+392340/60/60/24/310.14648297491039425spark.sql("SELECT MONTHS_BETWEEN('2023-07-26 12:00:00','2023-07-21 23:01:00') as diff_months").show(truncate=False)+-----------+
|diff_months|
+-----------+
|0.14648297 |
+-----------+spark.sql("SELECT to_unix_timestamp('2023-06-26 00:00:00')-to_unix_timestamp('2023-06-21 00:00:00') as diff_seconds").show(truncate=False)+------------+
|diff_seconds|
+------------+
|432000 |
+------------+spark.sql("SELECT MONTHS_BETWEEN('2023-06-26','2023-06-21') as diff_months").show(truncate=False)+-----------+
|diff_months|
+-----------+
|0.16129032 |
+-----------+432000/60/60/24/310.16129032258064516这里的月份差背后的计算逻辑,应该是计算两个日期的秒差,转换成天再除31天
如果只想计算自然月差,可以先转成当月首日,再求解
spark.sql("SELECT to_unix_timestamp('2023-07-01 00:00:00')-to_unix_timestamp('2023-06-01 00:00:00') as diff_seconds").show(truncate=False)+------------+
|diff_seconds|
+------------+
|2592000 |
+------------+2592000/60/60/24/310.967741935483871当月首日做月份差,好像走其他逻辑,分母不固定了~不细究了
spark.sql("SELECT MONTHS_BETWEEN('2023-08-01','2023-06-01') as diff_months").show(truncate=False)+-----------+
|diff_months|
+-----------+
|2.0 |
+-----------+spark.sql("SELECT MONTHS_BETWEEN(trunc('2023-08-01','MM'),trunc('2023-07-31','MM')) as diff_months").show(truncate=False)+-----------+
|diff_months|
+-----------+
|1.0 |
+-----------+日期差
返回的是整数,天数差
spark.sql("SELECT datediff('2023-07-26','2023-07-21') as diff_days").show(truncate=False)+---------+
|diff_days|
+---------+
|5 |
+---------+spark.sql("SELECT datediff('2023-07-26 12:00:00','2023-07-21 23:56:00') as diff_days").show(truncate=False)+---------+
|diff_days|
+---------+
|5 |
+---------+日期加减
spark.sql("SELECT date_add('2023-07-26 12:00:00',10) as add_days").show(truncate=False)+----------+
|add_days |
+----------+
|2023-08-05|
+----------+spark.sql("SELECT date_add('2023-07-26 12:00:00',-10) as minus_days").show(truncate=False)+----------+
|minus_days|
+----------+
|2023-07-16|
+----------+2023-07-26 阴 于南京市江宁区










