0
点赞
收藏
分享

微信扫一扫

python 解析恶心的时间

eelq 2022-01-20 阅读 80

python 解析恶心的时间

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
# import date_util
from date_util import DateUtil

plt.rcParams['font.sans-serif'] = [u'SimHei']

# filename=r"G:\edgeDownload\作家数据模板 (1).xlsx"
# filename=r"G:\project\springbootProj\writer\writer_iot\gitignore\入会时间格式恶心3055.xlsx"
filename=r"G:\project\springbootProj\writer\writer_iot\gitignore\入会时间格式恶心3055.csv"
# data=pd.read_csv(filename,encoding="utf-8")
# data=pd.read_csv(filename)
data=pd.read_csv(filename,encoding="gbk")
# data = pd.read_excel(filename)

# data["加入协会日期"].apply(lambda x:parse_date(x))


data["加入协会日期"]=data["加入协会日期"].apply(lambda x:DateUtil.parse_date(x))

# out_file_name=r"G:\file\学校\python数据分析\writer_date_format.xlsx"
# out_file_name=r"G:\project\springbootProj\writer\writer_iot\gitignore\修改掉恶心的时间.xlsx"
out_file_name=r"G:\project\springbootProj\writer\writer_iot\gitignore\修改掉恶心的时间.csv"
# pandas 保存 xlsx
# data.to_excel(out_file_name,sheet_name="sheetname",index=False)
# data.to_csv(out_file_name,sheet_name="sheetname",index=False)
data.to_csv(out_file_name,index=False)

print("write here",out_file_name)

date_util.py


# def parse_date(date_str):
# #     retrun time.strftime("%Y.%m.%d",date_str)
#     date_str=str(date_str)
#     if date_str=="nan":
#         return ""
#     try:
#         date=time.strptime(date_str,"%Y.%m.%d")
#         date_str_ok=time.strftime("%Y/%m/%d",date)
#         return date_str_ok
#     except ValueError:
#         return date_str
#     return time.strptime(date_str,"%Y.%m.%d")

# https://www.cnpython.com/qa/118360
import datetime

def split_dot(date_str:str):
    sps=date_str.split(".")
    day=sps[2]
    # print(sps)
    if int(day)>31:
        return
    print(date_str)

class DateUtil:
    # def __init__(self):
    dotYmd="%Y.%m.%d"
    
    # [DateUtil.dotYmd 写这个 不行
    DATE_FORMATS = [dotYmd,'%Y年%m月%d日','%m/%d/%Y %I:%M:%S %p', 
    '%Y/%m/%d %H:%M:%S', '%d/%m/%Y %H:%M', '%m/%d/%Y', '%Y/%m/%d',
    "%Y%m%d","%Y、%m","%Y.%m","%Y","%Y.%m.","%Y.%m.%d."]

    @staticmethod
    def parse_date_of_fmt_str(date_str,fmt_str):
        try:
            return datetime.datetime.strptime(date_str,fmt_str)
        except:
            return None

    @staticmethod
    def rm_not_num_or_dot(string):
        out_str=""
        for ch in string:
            # if ch.isnum():
            # if ch.isnumeric():
            if ch.isdigit():
                # isnumeric()
                out_str+=ch
            elif ch =='.':
                out_str+=ch
        return out_str
    
    @staticmethod
    def parse_date_of_many_fmt(date_str):
        fmt_str_lst=DateUtil. DATE_FORMATS
        for fmt in fmt_str_lst:
            date=DateUtil.parse_date_of_fmt_str(date_str,fmt)
            if date!=None:
                return date
        return None

    @staticmethod
    def parse_date(date_str):

        date_str=str(date_str)
        if date_str=="43943":
            print(43943)
        if date_str is None:
            return None
        if date_str=="":
            return None
        moved_str=date_str
        # moved_str=move_not_date_form(date_str)
        if(moved_str.endswith("年")):
            date=datetime.datetime(int(moved_str[:-2]),1,1)
            return date
        #  isnumeric 中文的四 他也是当作可以的。。
        # if(moved_str.isnumeric()):
            # isdigit ()
        if   moved_str.isdigit():
            date=DateUtil. parse_date_of_fmt_str(date_str,"%Y%m%d")
            if date !=None:
                return date
            # if date==None:
            date=datetime.datetime(int(moved_str),1,1)
            return date
    #     https://www.runoob.com/python/att-string-isnumeric.html
    #     isnum str
        # fmt_str_lst=[DateUtil.dotYmd,'%Y年%m月%d日']
        # fmt_str_lst=DateUtil. DATE_FORMATS
        
        try:
            date= pd.to_datetime(moved_str)
    #         datetime.datetime.strptime(moved_str,'%Y年%m月%d日')
        except:
            date=DateUtil.parse_date_of_many_fmt(moved_str)
            if date!=None:
                return date
            # for fmt in fmt_str_lst:
            #     date=DateUtil.parse_date_of_fmt_str(moved_str,fmt)
            #     if date!=None:
            #         return date
            # print(moved_str)
            moved_str=DateUtil.rm_not_num_or_dot(moved_str)
            date=DateUtil.parse_date_of_many_fmt(moved_str)
            if date!=None:
                return date
            # print(moved_str)
            # moved_str.split(".")
            try:
                split_dot(moved_str)
            except:
                if moved_str!="":
                    print(moved_str)
            return None
            # while True:
            #     date=parse_date_of_fmt_str(moved_str,)
            # try:
            #     date=datetime.datetime.strptime(moved_str,'%Y年%m月%d日')
            # except:
            #     try:
            #         date=datetime.datetime.strptime(moved_str,'%Y年')
            #     except  AttributeError:
            #         print(moved_str)

# data['首播日期'].apply(lambda x:parse_date(x))

# data['首播日期']=data['首播日期'].apply(lambda x:datetime.datetime.strptime(move_not_date_form(x),'%Y年%m月%d日'))


# str1="2017/4/1"
# date1= pd.to_datetime(str1)
# date1


import re

# https://www.cnblogs.com/guxingy/p/12890053.html
# UnboundLocalError: local variable 'strdate' referenced before assignment

def move_not_date_form(string):
#     str1='访客-2020-03-22 235119.xlsx'
    if not string.find("("):
        return string
    m = re.search("(\d{4}年\d{1,2}月\d{1,2}日)", string)
#     print(type(m))
    try:
        strdate = m.group(1)
    except AttributeError as e:
        print(string)
        print(e)
        raise

#     print(strdate)
    return strdate

# str1="2001年7月28日 (Fantasia International Film Festiv...	"
# print(move_not_date_form(str1))


# data[data["首播日期"]>=y_2000]

# pd 两个条件
# https://blog.csdn.net/GeekLeee/article/details/75268762
# ani_00_to_10=data[(2000<data["首播日期"])&(data["首播日期"]<=2010)]

# G:\file\学校\python数据分析\大作业代码\bangumi.ipynb

# print(parse_date("1995.12.20"))

# print(parse_date("2020年4月22日"))


# print(DateUtil. parse_date("1995.12.20"))

# print(DateUtil.parse_date("2020年4月22日"))

# str1=DateUtil.rm_not_num_or_dot("2011、12")
# print(str1)
举报

相关推荐

0 条评论