SELECT
player_id,
TO_HEX(MD5(CONCAT('f4ju8', CAST(player_id AS STRING)))) AS md5_str,
CAST(CONCAT('0x', SUBSTR(TO_HEX(MD5(CONCAT('fme7q', CAST(player_id AS STRING)))),0, 8)) AS INT64) AS hash_int,
MOD(CAST(CONCAT('0x', SUBSTR(TO_HEX(MD5(CONCAT('fme7q', CAST(player_id AS STRING)))),0, 8)) AS INT64), 100) AS bucket_id
FROM
mafia1.create_player
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 31 15:46:08 2019
@author: lg
"""
import pandas as pd
import datetime
from dateutil.parser import parse
import time
t1=time.time()
def now_date():
now=datetime.datetime.now()
nowd=now.strftime('%Y-%m-%d')
return nowd
def before_time(dt,n):
dt1=parse(dt)
future_time = dt1- datetime.timedelta(days=n)
fu = future_time.strftime('%Y-%m-%d')
return fu
def train_data(start,end):
query="""
SELECT * FROM `heidao-market.mafia1.charge_status_2` WHERE DATE(timestamp) >= '{}' and
DATE(timestamp) <='{}'
order by timestamp desc
limit 60000
"""
# and DATE(action_time) ='{}'
sql=query.format(start,end)
dfg= pd.read_gbq(sql, dialect='standard')
return dfg
end=now_date()
start= before_time(end,2)
df=train_data(start,end)
#df= pd.read_gbq(sql, dialect='standard')
#import pandas as pd
#
#df=pd.read_csv('data.csv')
c1= ['id', 'player_id','giftbag_id', 'status','created_at']
c2=['seq_id','player_id','giftbag_id', 'status', 'timestamp']
df1=df[c2]
df2=df1.values
df3=pd.DataFrame(df2,columns=c1)
df3.to_csv('data.csv',index=False)
print(time.time() -t1)
df = pd.read_csv(path)