一、小数据量情况下写入到Greenplum中
from sqlalchemy import create_engine
#指定表字段类型
dtype = {
'name' : types.VARCHAR(length=255),
'age' : types.INT,
}
# 创建数据库信息
engine = create_engine('postgresql://username:password@ip:host/postgres')
# data是一个dataframe对象
data.to_sql('表名',con=engine,if_exists='append',index=False,dtype=dtype)
二、大数据量情况下写入到Greenplum中(百万、千万、亿级别)
import io
from sqlalchemy import types,create_engine
#指定表字段类型
dtype = {
'name' : types.VARCHAR(length=255),
'age' : types.INT,
}
# io流对象
string_data_io = io.StringIO()
# data是一个dataframe对象
data.to_csv(string_data_io, sep='|', index=False)
# 初始化数据库连接配置
engine = create_engine('postgresql://username:password@ip:host/postgres')
pd_sql_engine = pd.io.sql.pandasSQL_builder(engine)
table = pd.io.sql.SQLTable('表名', pd_sql_engine, frame=data, index=False, if_exists='replace',
schema=None, dtype=dtype)
# 创建临时分数表
table.create()
string_data_io.seek(0)
with engine.connect() as connection:
with connection.connection.cursor() as cursor:
copy_cmd = "COPY 表名 from STDIN HEADER DELIMITER '|' CSV"
cursor.copy_expert(copy_cmd, string_data_io)
connection.connection.commit()