0
点赞
收藏
分享

微信扫一扫

向postgre插入数据

import psycopg2
import logging, time
import requests as req

DB_URI = 'postgresql://postgres:@{dbhost}:5432/{dbname}?application_name=json_parse'
DB_URI = DB_URI.format(dbhost='***', dbname='***')


class ConnectDB(object):
    def __init__(self,url):
        self.url=url

    def __enter__(self):
        self.conn = psycopg2.connect(self.url)
        return self.conn

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.conn:
            self.conn.close()
        self.conn=None


def update_db(sql, value):
    with ConnectDB(DB_URI) as db_con:
        with db_con.cursor() as cur:
            cur.executemany(sql, value)
            db_con.commit()


def get_ids():
    logging.basicConfig(filename='/home/zcy/ncbi/NCBI.log', level=logging.DEBUG,
                        format='%(asctime)s %(levelname)s %(lineno)s: %(message)s')
    email = 'zzzz'
    apk_key = 'zzzz'
    rooturl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    sql = 'INSERT INTO gene (gene_id) VALUES(%s);'

    step = 10000
    item = 0
    try_times = 3
    while item < 71786370:
        logging.info('item=%s' % item)
        while try_times > 0:
            url = f'{rooturl}?db=gene&term="all"[filter]&retstart={item}&retmax={step}&retmode=json&sort=name&usehistory=y&email={email}&api_key={apk_key}'
            logging.debug(url)
            rsp = req.get(url)
            if rsp.status_code == 200:
                logging.info('call success')
                id_lst = rsp.json().get('esearchresult', {}).get('idlist', [])
                id_lst = [(int(i),) for i in id_lst]
                update_db(sql, id_lst)
                break
            else:
                logging.warning('call %s failed, rsp=%s' % (url, rsp.text))
                time.sleep(1)
                try_times -= 1
        else:
            logging.error('retstart= %s failed' % item)
        item += step

if __name__ == '__main__':
    get_ids()

举报

相关推荐

0 条评论