DataFrame使用示例-CFANZ编程社区

class S2TShow2(S2TShowMeta):
    def get(self):
        yellow_color = '#ffca00'
        blue_color = '#00b0f0'
        green_color = '#00ffff'
        red_color = '#ff0000'
        grep_color = '#dddddd'

        try:
            data = self.parser.parse_args()
            cell_type = data["celltype"]
            knockout = data["knockout"]
            light = data["light"]
            num = data["num"]
            if not knockout:
                knockout = 'Cdx2'
        except BadRequest as ex:
            return jsonify({'code': -1, 'msg': 'Parameter error'})

        values = {'species': cell_type, 'tissue': cell_type, 'cell_type': cell_type, 'built_mode': cell_type}
        data = {'nodes': [], 'links': []}
        nodes = []

        # 后代
        values['tf'] = (knockout,)
        _sql = ''' select  tg,score,tf from tb_grn where species =%(species)s  and tissue =%(tissue)s and cell_type =%(cell_type)s 
                   and built_mode =%(built_mode)s and tf in %(tf)s order by score desc limit ''' + str(num)
        for i in range(2):
            tasks = fetch_all(_sql, values)
            son_df = DataFrame(tasks, columns=['tg', 'score', 'tf'])
            if son_df.empty:
                break
            nodes.append(son_df)
            values['tf'] = tuple(son_df['tg'].drop_duplicates().tolist())
        df1 = pd.concat(nodes, ignore_index=True)
        nodes_df1 = df1[['tg']].copy()
        nodes_df1.rename(columns={"tg": "id"}, inplace=True)
        nodes_df1['colorkey'] = blue_color
        nodes_df1['val'] = 30

        nodes = []
        # 父辈
        values['tf'] = (knockout,)
        _sql = ''' select  tg,score,tf from tb_grn where species =%(species)s  and tissue =%(tissue)s and cell_type =%(cell_type)s 
                           and built_mode =%(built_mode)s and tg in %(tf)s order by score desc limit  ''' + str(num)
        for i in range(2):
            tasks = fetch_all(_sql, values)
            parent_df = DataFrame(tasks, columns=['tg', 'score', 'tf'])
            if parent_df.empty:
                break
            nodes.append(parent_df)
            values['tf'] = tuple(parent_df['tf'].drop_duplicates().tolist())

        df2 = pd.concat(nodes, ignore_index=True)
        nodes_df2 = df2[['tf']].copy()
        nodes_df2.rename(columns={"tf": "id"}, inplace=True)
        nodes_df2['colorkey'] = green_color
        nodes_df2['val'] = 30

        nodes_df1 = nodes_df1.append(nodes_df2, ignore_index=True)
        nodes_df1.drop_duplicates(subset=['id'], inplace=True)
        if nodes_df1.loc[nodes_df1['id'] == knockout, 'colorkey'].empty:
            nodes_df1 = nodes_df1.append(DataFrame({'id':[knockout],'colorkey':[yellow_color],'val':[30]}), ignore_index=True)
        else:
            nodes_df1.loc[nodes_df1['id'] == knockout, 'colorkey'] = yellow_color
        nodes_df1['name'] = nodes_df1['id']
        if light:
            sql = ''' select input_path , dataset  from knock_out where cell_type = %(cell_type)s;  '''
            rd = fetch_one(sql, {'cell_type': cell_type})
            rel_path = rd[0]
            out_fname = rd[1][0]
            out_f = os.path.join(current_app.config.get('FILE_STORE'), rel_path, out_fname)
            light_df = pd.read_csv(out_f, usecols=['Gene', knockout])
            light_df = pd.merge(light_df, nodes_df1, left_on='Gene', right_on='id')
            if not light_df.empty:
                light_df[knockout] = light_df[knockout].abs()
                light_df.sort_values(knockout, inplace=True, ascending=False)
                light_df = light_df.head(10)
                data['topN'] = light_df['Gene'].tolist()
                nodes_df1.loc[nodes_df1['id'].isin(light_df['Gene'].tolist()), 'colorkey'] = red_color

        data['nodes'] = nodes_df1.to_dict(orient='records')

        df1 = df1.append(df2, ignore_index=True)
        df1.rename(columns={"tf": "source", "tg": "target"}, inplace=True)
        df1['name'] = df1['source'] + '->' + df1['target'] + ':' + df1['score'].astype('string')
        df1.drop('score', axis=1, inplace=True)
        data['links'] = df1.to_dict(orient='records')
        return jsonify(data)