0
点赞
收藏
分享

微信扫一扫

效果一般般的重入检测算法——基于blockchair的raw data

import json

# block_id	transaction_hash	index	depth	time
# failed	fail_reason	type	sender	recipient	child_call_count
# value	value_usd	transferred	input_hex	output_hex
import networkx as nx
import pandas as pd
from tqdm import trange

#


'''


def generateGraph(callTxs, A, B):
    callTxs = callTxs.reset_index(drop = True)  # 重置索引、

    # 根据输入的txs生成有向图
    D = nx.DiGraph()
    D.add_node(A)
    D.add_node(B)
    # 遍历并输出节点和边的图
    list_nodes = []
    for i in range(len(callTxs)):
        sender = callTxs['sender'][i]
        recipient = callTxs['recipient'][i]
        D.add_edge(sender, recipient)

    return D


def getABAB(txs, hash):
    txs = txs.reset_index(drop=True)  # 重置索引
    list_index = []
    list_senderAndRecipient = []
    list_input_hex = []
    list_finalOutPut = []
    count = 0  # 记录重复出现path的次数
    for i in range(len(txs)):
        txStart_tmp = [txs['sender'][i], txs['recipient'][i], txs['input_hex'][i][0:8], txs['index'][i]]
        # region 如果存在A调用A,则抛弃此次调用
        if txStart_tmp[0] == txStart_tmp[1]:
            continue;
        # endregion
        for j in range(i + 1, len(txs)):
            txEnd_tmp = [txs['sender'][j], txs['recipient'][j], txs['input_hex'][j][0:8], txs['index'][j]]
            # 交易限制判断
            if (
                    # sender相同
                    (txStart_tmp[0] == txEnd_tmp[0])
                    # 调用的函数相同
                    & (txStart_tmp[2] == txEnd_tmp[2])
                    # index必须是【B以A开头】,保证B调用在A的子调用下面
                    # 这种做法也限制了A和B之间的所有调用必须是子调用,因为数据本身就是按照index从浅到深输出的
                    & (str(txEnd_tmp[3]).startswith(str(txStart_tmp[3]))) & (j - i > 1)):

                # 记录下相关信息用于输出
                A = txStart_tmp[0]
                X = txStart_tmp[1]
                AtoX_index = txStart_tmp[3]
                AtoY_index = txEnd_tmp[3]
                AtoX_input_hex = txStart_tmp[2]
                AtoY_input_hex = txEnd_tmp[2]

                # 满足前后完全相同且相隔>1时,将中间的tx作为有向图:切片,切行,从i+1到j
                list_middle_txs = txs.iloc[i + 1:j]
                # 根据切片之后的交易生成有向图
                D = generateGraph(list_middle_txs, A, X)

                # 判断有向图是否存在X->A的通路
                if nx.has_path(D, X, A):
                    # region 聚合输出信息
                    list_senderAndRecipient.append(A)
                    list_senderAndRecipient.append(X)
                    list_index.append(AtoX_index)
                    list_index.append(AtoY_index)
                    list_input_hex.append(AtoX_input_hex)
                    list_input_hex.append(AtoY_input_hex)
                    shortest_path = nx.shortest_path(D, X, A)
                    list_finalOutPut.append(list_senderAndRecipient)
                    list_finalOutPut.append(list_index)
                    list_finalOutPut.append(list_input_hex)
                    list_finalOutPut.append(shortest_path)
                    list_finalOutPut.append(hash)
                    # endregion

                    # region 写文件
                    f = open('test.txt', 'a')  # 若是'wb'就表示写二进制文件
                    f.write(",")
                    f.write(json.dumps(list_finalOutPut))
                    f.close()
                    # endregion

                    # FLAG = True

                list_index = []
                list_senderAndRecipient = []
                list_finalOutPut = []
                list_input_hex = []
    # if FLAG:
    #     print("^^^^^^^^^^^^^^^^^^txhash:" + hash + "^^^^^^^^^^^^^^^^^^")


if __name__ == '__main__':
    file_path = r'blockchair_ethereum_calls_20210917.tsv'
    # file_path = r'C:\Users\nerbonic\Desktop\new.tsv'
    datas = pd.read_csv(file_path, sep='\t', header=0, index_col=None)
    wp = datas.drop_duplicates(['transaction_hash'])
    list_senderAndRecipient = []
    list_paths = []
    list_blocknum = []
    flag = 0
    for i in trange(len(wp['transaction_hash'].values)):

        hash = wp['transaction_hash'].values[i]
        blockNum = wp['block_id'].values[i]
        if blockNum not in list_blocknum:
            list_blocknum.append(blockNum)
            print(blockNum)
        # region 数据清洗
        if len(hash) != 64:
            continue
        txs = datas[datas['transaction_hash'] == hash]
        # 筛选掉所有非call的行
        callTxs = txs[txs['type'] == "call"]
        # 筛选掉所有hash,sender,recipient为空的
        callTxs = callTxs.dropna(axis=0, subset=["transaction_hash", "sender", "recipient"])
        # 消除input_hex为空的情况
        callTxs = callTxs.fillna('ABCDEFGH')
        # callTxs = callTxs[callTxs['input_hex'] != 'ABCDEFGH']  # 筛选掉所有index hex为空的,避免把转账的操作当成调用

        # endregion

        if (len(callTxs) > 2):  # 大于2才进行重入判断
            getABAB(callTxs, hash)
举报

相关推荐

0 条评论