0
点赞
收藏
分享

微信扫一扫

smith_waterman算法的python实现

非凡兔 2022-04-29 阅读 40
python算法
import numpy as np
seq1='CGUATTGA'
seq2='CGATGA'

def compare(m, n, match, n_match):
    if m == n:
        return match
    else:
        return n_match

def Smith_Waterman(seq1, seq2, mS, mmS, w1):
    path = {}
    S = np.zeros([len(seq1) + 1, len(seq2) + 1], int)

    for i in range(0, len(seq1) + 1):
        for j in range(0, len(seq2) + 1):
            if i == 0 or j == 0:
                path['[' + str(i) + ', ' + str(j) + ']'] = []
            else:
                if seq1[i - 1] == seq2[j - 1]:
                    s = mS
                else:
                    s = mmS
                L = S[i - 1, j - 1] + s
                P = S[i - 1, j] - w1
                Q = S[i, j - 1] - w1
                S[i, j] = max(L, P, Q, 0)
                path['[' + str(i) + ', ' + str(j) + ']'] = []
                if L == S[i, j]:
                    path['[' + str(i) + ', ' + str(j) + ']'].append('[' + str(i - 1) + ', ' + str(j - 1) + ']')
                if P == S[i, j]:
                    path['[' + str(i) + ', ' + str(j) + ']'].append('[' + str(i - 1) + ', ' + str(j) + ']')
                if Q == S[i, j]:
                    path['[' + str(i) + ', ' + str(j) + ']'].append('[' + str(i) + ', ' + str(j - 1) + ']')

    print("S = ", S)
    end = np.argwhere(S == S.max())
    for i in end:
        key = str(list(i))
        value = path[key]
        result = [key]
        traceback(path, S, value, result, seq1, seq2)

def traceback(path, S, value, result, seq1, seq2):
    if value != []:
        key = value[0]
        result.append(key)
        value = path[key]
        i = int((key.split(',')[0]).strip('['))
        j = int((key.split(',')[1]).strip(']'))
    if S[i, j] == 0:
        x = 0
        y = 0
        s1 = ''
        s2 = ''
        md = ''
        for n in range(len(result)-2, -1, -1):
            point = result[n]
            i = int((point.split(',')[0]).strip('['))
            j = int((point.split(',')[1]).strip(']'))
            if i == x:
                s1 += '-'
                s2 += seq2[j-1]
                md += ' '
            elif j == y:
                s1 += seq1[i-1]
                s2 += '-'
                md += ' '
            else:
                s1 += seq1[i-1]
                s2 += seq2[j-1]
                md += '|'
            x = i
            y = j
        print ('alignment result:')
        print ('s1: %s'%s1)
        print ('    '+md)
        print ('s2: %s'%s2)
    else:
        traceback(path, S, value, result, seq1, seq2)
Smith_Waterman(seq1, seq2, 1, -1/3, 1)
-------------------------------------------------------
S =  [[0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0]
 [0 0 2 1 0 1 0]
 [0 0 1 1 0 0 0]
 [0 0 0 2 1 0 1]
 [0 0 0 1 3 2 1]
 [0 0 0 0 2 2 1]
 [0 0 1 0 1 3 2]
 [0 0 0 2 1 2 4]]
alignment result:
s1: CGUATTGA
    || | |||
s2: CG-A-TGA

举报

相关推荐

0 条评论