import numpy as np
seq1='CGUATTGA'
seq2='CGATGA'
def compare(m, n, match, n_match):
if m == n:
return match
else:
return n_match
def Smith_Waterman(seq1, seq2, mS, mmS, w1):
path = {}
S = np.zeros([len(seq1) + 1, len(seq2) + 1], int)
for i in range(0, len(seq1) + 1):
for j in range(0, len(seq2) + 1):
if i == 0 or j == 0:
path['[' + str(i) + ', ' + str(j) + ']'] = []
else:
if seq1[i - 1] == seq2[j - 1]:
s = mS
else:
s = mmS
L = S[i - 1, j - 1] + s
P = S[i - 1, j] - w1
Q = S[i, j - 1] - w1
S[i, j] = max(L, P, Q, 0)
path['[' + str(i) + ', ' + str(j) + ']'] = []
if L == S[i, j]:
path['[' + str(i) + ', ' + str(j) + ']'].append('[' + str(i - 1) + ', ' + str(j - 1) + ']')
if P == S[i, j]:
path['[' + str(i) + ', ' + str(j) + ']'].append('[' + str(i - 1) + ', ' + str(j) + ']')
if Q == S[i, j]:
path['[' + str(i) + ', ' + str(j) + ']'].append('[' + str(i) + ', ' + str(j - 1) + ']')
print("S = ", S)
end = np.argwhere(S == S.max())
for i in end:
key = str(list(i))
value = path[key]
result = [key]
traceback(path, S, value, result, seq1, seq2)
def traceback(path, S, value, result, seq1, seq2):
if value != []:
key = value[0]
result.append(key)
value = path[key]
i = int((key.split(',')[0]).strip('['))
j = int((key.split(',')[1]).strip(']'))
if S[i, j] == 0:
x = 0
y = 0
s1 = ''
s2 = ''
md = ''
for n in range(len(result)-2, -1, -1):
point = result[n]
i = int((point.split(',')[0]).strip('['))
j = int((point.split(',')[1]).strip(']'))
if i == x:
s1 += '-'
s2 += seq2[j-1]
md += ' '
elif j == y:
s1 += seq1[i-1]
s2 += '-'
md += ' '
else:
s1 += seq1[i-1]
s2 += seq2[j-1]
md += '|'
x = i
y = j
print ('alignment result:')
print ('s1: %s'%s1)
print (' '+md)
print ('s2: %s'%s2)
else:
traceback(path, S, value, result, seq1, seq2)
Smith_Waterman(seq1, seq2, 1, -1/3, 1)
-------------------------------------------------------
S = [[0 0 0 0 0 0 0]
[0 1 0 0 0 0 0]
[0 0 2 1 0 1 0]
[0 0 1 1 0 0 0]
[0 0 0 2 1 0 1]
[0 0 0 1 3 2 1]
[0 0 0 0 2 2 1]
[0 0 1 0 1 3 2]
[0 0 0 2 1 2 4]]
alignment result:
s1: CGUATTGA
|| | |||
s2: CG-A-TGA