- Here we use a no loop method instead of two loops method for calculating distance matrix.
- And we find that the improved method is about 11 times faster than the traditional way.
This is our running environment and results:
This is our python code:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 22 09:22:52 2017
@author: brucelau
"""
import numpy as np
import matplotlib.pyplot as plt
import datetime
# define no loop functions
def no_loops(M,N):
Xtr_square = np.sum(N**2, axis=1).reshape([1, N.shape[0]])
Xte_square = np.sum(M**2, axis=1).reshape([M.shape[0], 1])
Xtr_dot_Xte = M.dot(N.T)
dists = (Xtr_square+Xte_square-2*Xtr_dot_Xte)**.5
return dists
# define two loop functions
def two_loops(M,N):
r1 = M.shape[0]
r2 = N.shape[0]
DM = np.ones((r1,r2))
for i in range(r1):
for j in range(r2):
vec1 = M[i,:]
vec2 = N[j,:]
DM[i,j] = np.linalg.norm(vec1 - vec2)
return DM
# generate data
M_test = np.random.randint(1,11,size = (600,784))
N_test = np.random.randint(1,11,size = (100,784))
# data visualization
plt.matshow(M_test)
plt.matshow(N_test)
# time compare
d1 = datetime.datetime.now()
dists_1 = no_loops(M_test,N_test)
d2 = datetime.datetime.now()
dists_2 =two_loops(M_test,N_test)
d3 =datetime.datetime.now()
t1=d2-d1
t2=d3-d2
print('#########################################################')
print('Time for calculating no__loop is ',t1)
print('Time for calculating twoloops is ',t2)
times = t2/t1
print('no loop function is about %d times faster than two loops function'%times)
# error calculate
print('The error is ',np.sum(dists_1-dists_2),'between two methods')