0
点赞
收藏
分享

微信扫一扫

Numpy 距离公式,获取重复元素


统计列表重复元素

from collections import defaultdict


dd = defaultdict(list)

cc = [1, 2, 3, 2, 4]
for k, val in [(v, i) for i, v in enumerate(cc)]:
dd[k].append(val)
print('dd:',dd)


# 访问dd:元素(重复)出现的索引
for k in set(cc):
print('dd['+str(k)+']:',dd[k])


'''
输出:
dd: defaultdict(<class 'list'>, {1: [0], 2: [1, 3], 3: [2], 4: [4]})
dd[1]: [0]
dd[2]: [1, 3]
dd[3]: [2]
dd[4]: [4]
'''

获取重复元素

a = [1, 2, 3, 2, 1, 5, 6, 5, 5, 5]
duplicated = set()
for i in range(len(a)):
if a[i] in a[i+1:]:
duplicated.add(a[i])
print('duplicated:',duplicated)

'''
输出:
duplicated: {1, 2, 5}
'''

# 统计二维数组中以某列重复值,并获取改数组
def Duplicate_elements(array,axis=0):
dd = defaultdict(list)
temp_index = []
temp_values = []
temp_duplicate = []
Duplicates = []
for i, v in enumerate(array):
temp_index.append(i)
temp_values.append(v)
for k, val in zip(temp_values, temp_index):
dd[k[axis]].append(val)
for l in set([V[axis] for V in array]):
if len(dd[l]) > 1:
temp_duplicate.append(l)
for m in array:
if m[axis] in temp_duplicate:
Duplicates.append(m)
return Duplicates

array = [[1, 2], [3, 2], [4,5],[1,3],[2,5],[3,8]]
X_Duplicates = Duplicate_elements(array,axis=0)
Y_Duplicates = Duplicate_elements(array,axis=1)
print('X_Duplicates:',X_Duplicates)
print('Y_Duplicates:',Y_Duplicates)


'''
X_Duplicates: [[1, 2], [3, 2], [1, 3], [3, 8]]
Y_Duplicates: [[1, 2], [3, 2], [4, 5], [2, 5]]
'''

 

八大距离公式


import numpy as np
from scipy.spatial.distance import pdist

def minkowski_distance(vec1, vec2, p=3):
"""
闵氏距离
当p=1时,就是曼哈顿距离
当p=2时,就是欧氏距离
当p→∞时,就是切比雪夫距离
:param vec1:
:param vec2:
:param p:
:return:
"""
# return sum([(x - y) ** p for (x, y) in zip(vec1, vec2)]) ** (1 / p)
return np.linalg.norm(vec1 - vec2, ord=p)

def cosine_distance(vec1, vec2):
"""
夹角余弦
:param vec1:
:param vec2:
:return:
"""
vec1_norm = np.linalg.norm(vec1)
vec2_norm = np.linalg.norm(vec2)
return vec1.dot(vec2) / (vec1_norm * vec2_norm)

def euclidean_distance(vec1, vec2):
"""
欧氏距离
:param vec1:
:param vec2:
:return:
"""
# return np.sqrt(np.sum(np.square(vec1 - vec2)))
# return sum([(x - y) ** 2 for (x, y) in zip(vec1, vec2)]) ** 0.5
return np.linalg.norm(vec1 - vec2, ord=2)

def manhattan_distance(vec1, vec2):
"""
曼哈顿距离
:param vec1:
:param vec2:
:return:
"""
# return np.sum(np.abs(vec1 - vec1))
return np.linalg.norm(vec1 - vec2, ord=1)

def chebyshev_distance(vec1, vec2):
"""
切比雪夫距离
:param vec1:
:param vec2:
:return:
"""
# return np.abs(vec1 - vec2).max()
return np.linalg.norm(vec1 - vec2, ord=np.inf)

def hamming_distance(vec1, vec2):
"""
汉明距离
:param vec1:
:param vec2:
:return:
"""
return np.shape(np.nonzero(vec1 - vec2)[0])[0]

def jaccard_similarity_coefficient(vec1, vec2):
"""
杰卡德距离
:param vec1:
:param vec2:
:return:
"""
return dist.pdist(np.array([vec1, vec2]), 'jaccard')

求两点之间的距离

import numpy as np

vector1 = np.array([3,4])
vector2 = np.array([0,0])


op1 = np.sqrt(np.sum(np.square(vector1 - vector2)))
op2 = np.linalg.norm(vector1 - vector2)
print(op1)
print(op2)
# 输出:
# 5.0
# 5.0

求多个点之间两两的距离

import numpy as np
from scipy.spatial.distance import pdist, squareform



points = [[0,0], [1,1], [2,2], [3,3],[4,4]]

distance = squareform(pdist(points))
distance_1_3 = np.triu(distance,0) # 获取右上三角
corr_coef = np.corrcoef(distance, distance) # 皮尔逊系数->相关度
Det = np.linalg.det(corr_coef) # 求行列式

print('============== 矩阵距离===================\n',distance)
print('============== 右上三角===================\n',distance_1_3)

array_list1 = []
for i in range(len(distance_1_3)-1):
array_list1.extend(distance_1_3[i][i+1:len(distance_1_3)])
print('==============获取右上三角:==============\n',array_list1)

'''输出:
============== 矩阵距离===================
[[0. 1.41421356 2.82842712 4.24264069 5.65685425]
[1.41421356 0. 1.41421356 2.82842712 4.24264069]
[2.82842712 1.41421356 0. 1.41421356 2.82842712]
[4.24264069 2.82842712 1.41421356 0. 1.41421356]
[5.65685425 4.24264069 2.82842712 1.41421356 0. ]]
============== 右上三角===================
[[0. 1.41421356 2.82842712 4.24264069 5.65685425]
[0. 0. 1.41421356 2.82842712 4.24264069]
[0. 0. 0. 1.41421356 2.82842712]
[0. 0. 0. 0. 1.41421356]
[0. 0. 0. 0. 0. ]]

==============获取右上三角:==============
[1.4142135623730951, 2.8284271247461903, 4.242640687119285, 5.656854249492381, 1.4142135623730951,
2.8284271247461903, 4.242640687119285, 1.4142135623730951, 2.8284271247461903, 1.4142135623730951]
'''

 

举报

相关推荐

0 条评论