利用BERT whitening可以将embedding 比如768维降到256维
def compute_kernel_bias(vecs, n_components=256):
"""计算kernel和bias
vecs.shape = [num_samples, embedding_size],
最后的变换:y = (x + bias).dot(kernel)
"""
mu = vecs.mean(axis=0, keepdims=True)
cov = np.cov(vecs.T)
u, s, vh = np.linalg.svd(cov)
W = np.dot(u, np.diag(1 / np.sqrt(s)))
return W[:, :n_components], -mu
def transform_and_normalize(vecs, kernel=None, bias=None):
""" 最终向量标准化
"""
if not (kernel is None or bias is None):
vecs = (vecs + bias).dot(kernel)
return vecs / (vecs**2).sum(axis=1, keepdims=True)**0.5
v_data = np.array(v_data)
kernel,bias=compute_kernel_bias(v_data,256)
v_data=transform_and_normalize(v_data, kernel=kernel, bias=bias)
参考:苏剑林. (Jan. 11, 2021). 《你可能不需要BERT-flow:一个线性变换媲美BERT-flow 》[Blog post]. Retrieved from https://spaces.ac.cn/archives/8069