寫了一個計算距離的指令碼,常見距離基本都有。
其中測地距離需要依賴曲面,Hausdorff距離之前有實現,而Wasserstei距離可以用sinkhorn方法求解。
程式碼如下:
import numpy as np def Euclidean(a,b): return np.sqrt(np.sum((a-b)*(a-b))) def Manhattan(a,b): return np.sum(np.abs(a-b)) def Minkowski(a,b,p): return(np.float_power(np.sum(np.power(np.abs(a-b),p)),1.0/p)) def Chebyshev(a,b): return np.max(np.abs(a-b)) def StdEuclidean(a,b): c = np.vstack((a,b)) s = np.var(c,0) return np.sqrt(np.sum((a-b)*(a-b)/s)) def Mahalanobis(a,b): return np.sqrt((a - np.mean(b)).T@(a - np.mean(b))/np.cov(b)) def Lance(a,b): return np.sum(np.abs(a-b)/np.abs(a+b))/len(a) def Cosine(a,b): return np.sum(a*b)/np.sqrt(np.sum(a*a))*np.sqrt(np.sum(b*b)) def BrayCurtis(a,b): return np.sum(np.abs(a-b))/np.sum(a+b) def Hamming(a,b): return sum(1 for x, y in zip(a, b) if x != y) def Edit(a,b): matrix = [[ i + j for j in range(len(a) + 1)] for i in range(len(b) + 1)] for i in range(1, len(a)+1): for j in range(1, len(b)+1): if(a[i-1] == b[j-1]): d = 0 else: d = 1 matrix[i][j] = min(matrix[i-1][j]+1, matrix[i][j-1]+1, matrix[i-1][j-1]+d) return matrix[len(a)][len(b)] def Jaccard(a,b): return 1 - len(a.intersection(b))/len(a.union(b)) def Ochiia(a,b): return 1 - len(a.intersection(b))/np.sqrt(len(a)*len(b)) def Dice(a,b): return 1 - 2*len(a.intersection(b))/(len(a)+len(b)) def Pearson(a,b): a = a-np.mean(a) b = b-np.mean(b) return np.dot(a,b) / np.linalg.norm(a)*np.linalg.norm(b) def ChiSquare(a,b): return np.sum(np.square(a-b)/b) def CrossEntropy(a,b): return -np.sum(a*np.log(b)) def KL_Divergence(a,b): return np.sum(a*np.log(a/b)) def JS_Divergence(a,b): m = (a+b)/2 return 0.5*KL_Divergence(a,m) + 0.5*KL_Divergence(b,m) def Hellinger(a,b): return 1 - np.sum(np.sqrt(a*b)) def alpha_Divergence(a,b,alpha): return (1.0/(alpha*(1-alpha)))*(1 - np.sum((a**alpha)*(b**(1-alpha)))) #F散度計算KL def F_Divergence(a,b): def F(x): return x*np.log(x) return np.sum(b*F(a/b)) #Bregman計算Euclidean def Bregman(a,b): def f(x): return np.sum(x**2) def df(x): return 2*x return np.sqrt(f(a) - f(b) - np.dot(df(b), a - b)) def Bhattacharyya(a,b): return np.sum(np.sqrt(a * b)) def MMD(a,b): from sklearn.metrics.pairwise import rbf_kernel Kaa = rbf_kernel(a, a, 1.0) Kbb = rbf_kernel(b, b, 1.0) Kab = rbf_kernel(a, b, 1.0) return np.mean(Kaa) + np.mean(Kbb) - 2 * np.mean(Kab) def PMI(): x = 1 y = 2 dataset = [[1, 2, 3],[2, 4, 5],[6, 7, 8],[2, 3, 4]] count_x = sum([1 for seq in dataset if x in seq]) count_y = sum([1 for seq in dataset if y in seq]) count_xy = sum([1 for seq in dataset if x in seq and y in seq]) px = count_x / len(dataset) py = count_y / len(dataset) pxy = count_xy / len(dataset) return np.log(pxy/(px*py)) # 測地距離,依賴曲面 # def Geodesic(a,b): # https://www.cnblogs.com/tiandsp/p/12623603.html # def Hausdorff(a,b): # https://www.cnblogs.com/tiandsp/p/18276246 # def Wasserstei(a,b): if __name__ == '__main__': a = np.random.rand(6) b = np.random.rand(6) print(a) print(b) print("Euclidean:",Euclidean(a,b)) print("Manhattan:",Manhattan(a,b)) print("Minkowski p3:",Minkowski(a,b,3)) print("Chebyshev:",Chebyshev(a,b),Minkowski(a,b,300)) print("StdEuclidean:",StdEuclidean(a,b)) print("Mahalanobis:",Mahalanobis(np.random.rand(1),b)) print("Lance:",Lance(a,b)) print("Cosine:",Cosine(a,b)) print("BrayCurtis:",BrayCurtis(a,b)) print("Hamming:",Hamming("1000111","1111111")) print("Edit:",Edit("1000111","1111111")) print("Jaccard:",Jaccard(set([1,2,3]),set([3,4,5,6]))) print("Ochiia:",Ochiia(set([1,2,3]),set([3,4,5,6]))) print("Dice:",Dice(set([1,2,3]),set([3,4,5,6]))) print("Pearson:",Pearson(a,b)) print("ChiSquare:",ChiSquare(np.array([1,2,3,4,5,6]),np.array([6,5,4,3,2,1]))) print("CrossEntropy:",CrossEntropy(a,b)) print("KL_Divergence:",KL_Divergence(a,b)) print("JS_Divergence:",JS_Divergence(a,b)) print("Hellinger:",Hellinger(a,b)) print("alpha_Divergence:",alpha_Divergence(a,b,0.1)) print("F_Divergence:",F_Divergence(a,b)) print("Bregman:",Bregman(a,b)) print("Bhattacharyya:",Bhattacharyya(a,b)) print("MMD:",MMD(a.reshape(-1,1),b.reshape(-1,1))) print("PMI:",PMI())
參考:https://blog.csdn.net/hy592070616/article/details/121723169?spm=1001.2014.3001.5501