【Python】距離

Dsp Tian發表於2024-08-17

寫了一個計算距離的指令碼,常見距離基本都有。

其中測地距離需要依賴曲面,Hausdorff距離之前有實現,而Wasserstei距離可以用sinkhorn方法求解。

程式碼如下:

import numpy as np

def Euclidean(a,b):
    return np.sqrt(np.sum((a-b)*(a-b)))

def Manhattan(a,b):
    return np.sum(np.abs(a-b))

def Minkowski(a,b,p):
  return(np.float_power(np.sum(np.power(np.abs(a-b),p)),1.0/p))

def Chebyshev(a,b):
  return np.max(np.abs(a-b))

def StdEuclidean(a,b):
   c = np.vstack((a,b))
   s = np.var(c,0)
   return np.sqrt(np.sum((a-b)*(a-b)/s))

def Mahalanobis(a,b):
    return  np.sqrt((a - np.mean(b)).T@(a - np.mean(b))/np.cov(b))

def Lance(a,b):
    return np.sum(np.abs(a-b)/np.abs(a+b))/len(a)

def Cosine(a,b):
    return np.sum(a*b)/np.sqrt(np.sum(a*a))*np.sqrt(np.sum(b*b))

def BrayCurtis(a,b):
    return np.sum(np.abs(a-b))/np.sum(a+b)

def Hamming(a,b):
    return sum(1 for x, y in zip(a, b) if x != y)

def Edit(a,b):
    matrix = [[ i + j for j in range(len(a) + 1)] for i in range(len(b) + 1)]
    for i in range(1, len(a)+1):
        for j in range(1, len(b)+1):
            if(a[i-1] == b[j-1]):
                d = 0
            else:
                d = 1  
            matrix[i][j] = min(matrix[i-1][j]+1, matrix[i][j-1]+1, matrix[i-1][j-1]+d)
    return matrix[len(a)][len(b)]

def Jaccard(a,b):
    return 1 - len(a.intersection(b))/len(a.union(b))

def Ochiia(a,b):
    return 1 - len(a.intersection(b))/np.sqrt(len(a)*len(b))

def Dice(a,b):
    return 1 - 2*len(a.intersection(b))/(len(a)+len(b))

def Pearson(a,b):
    a = a-np.mean(a)
    b = b-np.mean(b)
    return np.dot(a,b) / np.linalg.norm(a)*np.linalg.norm(b) 

def ChiSquare(a,b):
    return np.sum(np.square(a-b)/b)

def CrossEntropy(a,b):
    return -np.sum(a*np.log(b))

def KL_Divergence(a,b):
    return np.sum(a*np.log(a/b))

def JS_Divergence(a,b):
    m = (a+b)/2
    return 0.5*KL_Divergence(a,m) + 0.5*KL_Divergence(b,m)

def Hellinger(a,b):
    return 1 - np.sum(np.sqrt(a*b))

def alpha_Divergence(a,b,alpha):
    return (1.0/(alpha*(1-alpha)))*(1 - np.sum((a**alpha)*(b**(1-alpha))))

#F散度計算KL
def F_Divergence(a,b):
    def F(x):
        return x*np.log(x)
    return np.sum(b*F(a/b))

#Bregman計算Euclidean
def Bregman(a,b):
    def f(x):
        return np.sum(x**2)   
    def df(x):
        return 2*x
    return np.sqrt(f(a) - f(b) - np.dot(df(b), a - b))

def Bhattacharyya(a,b):
    return np.sum(np.sqrt(a * b))

def MMD(a,b):
    from sklearn.metrics.pairwise import rbf_kernel
    Kaa = rbf_kernel(a, a, 1.0)
    Kbb = rbf_kernel(b, b, 1.0)
    Kab = rbf_kernel(a, b, 1.0)
    return np.mean(Kaa) + np.mean(Kbb) - 2 * np.mean(Kab)

def PMI():
    x = 1
    y = 2
    dataset = [[1, 2, 3],[2, 4, 5],[6, 7, 8],[2, 3, 4]]
    count_x = sum([1 for seq in dataset if x in seq])
    count_y = sum([1 for seq in dataset if y in seq])
    count_xy = sum([1 for seq in dataset if x in seq and y in seq])
    px = count_x / len(dataset)
    py = count_y / len(dataset)
    pxy = count_xy / len(dataset)
    return np.log(pxy/(px*py))

# 測地距離,依賴曲面
# def Geodesic(a,b):

# https://www.cnblogs.com/tiandsp/p/12623603.html
# def Hausdorff(a,b):

# https://www.cnblogs.com/tiandsp/p/18276246
# def Wasserstei(a,b):

if __name__ == '__main__':
    a = np.random.rand(6)
    b = np.random.rand(6)

    print(a)
    print(b)
    print("Euclidean:",Euclidean(a,b))
    print("Manhattan:",Manhattan(a,b))
    print("Minkowski p3:",Minkowski(a,b,3))
    print("Chebyshev:",Chebyshev(a,b),Minkowski(a,b,300))
    print("StdEuclidean:",StdEuclidean(a,b))
    print("Mahalanobis:",Mahalanobis(np.random.rand(1),b))
    print("Lance:",Lance(a,b))
    print("Cosine:",Cosine(a,b))
    print("BrayCurtis:",BrayCurtis(a,b))
    print("Hamming:",Hamming("1000111","1111111"))
    print("Edit:",Edit("1000111","1111111"))
    print("Jaccard:",Jaccard(set([1,2,3]),set([3,4,5,6])))
    print("Ochiia:",Ochiia(set([1,2,3]),set([3,4,5,6])))
    print("Dice:",Dice(set([1,2,3]),set([3,4,5,6])))
    print("Pearson:",Pearson(a,b))
    print("ChiSquare:",ChiSquare(np.array([1,2,3,4,5,6]),np.array([6,5,4,3,2,1])))
    print("CrossEntropy:",CrossEntropy(a,b))
    print("KL_Divergence:",KL_Divergence(a,b))
    print("JS_Divergence:",JS_Divergence(a,b))
    print("Hellinger:",Hellinger(a,b))
    print("alpha_Divergence:",alpha_Divergence(a,b,0.1))
    print("F_Divergence:",F_Divergence(a,b))
    print("Bregman:",Bregman(a,b))
    print("Bhattacharyya:",Bhattacharyya(a,b))
    print("MMD:",MMD(a.reshape(-1,1),b.reshape(-1,1)))
    print("PMI:",PMI())

參考:https://blog.csdn.net/hy592070616/article/details/121723169?spm=1001.2014.3001.5501

相關文章