Pacific-Design.com

    
Home Index

1. Machine Learning

2. 1 Similarity

Machine Learning / 1 Similarity /

Measures of Similarity and Dissimilarity

%------------------------------------------------------------------------
%  Cosine Similarity, Correlation and Euclidean Distance Computation
%------------------------------------------------------------------------
clc
x = [1,0,0];
y = [1,0,1];

%--- Cosine Similarity---
xy =       x(1)*y(1) + x(2)*y(2) + x(3)*y(3); 
x1 = sqrt( x(1)*x(1) + x(2)*x(2) + x(3)*x(3) );
y1 = sqrt( y(1)*y(1) + y(2)*y(2) + y(3)*y(3) );
CosineSimilarity = xy/(x1*y1)

%--- Covariance computation---
Covariance = ((x(1)-mean(x)) * (y(1)-mean(y))  ...
            + (x(2)-mean(x)) * (y(2)-mean(y))  ...
            + (x(3)-mean(x)) * (y(3)-mean(y))) ...
            / 2

StandardDev1= sqrt( ((1-1/3)^2 + (0-1/3)^2 + (0-1/3)^2) /2 )
StandardDev2= sqrt( ((1-2/3)^2 + (0-2/3)^2 + (1-2/3)^2) /2 )

%--- Correlation Similarity computation ---
CorrelationSimilarity = Covariance / (std(x)*std(y))

%--- Euclidean Distance computation ---
EuclideanDistance = sqrt( (x(1)-y(1))^2 + (x(2)-y(2))^2 + (x(3)-y(3))^2 )

Result:

CosineSimilarity =

    0.7071


Covariance =

    0.1667


StandardDev1 =

    0.5774


StandardDev2 =

    0.5774


CorrelationSimilarity =

    0.5000


EuclideanDistance =

     1