@article{FERRADA2020101510,
title = "An efficient algorithm for approximated self-similarity joins in metric spaces",
journal = "Information Systems",
volume = "91",
pages = "101510",
year = "2020",
issn = "0306-4379",
doi = "https://doi.org/10.1016/j.is.2020.101510",
url = "http://www.sciencedirect.com/science/article/pii/S0306437920300211",
author = "Sebastián Ferrada and Benjamin Bustos and Nora Reyes",
keywords = "Similarity joins, kNN, Approximated nearest neighbors, Algorithms, Metric spaces",
abstract = "Similarity join is a key operation in metric databases. It retrieves all pairs of elements that are similar. Solving such a problem usually requires comparing every pair of objects of the datasets, even when indexing and ad hoc algorithms are used. We propose a simple and efficient algorithm for the computation of the approximated k nearest neighbor self-similarity join. This algorithm computes Θ(n3∕2) distances and it is empirically shown that it reaches an empirical precision of 46% in real-world datasets. We provide a comparison to other common techniques such as Quickjoin and Locality-Sensitive Hashing and argue that our proposal has a better execution time and average precision."
}