From a995862970764ff45ef153a5bb47cff01ad42f0f Mon Sep 17 00:00:00 2001 From: Brian Formento Date: Thu, 19 Sep 2024 10:46:26 +0800 Subject: [PATCH] (Bug) Update sentence_encoder.py If we compare two equal embeddings, emb1 == emb2, the cosine similarity should be 1. However, due to floating point precision, we might end up with a value slightly greater than 1, such as 1.00004. This results in an undefined NaN in torch.acos(cos_sim), causing get_angular_sim to return NaN instead of 1. By using cos_sim = torch.clamp(cos_sim, -1.0, 1.0), we ensure that the cos_sim value remains within the valid range expected by torch.acos(cos_sim). --- .../constraints/semantics/sentence_encoders/sentence_encoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py index a1eee10c7..baea87925 100644 --- a/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py @@ -215,6 +215,7 @@ def get_angular_sim(emb1, emb2): """Returns the _angular_ similarity between a batch of vector and a batch of vectors.""" cos_sim = torch.nn.CosineSimilarity(dim=1)(emb1, emb2) + cos_sim = torch.clamp(cos_sim, -1.0, 1.0) return 1 - (torch.acos(cos_sim) / math.pi)