From a995862970764ff45ef153a5bb47cff01ad42f0f Mon Sep 17 00:00:00 2001
From: Brian Formento <brian.formento@u.nus.edu>
Date: Thu, 19 Sep 2024 10:46:26 +0800
Subject: [PATCH] (Bug) Update sentence_encoder.py

If we compare two equal embeddings, emb1 == emb2, the cosine similarity should be 1. However, due to floating point precision, we might end up with a value slightly greater than 1, such as 1.00004. This results in an undefined NaN in torch.acos(cos_sim), causing get_angular_sim to return NaN instead of 1. By using cos_sim = torch.clamp(cos_sim, -1.0, 1.0), we ensure that the cos_sim value remains within the valid range expected by torch.acos(cos_sim).
---
 .../constraints/semantics/sentence_encoders/sentence_encoder.py  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py
index a1eee10c7..baea87925 100644
--- a/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py
+++ b/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py
@@ -215,6 +215,7 @@ def get_angular_sim(emb1, emb2):
     """Returns the _angular_ similarity between a batch of vector and a batch
     of vectors."""
     cos_sim = torch.nn.CosineSimilarity(dim=1)(emb1, emb2)
+    cos_sim = torch.clamp(cos_sim, -1.0, 1.0)
     return 1 - (torch.acos(cos_sim) / math.pi)