From cac9bbe67b038a79915ad16a0e762ef13bf1108b Mon Sep 17 00:00:00 2001
From: Martin Balao <mbalao@openjdk.org>
Date: Tue, 30 Jan 2024 08:41:36 +0000
Subject: [PATCH] 8320548: Improved loop handling

Backport-of: 72eed2f709caa48c82f58fe75c7e94d2c45947e1
---
 src/hotspot/share/opto/superword.cpp | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index 07b07571d6e..db214da125f 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -3590,20 +3590,39 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
   _igvn.register_new_node_with_optimizer(N);
   _phase->set_ctrl(N, pre_ctrl);
 
+  // The computation of the new pre-loop limit could overflow or underflow the int range. This is problematic in
+  // combination with Range Check Elimination (RCE), which determines a "safe" range where a RangeCheck will always
+  // succeed. RCE adjusts the pre-loop limit such that we only enter the main-loop once we have reached the "safe"
+  // range, and adjusts the main-loop limit so that we exit the main-loop before we leave the "safe" range. After RCE,
+  // the range of the main-loop can only be safely narrowed, and should never be widened. Hence, the pre-loop limit
+  // can only be increased (for stride > 0), but an add overflow might decrease it, or decreased (for stride < 0), but
+  // a sub underflow might increase it. To prevent that, we perform the Sub / Add and Max / Min with long operations.
+  lim0       = new ConvI2LNode(lim0);
+  N          = new ConvI2LNode(N);
+  orig_limit = new ConvI2LNode(orig_limit);
+  _igvn.register_new_node_with_optimizer(lim0);
+  _igvn.register_new_node_with_optimizer(N);
+  _igvn.register_new_node_with_optimizer(orig_limit);
+
   //   substitute back into (1), so that new limit
   //     lim = lim0 + N
   Node* lim;
   if (stride < 0) {
-    lim = new SubINode(lim0, N);
+    lim = new SubLNode(lim0, N);
   } else {
-    lim = new AddINode(lim0, N);
+    lim = new AddLNode(lim0, N);
   }
   _igvn.register_new_node_with_optimizer(lim);
   _phase->set_ctrl(lim, pre_ctrl);
   Node* constrained =
-    (stride > 0) ? (Node*) new MinINode(lim, orig_limit)
-                 : (Node*) new MaxINode(lim, orig_limit);
+    (stride > 0) ? (Node*) new MinLNode(_phase->C, lim, orig_limit)
+                 : (Node*) new MaxLNode(_phase->C, lim, orig_limit);
   _igvn.register_new_node_with_optimizer(constrained);
+
+  // We know that the result is in the int range, there is never truncation
+  constrained = new ConvL2INode(constrained);
+  _igvn.register_new_node_with_optimizer(constrained);
+
   _phase->set_ctrl(constrained, pre_ctrl);
   _igvn.replace_input_of(pre_opaq, 1, constrained);
 }
-- 
GitLab