From fdf897f03f426aa6cb5f93d88cbb841f6f253edd Mon Sep 17 00:00:00 2001
From: Martin Balao <mbalao@openjdk.org>
Date: Wed, 14 Aug 2024 21:12:00 +0000
Subject: [PATCH] 8328544: Improve handling of vectorization

Reviewed-by: roland, yan
Backport-of: b5174c9159fbffdf335ee6835267ba0e674cf432
---
 src/hotspot/share/opto/superword.cpp | 472 ++++++++++++++++++++++++++-
 src/hotspot/share/opto/superword.hpp |  90 ++++-
 2 files changed, 553 insertions(+), 9 deletions(-)

diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index db214da125f..8c1e0351649 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -3742,6 +3742,10 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
   _mem(mem), _slp(slp),  _base(nullptr),  _adr(nullptr),
   _scale(0), _offset(0), _invar(nullptr), _negate_invar(false),
   _invar_scale(nullptr),
+  _has_int_index_after_convI2L(false),
+  _int_index_after_convI2L_offset(0),
+  _int_index_after_convI2L_invar(nullptr),
+  _int_index_after_convI2L_scale(0),
   _nstack(nstack), _analyze_only(analyze_only),
   _stack_idx(0)
 #ifndef PRODUCT
@@ -3800,6 +3804,11 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
   NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();)
   NOT_PRODUCT(_tracer.ctor_6(mem);)
 
+  if (!is_safe_to_use_as_simple_form(base, adr)) {
+    assert(!valid(), "does not have simple form");
+    return;
+  }
+
   _base = base;
   _adr  = adr;
   assert(valid(), "Usable");
@@ -3811,6 +3820,10 @@ SWPointer::SWPointer(SWPointer* p) :
   _mem(p->_mem), _slp(p->_slp),  _base(nullptr),  _adr(nullptr),
   _scale(0), _offset(0), _invar(nullptr), _negate_invar(false),
   _invar_scale(nullptr),
+  _has_int_index_after_convI2L(false),
+  _int_index_after_convI2L_offset(0),
+  _int_index_after_convI2L_invar(nullptr),
+  _int_index_after_convI2L_scale(0),
   _nstack(p->_nstack), _analyze_only(p->_analyze_only),
   _stack_idx(p->_stack_idx)
   #ifndef PRODUCT
@@ -3818,6 +3831,354 @@ SWPointer::SWPointer(SWPointer* p) :
   #endif
 {}
 
+// We would like to make decisions about aliasing (i.e. removing memory edges) and adjacency
+// (i.e. which loads/stores can be packed) based on the simple form:
+//
+//   s_pointer = adr + offset + invar + scale * ConvI2L(iv)
+//
+// However, we parse the compound-long-int form:
+//
+//   c_pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
+//   int_index =       int_offset  + int_invar  + int_scale  * iv
+//
+// In general, the simple and the compound-long-int form do not always compute the same pointer
+// at runtime. For example, the simple form would give a different result due to an overflow
+// in the int_index.
+//
+// Example:
+//   For both forms, we have:
+//     iv = 0
+//     scale = 1
+//
+//   We now account the offset and invar once to the long part and once to the int part:
+//     Pointer 1 (long offset and long invar):
+//       long_offset = min_int
+//       long_invar  = min_int
+//       int_offset  = 0
+//       int_invar   = 0
+//
+//     Pointer 2 (int offset and int invar):
+//       long_offset = 0
+//       long_invar  = 0
+//       int_offset  = min_int
+//       int_invar   = min_int
+//
+//   This gives us the following pointers:
+//     Compound-long-int form pointers:
+//       Form:
+//         c_pointer   = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + int_invar + int_scale * iv)
+//
+//       Pointers:
+//         c_pointer1  = adr + min_int     + min_int    + 1          * ConvI2L(0          + 0         + 1         * 0)
+//                     = adr + min_int + min_int
+//                     = adr - 2^32
+//
+//         c_pointer2  = adr + 0           + 0          + 1          * ConvI2L(min_int    + min_int   + 1         * 0)
+//                     = adr + ConvI2L(min_int + min_int)
+//                     = adr + 0
+//                     = adr
+//
+//     Simple form pointers:
+//       Form:
+//         s_pointer  = adr + offset                     + invar                     + scale                    * ConvI2L(iv)
+//         s_pointer  = adr + (long_offset + int_offset) + (long_invar  + int_invar) + (long_scale * int_scale) * ConvI2L(iv)
+//
+//       Pointers:
+//         s_pointer1 = adr + (min_int     + 0         ) + (min_int     + 0        ) + 1                        * 0
+//                    = adr + min_int + min_int
+//                    = adr - 2^32
+//         s_pointer2 = adr + (0           + min_int   ) + (0           + min_int  ) + 1                        * 0
+//                    = adr + min_int + min_int
+//                    = adr - 2^32
+//
+//   We see that the two addresses are actually 2^32 bytes apart (derived from the c_pointers), but their simple form look identical.
+//
+// Hence, we need to determine in which cases it is safe to make decisions based on the simple
+// form, rather than the compound-long-int form. If we cannot prove that using the simple form
+// is safe (i.e. equivalent to the compound-long-int form), then we do not get a valid SWPointer,
+// and the associated memop cannot be vectorized.
+bool SWPointer::is_safe_to_use_as_simple_form(Node* base, Node* adr) const {
+#ifndef _LP64
+  // On 32-bit platforms, there is never an explicit int_index with ConvI2L for the iv. Thus, the
+  // parsed pointer form is always the simple form, with int operations:
+  //
+  //   pointer = adr + offset + invar + scale * iv
+  //
+  assert(!_has_int_index_after_convI2L, "32-bit never has an int_index with ConvI2L for the iv");
+  return true;
+#else
+
+  // Array accesses that are not Unsafe always have a RangeCheck which ensures that there is no
+  // int_index overflow. This implies that the conversion to long can be done separately:
+  //
+  //   ConvI2L(int_index) = ConvI2L(int_offset) + ConvI2L(int_invar) + ConvI2L(scale) * ConvI2L(iv)
+  //
+  // And hence, the simple form is guaranteed to be identical to the compound-long-int form at
+  // runtime and the SWPointer is safe/valid to be used.
+  const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr();
+  if (ary_ptr_t != nullptr) {
+    if (!_mem->is_unsafe_access()) {
+      return true;
+    }
+  }
+
+  // We did not find the int_index. Just to be safe, reject this SWPointer.
+  if (!_has_int_index_after_convI2L) {
+    return false;
+  }
+
+  int int_offset  = _int_index_after_convI2L_offset;
+  Node* int_invar = _int_index_after_convI2L_invar;
+  int int_scale   = _int_index_after_convI2L_scale;
+  int long_scale  = _scale / int_scale;
+
+  // If "int_index = iv", then the simple form is identical to the compound-long-int form.
+  //
+  //   int_index = int_offset + int_invar + int_scale * iv
+  //             = 0            0           1         * iv
+  //             =                                      iv
+  if (int_offset == 0 && int_invar == nullptr && int_scale == 1) {
+    return true;
+  }
+
+  // Intuition: What happens if the int_index overflows? Let us look at two pointers on the "overflow edge":
+  //
+  //              pointer1 = adr + ConvI2L(int_index1)
+  //              pointer2 = adr + ConvI2L(int_index2)
+  //
+  //              int_index1 = max_int + 0 = max_int  -> very close to but before the overflow
+  //              int_index2 = max_int + 1 = min_int  -> just enough to get the overflow
+  //
+  //            When looking at the difference of pointer1 and pointer2, we notice that it is very large
+  //            (almost 2^32). Since arrays have at most 2^31 elements, chances are high that pointer2 is
+  //            an actual out-of-bounds access at runtime. These would normally be prevented by range checks
+  //            at runtime. However, if the access was done by using Unsafe, where range checks are omitted,
+  //            then an out-of-bounds access constitutes undefined behavior. This means that we are allowed to
+  //            do anything, including changing the behavior.
+  //
+  //            If we can set the right conditions, we have a guarantee that an overflow is either impossible
+  //            (no overflow or range checks preventing that) or undefined behavior. In both cases, we are
+  //            safe to do a vectorization.
+  //
+  // Approach:  We want to prove a lower bound for the distance between these two pointers, and an
+  //            upper bound for the size of a memory object. We can derive such an upper bound for
+  //            arrays. We know they have at most 2^31 elements. If we know the size of the elements
+  //            in bytes, we have:
+  //
+  //              array_element_size_in_bytes * 2^31 >= max_possible_array_size_in_bytes
+  //                                                 >= array_size_in_bytes                      (ARR)
+  //
+  //            If some small difference "delta" leads to an int_index overflow, we know that the
+  //            int_index1 before overflow must have been close to max_int, and the int_index2 after
+  //            the overflow must be close to min_int:
+  //
+  //              pointer1 =        adr + long_offset + long_invar + long_scale * ConvI2L(int_index1)
+  //                       =approx  adr + long_offset + long_invar + long_scale * max_int
+  //
+  //              pointer2 =        adr + long_offset + long_invar + long_scale * ConvI2L(int_index2)
+  //                       =approx  adr + long_offset + long_invar + long_scale * min_int
+  //
+  //            We realize that the pointer difference is very large:
+  //
+  //              difference =approx  long_scale * 2^32
+  //
+  //            Hence, if we set the right condition for long_scale and array_element_size_in_bytes,
+  //            we can prove that an overflow is impossible (or would imply undefined behaviour).
+  //
+  // We must now take this intuition, and develop a rigorous proof. We start by stating the problem
+  // more precisely, with the help of some definitions and the Statement we are going to prove.
+  //
+  // Definition:
+  //   Two SWPointers are "comparable" (i.e. SWPointer::comparable is true, set with SWPointer::cmp()),
+  //   iff all of these conditions apply for the simple form:
+  //     1) Both SWPointers are valid.
+  //     2) The adr are identical, or both are array bases of different arrays.
+  //     3) They have identical scale.
+  //     4) They have identical invar.
+  //     5) The difference in offsets is limited: abs(offset1 - offset2) < 2^31.                 (DIFF)
+  //
+  // For the Vectorization Optimization, we pair-wise compare SWPointers and determine if they are:
+  //   1) "not comparable":
+  //        We do not optimize them (assume they alias, not assume adjacency).
+  //
+  //        Whenever we chose this option based on the simple form, it is also correct based on the
+  //        compound-long-int form, since we make no optimizations based on it.
+  //
+  //   2) "comparable" with different array bases at runtime:
+  //        We assume they do not alias (remove memory edges), but not assume adjacency.
+  //
+  //        Whenever we have two different array bases for the simple form, we also have different
+  //        array bases for the compound-long-form. Since SWPointers provably point to different
+  //        memory objects, they can never alias.
+  //
+  //   3) "comparable" with the same base address:
+  //        We compute the relative pointer difference, and based on the load/store size we can
+  //        compute aliasing and adjacency.
+  //
+  //        We must find a condition under which the pointer difference of the simple form is
+  //        identical to the pointer difference of the compound-long-form. We do this with the
+  //        Statement below, which we then proceed to prove.
+  //
+  // Statement:
+  //   If two SWPointers satisfy these 3 conditions:
+  //     1) They are "comparable".
+  //     2) They have the same base address.
+  //     3) Their long_scale is a multiple of the array element size in bytes:
+  //
+  //          abs(long_scale) % array_element_size_in_bytes = 0                                     (A)
+  //
+  //   Then their pointer difference of the simple form is identical to the pointer difference
+  //   of the compound-long-int form.
+  //
+  //   More precisely:
+  //     Such two SWPointers by definition have identical adr, invar, and scale.
+  //     Their simple form is:
+  //
+  //       s_pointer1 = adr + offset1 + invar + scale * ConvI2L(iv)                                 (B1)
+  //       s_pointer2 = adr + offset2 + invar + scale * ConvI2L(iv)                                 (B2)
+  //
+  //     Thus, the pointer difference of the simple forms collapses to the difference in offsets:
+  //
+  //       s_difference = s_pointer1 - s_pointer2 = offset1 - offset2                               (C)
+  //
+  //     Their compound-long-int form for these SWPointer is:
+  //
+  //       c_pointer1 = adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1)        (D1)
+  //       int_index1 = int_offset1 + int_invar1 + int_scale1 * iv                                  (D2)
+  //
+  //       c_pointer2 = adr + long_offset2 + long_invar2 + long_scale2 * ConvI2L(int_index2)        (D3)
+  //       int_index2 = int_offset2 + int_invar2 + int_scale2 * iv                                  (D4)
+  //
+  //     And these are the offset1, offset2, invar and scale from the simple form (B1) and (B2):
+  //
+  //       offset1 = long_offset1 + long_scale1 * ConvI2L(int_offset1)                              (D5)
+  //       offset2 = long_offset2 + long_scale2 * ConvI2L(int_offset2)                              (D6)
+  //
+  //       invar   = long_invar1 + long_scale1 * ConvI2L(int_invar1)
+  //               = long_invar2 + long_scale2 * ConvI2L(int_invar2)                                (D7)
+  //
+  //       scale   = long_scale1 * ConvI2L(int_scale1)
+  //               = long_scale2 * ConvI2L(int_scale2)                                              (D8)
+  //
+  //     The pointer difference of the compound-long-int form is defined as:
+  //
+  //       c_difference = c_pointer1 - c_pointer2
+  //
+  //   Thus, the statement claims that for the two SWPointer we have:
+  //
+  //     s_difference = c_difference                                                                (Statement)
+  //
+  // We prove the Statement with the help of a Lemma:
+  //
+  // Lemma:
+  //   There is some integer x, such that:
+  //
+  //     c_difference = s_difference + array_element_size_in_bytes * x * 2^32                       (Lemma)
+  //
+  // From condition (DIFF), we can derive:
+  //
+  //   abs(s_difference) < 2^31                                                                     (E)
+  //
+  // Assuming the Lemma, we prove the Statement:
+  //   If "x = 0" (intuitively: the int_index does not overflow), then:
+  //     c_difference = s_difference
+  //     and hence the simple form computes the same pointer difference as the compound-long-int form.
+  //   If "x != 0" (intuitively: the int_index overflows), then:
+  //     abs(c_difference) >= abs(s_difference + array_element_size_in_bytes * x * 2^32)
+  //                       >= array_element_size_in_bytes * 2^32 - abs(s_difference)
+  //                                                               --  apply (E)  --
+  //                       >  array_element_size_in_bytes * 2^32 - 2^31
+  //                       >= array_element_size_in_bytes * 2^31
+  //                              --  apply (ARR)  --
+  //                       >= max_possible_array_size_in_bytes
+  //                       >= array_size_in_bytes
+  //
+  //     This shows that c_pointer1 and c_pointer2 have a distance that exceeds the maximum array size.
+  //     Thus, at least one of the two pointers must be outside of the array bounds. But we can assume
+  //     that out-of-bounds accesses do not happen. If they still do, it is undefined behavior. Hence,
+  //     we are allowed to do anything. We can also "safely" use the simple form in this case even though
+  //     it might not match the compound-long-int form at runtime.
+  // QED Statement.
+  //
+  // We must now prove the Lemma.
+  //
+  // ConvI2L always truncates by some power of 2^32, i.e. there is some integer y such that:
+  //
+  //   ConvI2L(y1 + y2) = ConvI2L(y1) + ConvI2L(y2) + 2^32 * y                                  (F)
+  //
+  // It follows, that there is an integer y1 such that:
+  //
+  //   ConvI2L(int_index1) =  ConvI2L(int_offset1 + int_invar1 + int_scale1 * iv)
+  //                          -- apply (F) --
+  //                       =  ConvI2L(int_offset1)
+  //                        + ConvI2L(int_invar1)
+  //                        + ConvI2L(int_scale1) * ConvI2L(iv)
+  //                        + y1 * 2^32                                                         (G)
+  //
+  // Thus, we can write the compound-long-int form (D1) as:
+  //
+  //   c_pointer1 =   adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1)
+  //                  -- apply (G) --
+  //              =   adr
+  //                + long_offset1
+  //                + long_invar1
+  //                + long_scale1 * ConvI2L(int_offset1)
+  //                + long_scale1 * ConvI2L(int_invar1)
+  //                + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv)
+  //                + long_scale1 * y1 * 2^32                                                    (H)
+  //
+  // And we can write the simple form as:
+  //
+  //   s_pointer1 =   adr + offset1 + invar + scale * ConvI2L(iv)
+  //                  -- apply (D5, D7, D8) --
+  //              =   adr
+  //                + long_offset1
+  //                + long_scale1 * ConvI2L(int_offset1)
+  //                + long_invar1
+  //                + long_scale1 * ConvI2L(int_invar1)
+  //                + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv)                            (K)
+  //
+  // We now compute the pointer difference between the simple (K) and compound-long-int form (H).
+  // Most terms cancel out immediately:
+  //
+  //   sc_difference1 = c_pointer1 - s_pointer1 = long_scale1 * y1 * 2^32                        (L)
+  //
+  // Rearranging the equation (L), we get:
+  //
+  //   c_pointer1 = s_pointer1 + long_scale1 * y1 * 2^32                                         (M)
+  //
+  // And since long_scale1 is a multiple of array_element_size_in_bytes, there is some integer
+  // x1, such that (M) implies:
+  //
+  //   c_pointer1 = s_pointer1 + array_element_size_in_bytes * x1 * 2^32                         (N)
+  //
+  // With an analogue equation for c_pointer2, we can now compute the pointer difference for
+  // the compound-long-int form:
+  //
+  //   c_difference =  c_pointer1 - c_pointer2
+  //                   -- apply (N) --
+  //                =  s_pointer1 + array_element_size_in_bytes * x1 * 2^32
+  //                 -(s_pointer2 + array_element_size_in_bytes * x2 * 2^32)
+  //                   -- where "x = x1 - x2" --
+  //                =  s_pointer1 - s_pointer2 + array_element_size_in_bytes * x * 2^32
+  //                   -- apply (C) --
+  //                =  s_difference            + array_element_size_in_bytes * x * 2^32
+  // QED Lemma.
+  if (ary_ptr_t != nullptr) {
+    BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type();
+    if (is_java_primitive(array_element_bt)) {
+      int array_element_size_in_bytes = type2aelembytes(array_element_bt);
+      if (abs(long_scale) % array_element_size_in_bytes == 0) {
+        return true;
+      }
+    }
+  }
+
+  // General case: we do not know if it is safe to use the simple form.
+  return false;
+#endif
+}
+
 bool SWPointer::is_main_loop_member(Node* n) const {
   Node* n_c = phase()->get_ctrl(n);
   return lpt()->is_member(phase()->get_loop(n_c));
@@ -3918,6 +4279,37 @@ bool SWPointer::scaled_iv(Node* n) {
       NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)
       return true;
     }
+  } else if (opc == Op_ConvI2L && !has_iv()) {
+    // So far we have not found the iv yet, and are about to enter a ConvI2L subgraph,
+    // which may be the int index (that might overflow) for the memory access, of the form:
+    //
+    //   int_index = int_offset + int_invar + int_scale * iv
+    //
+    // If we simply continue parsing with the current SWPointer, then the int_offset and
+    // int_invar simply get added to the long offset and invar. But for the checks in
+    // SWPointer::is_safe_to_use_as_simple_form() we need to have explicit access to the
+    // int_index. Thus, we must parse it explicitly here. For this, we use a temporary
+    // SWPointer, to pattern match the int_index sub-expression of the address.
+
+    NOT_PRODUCT(Tracer::Depth dddd;)
+    SWPointer tmp(this);
+    NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)
+
+    if (tmp.scaled_iv_plus_offset(n->in(1)) && tmp.has_iv()) {
+      // We successfully matched an integer index, of the form:
+      //   int_index = int_offset + int_invar + int_scale * iv
+      _has_int_index_after_convI2L = true;
+      _int_index_after_convI2L_offset = tmp._offset;
+      _int_index_after_convI2L_invar  = tmp._invar;
+      _int_index_after_convI2L_scale  = tmp._scale;
+    }
+
+    // Now parse it again for the real SWPointer. This makes sure that the int_offset, int_invar,
+    // and int_scale are properly added to the final SWPointer's offset, invar, and scale.
+    if (scaled_iv_plus_offset(n->in(1))) {
+      NOT_PRODUCT(_tracer.scaled_iv_7(n);)
+      return true;
+    }
   } else if (opc == Op_ConvI2L || opc == Op_CastII) {
     if (scaled_iv_plus_offset(n->in(1))) {
       NOT_PRODUCT(_tracer.scaled_iv_7(n);)
@@ -3934,13 +4326,29 @@ bool SWPointer::scaled_iv(Node* n) {
 
       if (tmp.scaled_iv_plus_offset(n->in(1))) {
         int scale = n->in(2)->get_int();
+        // Accumulate scale.
         _scale   = tmp._scale  << scale;
-        _offset += tmp._offset << scale;
+        // Accumulate offset.
+        int shifted_offset = 0;
+        if (!try_LShiftI_no_overflow(tmp._offset, scale, shifted_offset)) {
+          return false; // shift overflow.
+        }
+        if (!try_AddI_no_overflow(_offset, shifted_offset, _offset)) {
+          return false; // add overflow.
+        }
+        // Accumulate invar.
         _invar = tmp._invar;
         if (_invar != nullptr) {
           _negate_invar = tmp._negate_invar;
           _invar_scale = n->in(2);
         }
+
+        // Forward info about the int_index:
+        _has_int_index_after_convI2L = tmp._has_int_index_after_convI2L;
+        _int_index_after_convI2L_offset = tmp._int_index_after_convI2L_offset;
+        _int_index_after_convI2L_invar  = tmp._int_index_after_convI2L_invar;
+        _int_index_after_convI2L_scale  = tmp._int_index_after_convI2L_scale;
+
         NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar);)
         return true;
       }
@@ -3959,7 +4367,9 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
 
   int opc = n->Opcode();
   if (opc == Op_ConI) {
-    _offset += negate ? -(n->get_int()) : n->get_int();
+    if (!try_AddSubI_no_overflow(_offset, n->get_int(), negate, _offset)) {
+      return false; // add/sub overflow.
+    }
     NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)
     return true;
   } else if (opc == Op_ConL) {
@@ -3968,7 +4378,9 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
     if (t->higher_equal(TypeLong::INT)) {
       jlong loff = n->get_long();
       jint  off  = (jint)loff;
-      _offset += negate ? -off : loff;
+      if (!try_AddSubI_no_overflow(_offset, off, negate, _offset)) {
+        return false; // add/sub overflow.
+      }
       NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)
       return true;
     }
@@ -3987,11 +4399,15 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
     if (n->in(2)->is_Con() && invariant(n->in(1))) {
       _negate_invar = negate;
       _invar = n->in(1);
-      _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), negate, _offset)) {
+        return false; // add/sub overflow.
+      }
       NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);)
       return true;
     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
-      _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
+      if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
+        return false; // add/sub overflow.
+      }
       _negate_invar = negate;
       _invar = n->in(2);
       NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);)
@@ -4002,11 +4418,15 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
     if (n->in(2)->is_Con() && invariant(n->in(1))) {
       _negate_invar = negate;
       _invar = n->in(1);
-      _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), !negate, _offset)) {
+        return false; // add/sub overflow.
+      }
       NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);)
       return true;
     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
-      _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
+      if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
+        return false; // add/sub overflow.
+      }
       _negate_invar = !negate;
       _invar = n->in(2);
       NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);)
@@ -4037,6 +4457,44 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
   return false;
 }
 
+bool SWPointer::try_AddI_no_overflow(int offset1, int offset2, int& result) {
+  jlong long_offset = java_add((jlong)(offset1), (jlong)(offset2));
+  jint  int_offset  = java_add((jint)(offset1), (jint)(offset2));
+  if (long_offset != int_offset) {
+    return false;
+  }
+  result = int_offset;
+  return true;
+}
+
+bool SWPointer::try_SubI_no_overflow(int offset1, int offset2, int& result) {
+  jlong long_offset = java_subtract((jlong)(offset1), (jlong)(offset2));
+  jint  int_offset  = java_subtract((jint)(offset1), (jint)(offset2));
+  if (long_offset != int_offset) {
+    return false;
+  }
+  result = int_offset;
+  return true;
+}
+
+bool SWPointer::try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result) {
+  if (is_sub) {
+    return try_SubI_no_overflow(offset1, offset2, result);
+  } else {
+    return try_AddI_no_overflow(offset1, offset2, result);
+  }
+}
+
+bool SWPointer::try_LShiftI_no_overflow(int offset, int shift, int& result) {
+  jlong long_offset = java_shift_left((jlong)(offset), (julong)((jlong)(shift)));
+  jint  int_offset  = java_shift_left((jint)(offset), (juint)((jint)(shift)));
+  if (long_offset != int_offset) {
+    return false;
+  }
+  result = int_offset;
+  return true;
+}
+
 //----------------------------print------------------------
 void SWPointer::print() {
 #ifndef PRODUCT
diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp
index f53a25c5bfb..bb7f8b72997 100644
--- a/src/hotspot/share/opto/superword.hpp
+++ b/src/hotspot/share/opto/superword.hpp
@@ -572,13 +572,51 @@ class SuperWord : public ResourceObj {
 
 //------------------------------SWPointer---------------------------
 // Information about an address for dependence checking and vector alignment
+//
+// We parse and represent pointers of the simple form:
+//
+//   pointer   = adr + offset + invar + scale * ConvI2L(iv)
+//
+// Where:
+//
+//   adr: the base address of an array (base = adr)
+//        OR
+//        some address to off-heap memory (base = TOP)
+//
+//   offset: a constant offset
+//   invar:  a runtime variable, which is invariant during the loop
+//   scale:  scaling factor
+//   iv:     loop induction variable
+//
+// But more precisely, we parse the composite-long-int form:
+//
+//   pointer   = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + inv_invar + int_scale * iv)
+//
+//   pointer   = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
+//   int_index =       int_offset  + int_invar  + int_scale  * iv
+//
+// However, for aliasing and adjacency checks (e.g. SWPointer::cmp()) we always use the simple form to make
+// decisions. Hence, we must make sure to only create a "valid" SWPointer if the optimisations based on the
+// simple form produce the same result as the compound-long-int form would. Intuitively, this depends on
+// if the int_index overflows, but the precise conditions are given in SWPointer::is_safe_to_use_as_simple_form().
+//
+//   ConvI2L(int_index) = ConvI2L(int_offset  + int_invar  + int_scale  * iv)
+//                      = Convi2L(int_offset) + ConvI2L(int_invar) + ConvI2L(int_scale) * ConvI2L(iv)
+//
+//   scale  = long_scale * ConvI2L(int_scale)
+//   offset = long_offset + long_scale * ConvI2L(int_offset)
+//   invar  = long_invar  + long_scale * ConvI2L(int_invar)
+//
+//   pointer   = adr + offset + invar + scale * ConvI2L(iv)
+//
 class SWPointer {
  protected:
   MemNode*   _mem;           // My memory reference node
   SuperWord* _slp;           // SuperWord class
 
-  Node* _base;               // null if unsafe nonheap reference
-  Node* _adr;                // address pointer
+  // Components of the simple form:
+  Node* _base;               // Base address of an array OR null if some off-heap memory.
+  Node* _adr;                // Same as _base if an array pointer OR some off-heap memory pointer.
   int   _scale;              // multiplier for iv (in bytes), 0 if no loop iv
   int   _offset;             // constant offset (in bytes)
 
@@ -586,6 +624,13 @@ class SWPointer {
   bool  _negate_invar;       // if true then use: (0 - _invar)
   Node* _invar_scale;        // multiplier for invariant
 
+  // The int_index components of the compound-long-int form. Used to decide if it is safe to use the
+  // simple form rather than the compound-long-int form that was parsed.
+  bool  _has_int_index_after_convI2L;
+  int   _int_index_after_convI2L_offset;
+  Node* _int_index_after_convI2L_invar;
+  int   _int_index_after_convI2L_scale;
+
   Node_Stack* _nstack;       // stack used to record a swpointer trace of variants
   bool        _analyze_only; // Used in loop unrolling only for swpointer trace
   uint        _stack_idx;    // Used in loop unrolling only for swpointer trace
@@ -604,6 +649,8 @@ class SWPointer {
   // Match: offset is (k [+/- invariant])
   bool offset_plus_k(Node* n, bool negate = false);
 
+  bool is_safe_to_use_as_simple_form(Node* base, Node* adr) const;
+
  public:
   enum CMP {
     Less          = 1,
@@ -639,10 +686,43 @@ class SWPointer {
               _negate_invar == q._negate_invar);
   }
 
+  // We compute if and how two SWPointers can alias at runtime, i.e. if the two addressed regions of memory can
+  // ever overlap. There are essentially 3 relevant return states:
+  //  - NotComparable:  Synonymous to "unknown aliasing".
+  //                    We have no information about how the two SWPointers can alias. They could overlap, refer
+  //                    to another location in the same memory object, or point to a completely different object.
+  //                    -> Memory edge required. Aliasing unlikely but possible.
+  //
+  //  - Less / Greater: Synonymous to "never aliasing".
+  //                    The two SWPointers may point into the same memory object, but be non-aliasing (i.e. we
+  //                    know both address regions inside the same memory object, but these regions are non-
+  //                    overlapping), or the SWPointers point to entirely different objects.
+  //                    -> No memory edge required. Aliasing impossible.
+  //
+  //  - Equal:          Synonymous to "overlap, or point to different memory objects".
+  //                    The two SWPointers either overlap on the same memory object, or point to two different
+  //                    memory objects.
+  //                    -> Memory edge required. Aliasing likely.
+  //
+  // In a future refactoring, we can simplify to two states:
+  //  - NeverAlias:     instead of Less / Greater
+  //  - MayAlias:       instead of Equal / NotComparable
+  //
+  // Two SWPointer are "comparable" (Less / Greater / Equal), iff all of these conditions apply:
+  //   1) Both are valid, i.e. expressible in the compound-long-int or simple form.
+  //   2) The adr are identical, or both are array bases of different arrays.
+  //   3) They have identical scale.
+  //   4) They have identical invar.
+  //   5) The difference in offsets is limited: abs(offset0 - offset1) < 2^31.
   int cmp(SWPointer& q) {
     if (valid() && q.valid() &&
         (_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
         _scale == q._scale   && invar_equals(q)) {
+      jlong difference = abs(java_subtract((jlong)_offset, (jlong)q._offset));
+      jlong max_diff = (jlong)1 << 31;
+      if (difference >= max_diff) {
+        return NotComparable;
+      }
       bool overlap = q._offset <   _offset +   memory_size() &&
                        _offset < q._offset + q.memory_size();
       return overlap ? Equal : (_offset < q._offset ? Less : Greater);
@@ -729,6 +809,12 @@ class SWPointer {
 
   } _tracer;//TRacer;
 #endif
+
+  static bool try_AddI_no_overflow(int offset1, int offset2, int& result);
+  static bool try_SubI_no_overflow(int offset1, int offset2, int& result);
+  static bool try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result);
+  static bool try_LShiftI_no_overflow(int offset1, int offset2, int& result);
+
 };
 
 #endif // SHARE_OPTO_SUPERWORD_HPP
-- 
GitLab