pmaports-a71/cross/gcc-x86_64/fix-arm64.patch
2024-08-10 23:30:06 +09:00

545 lines
21 KiB
Diff

From 2894660df1292153632edbc2a5b66eaf6a864660 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 13 Jun 2024 12:48:21 +0100
Subject: [PATCH 1/4] aarch64: Fix invalid nested subregs [PR115464]
The testcase extracts one arm_neon.h vector from a pair (one subreg)
and then reinterprets the result as an SVE vector (another subreg).
Each subreg makes sense individually, but we can't fold them together
into a single subreg: it's 32 bytes -> 16 bytes -> 16*N bytes,
but the interpretation of 32 bytes -> 16*N bytes depends on
whether N==1 or N>1.
Since the second subreg makes sense individually, simplify_subreg
should bail out rather than ICE on it. simplify_gen_subreg will
then do the same (because it already checks validate_subreg).
This leaves simplify_gen_subreg returning null, requiring the
caller to take appropriate action.
I think this is relatively likely to occur elsewhere, so the patch
adds a helper for forcing a subreg, allowing a temporary pseudo to
be created where necessary.
I'll follow up by using force_subreg in more places. This patch
is intended to be a minimal backportable fix for the PR.
gcc/
PR target/115464
* simplify-rtx.cc (simplify_context::simplify_subreg): Don't try
to fold two subregs together if their relationship isn't known
at compile time.
* explow.h (force_subreg): Declare.
* explow.cc (force_subreg): New function.
* config/aarch64/aarch64-sve-builtins-base.cc
(svset_neonq_impl::expand): Use it instead of simplify_gen_subreg.
gcc/testsuite/
PR target/115464
* gcc.target/aarch64/sve/acle/general/pr115464.c: New test.
(cherry picked from commit 0970ff46ba6330fc80e8736fc05b2eaeeae0b6a0)
---
gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +-
gcc/explow.cc | 15 +++++++++++++++
gcc/explow.h | 2 ++
gcc/simplify-rtx.cc | 5 +++++
.../aarch64/sve/acle/general/pr115464.c | 13 +++++++++++++
5 files changed, 36 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 0d2edf3f19e..c9182594bc1 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1174,7 +1174,7 @@ public:
Advanced SIMD argument as an SVE vector. */
if (!BYTES_BIG_ENDIAN
&& is_undef (CALL_EXPR_ARG (e.call_expr, 0)))
- return simplify_gen_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
+ return force_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
rtx_vector_builder builder (VNx16BImode, 16, 2);
for (unsigned int i = 0; i < 16; i++)
diff --git a/gcc/explow.cc b/gcc/explow.cc
index 8e5f6b8e680..f6843398c4b 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -745,6 +745,21 @@ force_reg (machine_mode mode, rtx x)
return temp;
}
+/* Like simplify_gen_subreg, but force OP into a new register if the
+ subreg cannot be formed directly. */
+
+rtx
+force_subreg (machine_mode outermode, rtx op,
+ machine_mode innermode, poly_uint64 byte)
+{
+ rtx x = simplify_gen_subreg (outermode, op, innermode, byte);
+ if (x)
+ return x;
+
+ op = copy_to_mode_reg (innermode, op);
+ return simplify_gen_subreg (outermode, op, innermode, byte);
+}
+
/* If X is a memory ref, copy its contents to a new temp reg and return
that reg. Otherwise, return X. */
diff --git a/gcc/explow.h b/gcc/explow.h
index 16aa02cfb68..cbd1fcb7eb3 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -42,6 +42,8 @@ extern rtx copy_to_suggested_reg (rtx, rtx, machine_mode);
Args are mode (in case value is a constant) and the value. */
extern rtx force_reg (machine_mode, rtx);
+extern rtx force_subreg (machine_mode, rtx, machine_mode, poly_uint64);
+
/* Return given rtx, copied into a new temp reg if it was in memory. */
extern rtx force_not_mem (rtx);
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index dceaa13333c..729d408aa55 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7612,6 +7612,11 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op,
poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode);
rtx newx;
+ /* Make sure that the relationship between the two subregs is
+ known at compile time. */
+ if (!ordered_p (outersize, innermostsize))
+ return NULL_RTX;
+
if (outermode == innermostmode
&& known_eq (byte, 0U)
&& known_eq (SUBREG_BYTE (op), 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
new file mode 100644
index 00000000000..d728d1325ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+#include <arm_neon_sve_bridge.h>
+
+svuint16_t
+convolve4_4_x (uint16x8x2_t permute_tbl)
+{
+ return svset_neonq_u16 (svundef_u16 (), permute_tbl.val[1]);
+}
+
+/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */
--
2.46.0
From 7edecb21012d88902c60d38f5865bc254eaa9c55 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 18 Jun 2024 12:22:30 +0100
Subject: [PATCH 2/4] aarch64: Use force_subreg in more places
This patch makes the aarch64 code use force_subreg instead of
simplify_gen_subreg in more places. The criteria were:
(1) The code is obviously specific to expand (where new pseudos
can be created).
(2) The value is obviously an rvalue rather than an lvalue.
(3) The offset wasn't a simple lowpart or highpart calculation;
a later patch will deal with those.
gcc/
* config/aarch64/aarch64-builtins.cc (aarch64_expand_fcmla_builtin):
Use force_subreg instead of simplify_gen_subreg.
* config/aarch64/aarch64-simd.md (ctz<mode>2): Likewise.
* config/aarch64/aarch64-sve-builtins-base.cc
(svget_impl::expand): Likewise.
(svget_neonq_impl::expand): Likewise.
* config/aarch64/aarch64-sve-builtins-functions.h
(multireg_permute::expand): Likewise.
(cherry picked from commit 1474a8eead4ab390e59ee014befa8c40346679f4)
---
gcc/config/aarch64/aarch64-builtins.cc | 4 ++--
gcc/config/aarch64/aarch64-simd.md | 4 ++--
gcc/config/aarch64/aarch64-sve-builtins-base.cc | 8 +++-----
gcc/config/aarch64/aarch64-sve-builtins-functions.h | 6 +++---
4 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 75d21de1401..b2e46a073a8 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2510,12 +2510,12 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
rtx temp2 = gen_reg_rtx (DImode);
temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
subreg_lowpart_offset (d->mode, quadmode));
- temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
+ temp1 = force_subreg (V2DImode, temp1, d->mode, 0);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
else
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
- op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
+ op2 = force_subreg (d->mode, temp2, GET_MODE (temp2), 0);
/* And recalculate the index. */
lane -= nunits / 4;
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 33ab0741e87..5b9efe0b165 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -412,8 +412,8 @@
"TARGET_SIMD"
{
emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
- rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
- <MODE>mode, 0);
+ rtx op0_castsi2qi = force_subreg (<VS:VSI2QI>mode, operands[0],
+ <MODE>mode, 0);
emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
DONE;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index c9182594bc1..2c95da79572 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1121,9 +1121,8 @@ public:
expand (function_expander &e) const override
{
/* Fold the access into a subreg rvalue. */
- return simplify_gen_subreg (e.vector_mode (0), e.args[0],
- GET_MODE (e.args[0]),
- INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
+ return force_subreg (e.vector_mode (0), e.args[0], GET_MODE (e.args[0]),
+ INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
}
};
@@ -1157,8 +1156,7 @@ public:
e.add_fixed_operand (indices);
return e.generate_insn (icode);
}
- return simplify_gen_subreg (e.result_mode (), e.args[0],
- GET_MODE (e.args[0]), 0);
+ return force_subreg (e.result_mode (), e.args[0], GET_MODE (e.args[0]), 0);
}
};
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 3b8e575e98e..7d06a57ff83 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -639,9 +639,9 @@ public:
{
machine_mode elt_mode = e.vector_mode (0);
rtx arg = e.args[0];
- e.args[0] = simplify_gen_subreg (elt_mode, arg, GET_MODE (arg), 0);
- e.args.safe_push (simplify_gen_subreg (elt_mode, arg, GET_MODE (arg),
- GET_MODE_SIZE (elt_mode)));
+ e.args[0] = force_subreg (elt_mode, arg, GET_MODE (arg), 0);
+ e.args.safe_push (force_subreg (elt_mode, arg, GET_MODE (arg),
+ GET_MODE_SIZE (elt_mode)));
}
return e.use_exact_insn (icode);
}
--
2.46.0
From 0ab664f41762803a72e5e99025b0512cad493985 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 18 Jun 2024 12:22:31 +0100
Subject: [PATCH 3/4] aarch64: Add some uses of force_lowpart_subreg
This patch makes more use of force_lowpart_subreg, similarly
to the recent patch for force_subreg. The criteria were:
(1) The code is obviously specific to expand (where new pseudos
can be created).
(2) The value is obviously an rvalue rather than an lvalue.
gcc/
PR target/115464
* config/aarch64/aarch64-builtins.cc (aarch64_expand_fcmla_builtin)
(aarch64_expand_rwsr_builtin): Use force_lowpart_subreg instead of
simplify_gen_subreg and lowpart_subreg.
* config/aarch64/aarch64-sve-builtins-base.cc
(svset_neonq_impl::expand): Likewise.
* config/aarch64/aarch64-sve-builtins-sme.cc
(add_load_store_slice_operand): Likewise.
* config/aarch64/aarch64.cc (aarch64_sve_reinterpret): Likewise.
(aarch64_addti_scratch_regs, aarch64_subvti_scratch_regs): Likewise.
gcc/testsuite/
PR target/115464
* gcc.target/aarch64/sve/acle/general/pr115464_2.c: New test.
(cherry picked from commit 6bd4fbae45d11795a9a6f54b866308d4d7134def)
---
gcc/config/aarch64/aarch64-builtins.cc | 11 +++++------
gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +-
gcc/config/aarch64/aarch64-sve-builtins-sme.cc | 2 +-
gcc/config/aarch64/aarch64.cc | 14 +++++---------
.../aarch64/sve/acle/general/pr115464_2.c | 11 +++++++++++
5 files changed, 23 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index b2e46a073a8..264b9560709 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2497,8 +2497,7 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
int lane = INTVAL (lane_idx);
if (lane < nunits / 4)
- op2 = simplify_gen_subreg (d->mode, op2, quadmode,
- subreg_lowpart_offset (d->mode, quadmode));
+ op2 = force_lowpart_subreg (d->mode, op2, quadmode);
else
{
/* Select the upper 64 bits, either a V2SF or V4HF, this however
@@ -2508,8 +2507,7 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
gen_highpart_mode generates code that isn't optimal. */
rtx temp1 = gen_reg_rtx (d->mode);
rtx temp2 = gen_reg_rtx (DImode);
- temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
- subreg_lowpart_offset (d->mode, quadmode));
+ temp1 = force_lowpart_subreg (d->mode, op2, quadmode);
temp1 = force_subreg (V2DImode, temp1, d->mode, 0);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
@@ -2754,7 +2752,7 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
case AARCH64_WSR64:
case AARCH64_WSRF64:
case AARCH64_WSR128:
- subreg = lowpart_subreg (sysreg_mode, input_val, mode);
+ subreg = force_lowpart_subreg (sysreg_mode, input_val, mode);
break;
case AARCH64_WSRF:
subreg = gen_lowpart_SUBREG (SImode, input_val);
@@ -2789,7 +2787,8 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
case AARCH64_RSR64:
case AARCH64_RSRF64:
case AARCH64_RSR128:
- return lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)), target, sysreg_mode);
+ return force_lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)),
+ target, sysreg_mode);
case AARCH64_RSRF:
subreg = gen_lowpart_SUBREG (SImode, target);
return gen_lowpart_SUBREG (SFmode, subreg);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 2c95da79572..3c970e9c5f8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1183,7 +1183,7 @@ public:
if (BYTES_BIG_ENDIAN)
return e.use_exact_insn (code_for_aarch64_sve_set_neonq (mode));
insn_code icode = code_for_vcond_mask (mode, mode);
- e.args[1] = lowpart_subreg (mode, e.args[1], GET_MODE (e.args[1]));
+ e.args[1] = force_lowpart_subreg (mode, e.args[1], GET_MODE (e.args[1]));
e.add_output_operand (icode);
e.add_input_operand (icode, e.args[1]);
e.add_input_operand (icode, e.args[0]);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
index f4c91bcbb95..b66b35ae60b 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
@@ -112,7 +112,7 @@ add_load_store_slice_operand (function_expander &e, insn_code icode,
rtx base = e.args[argno];
if (e.mode_suffix_id == MODE_vnum)
{
- rtx vnum = lowpart_subreg (SImode, e.args[vnum_argno], DImode);
+ rtx vnum = force_lowpart_subreg (SImode, e.args[vnum_argno], DImode);
base = simplify_gen_binary (PLUS, SImode, base, vnum);
}
e.add_input_operand (icode, base);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 1beec94629d..a064aeecbc0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3284,7 +3284,7 @@ aarch64_sve_reinterpret (machine_mode mode, rtx x)
/* can_change_mode_class must only return true if subregs and svreinterprets
have the same semantics. */
if (targetm.can_change_mode_class (GET_MODE (x), mode, FP_REGS))
- return lowpart_subreg (mode, x, GET_MODE (x));
+ return force_lowpart_subreg (mode, x, GET_MODE (x));
rtx res = gen_reg_rtx (mode);
x = force_reg (GET_MODE (x), x);
@@ -26979,9 +26979,8 @@ aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
rtx *high_in2)
{
*low_dest = gen_reg_rtx (DImode);
- *low_in1 = gen_lowpart (DImode, op1);
- *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
- subreg_lowpart_offset (DImode, TImode));
+ *low_in1 = force_lowpart_subreg (DImode, op1, TImode);
+ *low_in2 = force_lowpart_subreg (DImode, op2, TImode);
*high_dest = gen_reg_rtx (DImode);
*high_in1 = gen_highpart (DImode, op1);
*high_in2 = simplify_gen_subreg (DImode, op2, TImode,
@@ -27013,11 +27012,8 @@ aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
rtx *high_in2)
{
*low_dest = gen_reg_rtx (DImode);
- *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
- subreg_lowpart_offset (DImode, TImode));
-
- *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
- subreg_lowpart_offset (DImode, TImode));
+ *low_in1 = force_lowpart_subreg (DImode, op1, TImode);
+ *low_in2 = force_lowpart_subreg (DImode, op2, TImode);
*high_dest = gen_reg_rtx (DImode);
*high_in1 = simplify_gen_subreg (DImode, op1, TImode,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c
new file mode 100644
index 00000000000..f561c34f732
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+#include <arm_neon_sve_bridge.h>
+
+svuint16_t
+convolve4_4_x (uint16x8x2_t permute_tbl, svuint16_t a)
+{
+ return svset_neonq_u16 (a, permute_tbl.val[1]);
+}
--
2.46.0
From e21a377dea6edfaaa494f07974135e58ff66eef1 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 18 Jun 2024 12:22:31 +0100
Subject: [PATCH 4/4] Add force_lowpart_subreg
optabs had a local function called lowpart_subreg_maybe_copy
that is very similar to the lowpart version of force_subreg.
This patch adds a force_lowpart_subreg wrapper around
force_subreg and uses it in optabs.cc.
The only difference between the old and new functions is that
the old one asserted success while the new one doesn't.
It's common not to assert elsewhere when taking subregs;
normally a null result is enough.
Later patches will make more use of the new function.
gcc/
* explow.h (force_lowpart_subreg): Declare.
* explow.cc (force_lowpart_subreg): New function.
* optabs.cc (lowpart_subreg_maybe_copy): Delete.
(expand_absneg_bit): Use force_lowpart_subreg instead of
lowpart_subreg_maybe_copy.
(expand_copysign_bit): Likewise.
(cherry picked from commit 5f40d1c0cc6ce91ef28d326b8707b3f05e6f239c)
---
gcc/explow.cc | 14 ++++++++++++++
gcc/explow.h | 1 +
gcc/optabs.cc | 24 ++----------------------
3 files changed, 17 insertions(+), 22 deletions(-)
diff --git a/gcc/explow.cc b/gcc/explow.cc
index f6843398c4b..5fdfa81f69b 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -760,6 +760,20 @@ force_subreg (machine_mode outermode, rtx op,
return simplify_gen_subreg (outermode, op, innermode, byte);
}
+/* Try to return an rvalue expression for the OUTERMODE lowpart of OP,
+ which has mode INNERMODE. Allow OP to be forced into a new register
+ if necessary.
+
+ Return null on failure. */
+
+rtx
+force_lowpart_subreg (machine_mode outermode, rtx op,
+ machine_mode innermode)
+{
+ auto byte = subreg_lowpart_offset (outermode, innermode);
+ return force_subreg (outermode, op, innermode, byte);
+}
+
/* If X is a memory ref, copy its contents to a new temp reg and return
that reg. Otherwise, return X. */
diff --git a/gcc/explow.h b/gcc/explow.h
index cbd1fcb7eb3..dd654649b06 100644
--- a/gcc/explow.h
+++ b/gcc/explow.h
@@ -43,6 +43,7 @@ extern rtx copy_to_suggested_reg (rtx, rtx, machine_mode);
extern rtx force_reg (machine_mode, rtx);
extern rtx force_subreg (machine_mode, rtx, machine_mode, poly_uint64);
+extern rtx force_lowpart_subreg (machine_mode, rtx, machine_mode);
/* Return given rtx, copied into a new temp reg if it was in memory. */
extern rtx force_not_mem (rtx);
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index ce91f94ed43..804c0dc73ba 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -3096,26 +3096,6 @@ expand_ffs (scalar_int_mode mode, rtx op0, rtx target)
return 0;
}
-/* Extract the OMODE lowpart from VAL, which has IMODE. Under certain
- conditions, VAL may already be a SUBREG against which we cannot generate
- a further SUBREG. In this case, we expect forcing the value into a
- register will work around the situation. */
-
-static rtx
-lowpart_subreg_maybe_copy (machine_mode omode, rtx val,
- machine_mode imode)
-{
- rtx ret;
- ret = lowpart_subreg (omode, val, imode);
- if (ret == NULL)
- {
- val = force_reg (imode, val);
- ret = lowpart_subreg (omode, val, imode);
- gcc_assert (ret != NULL);
- }
- return ret;
-}
-
/* Expand a floating point absolute value or negation operation via a
logical operation on the sign bit. */
@@ -3204,7 +3184,7 @@ expand_absneg_bit (enum rtx_code code, scalar_float_mode mode,
gen_lowpart (imode, op0),
immed_wide_int_const (mask, imode),
gen_lowpart (imode, target), 1, OPTAB_LIB_WIDEN);
- target = lowpart_subreg_maybe_copy (mode, temp, imode);
+ target = force_lowpart_subreg (mode, temp, imode);
set_dst_reg_note (get_last_insn (), REG_EQUAL,
gen_rtx_fmt_e (code, mode, copy_rtx (op0)),
@@ -4043,7 +4023,7 @@ expand_copysign_bit (scalar_float_mode mode, rtx op0, rtx op1, rtx target,
temp = expand_binop (imode, ior_optab, op0, op1,
gen_lowpart (imode, target), 1, OPTAB_LIB_WIDEN);
- target = lowpart_subreg_maybe_copy (mode, temp, imode);
+ target = force_lowpart_subreg (mode, temp, imode);
}
return target;
--
2.46.0