gcc-4.6: Bring in latest linaro patches

Adjust existing patches for latest FSF gcc-4_6-branch

Signed-off-by: Khem Raj <raj.khem@gmail.com>
Acked-by: Martin Jansa <Martin.Jansa@gmail.com>
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
This commit is contained in:
Khem Raj
2011-09-29 22:12:52 +00:00
committed by Koen Kooi
parent 9f2e55480d
commit b9b5306e55
26 changed files with 10094 additions and 1784 deletions
@@ -17,8 +17,10 @@
for STORE_FLAG_VALUE==-1 case. for STORE_FLAG_VALUE==-1 case.
=== modified file 'gcc/combine.c' === modified file 'gcc/combine.c'
--- old/gcc/combine.c 2011-02-15 19:46:26 +0000 Index: gcc-4_6-branch/gcc/combine.c
+++ new/gcc/combine.c 2011-04-26 17:03:58 +0000 ===================================================================
--- gcc-4_6-branch.orig/gcc/combine.c 2011-09-16 19:58:21.000000000 -0700
+++ gcc-4_6-branch/gcc/combine.c 2011-09-16 20:05:36.626650681 -0700
@@ -391,8 +391,8 @@ @@ -391,8 +391,8 @@
static void undo_all (void); static void undo_all (void);
static void undo_commit (void); static void undo_commit (void);
@@ -30,7 +32,7 @@
static rtx simplify_if_then_else (rtx); static rtx simplify_if_then_else (rtx);
static rtx simplify_set (rtx); static rtx simplify_set (rtx);
static rtx simplify_logical (rtx); static rtx simplify_logical (rtx);
@@ -3086,12 +3086,12 @@ @@ -3112,12 +3112,12 @@
if (i1) if (i1)
{ {
subst_low_luid = DF_INSN_LUID (i1); subst_low_luid = DF_INSN_LUID (i1);
@@ -45,7 +47,7 @@
} }
} }
@@ -3103,7 +3103,7 @@ @@ -3129,7 +3129,7 @@
self-referential RTL when we will be substituting I1SRC for I1DEST self-referential RTL when we will be substituting I1SRC for I1DEST
later. Likewise if I0 feeds into I2, either directly or indirectly later. Likewise if I0 feeds into I2, either directly or indirectly
through I1, and I0DEST is in I0SRC. */ through I1, and I0DEST is in I0SRC. */
@@ -54,7 +56,7 @@
(i1_feeds_i2_n && i1dest_in_i1src) (i1_feeds_i2_n && i1dest_in_i1src)
|| ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n)) || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
&& i0dest_in_i0src)); && i0dest_in_i0src));
@@ -3142,7 +3142,7 @@ @@ -3168,7 +3168,7 @@
copy of I1SRC each time we substitute it, in order to avoid creating copy of I1SRC each time we substitute it, in order to avoid creating
self-referential RTL when we will be substituting I0SRC for I0DEST self-referential RTL when we will be substituting I0SRC for I0DEST
later. */ later. */
@@ -63,7 +65,7 @@
i0_feeds_i1_n && i0dest_in_i0src); i0_feeds_i1_n && i0dest_in_i0src);
substed_i1 = 1; substed_i1 = 1;
@@ -3172,7 +3172,7 @@ @@ -3198,7 +3198,7 @@
n_occurrences = 0; n_occurrences = 0;
subst_low_luid = DF_INSN_LUID (i0); subst_low_luid = DF_INSN_LUID (i0);
@@ -72,7 +74,7 @@
substed_i0 = 1; substed_i0 = 1;
} }
@@ -3234,7 +3234,7 @@ @@ -3260,7 +3260,7 @@
{ {
rtx t = i1pat; rtx t = i1pat;
if (i0_feeds_i1_n) if (i0_feeds_i1_n)
@@ -81,7 +83,7 @@
XVECEXP (newpat, 0, --total_sets) = t; XVECEXP (newpat, 0, --total_sets) = t;
} }
@@ -3242,10 +3242,10 @@ @@ -3268,10 +3268,10 @@
{ {
rtx t = i2pat; rtx t = i2pat;
if (i1_feeds_i2_n) if (i1_feeds_i2_n)
@@ -94,7 +96,7 @@
XVECEXP (newpat, 0, --total_sets) = t; XVECEXP (newpat, 0, --total_sets) = t;
} }
@@ -4914,11 +4914,13 @@ @@ -4943,11 +4943,13 @@
IN_DEST is nonzero if we are processing the SET_DEST of a SET. IN_DEST is nonzero if we are processing the SET_DEST of a SET.
@@ -109,7 +111,7 @@
{ {
enum rtx_code code = GET_CODE (x); enum rtx_code code = GET_CODE (x);
enum machine_mode op0_mode = VOIDmode; enum machine_mode op0_mode = VOIDmode;
@@ -4979,7 +4981,7 @@ @@ -5008,7 +5010,7 @@
&& GET_CODE (XVECEXP (x, 0, 0)) == SET && GET_CODE (XVECEXP (x, 0, 0)) == SET
&& GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS) && GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS)
{ {
@@ -118,7 +120,7 @@
/* If this substitution failed, this whole thing fails. */ /* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER if (GET_CODE (new_rtx) == CLOBBER
@@ -4996,7 +4998,7 @@ @@ -5025,7 +5027,7 @@
&& GET_CODE (dest) != CC0 && GET_CODE (dest) != CC0
&& GET_CODE (dest) != PC) && GET_CODE (dest) != PC)
{ {
@@ -127,7 +129,7 @@
/* If this substitution failed, this whole thing fails. */ /* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER if (GET_CODE (new_rtx) == CLOBBER
@@ -5042,8 +5044,8 @@ @@ -5071,8 +5073,8 @@
} }
else else
{ {
@@ -138,7 +140,7 @@
/* If this substitution failed, this whole thing /* If this substitution failed, this whole thing
fails. */ fails. */
@@ -5120,7 +5122,9 @@ @@ -5149,7 +5151,9 @@
&& (code == SUBREG || code == STRICT_LOW_PART && (code == SUBREG || code == STRICT_LOW_PART
|| code == ZERO_EXTRACT)) || code == ZERO_EXTRACT))
|| code == SET) || code == SET)
@@ -149,7 +151,7 @@
/* If we found that we will have to reject this combination, /* If we found that we will have to reject this combination,
indicate that by returning the CLOBBER ourselves, rather than indicate that by returning the CLOBBER ourselves, rather than
@@ -5177,7 +5181,7 @@ @@ -5206,7 +5210,7 @@
/* If X is sufficiently simple, don't bother trying to do anything /* If X is sufficiently simple, don't bother trying to do anything
with it. */ with it. */
if (code != CONST_INT && code != REG && code != CLOBBER) if (code != CONST_INT && code != REG && code != CLOBBER)
@@ -158,7 +160,7 @@
if (GET_CODE (x) == code) if (GET_CODE (x) == code)
break; break;
@@ -5197,10 +5201,12 @@ @@ -5226,10 +5230,12 @@
expression. expression.
OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero
@@ -173,7 +175,7 @@
{ {
enum rtx_code code = GET_CODE (x); enum rtx_code code = GET_CODE (x);
enum machine_mode mode = GET_MODE (x); enum machine_mode mode = GET_MODE (x);
@@ -5255,8 +5261,8 @@ @@ -5284,8 +5290,8 @@
false arms to store-flag values. Be careful to use copy_rtx false arms to store-flag values. Be careful to use copy_rtx
here since true_rtx or false_rtx might share RTL with x as a here since true_rtx or false_rtx might share RTL with x as a
result of the if_then_else_cond call above. */ result of the if_then_else_cond call above. */
@@ -184,16 +186,16 @@
/* If true_rtx and false_rtx are not general_operands, an if_then_else /* If true_rtx and false_rtx are not general_operands, an if_then_else
is unlikely to be simpler. */ is unlikely to be simpler. */
@@ -5600,7 +5606,7 @@ @@ -5629,7 +5635,7 @@
{ {
/* Try to simplify the expression further. */ /* Try to simplify the expression further. */
rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1)); rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1));
- temp = combine_simplify_rtx (tor, mode, in_dest); - temp = combine_simplify_rtx (tor, VOIDmode, in_dest);
+ temp = combine_simplify_rtx (tor, mode, in_dest, 0); + temp = combine_simplify_rtx (tor, VOIDmode, in_dest, 0);
/* If we could, great. If not, do not go ahead with the IOR /* If we could, great. If not, do not go ahead with the IOR
replacement, since PLUS appears in many special purpose replacement, since PLUS appears in many special purpose
@@ -5693,7 +5699,16 @@ @@ -5722,7 +5728,16 @@
ZERO_EXTRACT is indeed appropriate, it will be placed back by ZERO_EXTRACT is indeed appropriate, it will be placed back by
the call to make_compound_operation in the SET case. */ the call to make_compound_operation in the SET case. */
@@ -211,7 +213,7 @@
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx && op1 == const0_rtx
&& mode == GET_MODE (op0) && mode == GET_MODE (op0)
@@ -5739,7 +5754,10 @@ @@ -5768,7 +5783,10 @@
/* If STORE_FLAG_VALUE is -1, we have cases similar to /* If STORE_FLAG_VALUE is -1, we have cases similar to
those above. */ those above. */
@@ -223,7 +225,7 @@
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx && op1 == const0_rtx
&& (num_sign_bit_copies (op0, mode) && (num_sign_bit_copies (op0, mode)
@@ -5937,11 +5955,11 @@ @@ -5966,11 +5984,11 @@
if (reg_mentioned_p (from, true_rtx)) if (reg_mentioned_p (from, true_rtx))
true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code, true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code,
from, true_val), from, true_val),
@@ -237,7 +239,7 @@
SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx); SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx);
SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx); SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx);
@@ -6158,11 +6176,11 @@ @@ -6187,11 +6205,11 @@
{ {
temp = subst (simplify_gen_relational (true_code, m, VOIDmode, temp = subst (simplify_gen_relational (true_code, m, VOIDmode,
cond_op0, cond_op1), cond_op0, cond_op1),
@@ -251,4 +253,3 @@
temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp); temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp);
if (extend_op != UNKNOWN) if (extend_op != UNKNOWN)
File diff suppressed because it is too large Load Diff
@@ -1,15 +1,3 @@
2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
LP 791327
gcc/
2011-06-09 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/49335
* config/arm/predicates.md (add_operator): New.
* config/arm/arm.md ("*arith_shiftsi"): Fix for SP reg usage
in Thumb2.
2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> 2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline. Backport from mainline.
@@ -19,69 +7,10 @@
PR target/49385 PR target/49385
* config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast * config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast
one of the operands is a register. one of the operands is a register.
Index: gcc-4_6-branch/gcc/config/arm/thumb2.md
=== modified file 'gcc/config/arm/arm.md' ===================================================================
--- old/gcc/config/arm/arm.md 2011-06-27 22:14:07 +0000 --- gcc-4_6-branch.orig/gcc/config/arm/thumb2.md 2011-09-16 20:22:40.000000000 -0700
+++ new/gcc/config/arm/arm.md 2011-06-28 12:02:27 +0000 +++ gcc-4_6-branch/gcc/config/arm/thumb2.md 2011-09-16 20:28:47.648690433 -0700
@@ -8584,18 +8584,22 @@
;; Patterns to allow combination of arithmetic, cond code and shifts
(define_insn "*arith_shiftsi"
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
(match_operator:SI 1 "shiftable_operator"
[(match_operator:SI 3 "shift_operator"
- [(match_operand:SI 4 "s_register_operand" "r,r")
- (match_operand:SI 5 "shift_amount_operand" "M,r")])
- (match_operand:SI 2 "s_register_operand" "rk,rk")]))]
+ [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
+ (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
+ (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
"TARGET_32BIT"
"%i1%?\\t%0, %2, %4%S3"
[(set_attr "predicable" "yes")
(set_attr "shift" "4")
- (set_attr "arch" "32,a")
- ;; We have to make sure to disable the second alternative if
+ (set_attr "arch" "a,t2,t2,a")
+ ;; Thumb2 doesn't allow the stack pointer to be used for
+ ;; operand1 for all operations other than add and sub. In this case
+ ;; the minus operation is a candidate for an rsub and hence needs
+ ;; to be disabled.
+ ;; We have to make sure to disable the fourth alternative if
;; the shift_operator is MULT, since otherwise the insn will
;; also match a multiply_accumulate pattern and validate_change
;; will allow a replacement of the constant with a register
@@ -8603,9 +8607,13 @@
(set_attr_alternative "insn_enabled"
[(const_string "yes")
(if_then_else
+ (match_operand:SI 1 "add_operator" "")
+ (const_string "yes") (const_string "no"))
+ (const_string "yes")
+ (if_then_else
(match_operand:SI 3 "mult_operator" "")
(const_string "no") (const_string "yes"))])
- (set_attr "type" "alu_shift,alu_shift_reg")])
+ (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")])
(define_split
[(set (match_operand:SI 0 "s_register_operand" "")
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-05-03 15:14:56 +0000
+++ new/gcc/config/arm/predicates.md 2011-06-22 15:50:23 +0000
@@ -687,3 +687,6 @@
(define_special_predicate "neon_struct_operand"
(and (match_code "mem")
(match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+
+(define_special_predicate "add_operator"
+ (match_code "plus"))
=== modified file 'gcc/config/arm/thumb2.md'
--- old/gcc/config/arm/thumb2.md 2011-06-14 14:37:30 +0000
+++ new/gcc/config/arm/thumb2.md 2011-06-20 12:18:27 +0000
@@ -207,7 +207,9 @@ @@ -207,7 +207,9 @@
(define_insn "*thumb2_movhi_insn" (define_insn "*thumb2_movhi_insn"
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
@@ -93,4 +22,3 @@
"@ "@
mov%?\\t%0, %1\\t%@ movhi mov%?\\t%0, %1\\t%@ movhi
movw%?\\t%0, %L1\\t%@ movhi movw%?\\t%0, %L1\\t%@ movhi
@@ -50,8 +50,10 @@
* gcc.dg/vect/vect-widen-mult-half.c: New test. * gcc.dg/vect/vect-widen-mult-half.c: New test.
=== added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c' === added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c'
--- old/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 1970-01-01 00:00:00 +0000 Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+++ new/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2011-07-06 12:04:10 +0000 ===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2011-09-16 20:32:57.279056697 -0700
@@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_int } */
+ +
@@ -105,10 +107,10 @@
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */
+ +
Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c' ===================================================================
--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 1970-01-01 00:00:00 +0000 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2011-07-06 12:04:10 +0000 +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2011-09-16 20:32:57.279056697 -0700
@@ -0,0 +1,59 @@ @@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_int } */
+ +
@@ -169,10 +171,10 @@
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */
+ +
Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c' ===================================================================
--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 1970-01-01 00:00:00 +0000 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2011-07-06 12:04:10 +0000 +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2011-09-16 20:32:57.279056697 -0700
@@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_int } */
+ +
@@ -223,10 +225,10 @@
+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */
+ +
Index: gcc-4_6-branch/gcc/tree-vect-loop.c
=== modified file 'gcc/tree-vect-loop.c' ===================================================================
--- old/gcc/tree-vect-loop.c 2011-07-04 11:13:51 +0000 --- gcc-4_6-branch.orig/gcc/tree-vect-loop.c 2011-09-16 20:31:52.000000000 -0700
+++ new/gcc/tree-vect-loop.c 2011-07-11 11:02:55 +0000 +++ gcc-4_6-branch/gcc/tree-vect-loop.c 2011-09-16 20:32:57.289056641 -0700
@@ -181,6 +181,8 @@ @@ -181,6 +181,8 @@
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
int i; int i;
@@ -246,7 +248,6 @@
- tree vf_vectype; - tree vf_vectype;
- gimple stmt = gsi_stmt (si), pattern_stmt; - gimple stmt = gsi_stmt (si), pattern_stmt;
- stmt_info = vinfo_for_stmt (stmt); - stmt_info = vinfo_for_stmt (stmt);
-
+ tree vf_vectype; + tree vf_vectype;
+ +
+ if (analyze_pattern_stmt) + if (analyze_pattern_stmt)
@@ -256,7 +257,7 @@
+ } + }
+ else + else
+ stmt = gsi_stmt (si); + stmt = gsi_stmt (si);
+
+ stmt_info = vinfo_for_stmt (stmt); + stmt_info = vinfo_for_stmt (stmt);
+ +
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
@@ -376,10 +377,10 @@
} /* stmts in BB */ } /* stmts in BB */
} /* BBs in loop */ } /* BBs in loop */
Index: gcc-4_6-branch/gcc/tree-vect-patterns.c
=== modified file 'gcc/tree-vect-patterns.c' ===================================================================
--- old/gcc/tree-vect-patterns.c 2011-06-22 12:10:44 +0000 --- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2011-09-16 20:31:52.000000000 -0700
+++ new/gcc/tree-vect-patterns.c 2011-07-06 12:04:10 +0000 +++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2011-09-16 20:32:57.289056641 -0700
@@ -39,10 +39,13 @@ @@ -39,10 +39,13 @@
#include "diagnostic-core.h" #include "diagnostic-core.h"
@@ -552,21 +553,6 @@
S5 prod_T = a_T * CONST; S5 prod_T = a_T * CONST;
- Input: - Input:
-
- * LAST_STMT: A stmt from which the pattern search begins. In the example,
- when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
- detected.
-
- Output:
-
- * TYPE_IN: The type of the input arguments to the pattern.
-
- * TYPE_OUT: The type of the output of this pattern.
-
- * Return value: A new stmt that will be used to replace the sequence of
- stmts that constitute the pattern. In this case it will be:
- WIDEN_MULT <a_t, b_t>
- */
+ A special case of multiplication by constants is when 'TYPE' is 4 times + A special case of multiplication by constants is when 'TYPE' is 4 times
+ bigger than 'type', but CONST fits an intermediate type 2 times smaller + bigger than 'type', but CONST fits an intermediate type 2 times smaller
+ than 'TYPE'. In that case we create an additional pattern stmt for S3 + than 'TYPE'. In that case we create an additional pattern stmt for S3
@@ -584,20 +570,30 @@
+ '--> prod_T' = a_it w* CONST; + '--> prod_T' = a_it w* CONST;
+ +
+ Input/Output: + Input/Output:
+
- * LAST_STMT: A stmt from which the pattern search begins. In the example,
- when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
- detected.
+ * STMTS: Contains a stmt from which the pattern search begins. In the + * STMTS: Contains a stmt from which the pattern search begins. In the
+ example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} + example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
+ is detected. In case of unsigned widen-mult, the original stmt (S5) is + is detected. In case of unsigned widen-mult, the original stmt (S5) is
+ replaced with S6 in STMTS. In case of multiplication by a constant + replaced with S6 in STMTS. In case of multiplication by a constant
+ of an intermediate type (the last case above), STMTS also contains S3 + of an intermediate type (the last case above), STMTS also contains S3
+ (inserted before S5). + (inserted before S5).
+
- Output:
+ Output: + Output:
+
- * TYPE_IN: The type of the input arguments to the pattern.
+ * TYPE_IN: The type of the input arguments to the pattern. + * TYPE_IN: The type of the input arguments to the pattern.
+
- * TYPE_OUT: The type of the output of this pattern.
+ * TYPE_OUT: The type of the output of this pattern. + * TYPE_OUT: The type of the output of this pattern.
+
- * Return value: A new stmt that will be used to replace the sequence of
- stmts that constitute the pattern. In this case it will be:
- WIDEN_MULT <a_t, b_t>
- */
+ * Return value: A new stmt that will be used to replace the sequence of + * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be: + stmts that constitute the pattern. In this case it will be:
+ WIDEN_MULT <a_t, b_t> + WIDEN_MULT <a_t, b_t>
@@ -932,10 +928,10 @@
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_pattern_recog ==="); fprintf (vect_dump, "=== vect_pattern_recog ===");
Index: gcc-4_6-branch/gcc/tree-vect-slp.c
=== modified file 'gcc/tree-vect-slp.c' ===================================================================
--- old/gcc/tree-vect-slp.c 2011-06-19 10:59:13 +0000 --- gcc-4_6-branch.orig/gcc/tree-vect-slp.c 2011-09-16 20:31:52.000000000 -0700
+++ new/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000 +++ gcc-4_6-branch/gcc/tree-vect-slp.c 2011-09-16 20:32:57.289056641 -0700
@@ -152,7 +152,9 @@ @@ -152,7 +152,9 @@
if (loop && def_stmt && gimple_bb (def_stmt) if (loop && def_stmt && gimple_bb (def_stmt)
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
@@ -947,10 +943,10 @@
{ {
if (!*first_stmt_dt0) if (!*first_stmt_dt0)
*pattern0 = true; *pattern0 = true;
Index: gcc-4_6-branch/gcc/tree-vect-stmts.c
=== modified file 'gcc/tree-vect-stmts.c' ===================================================================
--- old/gcc/tree-vect-stmts.c 2011-06-22 06:21:13 +0000 --- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2011-09-16 20:31:52.000000000 -0700
+++ new/gcc/tree-vect-stmts.c 2011-07-06 12:04:10 +0000 +++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2011-09-16 20:32:57.289056641 -0700
@@ -126,33 +126,72 @@ @@ -126,33 +126,72 @@
static void static void
@@ -974,21 +970,6 @@
if (STMT_VINFO_IN_PATTERN_P (stmt_info)) if (STMT_VINFO_IN_PATTERN_P (stmt_info))
{ {
- gimple pattern_stmt; - gimple pattern_stmt;
-
- /* This is the last stmt in a sequence that was detected as a
- pattern that can potentially be vectorized. Don't mark the stmt
- as relevant/live because it's not going to be vectorized.
- Instead mark the pattern-stmt that replaces it. */
-
- pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
- stmt_info = vinfo_for_stmt (pattern_stmt);
- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
- save_relevant = STMT_VINFO_RELEVANT (stmt_info);
- save_live_p = STMT_VINFO_LIVE_P (stmt_info);
- stmt = pattern_stmt;
+ bool found = false; + bool found = false;
+ if (!used_in_pattern) + if (!used_in_pattern)
+ { + {
@@ -1026,7 +1007,21 @@
+ pattern that can potentially be vectorized. Don't mark the stmt + pattern that can potentially be vectorized. Don't mark the stmt
+ as relevant/live because it's not going to be vectorized. + as relevant/live because it's not going to be vectorized.
+ Instead mark the pattern-stmt that replaces it. */ + Instead mark the pattern-stmt that replaces it. */
+
- /* This is the last stmt in a sequence that was detected as a
- pattern that can potentially be vectorized. Don't mark the stmt
- as relevant/live because it's not going to be vectorized.
- Instead mark the pattern-stmt that replaces it. */
-
- pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
- stmt_info = vinfo_for_stmt (pattern_stmt);
- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
- save_relevant = STMT_VINFO_RELEVANT (stmt_info);
- save_live_p = STMT_VINFO_LIVE_P (stmt_info);
- stmt = pattern_stmt;
+ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ +
+ if (vect_print_dump_info (REPORT_DETAILS)) + if (vect_print_dump_info (REPORT_DETAILS))
@@ -1119,9 +1114,6 @@
- VEC_free (gimple, heap, worklist); - VEC_free (gimple, heap, worklist);
- return false; - return false;
- } - }
- }
- else
- return false;
+ break; + break;
+ +
+ case GIMPLE_BINARY_RHS: + case GIMPLE_BINARY_RHS:
@@ -1154,7 +1146,9 @@
+ +
+ default: + default:
+ return false; + return false;
+ } }
- else
- return false;
} }
else if (is_gimple_call (stmt)) else if (is_gimple_call (stmt))
{ {
@@ -1173,7 +1167,7 @@
gcc_assert (vec_stmt); gcc_assert (vec_stmt);
if (gimple_code (vec_stmt) == GIMPLE_PHI) if (gimple_code (vec_stmt) == GIMPLE_PHI)
vec_oprnd = PHI_RESULT (vec_stmt); vec_oprnd = PHI_RESULT (vec_stmt);
@@ -4886,6 +4946,7 @@ @@ -4894,6 +4954,7 @@
enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok; bool ok;
tree scalar_type, vectype; tree scalar_type, vectype;
@@ -1181,14 +1175,13 @@
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
{ {
@@ -4907,16 +4968,22 @@ @@ -4915,16 +4976,22 @@
- any LABEL_EXPRs in the loop - any LABEL_EXPRs in the loop
- computations that are used only for array indexing or loop control. - computations that are used only for array indexing or loop control.
In basic blocks we only analyze statements that are a part of some SLP In basic blocks we only analyze statements that are a part of some SLP
- instance, therefore, all the statements are relevant. */ - instance, therefore, all the statements are relevant. */
-
+ instance, therefore, all the statements are relevant. + instance, therefore, all the statements are relevant.
+
+ Pattern statement need to be analyzed instead of the original statement + Pattern statement need to be analyzed instead of the original statement
+ if the original statement is not relevant. Otherwise, we analyze both + if the original statement is not relevant. Otherwise, we analyze both
+ statements. */ + statements. */
@@ -1207,7 +1200,7 @@
stmt = pattern_stmt; stmt = pattern_stmt;
stmt_info = vinfo_for_stmt (pattern_stmt); stmt_info = vinfo_for_stmt (pattern_stmt);
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
@@ -4933,6 +5000,21 @@ @@ -4941,6 +5008,21 @@
return true; return true;
} }
} }
@@ -1229,42 +1222,7 @@
switch (STMT_VINFO_DEF_TYPE (stmt_info)) switch (STMT_VINFO_DEF_TYPE (stmt_info))
{ {
@@ -5066,7 +5148,6 @@ @@ -5605,8 +5687,12 @@
bool is_store = false;
gimple vec_stmt = NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- gimple orig_stmt_in_pattern, orig_scalar_stmt = stmt;
bool done;
switch (STMT_VINFO_TYPE (stmt_info))
@@ -5205,25 +5286,7 @@
}
if (vec_stmt)
- {
- STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
- orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
- if (orig_stmt_in_pattern)
- {
- stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
- /* STMT was inserted by the vectorizer to replace a computation idiom.
- ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
- computed this idiom. We need to record a pointer to VEC_STMT in
- the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
- documentation of vect_pattern_recog. */
- if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
- {
- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo)
- == orig_scalar_stmt);
- STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
- }
- }
- }
+ STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
return is_store;
}
@@ -5601,8 +5664,12 @@
|| *dt == vect_nested_cycle) || *dt == vect_nested_cycle)
{ {
stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
@@ -1278,10 +1236,10 @@
*vectype = STMT_VINFO_VECTYPE (stmt_info); *vectype = STMT_VINFO_VECTYPE (stmt_info);
gcc_assert (*vectype != NULL_TREE); gcc_assert (*vectype != NULL_TREE);
} }
Index: gcc-4_6-branch/gcc/tree-vectorizer.h
=== modified file 'gcc/tree-vectorizer.h' ===================================================================
--- old/gcc/tree-vectorizer.h 2011-07-04 11:13:51 +0000 --- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2011-09-16 20:31:52.000000000 -0700
+++ new/gcc/tree-vectorizer.h 2011-07-11 11:02:55 +0000 +++ gcc-4_6-branch/gcc/tree-vectorizer.h 2011-09-16 20:32:57.299056515 -0700
@@ -890,7 +890,7 @@ @@ -890,7 +890,7 @@
/* Pattern recognition functions. /* Pattern recognition functions.
Additional pattern recognition functions can (and will) be added Additional pattern recognition functions can (and will) be added
@@ -1291,4 +1249,3 @@
#define NUM_PATTERNS 4 #define NUM_PATTERNS 4
void vect_pattern_recog (loop_vec_info); void vect_pattern_recog (loop_vec_info);
@@ -0,0 +1,62 @@
2011-07-31 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r176970:
* modulo-sched.c: Change comment.
(reset_sched_times): Fix print message.
(print_partial_schedule): Add print info.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-07-04 12:01:34 +0000
+++ new/gcc/modulo-sched.c 2011-07-31 10:58:46 +0000
@@ -84,13 +84,14 @@
II cycles (i.e. use register copies to prevent a def from overwriting
itself before reaching the use).
- SMS works with countable loops whose loop count can be easily
- adjusted. This is because we peel a constant number of iterations
- into a prologue and epilogue for which we want to avoid emitting
- the control part, and a kernel which is to iterate that constant
- number of iterations less than the original loop. So the control
- part should be a set of insns clearly identified and having its
- own iv, not otherwise used in the loop (at-least for now), which
+ SMS works with countable loops (1) whose control part can be easily
+ decoupled from the rest of the loop and (2) whose loop count can
+ be easily adjusted. This is because we peel a constant number of
+ iterations into a prologue and epilogue for which we want to avoid
+ emitting the control part, and a kernel which is to iterate that
+ constant number of iterations less than the original loop. So the
+ control part should be a set of insns clearly identified and having
+ its own iv, not otherwise used in the loop (at-least for now), which
initializes a register before the loop to the number of iterations.
Currently SMS relies on the do-loop pattern to recognize such loops,
where (1) the control part comprises of all insns defining and/or
@@ -598,8 +599,8 @@
/* Print the scheduling times after the rotation. */
fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
"crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
- INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
- normalized_time);
+ INSN_UID (crr_insn->node->insn), normalized_time,
+ new_min_cycle);
if (JUMP_P (crr_insn->node->insn))
fprintf (dump_file, " (branch)");
fprintf (dump_file, "\n");
@@ -2550,8 +2551,13 @@
fprintf (dump, "\n[ROW %d ]: ", i);
while (ps_i)
{
- fprintf (dump, "%d, ",
- INSN_UID (ps_i->node->insn));
+ if (JUMP_P (ps_i->node->insn))
+ fprintf (dump, "%d (branch), ",
+ INSN_UID (ps_i->node->insn));
+ else
+ fprintf (dump, "%d, ",
+ INSN_UID (ps_i->node->insn));
+
ps_i = ps_i->next_in_row;
}
}
@@ -0,0 +1,458 @@
2011-08-09 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r177235.
* modulo-sched.c (calculate_stage_count,
calculate_must_precede_follow, get_sched_window,
try_scheduling_node_in_cycle, remove_node_from_ps):
Add declaration.
(update_node_sched_params, set_must_precede_follow, optimize_sc):
New functions.
(reset_sched_times): Call update_node_sched_params.
(sms_schedule): Call optimize_sc.
(get_sched_window): Change function arguments.
(sms_schedule_by_order): Update call to get_sched_window.
Call set_must_precede_follow.
(calculate_stage_count): Add function argument.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-07-31 10:58:46 +0000
+++ new/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000
@@ -203,7 +203,16 @@
rtx, rtx);
static void duplicate_insns_of_cycles (partial_schedule_ptr,
int, int, int, rtx);
-static int calculate_stage_count (partial_schedule_ptr ps);
+static int calculate_stage_count (partial_schedule_ptr, int);
+static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+ int, int, sbitmap, sbitmap, sbitmap);
+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
+ sbitmap, int, int *, int *, int *);
+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+ int, int, sbitmap, int *, sbitmap,
+ sbitmap);
+static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
#define SCHED_FIRST_REG_MOVE(x) \
@@ -577,6 +586,36 @@
}
}
+/* Update the sched_params (time, row and stage) for node U using the II,
+ the CYCLE of U and MIN_CYCLE.
+ We're not simply taking the following
+ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
+ because the stages may not be aligned on cycle 0. */
+static void
+update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
+{
+ int sc_until_cycle_zero;
+ int stage;
+
+ SCHED_TIME (u) = cycle;
+ SCHED_ROW (u) = SMODULO (cycle, ii);
+
+ /* The calculation of stage count is done adding the number
+ of stages before cycle zero and after cycle zero. */
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+
+ if (SCHED_TIME (u) < 0)
+ {
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+ }
+ else
+ {
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+ }
+}
+
/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
SCHED_ROW and SCHED_STAGE. */
static void
@@ -592,7 +631,6 @@
ddg_node_ptr u = crr_insn->node;
int normalized_time = SCHED_TIME (u) - amount;
int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
- int sc_until_cycle_zero, stage;
if (dump_file)
{
@@ -608,23 +646,9 @@
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- SCHED_TIME (u) = normalized_time;
- SCHED_ROW (u) = SMODULO (normalized_time, ii);
-
- /* The calculation of stage count is done adding the number
- of stages before cycle zero and after cycle zero. */
- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
-
- if (SCHED_TIME (u) < 0)
- {
- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
- SCHED_STAGE (u) = sc_until_cycle_zero - stage;
- }
- else
- {
- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
- }
+
+ crr_insn->cycle = normalized_time;
+ update_node_sched_params (u, ii, normalized_time, new_min_cycle);
}
}
@@ -661,6 +685,206 @@
PREV_INSN (last));
}
+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+ respectively only if cycle C falls on the border of the scheduling
+ window boundaries marked by START and END cycles. STEP is the
+ direction of the window. */
+static inline void
+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
+ sbitmap *tmp_precede, sbitmap must_precede, int c,
+ int start, int end, int step)
+{
+ *tmp_precede = NULL;
+ *tmp_follow = NULL;
+
+ if (c == start)
+ {
+ if (step == 1)
+ *tmp_precede = must_precede;
+ else /* step == -1. */
+ *tmp_follow = must_follow;
+ }
+ if (c == end - step)
+ {
+ if (step == 1)
+ *tmp_follow = must_follow;
+ else /* step == -1. */
+ *tmp_precede = must_precede;
+ }
+
+}
+
+/* Return True if the branch can be moved to row ii-1 while
+ normalizing the partial schedule PS to start from cycle zero and thus
+ optimize the SC. Otherwise return False. */
+static bool
+optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
+{
+ int amount = PS_MIN_CYCLE (ps);
+ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
+ int start, end, step;
+ int ii = ps->ii;
+ bool ok = false;
+ int stage_count, stage_count_curr;
+
+ /* Compare the SC after normalization and SC after bringing the branch
+ to row ii-1. If they are equal just bail out. */
+ stage_count = calculate_stage_count (ps, amount);
+ stage_count_curr =
+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
+
+ if (stage_count == stage_count_curr)
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS SC already optimized.\n");
+
+ ok = false;
+ goto clear;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS Trying to optimize branch location\n");
+ fprintf (dump_file, "SMS partial schedule before trial:\n");
+ print_partial_schedule (ps, dump_file);
+ }
+
+ /* First, normalize the partial scheduling. */
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "SMS partial schedule after normalization (ii, %d, SC %d):\n",
+ ii, stage_count);
+ print_partial_schedule (ps, dump_file);
+ }
+
+ if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
+ {
+ ok = true;
+ goto clear;
+ }
+
+ sbitmap_ones (sched_nodes);
+
+ /* Calculate the new placement of the branch. It should be in row
+ ii-1 and fall into it's scheduling window. */
+ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
+ &step, &end) == 0)
+ {
+ bool success;
+ ps_insn_ptr next_ps_i;
+ int branch_cycle = SCHED_TIME (g->closing_branch);
+ int row = SMODULO (branch_cycle, ps->ii);
+ int num_splits = 0;
+ sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+ int c;
+
+ if (dump_file)
+ fprintf (dump_file, "\nTrying to schedule node %d "
+ "INSN = %d in (%d .. %d) step %d\n",
+ g->closing_branch->cuid,
+ (INSN_UID (g->closing_branch->insn)), start, end, step);
+
+ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
+ if (step == 1)
+ {
+ c = start + ii - SMODULO (start, ii) - 1;
+ gcc_assert (c >= start);
+ if (c >= end)
+ {
+ ok = false;
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ goto clear;
+ }
+ }
+ else
+ {
+ c = start - SMODULO (start, ii) - 1;
+ gcc_assert (c <= start);
+
+ if (c <= end)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ ok = false;
+ goto clear;
+ }
+ }
+
+ must_precede = sbitmap_alloc (g->num_nodes);
+ must_follow = sbitmap_alloc (g->num_nodes);
+
+ /* Try to schedule the branch is it's new cycle. */
+ calculate_must_precede_follow (g->closing_branch, start, end,
+ step, ii, sched_nodes,
+ must_precede, must_follow);
+
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, c, start, end, step);
+
+ /* Find the element in the partial schedule related to the closing
+ branch so we can remove it from it's current cycle. */
+ for (next_ps_i = ps->rows[row];
+ next_ps_i; next_ps_i = next_ps_i->next_in_row)
+ if (next_ps_i->node->cuid == g->closing_branch->cuid)
+ break;
+
+ gcc_assert (next_ps_i);
+ gcc_assert (remove_node_from_ps (ps, next_ps_i));
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid, c,
+ sched_nodes, &num_splits,
+ tmp_precede, tmp_follow);
+ gcc_assert (num_splits == 0);
+ if (!success)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d, "
+ "bringing it back to cycle %d\n", c, branch_cycle);
+
+ /* The branch was failed to be placed in row ii - 1.
+ Put it back in it's original place in the partial
+ schedualing. */
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, branch_cycle, start, end,
+ step);
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid,
+ branch_cycle, sched_nodes,
+ &num_splits, tmp_precede,
+ tmp_follow);
+ gcc_assert (success && (num_splits == 0));
+ ok = false;
+ }
+ else
+ {
+ /* The branch is placed in row ii - 1. */
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS success in moving branch to cycle %d\n", c);
+
+ update_node_sched_params (g->closing_branch, ii, c,
+ PS_MIN_CYCLE (ps));
+ ok = true;
+ }
+
+ free (must_precede);
+ free (must_follow);
+ }
+
+clear:
+ free (sched_nodes);
+ return ok;
+}
+
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int to_stage, int for_prolog, rtx count_reg)
@@ -1116,6 +1340,7 @@
int mii, rec_mii;
unsigned stage_count = 0;
HOST_WIDEST_INT loop_count = 0;
+ bool opt_sc_p = false;
if (! (g = g_arr[loop->num]))
continue;
@@ -1197,14 +1422,32 @@
set_node_sched_params (g);
ps = sms_schedule_by_order (g, mii, maxii, node_order);
-
- if (ps)
- {
- stage_count = calculate_stage_count (ps);
- gcc_assert(stage_count >= 1);
- PS_STAGE_COUNT(ps) = stage_count;
- }
-
+
+ if (ps)
+ {
+ /* Try to achieve optimized SC by normalizing the partial
+ schedule (having the cycles start from cycle zero).
+ The branch location must be placed in row ii-1 in the
+ final scheduling. If failed, shift all instructions to
+ position the branch in row ii-1. */
+ opt_sc_p = optimize_sc (ps, g);
+ if (opt_sc_p)
+ stage_count = calculate_stage_count (ps, 0);
+ else
+ {
+ /* Bring the branch to cycle ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ if (dump_file)
+ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
+
+ stage_count = calculate_stage_count (ps, amount);
+ }
+
+ gcc_assert (stage_count >= 1);
+ PS_STAGE_COUNT (ps) = stage_count;
+ }
+
/* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1 means that there is no interleaving between iterations thus
we let the scheduling passes do the job in this case. */
@@ -1225,12 +1468,16 @@
else
{
struct undo_replace_buff_elem *reg_move_replaces;
- int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ if (!opt_sc_p)
+ {
+ /* Rotate the partial schedule to have the branch in row ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ }
- /* Set the stage boundaries. The closing_branch was scheduled
- and should appear in the last (ii-1) row. */
- reset_sched_times (ps, amount);
- rotate_partial_schedule (ps, amount);
set_columns_for_ps (ps);
canon_loop (loop);
@@ -1382,13 +1629,11 @@
scheduling window is empty and zero otherwise. */
static int
-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
{
int start, step, end;
ddg_edge_ptr e;
- int u = nodes_order [i];
- ddg_node_ptr u_node = &ps->g->nodes[u];
sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
@@ -1800,7 +2045,7 @@
/* Try to get non-empty scheduling window. */
success = 0;
- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
+ if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
&step, &end) == 0)
{
if (dump_file)
@@ -1817,24 +2062,11 @@
for (c = start; c != end; c += step)
{
- sbitmap tmp_precede = NULL;
- sbitmap tmp_follow = NULL;
-
- if (c == start)
- {
- if (step == 1)
- tmp_precede = must_precede;
- else /* step == -1. */
- tmp_follow = must_follow;
- }
- if (c == end - step)
- {
- if (step == 1)
- tmp_follow = must_follow;
- else /* step == -1. */
- tmp_precede = must_precede;
- }
-
+ sbitmap tmp_precede, tmp_follow;
+
+ set_must_precede_follow (&tmp_follow, must_follow,
+ &tmp_precede, must_precede,
+ c, start, end, step);
success =
try_scheduling_node_in_cycle (ps, u_node, u, c,
sched_nodes,
@@ -2899,12 +3131,10 @@
}
/* Calculate the stage count of the partial schedule PS. The calculation
- takes into account the rotation to bring the closing branch to row
- ii-1. */
+ takes into account the rotation amount passed in ROTATION_AMOUNT. */
int
-calculate_stage_count (partial_schedule_ptr ps)
+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
{
- int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
@@ -0,0 +1,39 @@
2011-08-09 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r176972:
* ddg.c (create_ddg_dep_from_intra_loop_link): Remove
the creation of anti-dep edge from a branch.
(add_cross_iteration_register_deps):
Create anti-dep edge from a branch.
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-07-04 11:00:06 +0000
+++ new/gcc/ddg.c 2011-07-31 11:29:10 +0000
@@ -197,11 +197,6 @@
}
}
- /* If a true dep edge enters the branch create an anti edge in the
- opposite direction to prevent the creation of reg-moves. */
- if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
- create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
-
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
add_edge_to_ddg (g, e);
@@ -306,8 +301,11 @@
gcc_assert (first_def_node);
+ /* Always create the edge if the use node is a branch in
+ order to prevent the creation of reg-moves. */
if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
- || !flag_modulo_sched_allow_regmoves)
+ || !flag_modulo_sched_allow_regmoves
+ || JUMP_P (use_node->insn))
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
@@ -0,0 +1,94 @@
2011-08-11 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/
Backport from mainline:
2011-07-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/vfp.md ("*movdf_vfp"): Handle the VFP constraints
before the core constraints. Adjust attributes.
(*thumb2_movdf_vfp"): Likewise.
=== modified file 'gcc/config/arm/vfp.md'
--- old/gcc/config/arm/vfp.md 2011-01-20 22:03:29 +0000
+++ new/gcc/config/arm/vfp.md 2011-07-27 12:59:19 +0000
@@ -401,8 +401,8 @@
;; DFmode moves
(define_insn "*movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
"TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
&& ( register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
@@ -418,9 +418,9 @@
gcc_assert (TARGET_VFP_DOUBLE);
return \"fconstd%?\\t%P0, #%G1\";
case 3: case 4:
+ return output_move_vfp (operands);
+ case 5: case 6:
return output_move_double (operands);
- case 5: case 6:
- return output_move_vfp (operands);
case 7:
if (TARGET_VFP_SINGLE)
return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
@@ -435,7 +435,7 @@
"
[(set_attr "type"
"r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+ (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
(eq_attr "alternative" "7")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -449,8 +449,8 @@
)
(define_insn "*thumb2_movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))]
"TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
"*
{
@@ -463,10 +463,10 @@
case 2:
gcc_assert (TARGET_VFP_DOUBLE);
return \"fconstd%?\\t%P0, #%G1\";
- case 3: case 4: case 8:
+ case 3: case 4:
+ return output_move_vfp (operands);
+ case 5: case 6: case 8:
return output_move_double (operands);
- case 5: case 6:
- return output_move_vfp (operands);
case 7:
if (TARGET_VFP_SINGLE)
return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
@@ -478,8 +478,8 @@
}
"
[(set_attr "type"
- "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+ "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
+ (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
(eq_attr "alternative" "7")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -487,8 +487,8 @@
(const_int 8)
(const_int 4))]
(const_int 4)))
- (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
- (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
+ (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
+ (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
)
@@ -0,0 +1,30 @@
2011-08-15 Michael Hope <michael.hope@linaro.org>
Backport from mainline r177357
gcc/testsuite/
2011-08-04 Ian Bolton <ian.bolton@arm.com>
* gcc.target/arm/vfp-1.c: no large negative offsets on Thumb2.
=== modified file 'gcc/testsuite/gcc.target/arm/vfp-1.c'
--- old/gcc/testsuite/gcc.target/arm/vfp-1.c 2011-01-01 08:52:03 +0000
+++ new/gcc/testsuite/gcc.target/arm/vfp-1.c 2011-08-09 23:22:51 +0000
@@ -127,13 +127,13 @@
void test_ldst (float f[], double d[]) {
/* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */
- /* { dg-final { scan-assembler "flds.+ \\\[r0, #-1020\\\]" } } */
+ /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
/* { dg-final { scan-assembler "add.+ r0, #1024" } } */
- /* { dg-final { scan-assembler "fsts.+ \\\[r0, #0\\\]\n" } } */
+ /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */
f[256] = f[255] + f[-255];
/* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */
- /* { dg-final { scan-assembler "fldd.+ \\\[r1, #-1016\\\]" } } */
+ /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
/* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */
d[32] = d[127] + d[-127];
}
@@ -0,0 +1,33 @@
2011-08-15 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* config/rs6000/rs6000.c (paired_expand_vector_init): Don't create
CONST_VECTORs with symbolic elements.
(rs6000_expand_vector_init): Likewise.
=== modified file 'gcc/config/rs6000/rs6000.c'
--- old/gcc/config/rs6000/rs6000.c 2011-07-27 18:17:15 +0000
+++ new/gcc/config/rs6000/rs6000.c 2011-08-16 08:59:36 +0000
@@ -5134,7 +5134,9 @@
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
++n_var;
}
if (n_var == 0)
@@ -5286,7 +5288,9 @@
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
++n_var, one_var = i;
else if (x != CONST0_RTX (inner_mode))
all_const_zero = false;
@@ -0,0 +1,61 @@
2011-08-18 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-08-12 Richard Sandiford <rdsandiford@googlemail.com>
* config/arm/arm.c (get_label_padding): New function.
(create_fix_barrier, arm_reorg): Use it.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-07-04 14:03:49 +0000
+++ new/gcc/config/arm/arm.c 2011-08-12 08:08:31 +0000
@@ -11769,6 +11769,19 @@
return 0;
}
+/* Return the maximum amount of padding that will be inserted before
+ label LABEL. */
+
+static HOST_WIDE_INT
+get_label_padding (rtx label)
+{
+ HOST_WIDE_INT align, min_insn_size;
+
+ align = 1 << label_to_alignment (label);
+ min_insn_size = TARGET_THUMB ? 2 : 4;
+ return align > min_insn_size ? align - min_insn_size : 0;
+}
+
/* Move a minipool fix MP from its current location to before MAX_MP.
If MAX_MP is NULL, then MP doesn't need moving, but the addressing
constraints may need updating. */
@@ -12315,8 +12328,12 @@
within range. */
gcc_assert (GET_CODE (from) != BARRIER);
- /* Count the length of this insn. */
- count += get_attr_length (from);
+ /* Count the length of this insn. This must stay in sync with the
+ code that pushes minipool fixes. */
+ if (LABEL_P (from))
+ count += get_label_padding (from);
+ else
+ count += get_attr_length (from);
/* If there is a jump table, add its length. */
tmp = is_jump_table (from);
@@ -12736,6 +12753,11 @@
insn = table;
}
}
+ else if (LABEL_P (insn))
+ /* Add the worst-case padding due to alignment. We don't add
+ the _current_ padding because the minipool insertions
+ themselves might change it. */
+ address += get_label_padding (insn);
}
fix = minipool_fix_head;
@@ -0,0 +1,34 @@
2011-08-18 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-08-18 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/arm.c (arm_rtx_costs_1): Don't modify the costs of SET.
(arm_size_rtx_costs): Likewise.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-08-12 08:08:31 +0000
+++ new/gcc/config/arm/arm.c 2011-08-18 13:53:37 +0000
@@ -7464,6 +7464,9 @@
*total = COSTS_N_INSNS (4);
return true;
+ case SET:
+ return false;
+
default:
*total = COSTS_N_INSNS (4);
return false;
@@ -7811,6 +7814,9 @@
*total = COSTS_N_INSNS (1) + 1;
return true;
+ case SET:
+ return false;
+
default:
if (mode != VOIDmode)
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,23 @@
2011-08-26 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-08-26 Richard Sandiford <richard.sandiford@linaro.org>
* df-problems.c (df_note_bb_compute): Pass uses rather than defs
to df_set_dead_notes_for_mw.
=== modified file 'gcc/df-problems.c'
--- old/gcc/df-problems.c 2011-07-07 19:10:01 +0000
+++ new/gcc/df-problems.c 2011-08-26 14:32:47 +0000
@@ -3375,7 +3375,7 @@
while (*mws_rec)
{
struct df_mw_hardreg *mws = *mws_rec;
- if ((DF_MWS_REG_DEF_P (mws))
+ if (DF_MWS_REG_USE_P (mws)
&& !df_ignore_stack_reg (mws->start_regno))
{
bool really_add_notes = debug_insn != 0;
@@ -0,0 +1,23 @@
2011-09-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/
2011-08-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/48328
* config/arm/arm.h (CASE_VECTOR_SHORTEN_MODE): Fix distance
for tbh instructions.
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-08-24 17:35:16 +0000
+++ new/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000
@@ -1961,7 +1961,7 @@
: min >= -4096 && max < 4096 \
? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \
: SImode) \
- : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode \
+ : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode \
: (max >= 0x200) ? HImode \
: QImode))
@@ -0,0 +1,75 @@
2011-09-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/arm/cortex-a9.md ("cortex_a9_mult_long"): New.
("cortex_a9_multiply_long"): New and use above. Handle all
long multiply cases.
("cortex_a9_multiply"): Handle smmul and smmulr.
("cortex_a9_mac"): Handle smmla.
=== modified file 'gcc/config/arm/cortex-a9.md'
--- old/gcc/config/arm/cortex-a9.md 2011-01-18 15:28:08 +0000
+++ new/gcc/config/arm/cortex-a9.md 2011-08-26 08:52:15 +0000
@@ -68,7 +68,8 @@
"cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
(define_reservation "cortex_a9_mac"
"cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
-
+(define_reservation "cortex_a9_mult_long"
+ "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb")
;; Issue at the same time along the load store pipeline and
;; the VFP / Neon pipeline is not possible.
@@ -139,29 +140,35 @@
(eq_attr "insn" "smlaxy"))
"cortex_a9_mac16")
-
(define_insn_reservation "cortex_a9_multiply" 4
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "mul"))
+ (eq_attr "insn" "mul,smmul,smmulr"))
"cortex_a9_mult")
(define_insn_reservation "cortex_a9_mac" 4
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "mla"))
+ (eq_attr "insn" "mla,smmla"))
"cortex_a9_mac")
+(define_insn_reservation "cortex_a9_multiply_long" 5
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
+ "cortex_a9_mult_long")
+
;; An instruction with a result in E2 can be forwarded
;; to E2 or E1 or M1 or the load store unit in the next cycle.
(define_bypass 1 "cortex_a9_dp"
"cortex_a9_dp_shift, cortex_a9_multiply,
cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
+ cortex_a9_multiply_long")
(define_bypass 2 "cortex_a9_dp_shift"
"cortex_a9_dp_shift, cortex_a9_multiply,
cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
+ cortex_a9_multiply_long")
;; An instruction in the load store pipeline can provide
;; read access to a DP instruction in the P0 default pipeline
@@ -212,7 +219,7 @@
(define_bypass 1
"cortex_a9_fps"
- "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
+ "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long")
;; Scheduling on the FP_ADD pipeline.
(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,948 @@
2011-09-12 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF mainline:
2011-08-30 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (optimal_immediate_sequence_1): Make b1, b2,
b3 and b4 unsigned.
2011-08-30 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_gen_constant): Set can_negate correctly
when code is SET.
2011-08-26 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (struct four_ints): New type.
(count_insns_for_constant): Delete function.
(find_best_start): Delete function.
(optimal_immediate_sequence): New function.
(optimal_immediate_sequence_1): New function.
(arm_gen_constant): Move constant splitting code to
optimal_immediate_sequence.
Rewrite constant negation/invertion code.
gcc/testsuite/
* gcc.target/arm/thumb2-replicated-constant1.c: New file.
* gcc.target/arm/thumb2-replicated-constant2.c: New file.
* gcc.target/arm/thumb2-replicated-constant3.c: New file.
* gcc.target/arm/thumb2-replicated-constant4.c: New file.
2011-08-26 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
* config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
Remove prototype. Remove static function type.
* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
Add arch attribute.
* config/arm/constraints.md (Pj, PJ): New constraints.
2011-04-20 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_gen_constant): Move mowv support ....
(const_ok_for_op): ... to here.
2011-04-20 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_gen_constant): Remove redundant can_invert.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-08-25 13:26:58 +0000
@@ -46,6 +46,7 @@
extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
extern int const_ok_for_arm (HOST_WIDE_INT);
+extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int);
extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-08-24 17:35:16 +0000
+++ new/gcc/config/arm/arm.c 2011-09-06 12:57:56 +0000
@@ -63,6 +63,11 @@
void (*arm_lang_output_object_attributes_hook)(void);
+struct four_ints
+{
+ int i[4];
+};
+
/* Forward function declarations. */
static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
static int arm_compute_static_chain_stack_bytes (void);
@@ -81,7 +86,6 @@
static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
-static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
static rtx emit_sfm (int, int);
static unsigned arm_size_return_regs (void);
static bool arm_assemble_integer (rtx, unsigned int, int);
@@ -129,7 +133,13 @@
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int);
-static int count_insns_for_constant (HOST_WIDE_INT, int);
+static int optimal_immediate_sequence (enum rtx_code code,
+ unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence);
+static int optimal_immediate_sequence_1 (enum rtx_code code,
+ unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence,
+ int i);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
static enum machine_mode arm_promote_function_mode (const_tree,
@@ -2525,7 +2535,7 @@
}
/* Return true if I is a valid constant for the operation CODE. */
-static int
+int
const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
{
if (const_ok_for_arm (i))
@@ -2533,7 +2543,21 @@
switch (code)
{
+ case SET:
+ /* See if we can use movw. */
+ if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
+ return 1;
+ else
+ return 0;
+
case PLUS:
+ /* See if we can use addw or subw. */
+ if (TARGET_THUMB2
+ && ((i & 0xfffff000) == 0
+ || ((-i) & 0xfffff000) == 0))
+ return 1;
+ /* else fall through. */
+
case COMPARE:
case EQ:
case NE:
@@ -2649,68 +2673,41 @@
1);
}
-/* Return the number of instructions required to synthesize the given
- constant, if we start emitting them from bit-position I. */
-static int
-count_insns_for_constant (HOST_WIDE_INT remainder, int i)
-{
- HOST_WIDE_INT temp1;
- int step_size = TARGET_ARM ? 2 : 1;
- int num_insns = 0;
-
- gcc_assert (TARGET_ARM || i == 0);
-
- do
- {
- int end;
-
- if (i <= 0)
- i += 32;
- if (remainder & (((1 << step_size) - 1) << (i - step_size)))
- {
- end = i - 8;
- if (end < 0)
- end += 32;
- temp1 = remainder & ((0x0ff << end)
- | ((i < end) ? (0xff >> (32 - end)) : 0));
- remainder &= ~temp1;
- num_insns++;
- i -= 8 - step_size;
- }
- i -= step_size;
- } while (remainder);
- return num_insns;
-}
-
-static int
-find_best_start (unsigned HOST_WIDE_INT remainder)
+/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
+ ARM/THUMB2 immediates, and add up to VAL.
+ Thr function return value gives the number of insns required. */
+static int
+optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence)
{
int best_consecutive_zeros = 0;
int i;
int best_start = 0;
+ int insns1, insns2;
+ struct four_ints tmp_sequence;
/* If we aren't targetting ARM, the best place to start is always at
- the bottom. */
- if (! TARGET_ARM)
- return 0;
-
- for (i = 0; i < 32; i += 2)
+ the bottom, otherwise look more closely. */
+ if (TARGET_ARM)
{
- int consecutive_zeros = 0;
-
- if (!(remainder & (3 << i)))
+ for (i = 0; i < 32; i += 2)
{
- while ((i < 32) && !(remainder & (3 << i)))
- {
- consecutive_zeros += 2;
- i += 2;
- }
- if (consecutive_zeros > best_consecutive_zeros)
- {
- best_consecutive_zeros = consecutive_zeros;
- best_start = i - consecutive_zeros;
- }
- i -= 2;
+ int consecutive_zeros = 0;
+
+ if (!(val & (3 << i)))
+ {
+ while ((i < 32) && !(val & (3 << i)))
+ {
+ consecutive_zeros += 2;
+ i += 2;
+ }
+ if (consecutive_zeros > best_consecutive_zeros)
+ {
+ best_consecutive_zeros = consecutive_zeros;
+ best_start = i - consecutive_zeros;
+ }
+ i -= 2;
+ }
}
}
@@ -2737,13 +2734,161 @@
the constant starting from `best_start', and also starting from
zero (i.e. with bit 31 first to be output). If `best_start' doesn't
yield a shorter sequence, we may as well use zero. */
+ insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
if (best_start != 0
- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
- && (count_insns_for_constant (remainder, 0) <=
- count_insns_for_constant (remainder, best_start)))
- best_start = 0;
-
- return best_start;
+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
+ {
+ insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
+ if (insns2 <= insns1)
+ {
+ *return_sequence = tmp_sequence;
+ insns1 = insns2;
+ }
+ }
+
+ return insns1;
+}
+
+/* As for optimal_immediate_sequence, but starting at bit-position I. */
+static int
+optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence, int i)
+{
+ int remainder = val & 0xffffffff;
+ int insns = 0;
+
+ /* Try and find a way of doing the job in either two or three
+ instructions.
+
+ In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
+ location. We start at position I. This may be the MSB, or
+ optimial_immediate_sequence may have positioned it at the largest block
+ of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
+ wrapping around to the top of the word when we drop off the bottom.
+ In the worst case this code should produce no more than four insns.
+
+ In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
+ constants, shifted to any arbitrary location. We should always start
+ at the MSB. */
+ do
+ {
+ int end;
+ unsigned int b1, b2, b3, b4;
+ unsigned HOST_WIDE_INT result;
+ int loc;
+
+ gcc_assert (insns < 4);
+
+ if (i <= 0)
+ i += 32;
+
+ /* First, find the next normal 12/8-bit shifted/rotated immediate. */
+ if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
+ {
+ loc = i;
+ if (i <= 12 && TARGET_THUMB2 && code == PLUS)
+ /* We can use addw/subw for the last 12 bits. */
+ result = remainder;
+ else
+ {
+ /* Use an 8-bit shifted/rotated immediate. */
+ end = i - 8;
+ if (end < 0)
+ end += 32;
+ result = remainder & ((0x0ff << end)
+ | ((i < end) ? (0xff >> (32 - end))
+ : 0));
+ i -= 8;
+ }
+ }
+ else
+ {
+ /* Arm allows rotates by a multiple of two. Thumb-2 allows
+ arbitrary shifts. */
+ i -= TARGET_ARM ? 2 : 1;
+ continue;
+ }
+
+ /* Next, see if we can do a better job with a thumb2 replicated
+ constant.
+
+ We do it this way around to catch the cases like 0x01F001E0 where
+ two 8-bit immediates would work, but a replicated constant would
+ make it worse.
+
+ TODO: 16-bit constants that don't clear all the bits, but still win.
+ TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
+ if (TARGET_THUMB2)
+ {
+ b1 = (remainder & 0xff000000) >> 24;
+ b2 = (remainder & 0x00ff0000) >> 16;
+ b3 = (remainder & 0x0000ff00) >> 8;
+ b4 = remainder & 0xff;
+
+ if (loc > 24)
+ {
+ /* The 8-bit immediate already found clears b1 (and maybe b2),
+ but must leave b3 and b4 alone. */
+
+ /* First try to find a 32-bit replicated constant that clears
+ almost everything. We can assume that we can't do it in one,
+ or else we wouldn't be here. */
+ unsigned int tmp = b1 & b2 & b3 & b4;
+ unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
+ + (tmp << 24);
+ unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
+ + (tmp == b3) + (tmp == b4);
+ if (tmp
+ && (matching_bytes >= 3
+ || (matching_bytes == 2
+ && const_ok_for_op (remainder & ~tmp2, code))))
+ {
+ /* At least 3 of the bytes match, and the fourth has at
+ least as many bits set, or two of the bytes match
+ and it will only require one more insn to finish. */
+ result = tmp2;
+ i = tmp != b1 ? 32
+ : tmp != b2 ? 24
+ : tmp != b3 ? 16
+ : 8;
+ }
+
+ /* Second, try to find a 16-bit replicated constant that can
+ leave three of the bytes clear. If b2 or b4 is already
+ zero, then we can. If the 8-bit from above would not
+ clear b2 anyway, then we still win. */
+ else if (b1 == b3 && (!b2 || !b4
+ || (remainder & 0x00ff0000 & ~result)))
+ {
+ result = remainder & 0xff00ff00;
+ i = 24;
+ }
+ }
+ else if (loc > 16)
+ {
+ /* The 8-bit immediate already found clears b2 (and maybe b3)
+ and we don't get here unless b1 is alredy clear, but it will
+ leave b4 unchanged. */
+
+ /* If we can clear b2 and b4 at once, then we win, since the
+ 8-bits couldn't possibly reach that far. */
+ if (b2 == b4)
+ {
+ result = remainder & 0x00ff00ff;
+ i = 16;
+ }
+ }
+ }
+
+ return_sequence->i[insns++] = result;
+ remainder &= ~result;
+
+ if (code == SET || code == MINUS)
+ code = PLUS;
+ }
+ while (remainder);
+
+ return insns;
}
/* Emit an instruction with the indicated PATTERN. If COND is
@@ -2760,7 +2905,6 @@
/* As above, but extra parameter GENERATE which, if clear, suppresses
RTL generation. */
-/* ??? This needs more work for thumb2. */
static int
arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
@@ -2772,15 +2916,15 @@
int final_invert = 0;
int can_negate_initial = 0;
int i;
- int num_bits_set = 0;
int set_sign_bit_copies = 0;
int clear_sign_bit_copies = 0;
int clear_zero_bit_copies = 0;
int set_zero_bit_copies = 0;
- int insns = 0;
+ int insns = 0, neg_insns, inv_insns;
unsigned HOST_WIDE_INT temp1, temp2;
unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
- int step_size = TARGET_ARM ? 2 : 1;
+ struct four_ints *immediates;
+ struct four_ints pos_immediates, neg_immediates, inv_immediates;
/* Find out which operations are safe for a given CODE. Also do a quick
check for degenerate cases; these can occur when DImode operations
@@ -2789,7 +2933,6 @@
{
case SET:
can_invert = 1;
- can_negate = 1;
break;
case PLUS:
@@ -2817,9 +2960,6 @@
gen_rtx_SET (VOIDmode, target, source));
return 1;
}
-
- if (TARGET_THUMB2)
- can_invert = 1;
break;
case AND:
@@ -2861,6 +3001,7 @@
gen_rtx_NOT (mode, source)));
return 1;
}
+ final_invert = 1;
break;
case MINUS:
@@ -2883,7 +3024,6 @@
source)));
return 1;
}
- can_negate = 1;
break;
@@ -2892,9 +3032,7 @@
}
/* If we can do it in one insn get out quickly. */
- if (const_ok_for_arm (val)
- || (can_negate_initial && const_ok_for_arm (-val))
- || (can_invert && const_ok_for_arm (~val)))
+ if (const_ok_for_op (val, code))
{
if (generate)
emit_constant_insn (cond,
@@ -2947,15 +3085,6 @@
switch (code)
{
case SET:
- /* See if we can use movw. */
- if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
- {
- if (generate)
- emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
- GEN_INT (val)));
- return 1;
- }
-
/* See if we can do this by sign_extending a constant that is known
to be negative. This is a good, way of doing it, since the shift
may well merge into a subsequent insn. */
@@ -3306,121 +3435,97 @@
break;
}
- for (i = 0; i < 32; i++)
- if (remainder & (1 << i))
- num_bits_set++;
-
- if ((code == AND)
- || (code != IOR && can_invert && num_bits_set > 16))
- remainder ^= 0xffffffff;
- else if (code == PLUS && num_bits_set > 16)
- remainder = (-remainder) & 0xffffffff;
-
- /* For XOR, if more than half the bits are set and there's a sequence
- of more than 8 consecutive ones in the pattern then we can XOR by the
- inverted constant and then invert the final result; this may save an
- instruction and might also lead to the final mvn being merged with
- some other operation. */
- else if (code == XOR && num_bits_set > 16
- && (count_insns_for_constant (remainder ^ 0xffffffff,
- find_best_start
- (remainder ^ 0xffffffff))
- < count_insns_for_constant (remainder,
- find_best_start (remainder))))
- {
- remainder ^= 0xffffffff;
- final_invert = 1;
+ /* Calculate what the instruction sequences would be if we generated it
+ normally, negated, or inverted. */
+ if (code == AND)
+ /* AND cannot be split into multiple insns, so invert and use BIC. */
+ insns = 99;
+ else
+ insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
+
+ if (can_negate)
+ neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
+ &neg_immediates);
+ else
+ neg_insns = 99;
+
+ if (can_invert || final_invert)
+ inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
+ &inv_immediates);
+ else
+ inv_insns = 99;
+
+ immediates = &pos_immediates;
+
+ /* Is the negated immediate sequence more efficient? */
+ if (neg_insns < insns && neg_insns <= inv_insns)
+ {
+ insns = neg_insns;
+ immediates = &neg_immediates;
+ }
+ else
+ can_negate = 0;
+
+ /* Is the inverted immediate sequence more efficient?
+ We must allow for an extra NOT instruction for XOR operations, although
+ there is some chance that the final 'mvn' will get optimized later. */
+ if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
+ {
+ insns = inv_insns;
+ immediates = &inv_immediates;
}
else
{
can_invert = 0;
- can_negate = 0;
+ final_invert = 0;
}
- /* Now try and find a way of doing the job in either two or three
- instructions.
- We start by looking for the largest block of zeros that are aligned on
- a 2-bit boundary, we then fill up the temps, wrapping around to the
- top of the word when we drop off the bottom.
- In the worst case this code should produce no more than four insns.
- Thumb-2 constants are shifted, not rotated, so the MSB is always the
- best place to start. */
-
- /* ??? Use thumb2 replicated constants when the high and low halfwords are
- the same. */
- {
- /* Now start emitting the insns. */
- i = find_best_start (remainder);
- do
- {
- int end;
-
- if (i <= 0)
- i += 32;
- if (remainder & (3 << (i - 2)))
- {
- end = i - 8;
- if (end < 0)
- end += 32;
- temp1 = remainder & ((0x0ff << end)
- | ((i < end) ? (0xff >> (32 - end)) : 0));
- remainder &= ~temp1;
-
- if (generate)
- {
- rtx new_src, temp1_rtx;
-
- if (code == SET || code == MINUS)
- {
- new_src = (subtargets ? gen_reg_rtx (mode) : target);
- if (can_invert && code != MINUS)
- temp1 = ~temp1;
- }
- else
- {
- if ((final_invert || remainder) && subtargets)
- new_src = gen_reg_rtx (mode);
- else
- new_src = target;
- if (can_invert)
- temp1 = ~temp1;
- else if (can_negate)
- temp1 = -temp1;
- }
-
- temp1 = trunc_int_for_mode (temp1, mode);
- temp1_rtx = GEN_INT (temp1);
-
- if (code == SET)
- ;
- else if (code == MINUS)
- temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
- else
- temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
-
- emit_constant_insn (cond,
- gen_rtx_SET (VOIDmode, new_src,
- temp1_rtx));
- source = new_src;
- }
-
- if (code == SET)
- {
- can_invert = 0;
- code = PLUS;
- }
- else if (code == MINUS)
+ /* Now output the chosen sequence as instructions. */
+ if (generate)
+ {
+ for (i = 0; i < insns; i++)
+ {
+ rtx new_src, temp1_rtx;
+
+ temp1 = immediates->i[i];
+
+ if (code == SET || code == MINUS)
+ new_src = (subtargets ? gen_reg_rtx (mode) : target);
+ else if ((final_invert || i < (insns - 1)) && subtargets)
+ new_src = gen_reg_rtx (mode);
+ else
+ new_src = target;
+
+ if (can_invert)
+ temp1 = ~temp1;
+ else if (can_negate)
+ temp1 = -temp1;
+
+ temp1 = trunc_int_for_mode (temp1, mode);
+ temp1_rtx = GEN_INT (temp1);
+
+ if (code == SET)
+ ;
+ else if (code == MINUS)
+ temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+ else
+ temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+
+ emit_constant_insn (cond,
+ gen_rtx_SET (VOIDmode, new_src,
+ temp1_rtx));
+ source = new_src;
+
+ if (code == SET)
+ {
+ can_negate = can_invert;
+ can_invert = 0;
code = PLUS;
-
- insns++;
- i -= 8 - step_size;
- }
- /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
- shifts. */
- i -= step_size;
- }
- while (remainder);
- }
+ }
+ else if (code == MINUS)
+ code = PLUS;
+ }
+ }
if (final_invert)
{
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-08-25 11:42:09 +0000
+++ new/gcc/config/arm/arm.md 2011-08-25 13:26:58 +0000
@@ -701,21 +701,24 @@
;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will
;; put the duplicated register first, and not try the commutative version.
(define_insn_and_split "*arm_addsi3"
- [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r")
- (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")
- (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
+ (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
+ (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
"TARGET_32BIT"
"@
add%?\\t%0, %1, %2
add%?\\t%0, %1, %2
add%?\\t%0, %2, %1
- sub%?\\t%0, %1, #%n2
- sub%?\\t%0, %1, #%n2
+ addw%?\\t%0, %1, %2
+ addw%?\\t%0, %1, %2
+ sub%?\\t%0, %1, #%n2
+ sub%?\\t%0, %1, #%n2
+ subw%?\\t%0, %1, #%n2
+ subw%?\\t%0, %1, #%n2
#"
"TARGET_32BIT
&& GET_CODE (operands[2]) == CONST_INT
- && !(const_ok_for_arm (INTVAL (operands[2]))
- || const_ok_for_arm (-INTVAL (operands[2])))
+ && !const_ok_for_op (INTVAL (operands[2]), PLUS)
&& (reload_completed || !arm_eliminable_register (operands[1]))"
[(clobber (const_int 0))]
"
@@ -724,8 +727,9 @@
operands[1], 0);
DONE;
"
- [(set_attr "length" "4,4,4,4,4,16")
- (set_attr "predicable" "yes")]
+ [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+ (set_attr "predicable" "yes")
+ (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
)
(define_insn_and_split "*thumb1_addsi3"
=== modified file 'gcc/config/arm/constraints.md'
--- old/gcc/config/arm/constraints.md 2011-01-03 20:52:22 +0000
+++ new/gcc/config/arm/constraints.md 2011-08-25 13:26:58 +0000
@@ -31,7 +31,7 @@
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -74,6 +74,18 @@
(and (match_code "const_int")
(match_test "(ival & 0xffff0000) == 0")))))
+(define_constraint "Pj"
+ "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
+ (and (match_code "const_int")
+ (and (match_test "TARGET_THUMB2")
+ (match_test "(ival & 0xfffff000) == 0"))))
+
+(define_constraint "PJ"
+ "@internal A constant that satisfies the Pj constrant if negated."
+ (and (match_code "const_int")
+ (and (match_test "TARGET_THUMB2")
+ (match_test "((-ival) & 0xfffff000) == 0"))))
+
(define_register_constraint "k" "STACK_REG"
"@internal The stack register.")
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,27 @@
+/* Ensure simple replicated constant immediates work. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a + 0xfefefefe;
+}
+
+/* { dg-final { scan-assembler "add.*#-16843010" } } */
+
+int
+foo2 (int a)
+{
+ return a - 0xab00ab00;
+}
+
+/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
+
+int
+foo3 (int a)
+{
+ return a & 0x00cd00cd;
+}
+
+/* { dg-final { scan-assembler "and.*#13435085" } } */
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,75 @@
+/* Ensure split constants can use replicated patterns. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a + 0xfe00fe01;
+}
+
+/* { dg-final { scan-assembler "add.*#-33489408" } } */
+/* { dg-final { scan-assembler "add.*#1" } } */
+
+int
+foo2 (int a)
+{
+ return a + 0xdd01dd00;
+}
+
+/* { dg-final { scan-assembler "add.*#-587145984" } } */
+/* { dg-final { scan-assembler "add.*#65536" } } */
+
+int
+foo3 (int a)
+{
+ return a + 0x00443344;
+}
+
+/* { dg-final { scan-assembler "add.*#4456516" } } */
+/* { dg-final { scan-assembler "add.*#13056" } } */
+
+int
+foo4 (int a)
+{
+ return a + 0x77330033;
+}
+
+/* { dg-final { scan-assembler "add.*#1996488704" } } */
+/* { dg-final { scan-assembler "add.*#3342387" } } */
+
+int
+foo5 (int a)
+{
+ return a + 0x11221122;
+}
+
+/* { dg-final { scan-assembler "add.*#285217024" } } */
+/* { dg-final { scan-assembler "add.*#2228258" } } */
+
+int
+foo6 (int a)
+{
+ return a + 0x66666677;
+}
+
+/* { dg-final { scan-assembler "add.*#1717986918" } } */
+/* { dg-final { scan-assembler "add.*#17" } } */
+
+int
+foo7 (int a)
+{
+ return a + 0x99888888;
+}
+
+/* { dg-final { scan-assembler "add.*#-2004318072" } } */
+/* { dg-final { scan-assembler "add.*#285212672" } } */
+
+int
+foo8 (int a)
+{
+ return a + 0xdddddfff;
+}
+
+/* { dg-final { scan-assembler "add.*#-572662307" } } */
+/* { dg-final { scan-assembler "addw.*#546" } } */
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,28 @@
+/* Ensure negated/inverted replicated constant immediates work. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a | 0xffffff00;
+}
+
+/* { dg-final { scan-assembler "orn.*#255" } } */
+
+int
+foo2 (int a)
+{
+ return a & 0xffeeffee;
+}
+
+/* { dg-final { scan-assembler "bic.*#1114129" } } */
+
+int
+foo3 (int a)
+{
+ return a & 0xaaaaaa00;
+}
+
+/* { dg-final { scan-assembler "and.*#-1431655766" } } */
+/* { dg-final { scan-assembler "bic.*#170" } } */
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,22 @@
+/* Ensure replicated constants don't make things worse. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ /* It might be tempting to use 0x01000100, but it wouldn't help. */
+ return a + 0x01f001e0;
+}
+
+/* { dg-final { scan-assembler "add.*#32505856" } } */
+/* { dg-final { scan-assembler "add.*#480" } } */
+
+int
+foo2 (int a)
+{
+ return a + 0x0f100e10;
+}
+
+/* { dg-final { scan-assembler "add.*#252706816" } } */
+/* { dg-final { scan-assembler "add.*#3600" } } */
@@ -0,0 +1,201 @@
2011-09-12 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
PR target/49030
* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
* config/arm/arm.c (maybe_get_arm_condition_code): New function,
reusing the old code from get_arm_condition_code. Return ARM_NV
for invalid comparison codes.
(get_arm_condition_code): Redefine in terms of
maybe_get_arm_condition_code.
* config/arm/predicates.md (arm_comparison_operator): Use
maybe_get_arm_condition_code.
gcc/testsuite/
PR target/49030
* gcc.dg/torture/pr49030.c: New test.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-08-25 13:26:58 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-09-12 11:03:11 +0000
@@ -179,6 +179,7 @@
#endif
extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
#ifdef RTX_CODE
+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
extern void thumb1_final_prescan_insn (rtx);
extern void thumb2_final_prescan_insn (rtx);
extern const char *thumb_load_double_from_address (rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-06 12:57:56 +0000
+++ new/gcc/config/arm/arm.c 2011-09-12 11:03:11 +0000
@@ -17494,10 +17494,10 @@
decremented/zeroed by arm_asm_output_opcode as the insns are output. */
/* Returns the index of the ARM condition code string in
- `arm_condition_codes'. COMPARISON should be an rtx like
- `(eq (...) (...))'. */
-static enum arm_cond_code
-get_arm_condition_code (rtx comparison)
+ `arm_condition_codes', or ARM_NV if the comparison is invalid.
+ COMPARISON should be an rtx like `(eq (...) (...))'. */
+enum arm_cond_code
+maybe_get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
enum arm_cond_code code;
@@ -17521,11 +17521,11 @@
case CC_DLTUmode: code = ARM_CC;
dominance:
- gcc_assert (comp_code == EQ || comp_code == NE);
-
if (comp_code == EQ)
return ARM_INVERSE_CONDITION_CODE (code);
- return code;
+ if (comp_code == NE)
+ return code;
+ return ARM_NV;
case CC_NOOVmode:
switch (comp_code)
@@ -17534,7 +17534,7 @@
case EQ: return ARM_EQ;
case GE: return ARM_PL;
case LT: return ARM_MI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Zmode:
@@ -17542,7 +17542,7 @@
{
case NE: return ARM_NE;
case EQ: return ARM_EQ;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Nmode:
@@ -17550,7 +17550,7 @@
{
case NE: return ARM_MI;
case EQ: return ARM_PL;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCFPEmode:
@@ -17575,7 +17575,7 @@
/* UNEQ and LTGT do not have a representation. */
case UNEQ: /* Fall through. */
case LTGT: /* Fall through. */
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_SWPmode:
@@ -17591,7 +17591,7 @@
case GTU: return ARM_CC;
case LEU: return ARM_CS;
case LTU: return ARM_HI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Cmode:
@@ -17599,7 +17599,7 @@
{
case LTU: return ARM_CS;
case GEU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_CZmode:
@@ -17611,7 +17611,7 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_NCVmode:
@@ -17621,7 +17621,7 @@
case LT: return ARM_LT;
case GEU: return ARM_CS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCmode:
@@ -17637,13 +17637,22 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
default: gcc_unreachable ();
}
}
+/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
+{
+ enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
+ gcc_assert (code != ARM_NV);
+ return code;
+}
+
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
instructions. */
void
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-08-13 08:40:36 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-05 09:40:19 +0000
@@ -242,10 +242,9 @@
;; True for integer comparisons and, if FP is active, for comparisons
;; other than LTGT or UNEQ.
(define_special_predicate "arm_comparison_operator"
- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
- && (TARGET_FPA || TARGET_VFP)")
- (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
+ (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
+ unordered,ordered,unlt,unle,unge,ungt")
+ (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
(define_special_predicate "lt_ge_comparison_operator"
(match_code "lt,ge"))
=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-09-05 09:40:19 +0000
@@ -0,0 +1,19 @@
+void
+sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
+ unsigned long dst_skip)
+{
+ long long y;
+ while (nsamples--)
+ {
+ y = (long long) (*src * 8388608.0f) << 8;
+ if (y > 2147483647) {
+ *(int *) dst = 2147483647;
+ } else if (y < -2147483647 - 1) {
+ *(int *) dst = -2147483647 - 1;
+ } else {
+ *(int *) dst = (int) y;
+ }
+ dst += dst_skip;
+ src++;
+ }
+}
@@ -0,0 +1,38 @@
2011-09-01 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/predicates.md (shift_amount_operand): Ensure shift
amount is positive.
gcc/testsuite/
* gcc.dg/pr50193-1.c: New file.
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-05 09:40:19 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-12 11:24:34 +0000
@@ -132,7 +132,8 @@
(define_predicate "shift_amount_operand"
(ior (and (match_test "TARGET_ARM")
(match_operand 0 "s_register_operand"))
- (match_operand 0 "const_int_operand")))
+ (and (match_operand 0 "const_int_operand")
+ (match_test "INTVAL (op) > 0"))))
(define_predicate "arm_add_operand"
(ior (match_operand 0 "arm_rhs_operand")
=== added file 'gcc/testsuite/gcc.dg/pr50193-1.c'
--- old/gcc/testsuite/gcc.dg/pr50193-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/pr50193-1.c 2011-09-01 12:22:14 +0000
@@ -0,0 +1,10 @@
+/* PR 50193: ARM: ICE on a | (b << negative-constant) */
+/* Ensure that the compiler doesn't ICE. */
+
+/* { dg-options "-O2" } */
+
+int
+foo(int a, int b)
+{
+ return a | (b << -3); /* { dg-warning "left shift count is negative" } */
+}
@@ -0,0 +1,47 @@
2011-09-12 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF mainline:
2011-09-08 Andrew Stubbs <ams@codesourcery.com>
PR tree-optimization/50318
gcc/
* tree-ssa-math-opts.c (convert_plusminus_to_widen): Correct
typo in use of mult_rhs1 and mult_rhs2.
gcc/testsuite/
* gcc.target/arm/pr50318-1.c: New file.
=== added file 'gcc/testsuite/gcc.target/arm/pr50318-1.c'
--- old/gcc/testsuite/gcc.target/arm/pr50318-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/pr50318-1.c 2011-09-08 20:11:43 +0000
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm_dsp } */
+
+long long test (unsigned int sec, unsigned long long nsecs)
+{
+ return (long long)(long)sec * 1000000000L + (long long)(unsigned
+ long)nsecs;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
=== modified file 'gcc/tree-ssa-math-opts.c'
--- old/gcc/tree-ssa-math-opts.c 2011-08-09 10:26:48 +0000
+++ new/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000
@@ -1699,9 +1699,9 @@
/* Handle constants. */
if (TREE_CODE (mult_rhs1) == INTEGER_CST)
- rhs1 = fold_convert (type1, mult_rhs1);
+ mult_rhs1 = fold_convert (type1, mult_rhs1);
if (TREE_CODE (mult_rhs2) == INTEGER_CST)
- rhs2 = fold_convert (type2, mult_rhs2);
+ mult_rhs2 = fold_convert (type2, mult_rhs2);
gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
add_rhs);
@@ -0,0 +1,92 @@
2011-09-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/testsuite/
* gcc.target/arm/pr50099.c: Fix testcase from previous commit.
2011-09-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
LP:838994
gcc/
Backport from mainline.
2011-09-06 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/50099
* config/arm/iterators.md (qhs_zextenddi_cstr): New.
(qhs_zextenddi_op): New.
* config/arm/arm.md ("zero_extend<mode>di2"): Use them.
* config/arm/predicates.md ("arm_extendqisi_mem_op"):
Distinguish between ARM and Thumb2 states.
gcc/testsuite/
* gcc.target/arm/pr50099.c: New test.
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-08-25 13:26:58 +0000
+++ new/gcc/config/arm/arm.md 2011-09-12 12:32:29 +0000
@@ -4136,8 +4136,8 @@
(define_insn "zero_extend<mode>di2"
[(set (match_operand:DI 0 "s_register_operand" "=r")
- (zero_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
- "<qhs_extenddi_cstr>")))]
+ (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
+ "<qhs_zextenddi_cstr>")))]
"TARGET_32BIT <qhs_zextenddi_cond>"
"#"
[(set_attr "length" "8")
=== modified file 'gcc/config/arm/iterators.md'
--- old/gcc/config/arm/iterators.md 2011-05-03 15:14:56 +0000
+++ new/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000
@@ -379,10 +379,14 @@
(define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
(define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
(QI "&& arm_arch6")])
+(define_mode_attr qhs_zextenddi_op [(SI "s_register_operand")
+ (HI "nonimmediate_operand")
+ (QI "nonimmediate_operand")])
(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
(HI "nonimmediate_operand")
- (QI "nonimmediate_operand")])
-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+ (QI "arm_reg_or_extendqisi_mem_op")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
;;----------------------------------------------------------------------------
;; Code attributes
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-12 11:24:34 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-12 12:32:29 +0000
@@ -289,8 +289,11 @@
(define_special_predicate "arm_extendqisi_mem_op"
(and (match_operand 0 "memory_operand")
- (match_test "arm_legitimate_address_outer_p (mode, XEXP (op, 0),
- SIGN_EXTEND, 0)")))
+ (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode,
+ XEXP (op, 0),
+ SIGN_EXTEND,
+ 0)
+ : memory_address_p (QImode, XEXP (op, 0))")))
(define_special_predicate "arm_reg_or_extendqisi_mem_op"
(ior (match_operand 0 "arm_extendqisi_mem_op")
=== added file 'gcc/testsuite/gcc.target/arm/pr50099.c'
--- old/gcc/testsuite/gcc.target/arm/pr50099.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/pr50099.c 2011-09-09 16:42:45 +0000
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo (signed char * arg)
+{
+ long long temp_1;
+
+ temp_1 = arg[256];
+ return temp_1;
+}
@@ -0,0 +1,767 @@
2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from FSF mainline:
2011-04-06 Wei Guozhi <carrot@google.com>
PR target/47855
gcc/
* config/arm/arm.md (arm_cmpsi_insn): Compute attr "length".
(arm_cond_branch): Likewise.
(arm_cond_branch_reversed): Likewise.
(arm_jump): Likewise.
(push_multi): Likewise.
* config/arm/constraints.md (Py): New constraint.
2011-04-08 Wei Guozhi <carrot@google.com>
PR target/47855
* config/arm/arm-protos.h (arm_attr_length_push_multi): New prototype.
* config/arm/arm.c (arm_attr_length_push_multi): New function.
* config/arm/arm.md (*push_multi): Change the length computation to
call a C function.
2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from FSF mainline:
2011-08-18 Jiangning Liu <jiangning.liu@arm.com>
gcc/
* config/arm/arm.md (*ior_scc_scc): Enable for Thumb2 as well.
(*ior_scc_scc_cmp): Likewise
(*and_scc_scc): Likewise.
(*and_scc_scc_cmp): Likewise.
(*and_scc_scc_nodom): Likewise.
(*cmp_ite0, *cmp_ite1, *cmp_and, *cmp_ior): Handle Thumb2.
gcc/testsuite
* gcc.target/arm/thumb2-cond-cmp-1.c: New. Make sure conditional
compare can be generated.
* gcc.target/arm/thumb2-cond-cmp-2.c: Likewise.
* gcc.target/arm/thumb2-cond-cmp-3.c: Likewise.
* gcc.target/arm/thumb2-cond-cmp-4.c: Likewise.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-09-12 11:03:11 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-09-12 14:14:00 +0000
@@ -156,6 +156,7 @@
extern const char *arm_output_memory_barrier (rtx *);
extern const char *arm_output_sync_insn (rtx, rtx *);
extern unsigned int arm_sync_loop_insns (rtx , rtx *);
+extern int arm_attr_length_push_multi(rtx, rtx);
#if defined TREE_CODE
extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-12 11:03:11 +0000
+++ new/gcc/config/arm/arm.c 2011-09-12 14:14:00 +0000
@@ -24391,4 +24391,30 @@
return NO_REGS;
}
+/* Compute the atrribute "length" of insn "*push_multi".
+ So this function MUST be kept in sync with that insn pattern. */
+int
+arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
+{
+ int i, regno, hi_reg;
+ int num_saves = XVECLEN (parallel_op, 0);
+
+ /* ARM mode. */
+ if (TARGET_ARM)
+ return 4;
+
+ /* Thumb2 mode. */
+ regno = REGNO (first_op);
+ hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+ for (i = 1; i < num_saves && !hi_reg; i++)
+ {
+ regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
+ hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+ }
+
+ if (!hi_reg)
+ return 2;
+ return 4;
+}
+
#include "gt-arm.h"
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-09-12 12:32:29 +0000
+++ new/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000
@@ -48,6 +48,15 @@
(DOM_CC_X_OR_Y 2)
]
)
+;; conditional compare combination
+(define_constants
+ [(CMP_CMP 0)
+ (CMN_CMP 1)
+ (CMP_CMN 2)
+ (CMN_CMN 3)
+ (NUM_OF_COND_CMP 4)
+ ]
+)
;; UNSPEC Usage:
;; Note: sin and cos are no-longer used.
@@ -7198,13 +7207,17 @@
(define_insn "*arm_cmpsi_insn"
[(set (reg:CC CC_REGNUM)
- (compare:CC (match_operand:SI 0 "s_register_operand" "r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L")))]
+ (compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r")
+ (match_operand:SI 1 "arm_add_operand" "Py,r,rI,L")))]
"TARGET_32BIT"
"@
cmp%?\\t%0, %1
+ cmp%?\\t%0, %1
+ cmp%?\\t%0, %1
cmn%?\\t%0, #%n1"
- [(set_attr "conds" "set")]
+ [(set_attr "conds" "set")
+ (set_attr "arch" "t2,t2,any,any")
+ (set_attr "length" "2,2,4,4")]
)
(define_insn "*cmpsi_shiftsi"
@@ -7375,7 +7388,14 @@
return \"b%d1\\t%l0\";
"
[(set_attr "conds" "use")
- (set_attr "type" "branch")]
+ (set_attr "type" "branch")
+ (set (attr "length")
+ (if_then_else
+ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+ (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+ (le (minus (match_dup 0) (pc)) (const_int 256))))
+ (const_int 2)
+ (const_int 4)))]
)
(define_insn "*arm_cond_branch_reversed"
@@ -7394,7 +7414,14 @@
return \"b%D1\\t%l0\";
"
[(set_attr "conds" "use")
- (set_attr "type" "branch")]
+ (set_attr "type" "branch")
+ (set (attr "length")
+ (if_then_else
+ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+ (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+ (le (minus (match_dup 0) (pc)) (const_int 256))))
+ (const_int 2)
+ (const_int 4)))]
)
@@ -7846,7 +7873,14 @@
return \"b%?\\t%l0\";
}
"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set (attr "length")
+ (if_then_else
+ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+ (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+ (le (minus (match_dup 0) (pc)) (const_int 2048))))
+ (const_int 2)
+ (const_int 4)))]
)
(define_insn "*thumb_jump"
@@ -8931,40 +8965,85 @@
(set_attr "length" "8,12")]
)
-;; ??? Is it worth using these conditional patterns in Thumb-2 mode?
(define_insn "*cmp_ite0"
[(set (match_operand 6 "dominant_cc_register" "")
(compare
(if_then_else:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
(const_int 0))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
{
- static const char * const opcodes[4][2] =
- {
- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
- };
+ static const char * const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%d5\\t%0, %1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmp%d5\\t%0, %1\",
+ \"cmn%d4\\t%2, #%n3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmn%d4\\t%2, #%n3\"}
+ };
+ static const char * const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%2, %3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmp\\t%2, %3\",
+ \"cmn\\t%0, #%n1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmn\\t%0, #%n1\"}
+ };
+ static const char * const ite[2] =
+ {
+ \"it\\t%d5\",
+ \"it\\t%d4\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
int swap =
comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
- return opcodes[which_alternative][swap];
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
}"
[(set_attr "conds" "set")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn "*cmp_ite1"
@@ -8972,35 +9051,81 @@
(compare
(if_then_else:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
(const_int 1))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
{
- static const char * const opcodes[4][2] =
- {
- {\"cmp\\t%0, %1\;cmp%d4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
- {\"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
- };
+ static const char * const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%0, %1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmp\\t%0, %1\",
+ \"cmn\\t%2, #%n3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmn\\t%2, #%n3\"}
+ };
+ static const char * const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%d4\\t%2, %3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmp%d4\\t%2, %3\",
+ \"cmn%D5\\t%0, #%n1\"},
+ {\"cmn%d4\\t%2, #%n3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmn%d4\\t%2, #%n3\",
+ \"cmn%D5\\t%0, #%n1\"}
+ };
+ static const char * const ite[2] =
+ {
+ \"it\\t%d4\",
+ \"it\\t%D5\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
int swap =
comparison_dominates_p (GET_CODE (operands[5]),
reverse_condition (GET_CODE (operands[4])));
- return opcodes[which_alternative][swap];
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
}"
[(set_attr "conds" "set")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn "*cmp_and"
@@ -9008,34 +9133,80 @@
(compare
(and:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
{
- static const char *const opcodes[4][2] =
- {
- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
- };
+ static const char *const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%d5\\t%0, %1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmp%d5\\t%0, %1\",
+ \"cmn%d4\\t%2, #%n3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmn%d4\\t%2, #%n3\"}
+ };
+ static const char *const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%2, %3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmp\\t%2, %3\",
+ \"cmn\\t%0, #%n1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmn\\t%0, #%n1\"}
+ };
+ static const char *const ite[2] =
+ {
+ \"it\\t%d5\",
+ \"it\\t%d4\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
int swap =
comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
- return opcodes[which_alternative][swap];
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
}"
[(set_attr "conds" "set")
(set_attr "predicable" "no")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn "*cmp_ior"
@@ -9043,34 +9214,80 @@
(compare
(ior:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
-{
- static const char *const opcodes[4][2] =
{
- {\"cmp\\t%0, %1\;cmp%D4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmp%D4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
- {\"cmp\\t%0, %1\;cmn%D4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmn%D4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
- };
- int swap =
- comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+ static const char *const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%0, %1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmp\\t%0, %1\",
+ \"cmn\\t%2, #%n3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmn\\t%2, #%n3\"}
+ };
+ static const char *const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%D4\\t%2, %3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmp%D4\\t%2, %3\",
+ \"cmn%D5\\t%0, #%n1\"},
+ {\"cmn%D4\\t%2, #%n3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmn%D4\\t%2, #%n3\",
+ \"cmn%D5\\t%0, #%n1\"}
+ };
+ static const char *const ite[2] =
+ {
+ \"it\\t%D4\",
+ \"it\\t%D5\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
+ int swap =
+ comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
- return opcodes[which_alternative][swap];
-}
-"
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
+ }
+ "
[(set_attr "conds" "set")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn_and_split "*ior_scc_scc"
@@ -9082,11 +9299,11 @@
[(match_operand:SI 4 "s_register_operand" "r")
(match_operand:SI 5 "arm_add_operand" "rIL")])))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_ARM
+ "TARGET_32BIT
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y)
!= CCmode)"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(set (match_dup 7)
(compare
(ior:SI
@@ -9115,9 +9332,9 @@
(set (match_operand:SI 7 "s_register_operand" "=r")
(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(set (match_dup 0)
(compare
(ior:SI
@@ -9138,11 +9355,11 @@
[(match_operand:SI 4 "s_register_operand" "r")
(match_operand:SI 5 "arm_add_operand" "rIL")])))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_ARM
+ "TARGET_32BIT
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
!= CCmode)"
"#"
- "TARGET_ARM && reload_completed
+ "TARGET_32BIT && reload_completed
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
!= CCmode)"
[(set (match_dup 7)
@@ -9173,9 +9390,9 @@
(set (match_operand:SI 7 "s_register_operand" "=r")
(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(set (match_dup 0)
(compare
(and:SI
@@ -9200,11 +9417,11 @@
[(match_operand:SI 4 "s_register_operand" "r,r,r")
(match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")])))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_ARM
+ "TARGET_32BIT
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
== CCmode)"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 1) (match_dup 2)]))
(clobber (reg:CC CC_REGNUM))])
@@ -10314,6 +10531,8 @@
;; Push multiple registers to the stack. Registers are in parallel (use ...)
;; expressions. For simplicity, the first register is also in the unspec
;; part.
+;; To avoid the usage of GNU extension, the length attribute is computed
+;; in a C function arm_attr_length_push_multi.
(define_insn "*push_multi"
[(match_parallel 2 "multi_register_push"
[(set (match_operand:BLK 0 "memory_operand" "=m")
@@ -10353,7 +10572,9 @@
return \"\";
}"
- [(set_attr "type" "store4")]
+ [(set_attr "type" "store4")
+ (set (attr "length")
+ (symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))]
)
(define_insn "stack_tie"
=== modified file 'gcc/config/arm/constraints.md'
--- old/gcc/config/arm/constraints.md 2011-08-25 13:26:58 +0000
+++ new/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000
@@ -31,7 +31,7 @@
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -201,6 +201,11 @@
(and (match_code "const_int")
(match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+(define_constraint "Py"
+ "@internal In Thumb-2 state a constant in the range 0 to 255"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255")))
+
(define_constraint "G"
"In ARM/Thumb-2 state a valid FPA immediate constant."
(and (match_code "const_double")
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,13 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpne" } } */
+
+int f(int i, int j)
+{
+ if ( (i == '+') || (j == '-') ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,13 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpeq" } } */
+
+int f(int i, int j)
+{
+ if ( (i == '+') && (j == '-') ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,12 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpgt" } } */
+
+int f(int i, int j)
+{
+ if ( (i >= '+') ? (j > '-') : 0)
+ return 1;
+ else
+ return 0;
+}
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,12 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpgt" } } */
+
+int f(int i, int j)
+{
+ if ( (i >= '+') ? (j <= '-') : 1)
+ return 1;
+ else
+ return 0;
+}
@@ -0,0 +1,203 @@
2011-09-15 Richard Sandiford <richard.sandiford@linaro.org>
Revert:
gcc/
PR target/49030
* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
* config/arm/arm.c (maybe_get_arm_condition_code): New function,
reusing the old code from get_arm_condition_code. Return ARM_NV
for invalid comparison codes.
(get_arm_condition_code): Redefine in terms of
maybe_get_arm_condition_code.
* config/arm/predicates.md (arm_comparison_operator): Use
maybe_get_arm_condition_code.
gcc/testsuite/
PR target/49030
* gcc.dg/torture/pr49030.c: New test.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000
@@ -180,7 +180,6 @@
#endif
extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
#ifdef RTX_CODE
-extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
extern void thumb1_final_prescan_insn (rtx);
extern void thumb2_final_prescan_insn (rtx);
extern const char *thumb_load_double_from_address (rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000
@@ -17494,10 +17494,10 @@
decremented/zeroed by arm_asm_output_opcode as the insns are output. */
/* Returns the index of the ARM condition code string in
- `arm_condition_codes', or ARM_NV if the comparison is invalid.
- COMPARISON should be an rtx like `(eq (...) (...))'. */
-enum arm_cond_code
-maybe_get_arm_condition_code (rtx comparison)
+ `arm_condition_codes'. COMPARISON should be an rtx like
+ `(eq (...) (...))'. */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
enum arm_cond_code code;
@@ -17521,11 +17521,11 @@
case CC_DLTUmode: code = ARM_CC;
dominance:
+ gcc_assert (comp_code == EQ || comp_code == NE);
+
if (comp_code == EQ)
return ARM_INVERSE_CONDITION_CODE (code);
- if (comp_code == NE)
- return code;
- return ARM_NV;
+ return code;
case CC_NOOVmode:
switch (comp_code)
@@ -17534,7 +17534,7 @@
case EQ: return ARM_EQ;
case GE: return ARM_PL;
case LT: return ARM_MI;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_Zmode:
@@ -17542,7 +17542,7 @@
{
case NE: return ARM_NE;
case EQ: return ARM_EQ;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_Nmode:
@@ -17550,7 +17550,7 @@
{
case NE: return ARM_MI;
case EQ: return ARM_PL;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CCFPEmode:
@@ -17575,7 +17575,7 @@
/* UNEQ and LTGT do not have a representation. */
case UNEQ: /* Fall through. */
case LTGT: /* Fall through. */
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_SWPmode:
@@ -17591,7 +17591,7 @@
case GTU: return ARM_CC;
case LEU: return ARM_CS;
case LTU: return ARM_HI;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_Cmode:
@@ -17599,7 +17599,7 @@
{
case LTU: return ARM_CS;
case GEU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_CZmode:
@@ -17611,7 +17611,7 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_NCVmode:
@@ -17621,7 +17621,7 @@
case LT: return ARM_LT;
case GEU: return ARM_CS;
case LTU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CCmode:
@@ -17637,22 +17637,13 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
default: gcc_unreachable ();
}
}
-/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
-static enum arm_cond_code
-get_arm_condition_code (rtx comparison)
-{
- enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
- gcc_assert (code != ARM_NV);
- return code;
-}
-
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
instructions. */
void
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-12 12:32:29 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000
@@ -243,9 +243,10 @@
;; True for integer comparisons and, if FP is active, for comparisons
;; other than LTGT or UNEQ.
(define_special_predicate "arm_comparison_operator"
- (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
- unordered,ordered,unlt,unle,unge,ungt")
- (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
+ (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
+ (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
+ && (TARGET_FPA || TARGET_VFP)")
+ (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
(define_special_predicate "lt_ge_comparison_operator"
(match_code "lt,ge"))
=== removed file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-09-05 09:40:19 +0000
+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
@@ -1,19 +0,0 @@
-void
-sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
- unsigned long dst_skip)
-{
- long long y;
- while (nsamples--)
- {
- y = (long long) (*src * 8388608.0f) << 8;
- if (y > 2147483647) {
- *(int *) dst = 2147483647;
- } else if (y < -2147483647 - 1) {
- *(int *) dst = -2147483647 - 1;
- } else {
- *(int *) dst = (int) y;
- }
- dst += dst_skip;
- src++;
- }
-}
@@ -36,4 +36,25 @@ file://linaro/gcc-4.6-linaro-r106777.patch \
file://linaro/gcc-4.6-linaro-r106778.patch \ file://linaro/gcc-4.6-linaro-r106778.patch \
file://linaro/gcc-4.6-linaro-r106781.patch \ file://linaro/gcc-4.6-linaro-r106781.patch \
file://linaro/gcc-4.6-linaro-r106782.patch \ file://linaro/gcc-4.6-linaro-r106782.patch \
file://linaro/gcc-4.6-linaro-r106783.patch \
file://linaro/gcc-4.6-linaro-r106784.patch \
file://linaro/gcc-4.6-linaro-r106785.patch \
file://linaro/gcc-4.6-linaro-r106786.patch \
file://linaro/gcc-4.6-linaro-r106787.patch \
file://linaro/gcc-4.6-linaro-r106789.patch \
file://linaro/gcc-4.6-linaro-r106792.patch \
file://linaro/gcc-4.6-linaro-r106793.patch \
file://linaro/gcc-4.6-linaro-r106794.patch \
file://linaro/gcc-4.6-linaro-r106796.patch \
file://linaro/gcc-4.6-linaro-r106797.patch \
file://linaro/gcc-4.6-linaro-r106798.patch \
file://linaro/gcc-4.6-linaro-r106799.patch \
file://linaro/gcc-4.6-linaro-r106800.patch \
file://linaro/gcc-4.6-linaro-r106802.patch \
file://linaro/gcc-4.6-linaro-r106803.patch \
file://linaro/gcc-4.6-linaro-r106804.patch \
file://linaro/gcc-4.6-linaro-r106805.patch \
file://linaro/gcc-4.6-linaro-r106806.patch \
file://linaro/gcc-4.6-linaro-r106807.patch \
file://linaro/gcc-4.6-linaro-r106811.patch \
" "