gcc-4.6: Bring in latest linaro patches

I have tested it on angstrom by successfully building console-image
and systemd-gnome-image for all supported qemu targets.

Signed-off-by: Khem Raj <raj.khem@gmail.com>
This commit is contained in:
Khem Raj
2011-11-22 07:29:59 -08:00
parent a2fbb83f95
commit 2378ee8f21
24 changed files with 10087 additions and 1 deletions
@@ -0,0 +1,80 @@
2011-09-22 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r178804:
modulo-sched.c (remove_node_from_ps): Return void
instead of bool.
(optimize_sc): Adjust call to remove_node_from_ps.
(sms_schedule): Add print info.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000
+++ new/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
@@ -211,7 +211,7 @@
static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
int, int, sbitmap, int *, sbitmap,
sbitmap);
-static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
@@ -834,8 +834,7 @@
if (next_ps_i->node->cuid == g->closing_branch->cuid)
break;
- gcc_assert (next_ps_i);
- gcc_assert (remove_node_from_ps (ps, next_ps_i));
+ remove_node_from_ps (ps, next_ps_i);
success =
try_scheduling_node_in_cycle (ps, g->closing_branch,
g->closing_branch->cuid, c,
@@ -1485,8 +1484,8 @@
if (dump_file)
{
fprintf (dump_file,
- "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
- stage_count);
+ "%s:%d SMS succeeded %d %d (with ii, sc)\n",
+ insn_file (tail), insn_line (tail), ps->ii, stage_count);
print_partial_schedule (ps, dump_file);
}
@@ -2810,22 +2809,18 @@
}
-/* Removes the given PS_INSN from the partial schedule. Returns false if the
- node is not found in the partial schedule, else returns true. */
-static bool
+/* Removes the given PS_INSN from the partial schedule. */
+static void
remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
{
int row;
- if (!ps || !ps_i)
- return false;
-
+ gcc_assert (ps && ps_i);
+
row = SMODULO (ps_i->cycle, ps->ii);
if (! ps_i->prev_in_row)
{
- if (ps_i != ps->rows[row])
- return false;
-
+ gcc_assert (ps_i == ps->rows[row]);
ps->rows[row] = ps_i->next_in_row;
if (ps->rows[row])
ps->rows[row]->prev_in_row = NULL;
@@ -2839,7 +2834,7 @@
ps->rows_length[row] -= 1;
free (ps_i);
- return true;
+ return;
}
/* Unlike what literature describes for modulo scheduling (which focuses
@@ -0,0 +1,528 @@
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
Replace check_effective_target_arm_neon with
check_effective_target_arm_neon_ok.
Backport from mainline:
2011-09-06 Ira Rosen <ira.rosen@linaro.org>
gcc/
* config/arm/arm.c (arm_preferred_simd_mode): Check
TARGET_NEON_VECTORIZE_DOUBLE instead of
TARGET_NEON_VECTORIZE_QUAD.
(arm_autovectorize_vector_sizes): Likewise.
* config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
mask of mvectorize-with-neon-double. Add RejectNegative.
(mvectorize-with-neon-double): New.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
New procedure.
(add_options_for_quad_vectors): Replace with ...
(add_options_for_double_vectors): ... this.
* gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
support multiple vector sizes since the vectorizer attempts to
vectorize with both vector sizes.
* gcc.dg/vect/no-vfa-vect-79.c,
gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
* gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
* gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
gcc.dg/vect/vect-40.c: Likewise.
* gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
redundant.
* gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
Likewise.
* gcc.dg/vect/vect-peel-4.c: Make ia global.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000
+++ new/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
@@ -22974,7 +22974,7 @@
return false;
}
-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+/* Use the option -mvectorize-with-neon-double to override the use of quardword
registers when autovectorizing for Neon, at least until multiple vector
widths are supported properly by the middle-end. */
@@ -22985,15 +22985,15 @@
switch (mode)
{
case SFmode:
- return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
case SImode:
- return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
case HImode:
- return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
case QImode:
- return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
case DImode:
- if (TARGET_NEON_VECTORIZE_QUAD)
+ if (!TARGET_NEON_VECTORIZE_DOUBLE)
return V2DImode;
break;
@@ -24226,7 +24226,7 @@
static unsigned int
arm_autovectorize_vector_sizes (void)
{
- return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
}
static bool
=== modified file 'gcc/config/arm/arm.opt'
--- old/gcc/config/arm/arm.opt 2009-06-18 11:24:10 +0000
+++ new/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
@@ -158,9 +158,13 @@
Assume big endian bytes, little endian words
mvectorize-with-neon-quad
-Target Report Mask(NEON_VECTORIZE_QUAD)
+Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
Use Neon quad-word (rather than double-word) registers for vectorization
+mvectorize-with-neon-double
+Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
+Use Neon double-word (rather than quad-word) registers for vectorization
+
mword-relocations
Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
Only generate absolute relocations on word sized values.
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2007-09-04 12:05:19 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2011-09-19 07:44:24 +0000
@@ -45,6 +45,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2007-09-12 07:48:44 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2011-09-19 07:44:24 +0000
@@ -53,6 +53,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2007-09-12 07:48:44 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2011-09-19 07:44:24 +0000
@@ -53,6 +53,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2011-09-19 07:44:24 +0000
@@ -58,5 +58,6 @@
If/when the aliasing problems are resolved, unalignment may
prevent vectorization on some targets. */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2011-09-19 07:44:24 +0000
@@ -46,5 +46,6 @@
If/when the aliasing problems are resolved, unalignment may
prevent vectorization on some targets. */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c'
--- old/gcc/testsuite/gcc.dg/vect/slp-25.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000
+++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-104.c 2007-09-12 07:48:44 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-104.c 2011-09-19 07:44:24 +0000
@@ -64,6 +64,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-109.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-109.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-40.c 2009-05-25 14:18:21 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-40.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-42.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-42.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-46.c 2009-05-25 14:18:21 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-46.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-48.c 2009-11-04 10:22:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-48.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-52.c 2009-11-04 10:22:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-52.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-54.c 2009-10-27 11:46:07 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-54.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-96.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-96.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2009-11-04 10:22:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2009-11-10 18:01:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2011-09-19 07:44:24 +0000
@@ -22,5 +22,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2009-06-16 06:21:12 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2011-09-19 07:44:24 +0000
@@ -20,5 +20,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2007-08-19 11:02:48 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2011-09-19 07:44:24 +0000
@@ -22,5 +22,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2011-09-19 07:44:24 +0000
@@ -37,5 +37,6 @@
return 0;
}
-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2011-09-19 07:44:24 +0000
@@ -49,5 +49,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2011-09-19 07:44:24 +0000
@@ -49,5 +49,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_float } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include <signal.h>
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-01-10 12:41:40 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-01-10 12:41:40 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-01-10 12:41:40 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000
@@ -6,12 +6,12 @@
#define N 128
int ib[N+7];
+int ia[N+1];
__attribute__ ((noinline))
int main1 ()
{
int i;
- int ia[N+1];
/* Don't peel keeping one load and the store aligned. */
for (i = 0; i <= N; i++)
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2010-05-27 12:23:45 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2011-09-19 07:44:24 +0000
@@ -58,7 +58,8 @@
}
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
=== modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90'
--- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2005-07-25 11:05:07 +0000
+++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2011-09-19 07:44:24 +0000
@@ -19,6 +19,7 @@
end
! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } }
-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } }
! { dg-final { cleanup-tree-dump "vect" } }
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-08-13 08:32:32 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
@@ -3265,6 +3265,24 @@
}]
}
+# Return 1 if the target supports multiple vector sizes
+
+proc check_effective_target_vect_multiple_sizes { } {
+ global et_vect_multiple_sizes
+
+ if [info exists et_vect_multiple_sizes_saved] {
+ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
+ } else {
+ set et_vect_multiple_sizes_saved 0
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+ set et_vect_multiple_sizes_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2
+ return $et_vect_multiple_sizes_saved
+}
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
@@ -3648,11 +3666,11 @@
return $flags
}
-# Add to FLAGS the flags needed to enable 128-bit vectors.
+# Add to FLAGS the flags needed to enable 64-bit vectors.
-proc add_options_for_quad_vectors { flags } {
+proc add_options_for_double_vectors { flags } {
if [is-effective-target arm_neon_ok] {
- return "$flags -mvectorize-with-neon-quad"
+ return "$flags -mvectorize-with-neon-double"
}
return $flags
@@ -0,0 +1,387 @@
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
(neon_move_hi_quad_<mode>): Likewise.
(move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-27 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi)
(neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di)
(neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si)
(neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands
that produce subreg moves. Define using VQX iterators.
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-14 Richard Sandiford <richard.sandiford@linaro.org>
* simplify-rtx.c (simplify_subreg): Check that the inner mode is
a scalar integer before applying integer-only optimisations to
inner arithmetic.
=== modified file 'gcc/config/arm/neon.md'
--- old/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000
+++ new/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
@@ -1235,66 +1235,14 @@
(const_string "neon_int_1") (const_string "neon_int_5")))]
)
-; FIXME: We wouldn't need the following insns if we could write subregs of
-; vector registers. Make an attempt at removing unnecessary moves, though
-; we're really at the mercy of the register allocator.
-
-(define_insn "neon_move_lo_quad_<mode>"
- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
- (vec_concat:ANY128
- (match_operand:<V_HALF> 1 "s_register_operand" "w")
- (vec_select:<V_HALF>
- (match_dup 0)
- (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%e0, %P1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_move_hi_quad_<mode>"
- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
- (vec_concat:ANY128
- (vec_select:<V_HALF>
- (match_dup 0)
- (match_operand:ANY128 2 "vect_par_constant_low" ""))
- (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
-
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%f0, %P1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
(define_expand "move_hi_quad_<mode>"
[(match_operand:ANY128 0 "s_register_operand" "")
(match_operand:<V_HALF> 1 "s_register_operand" "")]
"TARGET_NEON"
{
- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
- rtx t1;
- int i;
-
- for (i=0; i < (<V_mode_nunits>/2); i++)
- RTVEC_ELT (v, i) = GEN_INT (i);
-
- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
- emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
-
+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
+ GET_MODE_SIZE (<V_HALF>mode)),
+ operands[1]);
DONE;
})
@@ -1303,16 +1251,9 @@
(match_operand:<V_HALF> 1 "s_register_operand" "")]
"TARGET_NEON"
{
- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
- rtx t1;
- int i;
-
- for (i=0; i < (<V_mode_nunits>/2); i++)
- RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
-
- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
- emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
-
+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
+ <MODE>mode, 0),
+ operands[1]);
DONE;
})
@@ -2950,183 +2891,27 @@
(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vget_highv16qi"
- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
- (parallel [(const_int 8) (const_int 9)
- (const_int 10) (const_int 11)
- (const_int 12) (const_int 13)
- (const_int 14) (const_int 15)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv8hi"
- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
- (parallel [(const_int 4) (const_int 5)
- (const_int 6) (const_int 7)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4si"
- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4sf"
- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv2di"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
- (parallel [(const_int 1)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv16qi"
- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)
- (const_int 2) (const_int 3)
- (const_int 4) (const_int 5)
- (const_int 6) (const_int 7)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv8hi"
- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)
- (const_int 2) (const_int 3)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4si"
- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4sf"
- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv2di"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
- (parallel [(const_int 0)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
+(define_expand "neon_vget_high<mode>"
+ [(match_operand:<V_HALF> 0 "s_register_operand")
+ (match_operand:VQX 1 "s_register_operand")]
+ "TARGET_NEON"
+{
+ emit_move_insn (operands[0],
+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
+ GET_MODE_SIZE (<V_HALF>mode)));
+ DONE;
+})
+
+(define_expand "neon_vget_low<mode>"
+ [(match_operand:<V_HALF> 0 "s_register_operand")
+ (match_operand:VQX 1 "s_register_operand")]
+ "TARGET_NEON"
+{
+ emit_move_insn (operands[0],
+ simplify_gen_subreg (<V_HALF>mode, operands[1],
+ <MODE>mode, 0));
+ DONE;
+})
(define_insn "neon_vcvt<mode>"
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
=== modified file 'gcc/simplify-rtx.c'
--- old/gcc/simplify-rtx.c 2011-08-13 08:32:32 +0000
+++ new/gcc/simplify-rtx.c 2011-09-28 15:11:59 +0000
@@ -5567,6 +5567,7 @@
/* Optimize SUBREG truncations of zero and sign extended values. */
if ((GET_CODE (op) == ZERO_EXTEND
|| GET_CODE (op) == SIGN_EXTEND)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode))
{
unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
@@ -5605,6 +5606,7 @@
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
&& SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
/* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
to avoid the possibility that an outer LSHIFTRT shifts by more
than the sign extension's sign_bit_copies and introduces zeros
@@ -5624,6 +5626,7 @@
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
&& SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
&& CONST_INT_P (XEXP (op, 1))
&& GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5638,6 +5641,7 @@
the outer subreg is effectively a truncation to the original mode. */
if (GET_CODE (op) == ASHIFT
&& SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
&& CONST_INT_P (XEXP (op, 1))
&& (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5651,7 +5655,7 @@
/* Recognize a word extraction from a multi-word subreg. */
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
- && SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD
&& GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode))
&& CONST_INT_P (XEXP (op, 1))
@@ -5673,6 +5677,7 @@
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
+ && SCALAR_INT_MODE_P (innermode)
&& MEM_P (XEXP (op, 0))
&& CONST_INT_P (XEXP (op, 1))
&& GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))
@@ -0,0 +1,290 @@
2011-10-01 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from mainline -r179380 and -r179381
* ddg.c (autoinc_var_is_used_p): New function.
(create_ddg_dep_from_intra_loop_link,
add_cross_iteration_register_deps): Call it.
* ddg.h (autoinc_var_is_used_p): Declare.
* modulo-sched.c (sms_schedule): Handle instructions with REG_INC.
(generate_reg_moves): Call autoinc_var_is_used_p. Skip
instructions that do not set a register and verify no regmoves
are created for !single_set instructions.
gcc/testsuite/
* gcc.dg/sms-10.c: New file
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-07-31 11:29:10 +0000
+++ new/gcc/ddg.c 2011-10-02 06:56:53 +0000
@@ -145,6 +145,27 @@
return rtx_mem_access_p (PATTERN (insn));
}
+/* Return true if DEF_INSN contains address being auto-inc or auto-dec
+ which is used in USE_INSN. Otherwise return false. The result is
+ being used to decide whether to remove the edge between def_insn and
+ use_insn when -fmodulo-sched-allow-regmoves is set. This function
+ doesn't need to consider the specific address register; no reg_moves
+ will be allowed for any life range defined by def_insn and used
+ by use_insn, if use_insn uses an address register auto-inc'ed by
+ def_insn. */
+bool
+autoinc_var_is_used_p (rtx def_insn, rtx use_insn)
+{
+ rtx note;
+
+ for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1))
+ if (REG_NOTE_KIND (note) == REG_INC
+ && reg_referenced_p (XEXP (note, 0), PATTERN (use_insn)))
+ return true;
+
+ return false;
+}
+
/* Computes the dependence parameters (latency, distance etc.), creates
a ddg_edge and adds it to the given DDG. */
static void
@@ -173,10 +194,15 @@
compensate for that by generating reg-moves based on the life-range
analysis. The anti-deps that will be deleted are the ones which
have true-deps edges in the opposite direction (in other words
- the kernel has only one def of the relevant register). TODO:
- support the removal of all anti-deps edges, i.e. including those
+ the kernel has only one def of the relevant register).
+ If the address that is being auto-inc or auto-dec in DEST_NODE
+ is used in SRC_NODE then do not remove the edge to make sure
+ reg-moves will not be created for this address.
+ TODO: support the removal of all anti-deps edges, i.e. including those
whose register has multiple defs in the loop. */
- if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
+ if (flag_modulo_sched_allow_regmoves
+ && (t == ANTI_DEP && dt == REG_DEP)
+ && !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
{
rtx set;
@@ -302,10 +328,14 @@
gcc_assert (first_def_node);
/* Always create the edge if the use node is a branch in
- order to prevent the creation of reg-moves. */
+ order to prevent the creation of reg-moves.
+ If the address that is being auto-inc or auto-dec in LAST_DEF
+ is used in USE_INSN then do not remove the edge to make sure
+ reg-moves will not be created for that address. */
if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
|| !flag_modulo_sched_allow_regmoves
- || JUMP_P (use_node->insn))
+ || JUMP_P (use_node->insn)
+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn))
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
=== modified file 'gcc/ddg.h'
--- old/gcc/ddg.h 2009-11-25 10:55:54 +0000
+++ new/gcc/ddg.h 2011-10-02 06:56:53 +0000
@@ -186,4 +186,6 @@
int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
+bool autoinc_var_is_used_p (rtx, rtx);
+
#endif /* GCC_DDG_H */
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
+++ new/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000
@@ -477,7 +477,12 @@
sbitmap *uses_of_defs;
rtx last_reg_move;
rtx prev_reg, old_reg;
-
+ rtx set = single_set (u->insn);
+
+ /* Skip instructions that do not set a register. */
+ if ((set && !REG_P (SET_DEST (set))))
+ continue;
+
/* Compute the number of reg_moves needed for u, by looking at life
ranges started at u (excluding self-loops). */
for (e = u->out; e; e = e->next_out)
@@ -494,6 +499,20 @@
&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
nreg_moves4e--;
+ if (nreg_moves4e >= 1)
+ {
+ /* !single_set instructions are not supported yet and
+ thus we do not except to encounter them in the loop
+ except from the doloop part. For the latter case
+ we assume no regmoves are generated as the doloop
+ instructions are tied to the branch with an edge. */
+ gcc_assert (set);
+ /* If the instruction contains auto-inc register then
+ validate that the regmov is being generated for the
+ target regsiter rather then the inc'ed register. */
+ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
+ }
+
nreg_moves = MAX (nreg_moves, nreg_moves4e);
}
@@ -1266,12 +1285,10 @@
continue;
}
- /* Don't handle BBs with calls or barriers or auto-increment insns
- (to avoid creating invalid reg-moves for the auto-increment insns),
+ /* Don't handle BBs with calls or barriers
or !single_set with the exception of instructions that include
count_reg---these instructions are part of the control part
that do-loop recognizes.
- ??? Should handle auto-increment insns.
??? Should handle insns defining subregs. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
{
@@ -1282,7 +1299,6 @@
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
&& !reg_mentioned_p (count_reg, insn))
- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
&& GET_CODE (SET_DEST (set)) == SUBREG))
break;
@@ -1296,8 +1312,6 @@
fprintf (dump_file, "SMS loop-with-call\n");
else if (BARRIER_P (insn))
fprintf (dump_file, "SMS loop-with-barrier\n");
- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
- fprintf (dump_file, "SMS reg inc\n");
else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
fprintf (dump_file, "SMS loop-with-not-single-set\n");
=== added file 'gcc/testsuite/gcc.dg/sms-10.c'
--- old/gcc/testsuite/gcc.dg/sms-10.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/sms-10.c 2011-10-02 06:56:53 +0000
@@ -0,0 +1,118 @@
+ /* { dg-do run } */
+ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
+
+
+typedef __SIZE_TYPE__ size_t;
+extern void *malloc (size_t);
+extern void free (void *);
+extern void abort (void);
+
+struct regstat_n_sets_and_refs_t
+{
+ int sets;
+ int refs;
+};
+
+struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs;
+
+struct df_reg_info
+{
+ unsigned int n_refs;
+};
+
+struct df_d
+{
+ struct df_reg_info **def_regs;
+ struct df_reg_info **use_regs;
+};
+struct df_d *df;
+
+static inline int
+REG_N_SETS (int regno)
+{
+ return regstat_n_sets_and_refs[regno].sets;
+}
+
+__attribute__ ((noinline))
+ int max_reg_num (void)
+{
+ return 100;
+}
+
+__attribute__ ((noinline))
+ void regstat_init_n_sets_and_refs (void)
+{
+ unsigned int i;
+ unsigned int max_regno = max_reg_num ();
+
+ for (i = 0; i < max_regno; i++)
+ {
+ (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs));
+ (regstat_n_sets_and_refs[i].refs =
+ (df->use_regs[(i)]->n_refs) + REG_N_SETS (i));
+ }
+}
+
+int a_sets[100] =
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
+ 84, 85, 86,
+ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
+};
+
+int a_refs[100] =
+ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
+ 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
+ 78, 80, 82,
+ 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116,
+ 118, 120,
+ 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150,
+ 152, 154, 156,
+ 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
+ 188, 190, 192,
+ 194, 196, 198
+};
+
+int
+main ()
+{
+ struct df_reg_info *b[100], *c[100];
+ struct df_d df1;
+ size_t s = sizeof (struct df_reg_info);
+ struct regstat_n_sets_and_refs_t a[100];
+
+ df = &df1;
+ regstat_n_sets_and_refs = a;
+ int i;
+
+ for (i = 0; i < 100; i++)
+ {
+ b[i] = (struct df_reg_info *) malloc (s);
+ b[i]->n_refs = i;
+ c[i] = (struct df_reg_info *) malloc (s);
+ c[i]->n_refs = i;
+ }
+
+ df1.def_regs = b;
+ df1.use_regs = c;
+ regstat_init_n_sets_and_refs ();
+
+ for (i = 0; i < 100; i++)
+ if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i]))
+ abort ();
+
+ for (i = 0; i < 100; i++)
+ {
+ free (b[i]);
+ free (c[i]);
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "sms" } } */
@@ -0,0 +1,105 @@
2011-10-03 Michael Hope <michael.hope@linaro.org>
Backport from mainline:
2011-09-13 Sevak Sargsyan <sevak.sargsyan@ispras.ru>
gcc/
* config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New
define_insn patterns for combine.
gcc/testsuite/
* gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test.
=== modified file 'gcc/config/arm/neon.md'
--- old/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
+++ new/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000
@@ -5428,3 +5428,32 @@
emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
DONE;
})
+
+(define_insn "neon_vabd<mode>_2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vabd<mode>_3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")]
+ UNSPEC_VSUB)))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
=== added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c'
--- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 2011-10-03 01:32:17 +0000
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+float32x2_t f_sub_abs_to_vabd_32()
+{
+ float32x2_t val1 = vdup_n_f32 (10);
+ float32x2_t val2 = vdup_n_f32 (30);
+ float32x2_t sres = vsub_f32(val1, val2);
+ float32x2_t res = vabs_f32 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.f32" } }*/
+
+#include <arm_neon.h>
+int8x8_t sub_abs_to_vabd_8()
+{
+ int8x8_t val1 = vdup_n_s8 (10);
+ int8x8_t val2 = vdup_n_s8 (30);
+ int8x8_t sres = vsub_s8(val1, val2);
+ int8x8_t res = vabs_s8 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s8" } }*/
+
+int16x4_t sub_abs_to_vabd_16()
+{
+ int16x4_t val1 = vdup_n_s16 (10);
+ int16x4_t val2 = vdup_n_s16 (30);
+ int16x4_t sres = vsub_s16(val1, val2);
+ int16x4_t res = vabs_s16 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s16" } }*/
+
+int32x2_t sub_abs_to_vabd_32()
+{
+ int32x2_t val1 = vdup_n_s32 (10);
+ int32x2_t val2 = vdup_n_s32 (30);
+ int32x2_t sres = vsub_s32(val1, val2);
+ int32x2_t res = vabs_s32 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s32" } }*/
@@ -0,0 +1,436 @@
2011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/predicates.md (expandable_comparison_operator): New
predicate, extracted from...
(arm_comparison_operator): ...here.
* config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4)
(cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc)
(movdfcc): Use expandable_comparison_operator.
gcc/testsuite/
Backport from mainline:
2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.target/arm/cmp-1.c: New test.
* gcc.target/arm/cmp-2.c: Likewise.
2011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
PR target/49030
* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
* config/arm/arm.c (maybe_get_arm_condition_code): New function,
reusing the old code from get_arm_condition_code. Return ARM_NV
for invalid comparison codes.
(get_arm_condition_code): Redefine in terms of
maybe_get_arm_condition_code.
* config/arm/predicates.md (arm_comparison_operator): Use
maybe_get_arm_condition_code.
gcc/testsuite/
Backport from mainline:
2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
PR target/49030
* gcc.dg/torture/pr49030.c: New test.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
@@ -180,6 +180,7 @@
#endif
extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
#ifdef RTX_CODE
+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
extern void thumb1_final_prescan_insn (rtx);
extern void thumb2_final_prescan_insn (rtx);
extern const char *thumb_load_double_from_address (rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
+++ new/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
@@ -17494,10 +17494,10 @@
decremented/zeroed by arm_asm_output_opcode as the insns are output. */
/* Returns the index of the ARM condition code string in
- `arm_condition_codes'. COMPARISON should be an rtx like
- `(eq (...) (...))'. */
-static enum arm_cond_code
-get_arm_condition_code (rtx comparison)
+ `arm_condition_codes', or ARM_NV if the comparison is invalid.
+ COMPARISON should be an rtx like `(eq (...) (...))'. */
+enum arm_cond_code
+maybe_get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
enum arm_cond_code code;
@@ -17521,11 +17521,11 @@
case CC_DLTUmode: code = ARM_CC;
dominance:
- gcc_assert (comp_code == EQ || comp_code == NE);
-
if (comp_code == EQ)
return ARM_INVERSE_CONDITION_CODE (code);
- return code;
+ if (comp_code == NE)
+ return code;
+ return ARM_NV;
case CC_NOOVmode:
switch (comp_code)
@@ -17534,7 +17534,7 @@
case EQ: return ARM_EQ;
case GE: return ARM_PL;
case LT: return ARM_MI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Zmode:
@@ -17542,7 +17542,7 @@
{
case NE: return ARM_NE;
case EQ: return ARM_EQ;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Nmode:
@@ -17550,7 +17550,7 @@
{
case NE: return ARM_MI;
case EQ: return ARM_PL;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCFPEmode:
@@ -17575,7 +17575,7 @@
/* UNEQ and LTGT do not have a representation. */
case UNEQ: /* Fall through. */
case LTGT: /* Fall through. */
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_SWPmode:
@@ -17591,7 +17591,7 @@
case GTU: return ARM_CC;
case LEU: return ARM_CS;
case LTU: return ARM_HI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Cmode:
@@ -17599,7 +17599,7 @@
{
case LTU: return ARM_CS;
case GEU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_CZmode:
@@ -17611,7 +17611,7 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_NCVmode:
@@ -17621,7 +17621,7 @@
case LT: return ARM_LT;
case GEU: return ARM_CS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCmode:
@@ -17637,13 +17637,22 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
default: gcc_unreachable ();
}
}
+/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
+{
+ enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
+ gcc_assert (code != ARM_NV);
+ return code;
+}
+
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
instructions. */
void
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
@@ -6543,7 +6543,7 @@
(define_expand "cbranchsi4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:SI 1 "s_register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -6594,7 +6594,7 @@
(define_expand "cbranchsf4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:SF 1 "s_register_operand" "")
(match_operand:SF 2 "arm_float_compare_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -6606,7 +6606,7 @@
(define_expand "cbranchdf4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:DF 1 "s_register_operand" "")
(match_operand:DF 2 "arm_float_compare_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -6618,7 +6618,7 @@
(define_expand "cbranchdi4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:DI 1 "cmpdi_operand" "")
(match_operand:DI 2 "cmpdi_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -7473,7 +7473,7 @@
(define_expand "cstoresi4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:SI 2 "s_register_operand" "")
(match_operand:SI 3 "reg_or_int_operand" "")]))]
"TARGET_32BIT || TARGET_THUMB1"
@@ -7609,7 +7609,7 @@
(define_expand "cstoresf4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:SF 2 "s_register_operand" "")
(match_operand:SF 3 "arm_float_compare_operand" "")]))]
"TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -7619,7 +7619,7 @@
(define_expand "cstoredf4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:DF 2 "s_register_operand" "")
(match_operand:DF 3 "arm_float_compare_operand" "")]))]
"TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
@@ -7629,7 +7629,7 @@
(define_expand "cstoredi4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:DI 2 "cmpdi_operand" "")
(match_operand:DI 3 "cmpdi_operand" "")]))]
"TARGET_32BIT"
@@ -7749,7 +7749,7 @@
(define_expand "movsicc"
[(set (match_operand:SI 0 "s_register_operand" "")
- (if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
+ (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "")
(match_operand:SI 2 "arm_not_operand" "")
(match_operand:SI 3 "arm_not_operand" "")))]
"TARGET_32BIT"
@@ -7769,7 +7769,7 @@
(define_expand "movsfcc"
[(set (match_operand:SF 0 "s_register_operand" "")
- (if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
+ (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
(match_operand:SF 2 "s_register_operand" "")
(match_operand:SF 3 "nonmemory_operand" "")))]
"TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -7795,7 +7795,7 @@
(define_expand "movdfcc"
[(set (match_operand:DF 0 "s_register_operand" "")
- (if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
+ (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
(match_operand:DF 2 "s_register_operand" "")
(match_operand:DF 3 "arm_float_add_operand" "")))]
"TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000
+++ new/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
@@ -242,11 +242,15 @@
;; True for integer comparisons and, if FP is active, for comparisons
;; other than LTGT or UNEQ.
+(define_special_predicate "expandable_comparison_operator"
+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
+ unordered,ordered,unlt,unle,unge,ungt"))
+
+;; Likewise, but only accept comparisons that are directly supported
+;; by ARM condition codes.
(define_special_predicate "arm_comparison_operator"
- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
- && (TARGET_FPA || TARGET_VFP)")
- (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
+ (and (match_operand 0 "expandable_comparison_operator")
+ (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
(define_special_predicate "lt_ge_comparison_operator"
(match_code "lt,ge"))
=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-10-03 09:46:40 +0000
@@ -0,0 +1,19 @@
+void
+sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
+ unsigned long dst_skip)
+{
+ long long y;
+ while (nsamples--)
+ {
+ y = (long long) (*src * 8388608.0f) << 8;
+ if (y > 2147483647) {
+ *(int *) dst = 2147483647;
+ } else if (y < -2147483647 - 1) {
+ *(int *) dst = -2147483647 - 1;
+ } else {
+ *(int *) dst = (int) y;
+ }
+ dst += dst_skip;
+ src++;
+ }
+}
=== added file 'gcc/testsuite/gcc.target/arm/cmp-1.c'
--- old/gcc/testsuite/gcc.target/arm/cmp-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/cmp-1.c 2011-10-03 09:47:33 +0000
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
+/* { dg-final { scan-assembler-not "__aeabi" } } */
+int x, y;
+
+#define TEST_EXPR(NAME, ARGS, EXPR) \
+ int NAME##1 ARGS { return (EXPR); } \
+ int NAME##2 ARGS { return !(EXPR); } \
+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
+ void NAME##4 ARGS { if (EXPR) x++; } \
+ void NAME##5 ARGS { if (!(EXPR)) x++; }
+
+#define TEST(NAME, TYPE, OPERATOR) \
+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2) \
+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2) \
+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2) \
+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \
+ TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100) \
+ TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1)
+
+#define TEST_OP(NAME, OPERATOR) \
+ TEST (sc_##NAME, signed char, OPERATOR) \
+ TEST (uc_##NAME, unsigned char, OPERATOR) \
+ TEST (ss_##NAME, short, OPERATOR) \
+ TEST (us_##NAME, unsigned short, OPERATOR) \
+ TEST (si_##NAME, int, OPERATOR) \
+ TEST (ui_##NAME, unsigned int, OPERATOR) \
+ TEST (sll_##NAME, long long, OPERATOR) \
+ TEST (ull_##NAME, unsigned long long, OPERATOR)
+
+TEST_OP (eq, ==)
+TEST_OP (ne, !=)
+TEST_OP (lt, <)
+TEST_OP (gt, >)
+TEST_OP (le, <=)
+TEST_OP (ge, >=)
=== added file 'gcc/testsuite/gcc.target/arm/cmp-2.c'
--- old/gcc/testsuite/gcc.target/arm/cmp-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/cmp-2.c 2011-10-03 09:47:33 +0000
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_vfp_ok } */
+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
+/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
+/* { dg-final { scan-assembler-not "__aeabi" } } */
+int x, y;
+
+#define EQ(X, Y) ((X) == (Y))
+#define NE(X, Y) ((X) != (Y))
+#define LT(X, Y) ((X) < (Y))
+#define GT(X, Y) ((X) > (Y))
+#define LE(X, Y) ((X) <= (Y))
+#define GE(X, Y) ((X) >= (Y))
+
+#define TEST_EXPR(NAME, ARGS, EXPR) \
+ int NAME##1 ARGS { return (EXPR); } \
+ int NAME##2 ARGS { return !(EXPR); } \
+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
+ void NAME##4 ARGS { if (EXPR) x++; } \
+ void NAME##5 ARGS { if (!(EXPR)) x++; }
+
+#define TEST(NAME, TYPE, OPERATOR) \
+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2)) \
+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2)) \
+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2)) \
+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2)) \
+ TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100)) \
+ TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1))
+
+#define TEST_OP(NAME, OPERATOR) \
+ TEST (f_##NAME, float, OPERATOR) \
+ TEST (d_##NAME, double, OPERATOR) \
+ TEST (ld_##NAME, long double, OPERATOR)
+
+TEST_OP (eq, EQ)
+TEST_OP (ne, NE)
+TEST_OP (lt, LT)
+TEST_OP (gt, GT)
+TEST_OP (le, LE)
+TEST_OP (ge, GE)
+TEST_OP (blt, __builtin_isless)
+TEST_OP (bgt, __builtin_isgreater)
+TEST_OP (ble, __builtin_islessequal)
+TEST_OP (bge, __builtin_isgreaterequal)
+/* This one should be expanded into separate ordered and equality
+ comparisons. */
+TEST_OP (blg, __builtin_islessgreater)
+TEST_OP (bun, __builtin_isunordered)
@@ -0,0 +1,378 @@
2011-10-06 Ira Rosen <ira.rosen@linaro.org>
Backport from mainline:
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-data-ref.c (dr_analyze_innermost): Add new argument.
Allow not simple iv if analyzing basic block.
(create_data_ref): Update call to dr_analyze_innermost.
(stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise.
* tree-loop-distribution.c (generate_memset_zero): Likewise.
* tree-predcom.c (find_looparound_phi): Likewise.
* tree-data-ref.h (dr_analyze_innermost): Add new argument.
gcc/testsuite/
* gcc.dg/vect/bb-slp-24.c: New.
2011-09-15 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow
read-after-read dependencies in basic block SLP.
gcc/testsuite/
* gcc.dg/vect/bb-slp-25.c: New.
2011-04-21 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use
operand_equal_p to compare DR_BASE_ADDRESSes.
(vect_check_interleaving): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-119.c: New test.
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 2011-10-02 08:43:10 +0000
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h,
+ int stride, int dummy)
+{
+ int i;
+ h /= 8;
+ for (i = 0; i < h; i++)
+ {
+ dst[0] += A*src[0];
+ dst[1] += A*src[1];
+ dst[2] += A*src[2];
+ dst[3] += A*src[3];
+ dst[4] += A*src[4];
+ dst[5] += A*src[5];
+ dst[6] += A*src[6];
+ dst[7] += A*src[7];
+ dst += stride;
+ src += stride;
+ if (dummy == 32)
+ abort ();
+ }
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i;
+ }
+
+ foo (dst, src, N, 8, 0);
+
+ for (i = 0; i < N; i++)
+ {
+ if (dst[i] != A * i)
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 2011-10-02 08:43:10 +0000
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
+{
+ int i;
+ h /= 16;
+ for (i = 0; i < h; i++)
+ {
+ dst[0] += A*src[0] + src[stride];
+ dst[1] += A*src[1] + src[1+stride];
+ dst[2] += A*src[2] + src[2+stride];
+ dst[3] += A*src[3] + src[3+stride];
+ dst[4] += A*src[4] + src[4+stride];
+ dst[5] += A*src[5] + src[5+stride];
+ dst[6] += A*src[6] + src[6+stride];
+ dst[7] += A*src[7] + src[7+stride];
+ dst += 8;
+ src += 8;
+ if (dummy == 32)
+ abort ();
+ }
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i;
+ }
+
+ foo (dst, src, N, 8, 0);
+
+ for (i = 0; i < N/2; i++)
+ {
+ if (dst[i] != A * i + i + 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== added file 'gcc/testsuite/gcc.dg/vect/vect-119.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-119.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-119.c 2011-10-02 08:43:10 +0000
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+
+#define OUTER 32
+#define INNER 40
+
+static unsigned int
+bar (const unsigned int x[INNER][2], unsigned int sum)
+{
+ int i;
+
+ for (i = 0; i < INNER; i++)
+ sum += x[i][0] * x[i][0] + x[i][1] * x[i][1];
+ return sum;
+}
+
+unsigned int foo (const unsigned int x[OUTER][INNER][2])
+{
+ int i;
+ unsigned int sum;
+
+ sum = 0.0f;
+ for (i = 0; i < OUTER; i++)
+ sum = bar (x[i], sum);
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/tree-data-ref.c'
--- old/gcc/tree-data-ref.c 2011-05-26 14:27:33 +0000
+++ new/gcc/tree-data-ref.c 2011-10-02 08:43:10 +0000
@@ -721,11 +721,11 @@
}
/* Analyzes the behavior of the memory reference DR in the innermost loop or
- basic block that contains it. Returns true if analysis succeed or false
+ basic block that contains it. Returns true if analysis succeed or false
otherwise. */
bool
-dr_analyze_innermost (struct data_reference *dr)
+dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
{
gimple stmt = DR_STMT (dr);
struct loop *loop = loop_containing_stmt (stmt);
@@ -768,14 +768,25 @@
}
else
base = build_fold_addr_expr (base);
+
if (in_loop)
{
if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
false))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return false;
+ if (nest)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "failed: evolution of base is not"
+ " affine.\n");
+ return false;
+ }
+ else
+ {
+ base_iv.base = base;
+ base_iv.step = ssize_int (0);
+ base_iv.no_overflow = true;
+ }
}
}
else
@@ -800,10 +811,18 @@
else if (!simple_iv (loop, loop_containing_stmt (stmt),
poffset, &offset_iv, false))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of offset is not"
- " affine.\n");
- return false;
+ if (nest)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "failed: evolution of offset is not"
+ " affine.\n");
+ return false;
+ }
+ else
+ {
+ offset_iv.base = poffset;
+ offset_iv.step = ssize_int (0);
+ }
}
}
@@ -967,7 +986,7 @@
DR_REF (dr) = memref;
DR_IS_READ (dr) = is_read;
- dr_analyze_innermost (dr);
+ dr_analyze_innermost (dr, nest);
dr_analyze_indices (dr, nest, loop);
dr_analyze_alias (dr);
@@ -5185,7 +5204,7 @@
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
- res = dr_analyze_innermost (dr)
+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
&& stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
free_data_ref (dr);
@@ -5225,7 +5244,7 @@
DR_STMT (dr) = stmt;
DR_REF (dr) = *ref->pos;
- dr_analyze_innermost (dr);
+ dr_analyze_innermost (dr, loop_containing_stmt (stmt));
base_address = DR_BASE_ADDRESS (dr);
if (!base_address)
=== modified file 'gcc/tree-data-ref.h'
--- old/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000
+++ new/gcc/tree-data-ref.h 2011-10-02 08:43:10 +0000
@@ -386,7 +386,7 @@
DEF_VEC_ALLOC_O (data_ref_loc, heap);
bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **);
-bool dr_analyze_innermost (struct data_reference *);
+bool dr_analyze_innermost (struct data_reference *, struct loop *);
extern bool compute_data_dependences_for_loop (struct loop *, bool,
VEC (loop_p, heap) **,
VEC (data_reference_p, heap) **,
=== modified file 'gcc/tree-loop-distribution.c'
--- old/gcc/tree-loop-distribution.c 2011-05-11 13:07:54 +0000
+++ new/gcc/tree-loop-distribution.c 2011-10-02 08:43:10 +0000
@@ -267,7 +267,7 @@
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
- res = dr_analyze_innermost (dr);
+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
=== modified file 'gcc/tree-predcom.c'
--- old/gcc/tree-predcom.c 2011-02-11 14:19:44 +0000
+++ new/gcc/tree-predcom.c 2011-10-02 08:43:10 +0000
@@ -1114,7 +1114,7 @@
memset (&init_dr, 0, sizeof (struct data_reference));
DR_REF (&init_dr) = init_ref;
DR_STMT (&init_dr) = phi;
- if (!dr_analyze_innermost (&init_dr))
+ if (!dr_analyze_innermost (&init_dr, loop))
return NULL;
if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
=== modified file 'gcc/tree-vect-data-refs.c'
--- old/gcc/tree-vect-data-refs.c 2011-07-04 11:13:51 +0000
+++ new/gcc/tree-vect-data-refs.c 2011-10-02 08:43:10 +0000
@@ -353,11 +353,7 @@
/* Check that the data-refs have same bases and offsets. If not, we can't
determine if they are dependent. */
- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
|| !dr_equal_offsets_p (dra, drb))
return true;
@@ -403,11 +399,7 @@
/* Check that the data-refs have same first location (except init) and they
are both either store or load (not load and store). */
- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
|| !dr_equal_offsets_p (dra, drb)
|| !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
|| DR_IS_READ (dra) != DR_IS_READ (drb))
@@ -615,6 +607,11 @@
if (vect_check_interleaving (dra, drb))
return false;
+ /* Read-read is OK (we need this check here, after checking for
+ interleaving). */
+ if (DR_IS_READ (dra) && DR_IS_READ (drb))
+ return false;
+
if (vect_print_dump_info (REPORT_DR_DETAILS))
{
fprintf (vect_dump, "can't determine dependence between ");
@@ -0,0 +1,240 @@
2011-10-06 Ira Rosen <ira.rosen@linaro.org>
gcc/testsuite/
* gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block
vectorizable.
Backport from mainline:
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
of vect_analyze_bb here.
(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect64): New.
* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
of multiple vector sizes.
* gcc.dg/vect/bb-slp-26.c: New.
=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2010-11-22 12:16:52 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
@@ -49,6 +49,7 @@
}
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 2011-10-02 10:40:34 +0000
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+char src[N], dst[N];
+
+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
+ int stride, int dummy)
+{
+ int i;
+ h /= 16;
+ for (i = 0; i < h; i++)
+ {
+ dst[0] += A*src[0];
+ dst[1] += A*src[1];
+ dst[2] += A*src[2];
+ dst[3] += A*src[3];
+ dst[4] += A*src[4];
+ dst[5] += A*src[5];
+ dst[6] += A*src[6];
+ dst[7] += A*src[7];
+ dst += 8;
+ src += 8;
+ if (dummy == 32)
+ abort ();
+ }
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i/8;
+ }
+
+ foo (dst, src, N, 8, 0);
+
+ for (i = 0; i < N/2; i++)
+ {
+ if (dst[i] != A * src[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
@@ -3283,6 +3283,24 @@
return $et_vect_multiple_sizes_saved
}
+# Return 1 if the target supports vectors of 64 bits.
+
+proc check_effective_target_vect64 { } {
+ global et_vect64
+
+ if [info exists et_vect64_saved] {
+ verbose "check_effective_target_vect64: using cached result" 2
+ } else {
+ set et_vect64_saved 0
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+ set et_vect64_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
+ return $et_vect64_saved
+}
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
=== modified file 'gcc/tree-vect-slp.c'
--- old/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000
+++ new/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
@@ -1664,42 +1664,18 @@
/* Check if the basic block can be vectorized. */
-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
{
bb_vec_info bb_vinfo;
VEC (ddr_p, heap) *ddrs;
VEC (slp_instance, heap) *slp_instances;
slp_instance instance;
- int i, insns = 0;
- gimple_stmt_iterator gsi;
+ int i;
int min_vf = 2;
int max_vf = MAX_VECTORIZATION_FACTOR;
bool data_dependence_in_bb = false;
- current_vector_size = 0;
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (!is_gimple_debug (stmt)
- && !gimple_nop_p (stmt)
- && gimple_code (stmt) != GIMPLE_LABEL)
- insns++;
- }
-
- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
- {
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
- fprintf (vect_dump, "not vectorized: too many instructions in basic "
- "block.\n");
-
- return NULL;
- }
-
bb_vinfo = new_bb_vec_info (bb);
if (!bb_vinfo)
return NULL;
@@ -1819,6 +1795,61 @@
}
+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+ bb_vec_info bb_vinfo;
+ int insns = 0;
+ gimple_stmt_iterator gsi;
+ unsigned int vector_sizes;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!is_gimple_debug (stmt)
+ && !gimple_nop_p (stmt)
+ && gimple_code (stmt) != GIMPLE_LABEL)
+ insns++;
+ }
+
+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: too many instructions in basic "
+ "block.\n");
+
+ return NULL;
+ }
+
+ /* Autodetect first vector size we try. */
+ current_vector_size = 0;
+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+ while (1)
+ {
+ bb_vinfo = vect_slp_analyze_bb_1 (bb);
+ if (bb_vinfo)
+ return bb_vinfo;
+
+ destroy_bb_vec_info (bb_vinfo);
+
+ vector_sizes &= ~current_vector_size;
+ if (vector_sizes == 0
+ || current_vector_size == 0)
+ return NULL;
+
+ /* Try the next biggest vector size. */
+ current_vector_size = 1 << floor_log2 (vector_sizes);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "***** Re-trying analysis with "
+ "vector size %d\n", current_vector_size);
+ }
+}
+
+
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor).
However, the actual number of vector stmts for every SLP node depends on
@@ -0,0 +1,124 @@
2011-10-13 Andrew Stubbs <ams@codesourcery.com>
Backport from mainline:
2011-10-07 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/predicates.md (shift_amount_operand): Remove constant
range check.
(shift_operator): Check range of constants for all shift operators.
gcc/testsuite/
* gcc.dg/pr50193-1.c: New file.
* gcc.target/arm/shiftable.c: New file.
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
+++ new/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000
@@ -129,11 +129,12 @@
(ior (match_operand 0 "arm_rhs_operand")
(match_operand 0 "memory_operand")))
+;; This doesn't have to do much because the constant is already checked
+;; in the shift_operator predicate.
(define_predicate "shift_amount_operand"
(ior (and (match_test "TARGET_ARM")
(match_operand 0 "s_register_operand"))
- (and (match_operand 0 "const_int_operand")
- (match_test "INTVAL (op) > 0"))))
+ (match_operand 0 "const_int_operand")))
(define_predicate "arm_add_operand"
(ior (match_operand 0 "arm_rhs_operand")
@@ -219,13 +220,20 @@
(match_test "mode == GET_MODE (op)")))
;; True for shift operators.
+;; Notes:
+;; * mult is only permitted with a constant shift amount
+;; * patterns that permit register shift amounts only in ARM mode use
+;; shift_amount_operand, patterns that always allow registers do not,
+;; so we don't have to worry about that sort of thing here.
(define_special_predicate "shift_operator"
(and (ior (ior (and (match_code "mult")
(match_test "power_of_two_operand (XEXP (op, 1), mode)"))
(and (match_code "rotate")
(match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
- (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
+ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
+ (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT
+ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
(match_test "mode == GET_MODE (op)")))
;; True for MULT, to identify which variant of shift_operator is in use.
=== added file 'gcc/testsuite/gcc.target/arm/shiftable.c'
--- old/gcc/testsuite/gcc.target/arm/shiftable.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/shiftable.c 2011-10-10 11:43:28 +0000
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm32 } */
+
+/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some
+ of these as a left shift, others as a multiply. Check that we match the
+ right one. */
+
+int
+plus (int a, int b)
+{
+ return (a * 64) + b;
+}
+
+/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */
+
+int
+minus (int a, int b)
+{
+ return a - (b * 64);
+}
+
+/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */
+
+int
+ior (int a, int b)
+{
+ return (a * 64) | b;
+}
+
+/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */
+
+int
+xor (int a, int b)
+{
+ return (a * 64) ^ b;
+}
+
+/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */
+
+int
+and (int a, int b)
+{
+ return (a * 64) & b;
+}
+
+/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */
+
+int
+rsb (int a, int b)
+{
+ return (a * 64) - b;
+}
+
+/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */
+
+int
+mvn (int a, int b)
+{
+ return ~(a * 64);
+}
+
+/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */
@@ -0,0 +1,362 @@
2011-10-16 Ira Rosen <ira.rosen@linaro.org>
Backport from mainline:
2011-09-27 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
vectorization.
(vectorizable_type_promotion): Likewise.
(vect_analyze_stmt): Call vectorizable_type_demotion and
vectorizable_type_promotion for basic blocks.
(supportable_widening_operation): Don't assume loop vectorization.
* tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
basic blocks. Update vectorization factor for basic block
vectorization.
(vect_analyze_slp_instance): Allow multiple types for basic block
vectorization. Recheck unrolling factor after construction of SLP
instance.
gcc/testsuite/
* gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit
vectors.
* gcc.dg/vect/bb-slp-27.c: New.
* gcc.dg/vect/bb-slp-28.c: New.
2011-10-04 Ira Rosen <ira.rosen@linaro.org>
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
Make et_vect_multiple_sizes_saved global.
(check_effective_target_vect64): Make et_vect64_saved global.
=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-06 11:08:08 +0000
@@ -48,8 +48,6 @@
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 2011-10-06 11:08:08 +0000
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define N 16
+
+short src[N], dst[N];
+
+void foo (int a)
+{
+ dst[0] += a*src[0];
+ dst[1] += a*src[1];
+ dst[2] += a*src[2];
+ dst[3] += a*src[3];
+ dst[4] += a*src[4];
+ dst[5] += a*src[5];
+ dst[6] += a*src[6];
+ dst[7] += a*src[7];
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i;
+ }
+
+ foo (A);
+
+ for (i = 0; i < 8; i++)
+ {
+ if (dst[i] != A * i)
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 2011-10-06 11:08:08 +0000
@@ -0,0 +1,71 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 300
+#define N 16
+
+char src[N];
+short dst[N];
+short src1[N], dst1[N];
+
+void foo (int a)
+{
+ dst[0] = (short) (a * (int) src[0]);
+ dst[1] = (short) (a * (int) src[1]);
+ dst[2] = (short) (a * (int) src[2]);
+ dst[3] = (short) (a * (int) src[3]);
+ dst[4] = (short) (a * (int) src[4]);
+ dst[5] = (short) (a * (int) src[5]);
+ dst[6] = (short) (a * (int) src[6]);
+ dst[7] = (short) (a * (int) src[7]);
+ dst[8] = (short) (a * (int) src[8]);
+ dst[9] = (short) (a * (int) src[9]);
+ dst[10] = (short) (a * (int) src[10]);
+ dst[11] = (short) (a * (int) src[11]);
+ dst[12] = (short) (a * (int) src[12]);
+ dst[13] = (short) (a * (int) src[13]);
+ dst[14] = (short) (a * (int) src[14]);
+ dst[15] = (short) (a * (int) src[15]);
+
+ dst1[0] += src1[0];
+ dst1[1] += src1[1];
+ dst1[2] += src1[2];
+ dst1[3] += src1[3];
+ dst1[4] += src1[4];
+ dst1[5] += src1[5];
+ dst1[6] += src1[6];
+ dst1[7] += src1[7];
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 2;
+ dst1[i] = 0;
+ src[i] = i;
+ src1[i] = i+2;
+ }
+
+ foo (A);
+
+ for (i = 0; i < N; i++)
+ {
+ if (dst[i] != A * i
+ || (i < N/2 && dst1[i] != i + 2))
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_pack_trunc && vect_unpack } } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000
@@ -3268,7 +3268,7 @@
# Return 1 if the target supports multiple vector sizes
proc check_effective_target_vect_multiple_sizes { } {
- global et_vect_multiple_sizes
+ global et_vect_multiple_sizes_saved
if [info exists et_vect_multiple_sizes_saved] {
verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
@@ -3286,7 +3286,7 @@
# Return 1 if the target supports vectors of 64 bits.
proc check_effective_target_vect64 { } {
- global et_vect64
+ global et_vect64_saved
if [info exists et_vect64_saved] {
verbose "check_effective_target_vect64: using cached result" 2
=== modified file 'gcc/tree-vect-slp.c'
--- old/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
+++ new/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000
@@ -386,20 +386,15 @@
return false;
}
- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (ncopies != 1)
- {
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "SLP with multiple types ");
-
- /* FORNOW: multiple types are unsupported in BB SLP. */
- if (bb_vinfo)
- return false;
- }
-
/* In case of multiple types we need to detect the smallest type. */
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ {
+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (bb_vinfo)
+ vectorization_factor = *max_nunits;
+ }
+
+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
if (is_gimple_call (stmt))
rhs_code = CALL_EXPR;
@@ -1183,7 +1178,6 @@
if (loop_vinfo)
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
else
- /* No multitypes in BB SLP. */
vectorization_factor = nunits;
/* Calculate the unrolling factor. */
@@ -1246,16 +1240,23 @@
&max_nunits, &load_permutation, &loads,
vectorization_factor))
{
+ /* Calculate the unrolling factor based on the smallest type. */
+ if (max_nunits > nunits)
+ unrolling_factor = least_common_multiple (max_nunits, group_size)
+ / group_size;
+
+ if (unrolling_factor != 1 && !loop_vinfo)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
+ " block SLP");
+ return false;
+ }
+
/* Create a new SLP instance. */
new_instance = XNEW (struct _slp_instance);
SLP_INSTANCE_TREE (new_instance) = node;
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
- /* Calculate the unrolling factor based on the smallest type in the
- loop. */
- if (max_nunits > nunits)
- unrolling_factor = least_common_multiple (max_nunits, group_size)
- / group_size;
-
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
=== modified file 'gcc/tree-vect-stmts.c'
--- old/gcc/tree-vect-stmts.c 2011-10-04 08:57:25 +0000
+++ new/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000
@@ -3081,11 +3081,9 @@
VEC (tree, heap) *vec_oprnds0 = NULL;
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
tree last_oprnd, intermediate_type;
-
- /* FORNOW: not supported by basic block SLP vectorization. */
- gcc_assert (loop_vinfo);
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
@@ -3113,7 +3111,7 @@
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
&& CONVERT_EXPR_CODE_P (code))))
return false;
- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
&def_stmt, &def, &dt[0], &vectype_in))
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -3360,11 +3358,9 @@
int multi_step_cvt = 0;
VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
-
- /* FORNOW: not supported by basic block SLP vectorization. */
- gcc_assert (loop_vinfo);
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
@@ -3393,7 +3389,7 @@
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
&& CONVERT_EXPR_CODE_P (code))))
return false;
- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
&def_stmt, &def, &dt[0], &vectype_in))
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -5153,7 +5149,9 @@
else
{
if (bb_vinfo)
- ok = (vectorizable_shift (stmt, NULL, NULL, node)
+ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
+ || vectorizable_type_demotion (stmt, NULL, NULL, node)
+ || vectorizable_shift (stmt, NULL, NULL, node)
|| vectorizable_operation (stmt, NULL, NULL, node)
|| vectorizable_assignment (stmt, NULL, NULL, node)
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
@@ -5780,7 +5778,7 @@
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+ struct loop *vect_loop = NULL;
bool ordered_p;
enum machine_mode vec_mode;
enum insn_code icode1, icode2;
@@ -5789,6 +5787,9 @@
tree wide_vectype = vectype_out;
enum tree_code c1, c2;
+ if (loop_info)
+ vect_loop = LOOP_VINFO_LOOP (loop_info);
+
/* The result of a vectorized widening operation usually requires two vectors
(because the widened results do not fit int one vector). The generated
vector results would normally be expected to be generated in the same
@@ -5809,7 +5810,8 @@
iterations in parallel). We therefore don't allow to change the order
of the computation in the inner-loop during outer-loop vectorization. */
- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
+ if (vect_loop
+ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
&& !nested_in_vect_loop_p (vect_loop, stmt))
ordered_p = false;
else
@@ -0,0 +1,628 @@
2011-10-17 Michael Hope <michael.hope@linaro.org>
Backport from mainline r178852:
2011-09-14 Julian Brown <julian@codesourcery.com>
gcc/
* config/arm/arm.c (arm_override_options): Add unaligned_access
support.
(arm_file_start): Emit attribute for unaligned access as appropriate.
* config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
(UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
(insv, extzv): Add unaligned-access support.
(extv): Change to expander. Likewise.
(extzv_t1, extv_regsi): Add helpers.
(unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
(unaligned_storesi, unaligned_storehi): New.
(*extv_reg): New (previous extv implementation).
* config/arm/arm.opt (munaligned_access): Add option.
* config/arm/constraints.md (Uw): New constraint.
* expmed.c (store_bit_field_1): Adjust bitfield numbering according
to size of access, not size of unit, when BITS_BIG_ENDIAN !=
BYTES_BIG_ENDIAN. Don't use bitfield accesses for
volatile accesses when -fstrict-volatile-bitfields is in effect.
(extract_bit_field_1): Likewise.
Backport from mainline r172697:
2011-04-19 Wei Guozhi <carrot@google.com>
PR target/47855
gcc/
* config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype.
* config/arm/arm.c (thumb1_legitimate_address_p): Remove the static
linkage.
* config/arm/constraints.md (Uu): New constraint.
* config/arm/arm.md (*arm_movqi_insn): Compute attr "length".
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-10-11 01:56:19 +0000
@@ -59,6 +59,7 @@
int);
extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
int);
+extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
extern int arm_const_double_rtx (rtx);
extern int neg_const_double_rtx_ok_for_fpa (rtx);
extern int vfp3_const_double_rtx (rtx);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
+++ new/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000
@@ -2065,6 +2065,28 @@
fix_cm3_ldrd = 0;
}
+ /* Enable -munaligned-access by default for
+ - all ARMv6 architecture-based processors
+ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+
+ Disable -munaligned-access by default for
+ - all pre-ARMv6 architecture-based processors
+ - ARMv6-M architecture-based processors. */
+
+ if (unaligned_access == 2)
+ {
+ if (arm_arch6 && (arm_arch_notm || arm_arch7))
+ unaligned_access = 1;
+ else
+ unaligned_access = 0;
+ }
+ else if (unaligned_access == 1
+ && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
+ {
+ warning (0, "target CPU does not support unaligned accesses");
+ unaligned_access = 0;
+ }
+
if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
@@ -6106,7 +6128,7 @@
addresses based on the frame pointer or arg pointer until the
reload pass starts. This is so that eliminating such addresses
into stack based ones won't produce impossible code. */
-static int
+int
thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
{
/* ??? Not clear if this is right. Experiment. */
@@ -22226,6 +22248,10 @@
val = 6;
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+ /* Tag_CPU_unaligned_access. */
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
+ unaligned_access);
+
/* Tag_ABI_FP_16bit_format. */
if (arm_fp16_format)
asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
+++ new/gcc/config/arm/arm.md 2011-10-11 02:31:01 +0000
@@ -113,6 +113,10 @@
(UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from
; another symbolic address.
(UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
+ (UNSPEC_UNALIGNED_LOAD 29) ; Used to represent ldr/ldrh instructions that access
+ ; unaligned locations, on architectures which support
+ ; that.
+ (UNSPEC_UNALIGNED_STORE 30) ; Same for str/strh.
]
)
@@ -2463,10 +2467,10 @@
;;; this insv pattern, so this pattern needs to be reevalutated.
(define_expand "insv"
- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
- (match_operand:SI 1 "general_operand" "")
- (match_operand:SI 2 "general_operand" ""))
- (match_operand:SI 3 "reg_or_int_operand" ""))]
+ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+ (match_operand 1 "general_operand" "")
+ (match_operand 2 "general_operand" ""))
+ (match_operand 3 "reg_or_int_operand" ""))]
"TARGET_ARM || arm_arch_thumb2"
"
{
@@ -2477,35 +2481,70 @@
if (arm_arch_thumb2)
{
- bool use_bfi = TRUE;
-
- if (GET_CODE (operands[3]) == CONST_INT)
- {
- HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
-
- if (val == 0)
- {
- emit_insn (gen_insv_zero (operands[0], operands[1],
- operands[2]));
+ if (unaligned_access && MEM_P (operands[0])
+ && s_register_operand (operands[3], GET_MODE (operands[3]))
+ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
+ {
+ rtx base_addr;
+
+ if (BYTES_BIG_ENDIAN)
+ start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
+ - start_bit;
+
+ if (width == 32)
+ {
+ base_addr = adjust_address (operands[0], SImode,
+ start_bit / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
+ }
+ else
+ {
+ rtx tmp = gen_reg_rtx (HImode);
+
+ base_addr = adjust_address (operands[0], HImode,
+ start_bit / BITS_PER_UNIT);
+ emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
+ emit_insn (gen_unaligned_storehi (base_addr, tmp));
+ }
+ DONE;
+ }
+ else if (s_register_operand (operands[0], GET_MODE (operands[0])))
+ {
+ bool use_bfi = TRUE;
+
+ if (GET_CODE (operands[3]) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+
+ if (val == 0)
+ {
+ emit_insn (gen_insv_zero (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
+
+ /* See if the set can be done with a single orr instruction. */
+ if (val == mask && const_ok_for_arm (val << start_bit))
+ use_bfi = FALSE;
+ }
+
+ if (use_bfi)
+ {
+ if (GET_CODE (operands[3]) != REG)
+ operands[3] = force_reg (SImode, operands[3]);
+
+ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+ operands[3]));
DONE;
}
-
- /* See if the set can be done with a single orr instruction. */
- if (val == mask && const_ok_for_arm (val << start_bit))
- use_bfi = FALSE;
- }
-
- if (use_bfi)
- {
- if (GET_CODE (operands[3]) != REG)
- operands[3] = force_reg (SImode, operands[3]);
-
- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
- }
+ }
+ else
+ FAIL;
}
+ if (!s_register_operand (operands[0], GET_MODE (operands[0])))
+ FAIL;
+
target = copy_rtx (operands[0]);
/* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
subreg as the final target. */
@@ -3697,12 +3736,10 @@
;; to reduce register pressure later on.
(define_expand "extzv"
- [(set (match_dup 4)
- (ashift:SI (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "const_int_operand" "")))
- (set (match_operand:SI 0 "register_operand" "")
- (lshiftrt:SI (match_dup 4)
- (match_operand:SI 3 "const_int_operand" "")))]
+ [(set (match_operand 0 "s_register_operand" "")
+ (zero_extract (match_operand 1 "nonimmediate_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
"TARGET_THUMB1 || arm_arch_thumb2"
"
{
@@ -3711,10 +3748,57 @@
if (arm_arch_thumb2)
{
- emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
+ HOST_WIDE_INT width = INTVAL (operands[2]);
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+ if (unaligned_access && MEM_P (operands[1])
+ && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
+ {
+ rtx base_addr;
+
+ if (BYTES_BIG_ENDIAN)
+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width
+ - bitpos;
+
+ if (width == 32)
+ {
+ base_addr = adjust_address (operands[1], SImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+ }
+ else
+ {
+ rtx dest = operands[0];
+ rtx tmp = gen_reg_rtx (SImode);
+
+ /* We may get a paradoxical subreg here. Strip it off. */
+ if (GET_CODE (dest) == SUBREG
+ && GET_MODE (dest) == SImode
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
+ dest = SUBREG_REG (dest);
+
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+ FAIL;
+
+ base_addr = adjust_address (operands[1], HImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
+ }
+ DONE;
+ }
+ else if (s_register_operand (operands[1], GET_MODE (operands[1])))
+ {
+ emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+ else
+ FAIL;
}
+
+ if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+ FAIL;
operands[3] = GEN_INT (rshift);
@@ -3724,12 +3808,154 @@
DONE;
}
- operands[2] = GEN_INT (lshift);
- operands[4] = gen_reg_rtx (SImode);
+ emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
+ operands[3], gen_reg_rtx (SImode)));
+ DONE;
}"
)
-(define_insn "extv"
+;; Helper for extzv, for the Thumb-1 register-shifts case.
+
+(define_expand "extzv_t1"
+ [(set (match_operand:SI 4 "s_register_operand" "")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (set (match_operand:SI 0 "s_register_operand" "")
+ (lshiftrt:SI (match_dup 4)
+ (match_operand:SI 3 "const_int_operand" "")))]
+ "TARGET_THUMB1"
+ "")
+
+(define_expand "extv"
+ [(set (match_operand 0 "s_register_operand" "")
+ (sign_extract (match_operand 1 "nonimmediate_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ "arm_arch_thumb2"
+{
+ HOST_WIDE_INT width = INTVAL (operands[2]);
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+ if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
+ && (bitpos % BITS_PER_UNIT) == 0)
+ {
+ rtx base_addr;
+
+ if (BYTES_BIG_ENDIAN)
+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
+
+ if (width == 32)
+ {
+ base_addr = adjust_address (operands[1], SImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+ }
+ else
+ {
+ rtx dest = operands[0];
+ rtx tmp = gen_reg_rtx (SImode);
+
+ /* We may get a paradoxical subreg here. Strip it off. */
+ if (GET_CODE (dest) == SUBREG
+ && GET_MODE (dest) == SImode
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
+ dest = SUBREG_REG (dest);
+
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+ FAIL;
+
+ base_addr = adjust_address (operands[1], HImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadhis (tmp, base_addr));
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
+ }
+
+ DONE;
+ }
+ else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+ FAIL;
+ else if (GET_MODE (operands[0]) == SImode
+ && GET_MODE (operands[1]) == SImode)
+ {
+ emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+
+ FAIL;
+})
+
+; Helper to expand register forms of extv with the proper modes.
+
+(define_expand "extv_regsi"
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ ""
+{
+})
+
+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
+
+(define_insn "unaligned_loadsi"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%?\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load1")])
+
+(define_insn "unaligned_loadhis"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (sign_extend:SI
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD)))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%(sh%)\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_loadhiu"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (zero_extend:SI
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD)))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%(h%)\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_storesi"
+ [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "str%?\t%1, %0\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store1")])
+
+(define_insn "unaligned_storehi"
+ [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
+ (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "str%(h%)\t%1, %0\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store1")])
+
+(define_insn "*extv_reg"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "const_int_operand" "M")
@@ -6038,8 +6264,8 @@
(define_insn "*arm_movqi_insn"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
- (match_operand:QI 1 "general_operand" "rI,K,m,r"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m")
+ (match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))]
"TARGET_32BIT
&& ( register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode))"
@@ -6047,10 +6273,14 @@
mov%?\\t%0, %1
mvn%?\\t%0, #%B1
ldr%(b%)\\t%0, %1
+ str%(b%)\\t%1, %0
+ ldr%(b%)\\t%0, %1
str%(b%)\\t%1, %0"
- [(set_attr "type" "*,*,load1,store1")
- (set_attr "insn" "mov,mvn,*,*")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "*,*,load1,store1,load1,store1")
+ (set_attr "insn" "mov,mvn,*,*,*,*")
+ (set_attr "predicable" "yes")
+ (set_attr "arch" "any,any,t2,t2,any,any")
+ (set_attr "length" "4,4,2,2,4,4")]
)
(define_insn "*thumb1_movqi_insn"
=== modified file 'gcc/config/arm/arm.opt'
--- old/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
+++ new/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000
@@ -173,3 +173,7 @@
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
that may trigger Cortex-M3 errata.
+
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned word and halfword accesses to packed data.
=== modified file 'gcc/config/arm/constraints.md'
--- old/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/constraints.md 2011-10-11 02:31:01 +0000
@@ -36,6 +36,7 @@
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
;; in ARM state: Uq
+;; in Thumb state: Uu, Uw
(define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
@@ -344,6 +345,27 @@
(and (match_code "mem")
(match_test "REG_P (XEXP (op, 0))")))
+(define_memory_constraint "Uu"
+ "@internal
+ In Thumb state an address that is valid in 16bit encoding."
+ (and (match_code "mem")
+ (match_test "TARGET_THUMB
+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+ 0)")))
+
+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
+; are actually LDM/STM instructions, so cannot be used to access unaligned
+; data.
+(define_memory_constraint "Uw"
+ "@internal
+ In Thumb state an address that is valid in 16bit encoding, and that can be
+ used for unaligned accesses."
+ (and (match_code "mem")
+ (match_test "TARGET_THUMB
+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+ 0)
+ && GET_CODE (XEXP (op, 0)) != POST_INC")))
+
;; We used to have constraint letters for S and R in ARM state, but
;; all uses of these now appear to have been removed.
=== modified file 'gcc/expmed.c'
--- old/gcc/expmed.c 2011-05-22 19:02:59 +0000
+++ new/gcc/expmed.c 2011-10-11 02:31:01 +0000
@@ -657,6 +657,10 @@
&& GET_MODE (value) != BLKmode
&& bitsize > 0
&& GET_MODE_BITSIZE (op_mode) >= bitsize
+ /* Do not use insv for volatile bitfields when
+ -fstrict-volatile-bitfields is in effect. */
+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
+ && flag_strict_volatile_bitfields > 0)
&& ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
&& (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
&& insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
@@ -700,19 +704,21 @@
copy_back = true;
}
- /* On big-endian machines, we count bits from the most significant.
- If the bit field insn does not, we must invert. */
-
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
- xbitpos = unit - bitsize - xbitpos;
-
/* We have been counting XBITPOS within UNIT.
Count instead within the size of the register. */
- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
unit = GET_MODE_BITSIZE (op_mode);
+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
+ "backwards" from the size of the unit we are inserting into.
+ Otherwise, we count bits from the most significant on a
+ BYTES/BITS_BIG_ENDIAN machine. */
+
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ xbitpos = unit - bitsize - xbitpos;
+
/* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
value1 = value;
if (GET_MODE (value) != op_mode)
@@ -1528,6 +1534,10 @@
if (ext_mode != MAX_MACHINE_MODE
&& bitsize > 0
&& GET_MODE_BITSIZE (ext_mode) >= bitsize
+ /* Do not use extv/extzv for volatile bitfields when
+ -fstrict-volatile-bitfields is in effect. */
+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
+ && flag_strict_volatile_bitfields > 0)
/* If op0 is a register, we need it in EXT_MODE to make it
acceptable to the format of ext(z)v. */
&& !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
@@ -1552,17 +1562,20 @@
/* Get ref to first byte containing part of the field. */
xop0 = adjust_address (xop0, byte_mode, xoffset);
- /* On big-endian machines, we count bits from the most significant.
- If the bit field insn does not, we must invert. */
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
- xbitpos = unit - bitsize - xbitpos;
-
/* Now convert from counting within UNIT to counting in EXT_MODE. */
- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
unit = GET_MODE_BITSIZE (ext_mode);
+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
+ "backwards" from the size of the unit we are extracting from.
+ Otherwise, we count bits from the most significant on a
+ BYTES/BITS_BIG_ENDIAN machine. */
+
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ xbitpos = unit - bitsize - xbitpos;
+
if (xtarget == 0)
xtarget = xspec_target = gen_reg_rtx (tmode);
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,147 @@
2011-10-19 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-09-09 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm-cores.def (generic-armv7-a): New architecture.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm-tune.md: Regenerate.
* config/arm/arm.c (arm_file_start): Output .arch directive when
user passes -mcpu=generic-*.
(arm_issue_rate): Add genericv7a support.
* config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec.
(ASM_CPU_SPEC): New define.
* config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec).
* config/arm/semi.h (ASM_SPEC): Likewise.
* doc/invoke.texi (ARM Options): Document -mcpu=generic-*
and -mtune=generic-*.
=== modified file 'gcc/config/arm/arm-cores.def'
--- old/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000
+++ new/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000
@@ -124,6 +124,7 @@
ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2)
ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
+ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
@@ -135,3 +136,4 @@
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex)
ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex)
+
=== modified file 'gcc/config/arm/arm-tune.md'
--- old/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000
+++ new/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from arm-cores.def
(define_attr "tune"
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000
+++ new/gcc/config/arm/arm.c 2011-10-19 16:46:51 +0000
@@ -22185,6 +22185,8 @@
const char *fpu_name;
if (arm_selected_arch)
asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
+ else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
+ asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
else
asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
@@ -23717,6 +23719,7 @@
case cortexr4:
case cortexr4f:
case cortexr5:
+ case genericv7a:
case cortexa5:
case cortexa8:
case cortexa9:
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000
+++ new/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000
@@ -198,6 +198,7 @@
Do not define this macro if it does not need to do anything. */
#define EXTRA_SPECS \
{ "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \
+ { "asm_cpu_spec", ASM_CPU_SPEC }, \
SUBTARGET_EXTRA_SPECS
#ifndef SUBTARGET_EXTRA_SPECS
@@ -2278,4 +2279,8 @@
instruction. */
#define MAX_LDM_STM_OPS 4
+#define ASM_CPU_SPEC \
+ " %{mcpu=generic-*:-march=%*;" \
+ " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
+
#endif /* ! GCC_ARM_H */
=== modified file 'gcc/config/arm/elf.h'
--- old/gcc/config/arm/elf.h 2009-06-21 19:48:15 +0000
+++ new/gcc/config/arm/elf.h 2011-10-19 16:46:51 +0000
@@ -56,8 +56,7 @@
#define ASM_SPEC "\
%{mbig-endian:-EB} \
%{mlittle-endian:-EL} \
-%{mcpu=*:-mcpu=%*} \
-%{march=*:-march=%*} \
+%(asm_cpu_spec) \
%{mapcs-*:-mapcs-%*} \
%(subtarget_asm_float_spec) \
%{mthumb-interwork:-mthumb-interwork} \
=== modified file 'gcc/config/arm/semi.h'
--- old/gcc/config/arm/semi.h 2007-08-02 09:49:31 +0000
+++ new/gcc/config/arm/semi.h 2011-10-19 16:46:51 +0000
@@ -65,8 +65,7 @@
#define ASM_SPEC "\
%{fpic|fpie: -k} %{fPIC|fPIE: -k} \
%{mbig-endian:-EB} \
-%{mcpu=*:-mcpu=%*} \
-%{march=*:-march=%*} \
+%(arm_cpu_spec) \
%{mapcs-float:-mfloat} \
%{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \
%{mfloat-abi=*} %{mfpu=*} \
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2011-08-13 08:32:32 +0000
+++ new/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000
@@ -10215,6 +10215,10 @@
@samp{cortex-m0},
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+@option{-mcpu=generic-@var{arch}} is also permissible, and is
+equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
+See @option{-mtune} for more information.
+
@item -mtune=@var{name}
@opindex mtune
This option is very similar to the @option{-mcpu=} option, except that
@@ -10226,6 +10230,13 @@
For some ARM implementations better performance can be obtained by using
this option.
+@option{-mtune=generic-@var{arch}} specifies that GCC should tune the
+performance for a blend of processors within architecture @var{arch}.
+The aim is to generate code that run well on the current most popular
+processors, balancing between optimizations that benefit some CPUs in the
+range, and avoiding performance pitfalls of other CPUs. The effects of
+this option may change in future GCC versions as CPU models come and go.
+
@item -march=@var{name}
@opindex march
This specifies the name of the target ARM architecture. GCC uses this
@@ -0,0 +1,304 @@
2011-10-19 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-10-18 Andrew Stubbs <ams@codesourcery.com>
* config/arm/driver-arm.c (host_detect_local_cpu): Close the file
before exiting.
2011-10-18 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config.host (arm*-*-linux*): Add driver-arm.o and x-arm.
* config/arm/arm.opt: Add 'native' processor_type and
arm_arch enum values.
* config/arm/arm.h (host_detect_local_cpu): New prototype.
(EXTRA_SPEC_FUNCTIONS): New define.
(MCPU_MTUNE_NATIVE_SPECS): New define.
(DRIVER_SELF_SPECS): New define.
* config/arm/driver-arm.c: New file.
* config/arm/x-arm: New file.
* doc/invoke.texi (ARM Options): Document -mcpu=native,
-mtune=native and -march=native.
=== modified file 'gcc/config.host'
--- old/gcc/config.host 2011-02-15 09:49:14 +0000
+++ new/gcc/config.host 2011-10-19 17:01:50 +0000
@@ -100,6 +100,14 @@
esac
case ${host} in
+ arm*-*-linux*)
+ case ${target} in
+ arm*-*-*)
+ host_extra_gcc_objs="driver-arm.o"
+ host_xmake_file="${host_xmake_file} arm/x-arm"
+ ;;
+ esac
+ ;;
alpha*-*-linux*)
case ${target} in
alpha*-*-linux*)
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000
+++ new/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000
@@ -2283,4 +2283,21 @@
" %{mcpu=generic-*:-march=%*;" \
" :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
+/* -mcpu=native handling only makes sense with compiler running on
+ an ARM chip. */
+#if defined(__arm__)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS \
+ { "local_cpu_detect", host_detect_local_cpu },
+
+# define MCPU_MTUNE_NATIVE_SPECS \
+ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \
+ " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \
+ " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+
#endif /* ! GCC_ARM_H */
=== modified file 'gcc/config/arm/arm.opt'
--- old/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000
+++ new/gcc/config/arm/arm.opt 2011-10-19 17:01:50 +0000
@@ -48,6 +48,11 @@
Target RejectNegative Joined
Specify the name of the target architecture
+; Other arm_arch values are loaded from arm-tables.opt
+; but that is a generated file and this is an odd-one-out.
+EnumValue
+Enum(arm_arch) String(native) Value(-1) DriverOnly
+
marm
Target RejectNegative InverseMask(THUMB) Undocumented
@@ -153,6 +158,11 @@
Target RejectNegative Joined
Tune code for the given processor
+; Other processor_type values are loaded from arm-tables.opt
+; but that is a generated file and this is an odd-one-out.
+EnumValue
+Enum(processor_type) String(native) Value(-1) DriverOnly
+
mwords-little-endian
Target Report RejectNegative Mask(LITTLE_WORDS)
Assume big endian bytes, little endian words
=== added file 'gcc/config/arm/driver-arm.c'
--- old/gcc/config/arm/driver-arm.c 1970-01-01 00:00:00 +0000
+++ new/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000
@@ -0,0 +1,149 @@
+/* Subroutines for the gcc driver.
+ Copyright (C) 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "configargs.h"
+
+struct vendor_cpu {
+ const char *part_no;
+ const char *arch_name;
+ const char *cpu_name;
+};
+
+static struct vendor_cpu arm_cpu_table[] = {
+ {"0x926", "armv5te", "arm926ej-s"},
+ {"0xa26", "armv5te", "arm1026ej-s"},
+ {"0xb02", "armv6k", "mpcore"},
+ {"0xb36", "armv6j", "arm1136j-s"},
+ {"0xb56", "armv6t2", "arm1156t2-s"},
+ {"0xb76", "armv6zk", "arm1176jz-s"},
+ {"0xc05", "armv7-a", "cortex-a5"},
+ {"0xc08", "armv7-a", "cortex-a8"},
+ {"0xc09", "armv7-a", "cortex-a9"},
+ {"0xc0f", "armv7-a", "cortex-a15"},
+ {"0xc14", "armv7-r", "cortex-r4"},
+ {"0xc15", "armv7-r", "cortex-r5"},
+ {"0xc20", "armv6-m", "cortex-m0"},
+ {"0xc21", "armv6-m", "cortex-m1"},
+ {"0xc23", "armv7-m", "cortex-m3"},
+ {"0xc24", "armv7e-m", "cortex-m4"},
+ {NULL, NULL, NULL}
+};
+
+struct {
+ const char *vendor_no;
+ const struct vendor_cpu *vendor_parts;
+} vendors[] = {
+ {"0x41", arm_cpu_table},
+ {NULL, NULL}
+};
+
+/* This will be called by the spec parser in gcc.c when it sees
+ a %:local_cpu_detect(args) construct. Currently it will be called
+ with either "arch", "cpu" or "tune" as argument depending on if
+ -march=native, -mcpu=native or -mtune=native is to be substituted.
+
+ It returns a string containing new command line parameters to be
+ put at the place of the above two options, depending on what CPU
+ this is executed. E.g. "-march=armv7-a" on a Cortex-A8 for
+ -march=native. If the routine can't detect a known processor,
+ the -march or -mtune option is discarded.
+
+ ARGC and ARGV are set depending on the actual arguments given
+ in the spec. */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+ const char *val = NULL;
+ char buf[128];
+ FILE *f = NULL;
+ bool arch;
+ const struct vendor_cpu *cpu_table = NULL;
+
+ if (argc < 1)
+ goto not_found;
+
+ arch = strcmp (argv[0], "arch") == 0;
+ if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune"))
+ goto not_found;
+
+ f = fopen ("/proc/cpuinfo", "r");
+ if (f == NULL)
+ goto not_found;
+
+ while (fgets (buf, sizeof (buf), f) != NULL)
+ {
+ /* Ensure that CPU implementer is ARM (0x41). */
+ if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0)
+ {
+ int i;
+ for (i = 0; vendors[i].vendor_no != NULL; i++)
+ if (strstr (buf, vendors[i].vendor_no) != NULL)
+ {
+ cpu_table = vendors[i].vendor_parts;
+ break;
+ }
+ }
+
+ /* Detect arch/cpu. */
+ if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0)
+ {
+ int i;
+
+ if (cpu_table == NULL)
+ goto not_found;
+
+ for (i = 0; cpu_table[i].part_no != NULL; i++)
+ if (strstr (buf, cpu_table[i].part_no) != NULL)
+ {
+ val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name;
+ break;
+ }
+ break;
+ }
+ }
+
+ fclose (f);
+
+ if (val == NULL)
+ goto not_found;
+
+ return concat ("-m", argv[0], "=", val, NULL);
+
+not_found:
+ {
+ unsigned int i;
+ unsigned int opt;
+ const char *search[] = {NULL, "arch"};
+
+ if (f)
+ fclose (f);
+
+ search[0] = argv[0];
+ for (opt = 0; opt < ARRAY_SIZE (search); opt++)
+ for (i = 0; i < ARRAY_SIZE (configure_default_options); i++)
+ if (strcmp (configure_default_options[i].name, search[opt]) == 0)
+ return concat ("-m", search[opt], "=",
+ configure_default_options[i].value, NULL);
+ return NULL;
+ }
+}
=== added file 'gcc/config/arm/x-arm'
--- old/gcc/config/arm/x-arm 1970-01-01 00:00:00 +0000
+++ new/gcc/config/arm/x-arm 2011-10-19 17:01:50 +0000
@@ -0,0 +1,3 @@
+driver-arm.o: $(srcdir)/config/arm/driver-arm.c \
+ $(CONFIG_H) $(SYSTEM_H)
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000
+++ new/gcc/doc/invoke.texi 2011-10-19 17:01:50 +0000
@@ -10215,10 +10215,16 @@
@samp{cortex-m0},
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+
@option{-mcpu=generic-@var{arch}} is also permissible, and is
equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
See @option{-mtune} for more information.
+@option{-mcpu=native} causes the compiler to auto-detect the CPU
+of the build computer. At present, this feature is only supported on
+Linux, and not all architectures are recognised. If the auto-detect is
+unsuccessful the option has no effect.
+
@item -mtune=@var{name}
@opindex mtune
This option is very similar to the @option{-mcpu=} option, except that
@@ -10237,6 +10243,11 @@
range, and avoiding performance pitfalls of other CPUs. The effects of
this option may change in future GCC versions as CPU models come and go.
+@option{-mtune=native} causes the compiler to auto-detect the CPU
+of the build computer. At present, this feature is only supported on
+Linux, and not all architectures are recognised. If the auto-detect is
+unsuccessful the option has no effect.
+
@item -march=@var{name}
@opindex march
This specifies the name of the target ARM architecture. GCC uses this
@@ -10250,6 +10261,11 @@
@samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m},
@samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
+@option{-march=native} causes the compiler to auto-detect the architecture
+of the build computer. At present, this feature is only supported on
+Linux, and not all architectures are recognised. If the auto-detect is
+unsuccessful the option has no effect.
+
@item -mfpu=@var{name}
@itemx -mfpe=@var{number}
@itemx -mfp=@var{number}
@@ -0,0 +1,123 @@
2011-10-19 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-10-18 Andrew Stubbs <ams@codesourcery.com>
PR tree-optimization/50717
gcc/
* tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type'
parameter. Calculate 'type' from stmt.
(convert_mult_to_widen): Update call the is_widening_mult_p.
(convert_plusminus_to_widen): Likewise.
gcc/testsuite/
* gcc.dg/pr50717-1.c: New file.
* gcc.target/arm/wmul-12.c: Correct types.
* gcc.target/arm/wmul-8.c: Correct types.
=== added file 'gcc/testsuite/gcc.dg/pr50717-1.c'
--- old/gcc/testsuite/gcc.dg/pr50717-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/pr50717-1.c 2011-10-19 14:42:50 +0000
@@ -0,0 +1,26 @@
+/* PR tree-optimization/50717 */
+/* Ensure that widening multiply-and-accumulate is not used where integer
+ type promotion or users' casts should prevent it. */
+
+/* { dg-options "-O2 -fdump-tree-widening_mul" } */
+
+long long
+f (unsigned int a, char b, long long c)
+{
+ return (a * b) + c;
+}
+
+int
+g (short a, short b, int c)
+{
+ return (short)(a * b) + c;
+}
+
+int
+h (char a, char b, int c)
+{
+ return (char)(a * b) + c;
+}
+
+/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */
+/* { dg-final { cleanup-tree-dump "widening_mul" } } */
=== modified file 'gcc/testsuite/gcc.target/arm/wmul-12.c'
--- old/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-07-22 15:46:42 +0000
+++ new/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-10-19 14:42:50 +0000
@@ -4,8 +4,8 @@
long long
foo (int *b, int *c)
{
- int tmp = *b * *c;
- return 10 + (long long)tmp;
+ long long tmp = (long long)*b * *c;
+ return 10 + tmp;
}
/* { dg-final { scan-assembler "smlal" } } */
=== modified file 'gcc/testsuite/gcc.target/arm/wmul-8.c'
--- old/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-07-15 14:16:54 +0000
+++ new/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-10-19 14:42:50 +0000
@@ -4,7 +4,7 @@
long long
foo (long long a, int *b, int *c)
{
- return a + *b * *c;
+ return a + (long long)*b * *c;
}
/* { dg-final { scan-assembler "smlal" } } */
=== modified file 'gcc/tree-ssa-math-opts.c'
--- old/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000
+++ new/gcc/tree-ssa-math-opts.c 2011-10-19 14:42:50 +0000
@@ -1351,10 +1351,12 @@
and *TYPE2_OUT would give the operands of the multiplication. */
static bool
-is_widening_mult_p (tree type, gimple stmt,
+is_widening_mult_p (gimple stmt,
tree *type1_out, tree *rhs1_out,
tree *type2_out, tree *rhs2_out)
{
+ tree type = TREE_TYPE (gimple_assign_lhs (stmt));
+
if (TREE_CODE (type) != INTEGER_TYPE
&& TREE_CODE (type) != FIXED_POINT_TYPE)
return false;
@@ -1416,7 +1418,7 @@
if (TREE_CODE (type) != INTEGER_TYPE)
return false;
- if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2))
+ if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
return false;
to_mode = TYPE_MODE (type);
@@ -1592,7 +1594,7 @@
if (code == PLUS_EXPR
&& (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
{
- if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1,
+ if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
&type2, &mult_rhs2))
return false;
add_rhs = rhs2;
@@ -1600,7 +1602,7 @@
}
else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
{
- if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1,
+ if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
&type2, &mult_rhs2))
return false;
add_rhs = rhs1;
@@ -0,0 +1,24 @@
2011-10-21 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF mainline:
2011-10-21 Andrew Stubbs <ams@codesourcery.com>
PR target/50809
gcc/
* config/arm/driver-arm.c (vendors): Make static.
=== modified file 'gcc/config/arm/driver-arm.c'
--- old/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000
+++ new/gcc/config/arm/driver-arm.c 2011-10-21 19:27:47 +0000
@@ -49,7 +49,7 @@
{NULL, NULL, NULL}
};
-struct {
+static struct {
const char *vendor_no;
const struct vendor_cpu *vendor_parts;
} vendors[] = {
@@ -0,0 +1,453 @@
2011-10-27 Ira Rosen <ira.rosen@linaro.org>
Backport from mainline:
2011-10-16 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-stmts.c (vectorizable_load): For SLP without permutation
treat the first load of the node as the first element in its
interleaving chain.
* tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
necessary and possible.
(vect_build_slp_tree): Add new argument. Allow load groups of any size
in basic blocks. Keep all the loads for further permutation check.
Use the new argument to determine if there is a permutation. Update
the recursive calls.
(vect_supported_load_permutation_p): Allow subchains of interleaving
chains in basic block vectorization.
(vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
Check load permutation based on the new parameter.
(vect_schedule_slp_instance): Don't start from the first element in
interleaving chain unless the loads are permuted.
gcc/testsuite/
* gcc.dg/vect/bb-slp-29.c: New test.
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-29.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 2011-10-23 11:29:25 +0000
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
+{
+ int i;
+ h /= 16;
+ for (i = 0; i < h; i++)
+ {
+ dst[0] = A*src[0] + B*src[1];
+ dst[1] = A*src[1] + B*src[2];
+ dst[2] = A*src[2] + B*src[3];
+ dst[3] = A*src[3] + B*src[4];
+ dst[4] = A*src[4] + B*src[5];
+ dst[5] = A*src[5] + B*src[6];
+ dst[6] = A*src[6] + B*src[7];
+ dst[7] = A*src[7] + B*src[8];
+ dst += stride;
+ src += stride;
+ if (dummy == 32)
+ abort ();
+ }
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i;
+ }
+
+ foo (dst, src, N, 8, 0);
+
+ for (i = 0; i < N/2; i++)
+ {
+ if (dst[i] != A * src[i] + B * src[i+1])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && vect_element_align } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== modified file 'gcc/tree-vect-slp.c'
--- old/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000
+++ new/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000
@@ -115,13 +115,15 @@
{
tree oprnd;
unsigned int i, number_of_oprnds;
- tree def;
+ tree def[2];
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
stmt_vec_info stmt_info =
vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
enum gimple_rhs_class rhs_class;
struct loop *loop = NULL;
+ enum tree_code rhs_code;
+ bool different_types = false;
if (loop_vinfo)
loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -133,7 +135,7 @@
{
oprnd = gimple_op (stmt, i + 1);
- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
&dt[i])
|| (!def_stmt && dt[i] != vect_constant_def))
{
@@ -188,11 +190,11 @@
switch (gimple_code (def_stmt))
{
case GIMPLE_PHI:
- def = gimple_phi_result (def_stmt);
+ def[i] = gimple_phi_result (def_stmt);
break;
case GIMPLE_ASSIGN:
- def = gimple_assign_lhs (def_stmt);
+ def[i] = gimple_assign_lhs (def_stmt);
break;
default:
@@ -206,8 +208,8 @@
{
/* op0 of the first stmt of the group - store its info. */
*first_stmt_dt0 = dt[i];
- if (def)
- *first_stmt_def0_type = TREE_TYPE (def);
+ if (def[i])
+ *first_stmt_def0_type = TREE_TYPE (def[i]);
else
*first_stmt_const_oprnd = oprnd;
@@ -227,8 +229,8 @@
{
/* op1 of the first stmt of the group - store its info. */
*first_stmt_dt1 = dt[i];
- if (def)
- *first_stmt_def1_type = TREE_TYPE (def);
+ if (def[i])
+ *first_stmt_def1_type = TREE_TYPE (def[i]);
else
{
/* We assume that the stmt contains only one constant
@@ -249,22 +251,53 @@
the def-stmt/s of the first stmt. */
if ((i == 0
&& (*first_stmt_dt0 != dt[i]
- || (*first_stmt_def0_type && def
+ || (*first_stmt_def0_type && def[0]
&& !types_compatible_p (*first_stmt_def0_type,
- TREE_TYPE (def)))))
+ TREE_TYPE (def[0])))))
|| (i == 1
&& (*first_stmt_dt1 != dt[i]
- || (*first_stmt_def1_type && def
+ || (*first_stmt_def1_type && def[1]
&& !types_compatible_p (*first_stmt_def1_type,
- TREE_TYPE (def)))))
- || (!def
+ TREE_TYPE (def[1])))))
+ || (!def[i]
&& !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
- TREE_TYPE (oprnd))))
+ TREE_TYPE (oprnd)))
+ || different_types)
{
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "Build SLP failed: different types ");
+ if (i != number_of_oprnds - 1)
+ different_types = true;
+ else
+ {
+ if (is_gimple_assign (stmt)
+ && (rhs_code = gimple_assign_rhs_code (stmt))
+ && TREE_CODE_CLASS (rhs_code) == tcc_binary
+ && commutative_tree_code (rhs_code)
+ && *first_stmt_dt0 == dt[1]
+ && *first_stmt_dt1 == dt[0]
+ && def[0] && def[1]
+ && !(*first_stmt_def0_type
+ && !types_compatible_p (*first_stmt_def0_type,
+ TREE_TYPE (def[1])))
+ && !(*first_stmt_def1_type
+ && !types_compatible_p (*first_stmt_def1_type,
+ TREE_TYPE (def[0]))))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Swapping operands of ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+ gimple_assign_rhs2_ptr (stmt));
+ }
+ else
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: different types ");
- return false;
+ return false;
+ }
+ }
}
}
}
@@ -278,10 +311,10 @@
case vect_internal_def:
case vect_reduction_def:
- if (i == 0)
+ if ((i == 0 && !different_types) || (i == 1 && different_types))
VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
else
- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
+ VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
break;
default:
@@ -289,7 +322,7 @@
if (vect_print_dump_info (REPORT_SLP))
{
fprintf (vect_dump, "Build SLP failed: illegal type of def ");
- print_generic_expr (vect_dump, def, TDF_SLIM);
+ print_generic_expr (vect_dump, def[i], TDF_SLIM);
}
return false;
@@ -312,7 +345,7 @@
int ncopies_for_cost, unsigned int *max_nunits,
VEC (int, heap) **load_permutation,
VEC (slp_tree, heap) **loads,
- unsigned int vectorization_factor)
+ unsigned int vectorization_factor, bool *loads_permuted)
{
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
@@ -523,7 +556,9 @@
/* Check that the size of interleaved loads group is not
greater than the SLP group size. */
- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
+ if (loop_vinfo
+ && DR_GROUP_SIZE (vinfo_for_stmt (stmt))
+ > ncopies * group_size)
{
if (vect_print_dump_info (REPORT_SLP))
{
@@ -644,19 +679,22 @@
/* Strided loads were reached - stop the recursion. */
if (stop_recursion)
{
+ VEC_safe_push (slp_tree, heap, *loads, *node);
if (permutation)
{
- VEC_safe_push (slp_tree, heap, *loads, *node);
+
+ *loads_permuted = true;
*inside_cost
+= targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
* group_size;
}
else
- {
- /* We don't check here complex numbers chains, so we keep them in
- LOADS for further check in vect_supported_load_permutation_p. */
+ {
+ /* We don't check here complex numbers chains, so we set
+ LOADS_PERMUTED for further check in
+ vect_supported_load_permutation_p. */
if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
- VEC_safe_push (slp_tree, heap, *loads, *node);
+ *loads_permuted = true;
}
return true;
@@ -675,7 +713,7 @@
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
max_nunits, load_permutation, loads,
- vectorization_factor))
+ vectorization_factor, loads_permuted))
return false;
SLP_TREE_LEFT (*node) = left_node;
@@ -693,7 +731,7 @@
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
max_nunits, load_permutation, loads,
- vectorization_factor))
+ vectorization_factor, loads_permuted))
return false;
SLP_TREE_RIGHT (*node) = right_node;
@@ -879,8 +917,10 @@
bool supported, bad_permutation = false;
sbitmap load_index;
slp_tree node, other_complex_node;
- gimple stmt, first = NULL, other_node_first;
+ gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
unsigned complex_numbers = 0;
+ struct data_reference *dr;
+ bb_vec_info bb_vinfo;
/* FORNOW: permutations are only supported in SLP. */
if (!slp_instn)
@@ -1040,6 +1080,76 @@
}
}
+ /* In basic block vectorization we allow any subchain of an interleaving
+ chain.
+ FORNOW: not supported in loop SLP because of realignment compications. */
+ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
+ bad_permutation = false;
+ /* Check that for every node in the instance teh loads form a subchain. */
+ if (bb_vinfo)
+ {
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+ {
+ next_load = NULL;
+ first_load = NULL;
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
+ {
+ if (!first_load)
+ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load));
+ else if (first_load
+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (load)))
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ if (j != 0 && next_load != load)
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load));
+ }
+
+ if (bad_permutation)
+ break;
+ }
+
+ /* Check that the alignment of the first load in every subchain, i.e.,
+ the first statement in every load node, is supported. */
+ if (!bad_permutation)
+ {
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+ {
+ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+ if (first_load
+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load)))
+ {
+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
+ if (vect_supportable_dr_alignment (dr, false)
+ == dr_unaligned_unsupported)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "unsupported unaligned load ");
+ print_gimple_stmt (vect_dump, first_load, 0,
+ TDF_SLIM);
+ }
+ bad_permutation = true;
+ break;
+ }
+ }
+ }
+
+ if (!bad_permutation)
+ {
+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
+ return true;
+ }
+ }
+ }
+
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
well (unless it's reduction). */
@@ -1149,6 +1259,7 @@
VEC (int, heap) *load_permutation;
VEC (slp_tree, heap) *loads;
struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ bool loads_permuted = false;
if (dr)
{
@@ -1238,7 +1349,7 @@
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
&inside_cost, &outside_cost, ncopies_for_cost,
&max_nunits, &load_permutation, &loads,
- vectorization_factor))
+ vectorization_factor, &loads_permuted))
{
/* Calculate the unrolling factor based on the smallest type. */
if (max_nunits > nunits)
@@ -1263,7 +1374,8 @@
SLP_INSTANCE_LOADS (new_instance) = loads;
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
- if (VEC_length (slp_tree, loads))
+
+ if (loads_permuted)
{
if (!vect_supported_load_permutation_p (new_instance, group_size,
load_permutation))
@@ -2542,10 +2654,11 @@
/* Loads should be inserted before the first load. */
if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
&& STMT_VINFO_STRIDED_ACCESS (stmt_info)
- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
+ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
+ && SLP_INSTANCE_LOAD_PERMUTATION (instance))
si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
else if (is_pattern_stmt_p (stmt_info))
- si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
+ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
else
si = gsi_for_stmt (stmt);
=== modified file 'gcc/tree-vect-stmts.c'
--- old/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000
+++ new/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000
@@ -4285,6 +4285,11 @@
if (strided_load)
{
first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+ if (slp
+ && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
+ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
+ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
+
/* Check if the chain of loads is already vectorized. */
if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
{
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,61 @@
2011-11-04 Revital Eres <revital.eres@linaro.org>
Backport from mainline -r180673:
gcc/
* modulo-sched.c (generate_prolog_epilog): Mark prolog
and epilog as BB_DISABLE_SCHEDULE.
(mark_loop_unsched): New function.
(sms_schedule): Call it.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000
+++ new/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000
@@ -1173,6 +1173,8 @@
/* Put the prolog on the entry edge. */
e = loop_preheader_edge (loop);
split_edge_and_insert (e, get_insns ());
+ if (!flag_resched_modulo_sched)
+ e->dest->flags |= BB_DISABLE_SCHEDULE;
end_sequence ();
@@ -1186,9 +1188,24 @@
gcc_assert (single_exit (loop));
e = single_exit (loop);
split_edge_and_insert (e, get_insns ());
+ if (!flag_resched_modulo_sched)
+ e->dest->flags |= BB_DISABLE_SCHEDULE;
+
end_sequence ();
}
+/* Mark LOOP as software pipelined so the later
+ scheduling passes don't touch it. */
+static void
+mark_loop_unsched (struct loop *loop)
+{
+ unsigned i;
+ basic_block *bbs = get_loop_body (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ bbs[i]->flags |= BB_DISABLE_SCHEDULE;
+}
+
/* Return true if all the BBs of the loop are empty except the
loop header. */
static bool
@@ -1714,9 +1731,10 @@
permute_partial_schedule (ps, g->closing_branch->first_note);
/* Mark this loop as software pipelined so the later
- scheduling passes doesn't touch it. */
+ scheduling passes don't touch it. */
if (! flag_resched_modulo_sched)
- g->bb->flags |= BB_DISABLE_SCHEDULE;
+ mark_loop_unsched (loop);
+
/* The life-info is not valid any more. */
df_set_bb_dirty (g->bb);
@@ -0,0 +1,23 @@
2011-11-02 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF mainline:
2011-11-01 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning.
=== modified file 'gcc/config/arm/bpabi.h'
--- old/gcc/config/arm/bpabi.h 2010-12-20 17:48:51 +0000
+++ new/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000
@@ -56,7 +56,8 @@
"|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
- "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
+ "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\
+ ":%{!r:--be8}}}"
/* Tell the assembler to build BPABI binaries. */
#undef SUBTARGET_EXTRA_ASM_SPEC
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,515 @@
2011-11-21 Michael Hope <michael.hope@linaro.org>
Backport from mainline r180131:
2011-10-18 Julian Brown <julian@codesourcery.com>
gcc/
* config/arm/arm.c (arm_block_move_unaligned_straight)
(arm_adjust_block_mem, arm_block_move_unaligned_loop)
(arm_movmemqi_unaligned): New.
(arm_gen_movmemqi): Support unaligned block copies.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_arm_unaligned): New.
* gcc.target/arm/unaligned-memcpy-1.c: New.
* gcc.target/arm/unaligned-memcpy-2.c: New.
* gcc.target/arm/unaligned-memcpy-3.c: New.
* gcc.target/arm/unaligned-memcpy-4.c: New.
2011-09-15 James Greenhalgh <james.greenhalgh@arm.com>
gcc/
* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-10-26 11:38:30 +0000
+++ new/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000
@@ -10803,6 +10803,335 @@
return true;
}
+/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
+ unaligned copies on processors which support unaligned semantics for those
+ instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
+ (using more registers) by doing e.g. load/load/store/store for a factor of 2.
+ An interleave factor of 1 (the minimum) will perform no interleaving.
+ Load/store multiple are used for aligned addresses where possible. */
+
+static void
+arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
+ HOST_WIDE_INT length,
+ unsigned int interleave_factor)
+{
+ rtx *regs = XALLOCAVEC (rtx, interleave_factor);
+ int *regnos = XALLOCAVEC (int, interleave_factor);
+ HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
+ HOST_WIDE_INT i, j;
+ HOST_WIDE_INT remaining = length, words;
+ rtx halfword_tmp = NULL, byte_tmp = NULL;
+ rtx dst, src;
+ bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
+ bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
+ HOST_WIDE_INT srcoffset, dstoffset;
+ HOST_WIDE_INT src_autoinc, dst_autoinc;
+ rtx mem, addr;
+
+ gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
+
+ /* Use hard registers if we have aligned source or destination so we can use
+ load/store multiple with contiguous registers. */
+ if (dst_aligned || src_aligned)
+ for (i = 0; i < interleave_factor; i++)
+ regs[i] = gen_rtx_REG (SImode, i);
+ else
+ for (i = 0; i < interleave_factor; i++)
+ regs[i] = gen_reg_rtx (SImode);
+
+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
+ src = copy_addr_to_reg (XEXP (srcbase, 0));
+
+ srcoffset = dstoffset = 0;
+
+ /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
+ For copying the last bytes we want to subtract this offset again. */
+ src_autoinc = dst_autoinc = 0;
+
+ for (i = 0; i < interleave_factor; i++)
+ regnos[i] = i;
+
+ /* Copy BLOCK_SIZE_BYTES chunks. */
+
+ for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
+ {
+ /* Load words. */
+ if (src_aligned && interleave_factor > 1)
+ {
+ emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
+ TRUE, srcbase, &srcoffset));
+ src_autoinc += UNITS_PER_WORD * interleave_factor;
+ }
+ else
+ {
+ for (j = 0; j < interleave_factor; j++)
+ {
+ addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
+ - src_autoinc);
+ mem = adjust_automodify_address (srcbase, SImode, addr,
+ srcoffset + j * UNITS_PER_WORD);
+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
+ }
+ srcoffset += block_size_bytes;
+ }
+
+ /* Store words. */
+ if (dst_aligned && interleave_factor > 1)
+ {
+ emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
+ TRUE, dstbase, &dstoffset));
+ dst_autoinc += UNITS_PER_WORD * interleave_factor;
+ }
+ else
+ {
+ for (j = 0; j < interleave_factor; j++)
+ {
+ addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
+ - dst_autoinc);
+ mem = adjust_automodify_address (dstbase, SImode, addr,
+ dstoffset + j * UNITS_PER_WORD);
+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
+ }
+ dstoffset += block_size_bytes;
+ }
+
+ remaining -= block_size_bytes;
+ }
+
+ /* Copy any whole words left (note these aren't interleaved with any
+ subsequent halfword/byte load/stores in the interests of simplicity). */
+
+ words = remaining / UNITS_PER_WORD;
+
+ gcc_assert (words < interleave_factor);
+
+ if (src_aligned && words > 1)
+ {
+ emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
+ &srcoffset));
+ src_autoinc += UNITS_PER_WORD * words;
+ }
+ else
+ {
+ for (j = 0; j < words; j++)
+ {
+ addr = plus_constant (src,
+ srcoffset + j * UNITS_PER_WORD - src_autoinc);
+ mem = adjust_automodify_address (srcbase, SImode, addr,
+ srcoffset + j * UNITS_PER_WORD);
+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
+ }
+ srcoffset += words * UNITS_PER_WORD;
+ }
+
+ if (dst_aligned && words > 1)
+ {
+ emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
+ &dstoffset));
+ dst_autoinc += words * UNITS_PER_WORD;
+ }
+ else
+ {
+ for (j = 0; j < words; j++)
+ {
+ addr = plus_constant (dst,
+ dstoffset + j * UNITS_PER_WORD - dst_autoinc);
+ mem = adjust_automodify_address (dstbase, SImode, addr,
+ dstoffset + j * UNITS_PER_WORD);
+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
+ }
+ dstoffset += words * UNITS_PER_WORD;
+ }
+
+ remaining -= words * UNITS_PER_WORD;
+
+ gcc_assert (remaining < 4);
+
+ /* Copy a halfword if necessary. */
+
+ if (remaining >= 2)
+ {
+ halfword_tmp = gen_reg_rtx (SImode);
+
+ addr = plus_constant (src, srcoffset - src_autoinc);
+ mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
+ emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
+
+ /* Either write out immediately, or delay until we've loaded the last
+ byte, depending on interleave factor. */
+ if (interleave_factor == 1)
+ {
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
+ emit_insn (gen_unaligned_storehi (mem,
+ gen_lowpart (HImode, halfword_tmp)));
+ halfword_tmp = NULL;
+ dstoffset += 2;
+ }
+
+ remaining -= 2;
+ srcoffset += 2;
+ }
+
+ gcc_assert (remaining < 2);
+
+ /* Copy last byte. */
+
+ if ((remaining & 1) != 0)
+ {
+ byte_tmp = gen_reg_rtx (SImode);
+
+ addr = plus_constant (src, srcoffset - src_autoinc);
+ mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
+ emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
+
+ if (interleave_factor == 1)
+ {
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
+ byte_tmp = NULL;
+ dstoffset++;
+ }
+
+ remaining--;
+ srcoffset++;
+ }
+
+ /* Store last halfword if we haven't done so already. */
+
+ if (halfword_tmp)
+ {
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
+ emit_insn (gen_unaligned_storehi (mem,
+ gen_lowpart (HImode, halfword_tmp)));
+ dstoffset += 2;
+ }
+
+ /* Likewise for last byte. */
+
+ if (byte_tmp)
+ {
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
+ dstoffset++;
+ }
+
+ gcc_assert (remaining == 0 && srcoffset == dstoffset);
+}
+
+/* From mips_adjust_block_mem:
+
+ Helper function for doing a loop-based block operation on memory
+ reference MEM. Each iteration of the loop will operate on LENGTH
+ bytes of MEM.
+
+ Create a new base register for use within the loop and point it to
+ the start of MEM. Create a new memory reference that uses this
+ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
+
+static void
+arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
+ rtx *loop_mem)
+{
+ *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+ /* Although the new mem does not refer to a known location,
+ it does keep up to LENGTH bytes of alignment. */
+ *loop_mem = change_address (mem, BLKmode, *loop_reg);
+ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* From mips_block_move_loop:
+
+ Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
+ the memory regions do not overlap. */
+
+static void
+arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+ unsigned int interleave_factor,
+ HOST_WIDE_INT bytes_per_iter)
+{
+ rtx label, src_reg, dest_reg, final_src, test;
+ HOST_WIDE_INT leftover;
+
+ leftover = length % bytes_per_iter;
+ length -= leftover;
+
+ /* Create registers and memory references for use within the loop. */
+ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+ arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+
+ /* Calculate the value that SRC_REG should have after the last iteration of
+ the loop. */
+ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+ 0, 0, OPTAB_WIDEN);
+
+ /* Emit the start of the loop. */
+ label = gen_label_rtx ();
+ emit_label (label);
+
+ /* Emit the loop body. */
+ arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
+ interleave_factor);
+
+ /* Move on to the next block. */
+ emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
+ emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
+
+ /* Emit the loop condition. */
+ test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+
+ /* Mop up any left-over bytes. */
+ if (leftover)
+ arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
+}
+
+/* Emit a block move when either the source or destination is unaligned (not
+ aligned to a four-byte boundary). This may need further tuning depending on
+ core type, optimize_size setting, etc. */
+
+static int
+arm_movmemqi_unaligned (rtx *operands)
+{
+ HOST_WIDE_INT length = INTVAL (operands[2]);
+
+ if (optimize_size)
+ {
+ bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
+ bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
+ /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
+ size of code if optimizing for size. We'll use ldm/stm if src_aligned
+ or dst_aligned though: allow more interleaving in those cases since the
+ resulting code can be smaller. */
+ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
+ HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
+
+ if (length > 12)
+ arm_block_move_unaligned_loop (operands[0], operands[1], length,
+ interleave_factor, bytes_per_iter);
+ else
+ arm_block_move_unaligned_straight (operands[0], operands[1], length,
+ interleave_factor);
+ }
+ else
+ {
+ /* Note that the loop created by arm_block_move_unaligned_loop may be
+ subject to loop unrolling, which makes tuning this condition a little
+ redundant. */
+ if (length > 32)
+ arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
+ else
+ arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
+ }
+
+ return 1;
+}
+
int
arm_gen_movmemqi (rtx *operands)
{
@@ -10815,8 +11144,13 @@
if (GET_CODE (operands[2]) != CONST_INT
|| GET_CODE (operands[3]) != CONST_INT
- || INTVAL (operands[2]) > 64
- || INTVAL (operands[3]) & 3)
+ || INTVAL (operands[2]) > 64)
+ return 0;
+
+ if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
+ return arm_movmemqi_unaligned (operands);
+
+ if (INTVAL (operands[3]) & 3)
return 0;
dstbase = operands[0];
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000
+++ new/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000
@@ -47,6 +47,8 @@
{ \
if (TARGET_DSP_MULTIPLY) \
builtin_define ("__ARM_FEATURE_DSP"); \
+ if (unaligned_access) \
+ builtin_define ("__ARM_FEATURE_UNALIGNED"); \
/* Define __arm__ even when in thumb mode, for \
consistency with armcc. */ \
builtin_define ("__arm__"); \
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c'
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 2011-10-19 22:56:19 +0000
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+void unknown_alignment (char *dest, char *src)
+{
+ memcpy (dest, src, 15);
+}
+
+/* We should see three unaligned word loads and store pairs, one unaligned
+ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */
+
+/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "strh" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "strb" 1 } } */
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c'
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 2011-10-19 22:56:19 +0000
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+char dest[16];
+
+void aligned_dest (char *src)
+{
+ memcpy (dest, src, 15);
+}
+
+/* Expect a multi-word store for the main part of the copy, but subword
+ loads/stores for the remainder. */
+
+/* { dg-final { scan-assembler-times "stmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "strh" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "strb" 1 } } */
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c'
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 2011-10-19 22:56:19 +0000
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+char src[16];
+
+void aligned_src (char *dest)
+{
+ memcpy (dest, src, 15);
+}
+
+/* Expect a multi-word load for the main part of the copy, but subword
+ loads/stores for the remainder. */
+
+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "strh" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "strb" 1 } } */
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c'
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 2011-10-19 22:56:19 +0000
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+char src[16];
+char dest[16];
+
+void aligned_both (void)
+{
+ memcpy (dest, src, 15);
+}
+
+/* We know both src and dest to be aligned: expect multiword loads/stores. */
+
+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
+/* { dg-final { scan-assembler-times "stmia" 1 } } */
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000
@@ -1894,6 +1894,18 @@
}]
}
+# Return 1 if this is an ARM target that supports unaligned word/halfword
+# load/store instructions.
+
+proc check_effective_target_arm_unaligned { } {
+ return [check_no_compiler_messages arm_unaligned assembly {
+ #ifndef __ARM_FEATURE_UNALIGNED
+ #error no unaligned support
+ #endif
+ int i;
+ }]
+}
+
# Add the options needed for NEON. We need either -mfloat-abi=softfp
# or -mfloat-abi=hard, but if one is already specified by the
# multilib, use it. Similarly, if a -mfpu option already enables
@@ -53,4 +53,26 @@ file://linaro/gcc-4.6-linaro-r106805.patch \
file://linaro/gcc-4.6-linaro-r106806.patch \
file://linaro/gcc-4.6-linaro-r106807.patch \
file://linaro/gcc-4.6-linaro-r106811.patch \
file://linaro/gcc-4.6-linaro-r106814.patch \
file://linaro/gcc-4.6-linaro-r106815.patch \
file://linaro/gcc-4.6-linaro-r106816.patch \
file://linaro/gcc-4.6-linaro-r106817.patch \
file://linaro/gcc-4.6-linaro-r106818.patch \
file://linaro/gcc-4.6-linaro-r106819.patch \
file://linaro/gcc-4.6-linaro-r106820.patch \
file://linaro/gcc-4.6-linaro-r106821.patch \
file://linaro/gcc-4.6-linaro-r106825.patch \
file://linaro/gcc-4.6-linaro-r106826.patch \
file://linaro/gcc-4.6-linaro-r106827.patch \
file://linaro/gcc-4.6-linaro-r106828.patch \
file://linaro/gcc-4.6-linaro-r106829.patch \
file://linaro/gcc-4.6-linaro-r106830.patch \
file://linaro/gcc-4.6-linaro-r106831.patch \
file://linaro/gcc-4.6-linaro-r106832.patch \
file://linaro/gcc-4.6-linaro-r106833.patch \
file://linaro/gcc-4.6-linaro-r106834.patch \
file://linaro/gcc-4.6-linaro-r106836.patch \
file://linaro/gcc-4.6-linaro-r106839.patch \
file://linaro/gcc-4.6-linaro-r106840.patch \
file://linaro/gcc-4.6-linaro-r106841.patch \
"
@@ -1,4 +1,4 @@
# this will prepend this layer to FILESPATH
FILESEXTRAPATHS := "${THISDIR}/gcc-4.6"
PRINC = "2"
PRINC = "3"
ARM_INSTRUCTION_SET = "arm"