mirror of
https://github.com/openembedded/meta-openembedded.git
synced 2026-05-08 05:29:22 +00:00
gcc-4.6: Bring in latest linaro patches
I have tested it on angstrom by successfully building console-image and systemd-gnome-image for all supported qemu targets. Signed-off-by: Khem Raj <raj.khem@gmail.com>
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
2011-09-22 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from trunk -r178804:
|
||||
modulo-sched.c (remove_node_from_ps): Return void
|
||||
instead of bool.
|
||||
(optimize_sc): Adjust call to remove_node_from_ps.
|
||||
(sms_schedule): Add print info.
|
||||
|
||||
=== modified file 'gcc/modulo-sched.c'
|
||||
--- old/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000
|
||||
+++ new/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
|
||||
@@ -211,7 +211,7 @@
|
||||
static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
|
||||
int, int, sbitmap, int *, sbitmap,
|
||||
sbitmap);
|
||||
-static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
|
||||
+static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
|
||||
|
||||
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
|
||||
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
|
||||
@@ -834,8 +834,7 @@
|
||||
if (next_ps_i->node->cuid == g->closing_branch->cuid)
|
||||
break;
|
||||
|
||||
- gcc_assert (next_ps_i);
|
||||
- gcc_assert (remove_node_from_ps (ps, next_ps_i));
|
||||
+ remove_node_from_ps (ps, next_ps_i);
|
||||
success =
|
||||
try_scheduling_node_in_cycle (ps, g->closing_branch,
|
||||
g->closing_branch->cuid, c,
|
||||
@@ -1485,8 +1484,8 @@
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file,
|
||||
- "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
|
||||
- stage_count);
|
||||
+ "%s:%d SMS succeeded %d %d (with ii, sc)\n",
|
||||
+ insn_file (tail), insn_line (tail), ps->ii, stage_count);
|
||||
print_partial_schedule (ps, dump_file);
|
||||
}
|
||||
|
||||
@@ -2810,22 +2809,18 @@
|
||||
}
|
||||
|
||||
|
||||
-/* Removes the given PS_INSN from the partial schedule. Returns false if the
|
||||
- node is not found in the partial schedule, else returns true. */
|
||||
-static bool
|
||||
+/* Removes the given PS_INSN from the partial schedule. */
|
||||
+static void
|
||||
remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
|
||||
{
|
||||
int row;
|
||||
|
||||
- if (!ps || !ps_i)
|
||||
- return false;
|
||||
-
|
||||
+ gcc_assert (ps && ps_i);
|
||||
+
|
||||
row = SMODULO (ps_i->cycle, ps->ii);
|
||||
if (! ps_i->prev_in_row)
|
||||
{
|
||||
- if (ps_i != ps->rows[row])
|
||||
- return false;
|
||||
-
|
||||
+ gcc_assert (ps_i == ps->rows[row]);
|
||||
ps->rows[row] = ps_i->next_in_row;
|
||||
if (ps->rows[row])
|
||||
ps->rows[row]->prev_in_row = NULL;
|
||||
@@ -2839,7 +2834,7 @@
|
||||
|
||||
ps->rows_length[row] -= 1;
|
||||
free (ps_i);
|
||||
- return true;
|
||||
+ return;
|
||||
}
|
||||
|
||||
/* Unlike what literature describes for modulo scheduling (which focuses
|
||||
|
||||
@@ -0,0 +1,528 @@
|
||||
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/testsuite/
|
||||
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
|
||||
Replace check_effective_target_arm_neon with
|
||||
check_effective_target_arm_neon_ok.
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-06 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.c (arm_preferred_simd_mode): Check
|
||||
TARGET_NEON_VECTORIZE_DOUBLE instead of
|
||||
TARGET_NEON_VECTORIZE_QUAD.
|
||||
(arm_autovectorize_vector_sizes): Likewise.
|
||||
* config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
|
||||
mask of mvectorize-with-neon-double. Add RejectNegative.
|
||||
(mvectorize-with-neon-double): New.
|
||||
|
||||
gcc/testsuite/
|
||||
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
|
||||
New procedure.
|
||||
(add_options_for_quad_vectors): Replace with ...
|
||||
(add_options_for_double_vectors): ... this.
|
||||
* gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
|
||||
support multiple vector sizes since the vectorizer attempts to
|
||||
vectorize with both vector sizes.
|
||||
* gcc.dg/vect/no-vfa-vect-79.c,
|
||||
gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
|
||||
gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
|
||||
gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
|
||||
gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
|
||||
gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
|
||||
gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
|
||||
* gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
|
||||
* gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
|
||||
gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
|
||||
gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
|
||||
gcc.dg/vect/vect-40.c: Likewise.
|
||||
* gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
|
||||
redundant.
|
||||
* gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
|
||||
gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
|
||||
gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
|
||||
gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
|
||||
Likewise.
|
||||
* gcc.dg/vect/vect-peel-4.c: Make ia global.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
|
||||
@@ -22974,7 +22974,7 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
|
||||
+/* Use the option -mvectorize-with-neon-double to override the use of quardword
|
||||
registers when autovectorizing for Neon, at least until multiple vector
|
||||
widths are supported properly by the middle-end. */
|
||||
|
||||
@@ -22985,15 +22985,15 @@
|
||||
switch (mode)
|
||||
{
|
||||
case SFmode:
|
||||
- return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
|
||||
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
|
||||
case SImode:
|
||||
- return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
|
||||
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
|
||||
case HImode:
|
||||
- return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
|
||||
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
|
||||
case QImode:
|
||||
- return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
|
||||
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
|
||||
case DImode:
|
||||
- if (TARGET_NEON_VECTORIZE_QUAD)
|
||||
+ if (!TARGET_NEON_VECTORIZE_DOUBLE)
|
||||
return V2DImode;
|
||||
break;
|
||||
|
||||
@@ -24226,7 +24226,7 @@
|
||||
static unsigned int
|
||||
arm_autovectorize_vector_sizes (void)
|
||||
{
|
||||
- return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
|
||||
+ return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.opt'
|
||||
--- old/gcc/config/arm/arm.opt 2009-06-18 11:24:10 +0000
|
||||
+++ new/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
|
||||
@@ -158,9 +158,13 @@
|
||||
Assume big endian bytes, little endian words
|
||||
|
||||
mvectorize-with-neon-quad
|
||||
-Target Report Mask(NEON_VECTORIZE_QUAD)
|
||||
+Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
|
||||
Use Neon quad-word (rather than double-word) registers for vectorization
|
||||
|
||||
+mvectorize-with-neon-double
|
||||
+Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
|
||||
+Use Neon double-word (rather than quad-word) registers for vectorization
|
||||
+
|
||||
mword-relocations
|
||||
Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
|
||||
Only generate absolute relocations on word sized values.
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2007-09-04 12:05:19 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2011-09-19 07:44:24 +0000
|
||||
@@ -45,6 +45,7 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2007-09-12 07:48:44 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2011-09-19 07:44:24 +0000
|
||||
@@ -53,6 +53,7 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2007-09-12 07:48:44 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2011-09-19 07:44:24 +0000
|
||||
@@ -53,6 +53,7 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2009-05-08 12:39:01 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2011-09-19 07:44:24 +0000
|
||||
@@ -58,5 +58,6 @@
|
||||
If/when the aliasing problems are resolved, unalignment may
|
||||
prevent vectorization on some targets. */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2009-05-08 12:39:01 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2011-09-19 07:44:24 +0000
|
||||
@@ -46,5 +46,6 @@
|
||||
If/when the aliasing problems are resolved, unalignment may
|
||||
prevent vectorization on some targets. */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/slp-25.c 2010-10-04 14:59:30 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-104.c 2007-09-12 07:48:44 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-104.c 2011-09-19 07:44:24 +0000
|
||||
@@ -64,6 +64,7 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-109.c 2010-10-04 14:59:30 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-109.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-40.c 2009-05-25 14:18:21 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-40.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-42.c 2010-10-04 14:59:30 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-42.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-46.c 2009-05-25 14:18:21 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-46.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-48.c 2009-11-04 10:22:22 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-48.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-52.c 2009-11-04 10:22:22 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-52.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-54.c 2009-10-27 11:46:07 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-54.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-96.c 2010-10-04 14:59:30 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-96.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2010-10-04 14:59:30 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2009-11-04 10:22:22 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2010-10-04 14:59:30 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2009-11-10 18:01:22 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,4 +1,5 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
+/* { dg-add-options double_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2009-05-08 12:39:01 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2011-09-19 07:44:24 +0000
|
||||
@@ -22,5 +22,6 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2009-06-16 06:21:12 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2011-09-19 07:44:24 +0000
|
||||
@@ -20,5 +20,6 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2007-08-19 11:02:48 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2011-09-19 07:44:24 +0000
|
||||
@@ -22,5 +22,6 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2009-05-08 12:39:01 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2011-09-19 07:44:24 +0000
|
||||
@@ -37,5 +37,6 @@
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2009-05-08 12:39:01 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2011-09-19 07:44:24 +0000
|
||||
@@ -49,5 +49,6 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2009-05-08 12:39:01 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2011-09-19 07:44:24 +0000
|
||||
@@ -49,5 +49,6 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <signal.h>
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-01-10 12:41:40 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-01-10 12:41:40 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000
|
||||
@@ -1,5 +1,4 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
-/* { dg-add-options quad_vectors } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-01-10 12:41:40 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000
|
||||
@@ -6,12 +6,12 @@
|
||||
#define N 128
|
||||
|
||||
int ib[N+7];
|
||||
+int ia[N+1];
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
- int ia[N+1];
|
||||
|
||||
/* Don't peel keeping one load and the store aligned. */
|
||||
for (i = 0; i <= N; i++)
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2010-05-27 12:23:45 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2011-09-19 07:44:24 +0000
|
||||
@@ -58,7 +58,8 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
|
||||
|
||||
=== modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90'
|
||||
--- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2005-07-25 11:05:07 +0000
|
||||
+++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2011-09-19 07:44:24 +0000
|
||||
@@ -19,6 +19,7 @@
|
||||
end
|
||||
|
||||
! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } }
|
||||
-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } }
|
||||
+! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } }
|
||||
+! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } }
|
||||
! { dg-final { cleanup-tree-dump "vect" } }
|
||||
|
||||
|
||||
=== modified file 'gcc/testsuite/lib/target-supports.exp'
|
||||
--- old/gcc/testsuite/lib/target-supports.exp 2011-08-13 08:32:32 +0000
|
||||
+++ new/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
|
||||
@@ -3265,6 +3265,24 @@
|
||||
}]
|
||||
}
|
||||
|
||||
+# Return 1 if the target supports multiple vector sizes
|
||||
+
|
||||
+proc check_effective_target_vect_multiple_sizes { } {
|
||||
+ global et_vect_multiple_sizes
|
||||
+
|
||||
+ if [info exists et_vect_multiple_sizes_saved] {
|
||||
+ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
|
||||
+ } else {
|
||||
+ set et_vect_multiple_sizes_saved 0
|
||||
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
|
||||
+ set et_vect_multiple_sizes_saved 1
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2
|
||||
+ return $et_vect_multiple_sizes_saved
|
||||
+}
|
||||
+
|
||||
# Return 1 if the target supports section-anchors
|
||||
|
||||
proc check_effective_target_section_anchors { } {
|
||||
@@ -3648,11 +3666,11 @@
|
||||
return $flags
|
||||
}
|
||||
|
||||
-# Add to FLAGS the flags needed to enable 128-bit vectors.
|
||||
+# Add to FLAGS the flags needed to enable 64-bit vectors.
|
||||
|
||||
-proc add_options_for_quad_vectors { flags } {
|
||||
+proc add_options_for_double_vectors { flags } {
|
||||
if [is-effective-target arm_neon_ok] {
|
||||
- return "$flags -mvectorize-with-neon-quad"
|
||||
+ return "$flags -mvectorize-with-neon-double"
|
||||
}
|
||||
|
||||
return $flags
|
||||
|
||||
@@ -0,0 +1,387 @@
|
||||
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
|
||||
(neon_move_hi_quad_<mode>): Likewise.
|
||||
(move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.
|
||||
|
||||
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-27 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi)
|
||||
(neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di)
|
||||
(neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si)
|
||||
(neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands
|
||||
that produce subreg moves. Define using VQX iterators.
|
||||
|
||||
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-14 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* simplify-rtx.c (simplify_subreg): Check that the inner mode is
|
||||
a scalar integer before applying integer-only optimisations to
|
||||
inner arithmetic.
|
||||
|
||||
=== modified file 'gcc/config/arm/neon.md'
|
||||
--- old/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000
|
||||
+++ new/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
|
||||
@@ -1235,66 +1235,14 @@
|
||||
(const_string "neon_int_1") (const_string "neon_int_5")))]
|
||||
)
|
||||
|
||||
-; FIXME: We wouldn't need the following insns if we could write subregs of
|
||||
-; vector registers. Make an attempt at removing unnecessary moves, though
|
||||
-; we're really at the mercy of the register allocator.
|
||||
-
|
||||
-(define_insn "neon_move_lo_quad_<mode>"
|
||||
- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
|
||||
- (vec_concat:ANY128
|
||||
- (match_operand:<V_HALF> 1 "s_register_operand" "w")
|
||||
- (vec_select:<V_HALF>
|
||||
- (match_dup 0)
|
||||
- (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src)
|
||||
- return "vmov\t%e0, %P1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_move_hi_quad_<mode>"
|
||||
- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
|
||||
- (vec_concat:ANY128
|
||||
- (vec_select:<V_HALF>
|
||||
- (match_dup 0)
|
||||
- (match_operand:ANY128 2 "vect_par_constant_low" ""))
|
||||
- (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
|
||||
-
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src)
|
||||
- return "vmov\t%f0, %P1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
(define_expand "move_hi_quad_<mode>"
|
||||
[(match_operand:ANY128 0 "s_register_operand" "")
|
||||
(match_operand:<V_HALF> 1 "s_register_operand" "")]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
|
||||
- rtx t1;
|
||||
- int i;
|
||||
-
|
||||
- for (i=0; i < (<V_mode_nunits>/2); i++)
|
||||
- RTVEC_ELT (v, i) = GEN_INT (i);
|
||||
-
|
||||
- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
|
||||
- emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
|
||||
-
|
||||
+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
|
||||
+ GET_MODE_SIZE (<V_HALF>mode)),
|
||||
+ operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
@@ -1303,16 +1251,9 @@
|
||||
(match_operand:<V_HALF> 1 "s_register_operand" "")]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
|
||||
- rtx t1;
|
||||
- int i;
|
||||
-
|
||||
- for (i=0; i < (<V_mode_nunits>/2); i++)
|
||||
- RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
|
||||
-
|
||||
- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
|
||||
- emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
|
||||
-
|
||||
+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
|
||||
+ <MODE>mode, 0),
|
||||
+ operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
@@ -2950,183 +2891,27 @@
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
-(define_insn "neon_vget_highv16qi"
|
||||
- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
|
||||
- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 8) (const_int 9)
|
||||
- (const_int 10) (const_int 11)
|
||||
- (const_int 12) (const_int 13)
|
||||
- (const_int 14) (const_int 15)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src + 2)
|
||||
- return "vmov\t%P0, %f1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_highv8hi"
|
||||
- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
|
||||
- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 4) (const_int 5)
|
||||
- (const_int 6) (const_int 7)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src + 2)
|
||||
- return "vmov\t%P0, %f1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_highv4si"
|
||||
- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
|
||||
- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 2) (const_int 3)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src + 2)
|
||||
- return "vmov\t%P0, %f1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_highv4sf"
|
||||
- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
|
||||
- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 2) (const_int 3)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src + 2)
|
||||
- return "vmov\t%P0, %f1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_highv2di"
|
||||
- [(set (match_operand:DI 0 "s_register_operand" "=w")
|
||||
- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 1)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src + 2)
|
||||
- return "vmov\t%P0, %f1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_lowv16qi"
|
||||
- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
|
||||
- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 0) (const_int 1)
|
||||
- (const_int 2) (const_int 3)
|
||||
- (const_int 4) (const_int 5)
|
||||
- (const_int 6) (const_int 7)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src)
|
||||
- return "vmov\t%P0, %e1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_lowv8hi"
|
||||
- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
|
||||
- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 0) (const_int 1)
|
||||
- (const_int 2) (const_int 3)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src)
|
||||
- return "vmov\t%P0, %e1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_lowv4si"
|
||||
- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
|
||||
- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 0) (const_int 1)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src)
|
||||
- return "vmov\t%P0, %e1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_lowv4sf"
|
||||
- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
|
||||
- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 0) (const_int 1)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src)
|
||||
- return "vmov\t%P0, %e1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "neon_vget_lowv2di"
|
||||
- [(set (match_operand:DI 0 "s_register_operand" "=w")
|
||||
- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
|
||||
- (parallel [(const_int 0)])))]
|
||||
- "TARGET_NEON"
|
||||
-{
|
||||
- int dest = REGNO (operands[0]);
|
||||
- int src = REGNO (operands[1]);
|
||||
-
|
||||
- if (dest != src)
|
||||
- return "vmov\t%P0, %e1";
|
||||
- else
|
||||
- return "";
|
||||
-}
|
||||
- [(set_attr "neon_type" "neon_bp_simple")]
|
||||
-)
|
||||
+(define_expand "neon_vget_high<mode>"
|
||||
+ [(match_operand:<V_HALF> 0 "s_register_operand")
|
||||
+ (match_operand:VQX 1 "s_register_operand")]
|
||||
+ "TARGET_NEON"
|
||||
+{
|
||||
+ emit_move_insn (operands[0],
|
||||
+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
|
||||
+ GET_MODE_SIZE (<V_HALF>mode)));
|
||||
+ DONE;
|
||||
+})
|
||||
+
|
||||
+(define_expand "neon_vget_low<mode>"
|
||||
+ [(match_operand:<V_HALF> 0 "s_register_operand")
|
||||
+ (match_operand:VQX 1 "s_register_operand")]
|
||||
+ "TARGET_NEON"
|
||||
+{
|
||||
+ emit_move_insn (operands[0],
|
||||
+ simplify_gen_subreg (<V_HALF>mode, operands[1],
|
||||
+ <MODE>mode, 0));
|
||||
+ DONE;
|
||||
+})
|
||||
|
||||
(define_insn "neon_vcvt<mode>"
|
||||
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
|
||||
|
||||
=== modified file 'gcc/simplify-rtx.c'
|
||||
--- old/gcc/simplify-rtx.c 2011-08-13 08:32:32 +0000
|
||||
+++ new/gcc/simplify-rtx.c 2011-09-28 15:11:59 +0000
|
||||
@@ -5567,6 +5567,7 @@
|
||||
/* Optimize SUBREG truncations of zero and sign extended values. */
|
||||
if ((GET_CODE (op) == ZERO_EXTEND
|
||||
|| GET_CODE (op) == SIGN_EXTEND)
|
||||
+ && SCALAR_INT_MODE_P (innermode)
|
||||
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode))
|
||||
{
|
||||
unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
|
||||
@@ -5605,6 +5606,7 @@
|
||||
if ((GET_CODE (op) == LSHIFTRT
|
||||
|| GET_CODE (op) == ASHIFTRT)
|
||||
&& SCALAR_INT_MODE_P (outermode)
|
||||
+ && SCALAR_INT_MODE_P (innermode)
|
||||
/* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
|
||||
to avoid the possibility that an outer LSHIFTRT shifts by more
|
||||
than the sign extension's sign_bit_copies and introduces zeros
|
||||
@@ -5624,6 +5626,7 @@
|
||||
if ((GET_CODE (op) == LSHIFTRT
|
||||
|| GET_CODE (op) == ASHIFTRT)
|
||||
&& SCALAR_INT_MODE_P (outermode)
|
||||
+ && SCALAR_INT_MODE_P (innermode)
|
||||
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
|
||||
&& CONST_INT_P (XEXP (op, 1))
|
||||
&& GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
|
||||
@@ -5638,6 +5641,7 @@
|
||||
the outer subreg is effectively a truncation to the original mode. */
|
||||
if (GET_CODE (op) == ASHIFT
|
||||
&& SCALAR_INT_MODE_P (outermode)
|
||||
+ && SCALAR_INT_MODE_P (innermode)
|
||||
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
|
||||
&& CONST_INT_P (XEXP (op, 1))
|
||||
&& (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
|
||||
@@ -5651,7 +5655,7 @@
|
||||
/* Recognize a word extraction from a multi-word subreg. */
|
||||
if ((GET_CODE (op) == LSHIFTRT
|
||||
|| GET_CODE (op) == ASHIFTRT)
|
||||
- && SCALAR_INT_MODE_P (outermode)
|
||||
+ && SCALAR_INT_MODE_P (innermode)
|
||||
&& GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD
|
||||
&& GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode))
|
||||
&& CONST_INT_P (XEXP (op, 1))
|
||||
@@ -5673,6 +5677,7 @@
|
||||
|
||||
if ((GET_CODE (op) == LSHIFTRT
|
||||
|| GET_CODE (op) == ASHIFTRT)
|
||||
+ && SCALAR_INT_MODE_P (innermode)
|
||||
&& MEM_P (XEXP (op, 0))
|
||||
&& CONST_INT_P (XEXP (op, 1))
|
||||
&& GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))
|
||||
|
||||
@@ -0,0 +1,290 @@
|
||||
2011-10-01 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline -r179380 and -r179381
|
||||
|
||||
* ddg.c (autoinc_var_is_used_p): New function.
|
||||
(create_ddg_dep_from_intra_loop_link,
|
||||
add_cross_iteration_register_deps): Call it.
|
||||
* ddg.h (autoinc_var_is_used_p): Declare.
|
||||
* modulo-sched.c (sms_schedule): Handle instructions with REG_INC.
|
||||
(generate_reg_moves): Call autoinc_var_is_used_p. Skip
|
||||
instructions that do not set a register and verify no regmoves
|
||||
are created for !single_set instructions.
|
||||
|
||||
gcc/testsuite/
|
||||
|
||||
* gcc.dg/sms-10.c: New file
|
||||
|
||||
=== modified file 'gcc/ddg.c'
|
||||
--- old/gcc/ddg.c 2011-07-31 11:29:10 +0000
|
||||
+++ new/gcc/ddg.c 2011-10-02 06:56:53 +0000
|
||||
@@ -145,6 +145,27 @@
|
||||
return rtx_mem_access_p (PATTERN (insn));
|
||||
}
|
||||
|
||||
+/* Return true if DEF_INSN contains address being auto-inc or auto-dec
|
||||
+ which is used in USE_INSN. Otherwise return false. The result is
|
||||
+ being used to decide whether to remove the edge between def_insn and
|
||||
+ use_insn when -fmodulo-sched-allow-regmoves is set. This function
|
||||
+ doesn't need to consider the specific address register; no reg_moves
|
||||
+ will be allowed for any life range defined by def_insn and used
|
||||
+ by use_insn, if use_insn uses an address register auto-inc'ed by
|
||||
+ def_insn. */
|
||||
+bool
|
||||
+autoinc_var_is_used_p (rtx def_insn, rtx use_insn)
|
||||
+{
|
||||
+ rtx note;
|
||||
+
|
||||
+ for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1))
|
||||
+ if (REG_NOTE_KIND (note) == REG_INC
|
||||
+ && reg_referenced_p (XEXP (note, 0), PATTERN (use_insn)))
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
/* Computes the dependence parameters (latency, distance etc.), creates
|
||||
a ddg_edge and adds it to the given DDG. */
|
||||
static void
|
||||
@@ -173,10 +194,15 @@
|
||||
compensate for that by generating reg-moves based on the life-range
|
||||
analysis. The anti-deps that will be deleted are the ones which
|
||||
have true-deps edges in the opposite direction (in other words
|
||||
- the kernel has only one def of the relevant register). TODO:
|
||||
- support the removal of all anti-deps edges, i.e. including those
|
||||
+ the kernel has only one def of the relevant register).
|
||||
+ If the address that is being auto-inc or auto-dec in DEST_NODE
|
||||
+ is used in SRC_NODE then do not remove the edge to make sure
|
||||
+ reg-moves will not be created for this address.
|
||||
+ TODO: support the removal of all anti-deps edges, i.e. including those
|
||||
whose register has multiple defs in the loop. */
|
||||
- if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
|
||||
+ if (flag_modulo_sched_allow_regmoves
|
||||
+ && (t == ANTI_DEP && dt == REG_DEP)
|
||||
+ && !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
|
||||
{
|
||||
rtx set;
|
||||
|
||||
@@ -302,10 +328,14 @@
|
||||
gcc_assert (first_def_node);
|
||||
|
||||
/* Always create the edge if the use node is a branch in
|
||||
- order to prevent the creation of reg-moves. */
|
||||
+ order to prevent the creation of reg-moves.
|
||||
+ If the address that is being auto-inc or auto-dec in LAST_DEF
|
||||
+ is used in USE_INSN then do not remove the edge to make sure
|
||||
+ reg-moves will not be created for that address. */
|
||||
if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
|
||||
|| !flag_modulo_sched_allow_regmoves
|
||||
- || JUMP_P (use_node->insn))
|
||||
+ || JUMP_P (use_node->insn)
|
||||
+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn))
|
||||
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
|
||||
REG_DEP, 1);
|
||||
|
||||
|
||||
=== modified file 'gcc/ddg.h'
|
||||
--- old/gcc/ddg.h 2009-11-25 10:55:54 +0000
|
||||
+++ new/gcc/ddg.h 2011-10-02 06:56:53 +0000
|
||||
@@ -186,4 +186,6 @@
|
||||
int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
|
||||
int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
|
||||
|
||||
+bool autoinc_var_is_used_p (rtx, rtx);
|
||||
+
|
||||
#endif /* GCC_DDG_H */
|
||||
|
||||
=== modified file 'gcc/modulo-sched.c'
|
||||
--- old/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
|
||||
+++ new/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000
|
||||
@@ -477,7 +477,12 @@
|
||||
sbitmap *uses_of_defs;
|
||||
rtx last_reg_move;
|
||||
rtx prev_reg, old_reg;
|
||||
-
|
||||
+ rtx set = single_set (u->insn);
|
||||
+
|
||||
+ /* Skip instructions that do not set a register. */
|
||||
+ if ((set && !REG_P (SET_DEST (set))))
|
||||
+ continue;
|
||||
+
|
||||
/* Compute the number of reg_moves needed for u, by looking at life
|
||||
ranges started at u (excluding self-loops). */
|
||||
for (e = u->out; e; e = e->next_out)
|
||||
@@ -494,6 +499,20 @@
|
||||
&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
|
||||
nreg_moves4e--;
|
||||
|
||||
+ if (nreg_moves4e >= 1)
|
||||
+ {
|
||||
+ /* !single_set instructions are not supported yet and
|
||||
+ thus we do not except to encounter them in the loop
|
||||
+ except from the doloop part. For the latter case
|
||||
+ we assume no regmoves are generated as the doloop
|
||||
+ instructions are tied to the branch with an edge. */
|
||||
+ gcc_assert (set);
|
||||
+ /* If the instruction contains auto-inc register then
|
||||
+ validate that the regmov is being generated for the
|
||||
+ target regsiter rather then the inc'ed register. */
|
||||
+ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
|
||||
+ }
|
||||
+
|
||||
nreg_moves = MAX (nreg_moves, nreg_moves4e);
|
||||
}
|
||||
|
||||
@@ -1266,12 +1285,10 @@
|
||||
continue;
|
||||
}
|
||||
|
||||
- /* Don't handle BBs with calls or barriers or auto-increment insns
|
||||
- (to avoid creating invalid reg-moves for the auto-increment insns),
|
||||
+ /* Don't handle BBs with calls or barriers
|
||||
or !single_set with the exception of instructions that include
|
||||
count_reg---these instructions are part of the control part
|
||||
that do-loop recognizes.
|
||||
- ??? Should handle auto-increment insns.
|
||||
??? Should handle insns defining subregs. */
|
||||
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
|
||||
{
|
||||
@@ -1282,7 +1299,6 @@
|
||||
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
|
||||
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
|
||||
&& !reg_mentioned_p (count_reg, insn))
|
||||
- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|
||||
|| (INSN_P (insn) && (set = single_set (insn))
|
||||
&& GET_CODE (SET_DEST (set)) == SUBREG))
|
||||
break;
|
||||
@@ -1296,8 +1312,6 @@
|
||||
fprintf (dump_file, "SMS loop-with-call\n");
|
||||
else if (BARRIER_P (insn))
|
||||
fprintf (dump_file, "SMS loop-with-barrier\n");
|
||||
- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|
||||
- fprintf (dump_file, "SMS reg inc\n");
|
||||
else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
|
||||
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
|
||||
fprintf (dump_file, "SMS loop-with-not-single-set\n");
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/sms-10.c'
|
||||
--- old/gcc/testsuite/gcc.dg/sms-10.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/sms-10.c 2011-10-02 06:56:53 +0000
|
||||
@@ -0,0 +1,118 @@
|
||||
+ /* { dg-do run } */
|
||||
+ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
|
||||
+
|
||||
+
|
||||
+typedef __SIZE_TYPE__ size_t;
|
||||
+extern void *malloc (size_t);
|
||||
+extern void free (void *);
|
||||
+extern void abort (void);
|
||||
+
|
||||
+struct regstat_n_sets_and_refs_t
|
||||
+{
|
||||
+ int sets;
|
||||
+ int refs;
|
||||
+};
|
||||
+
|
||||
+struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs;
|
||||
+
|
||||
+struct df_reg_info
|
||||
+{
|
||||
+ unsigned int n_refs;
|
||||
+};
|
||||
+
|
||||
+struct df_d
|
||||
+{
|
||||
+ struct df_reg_info **def_regs;
|
||||
+ struct df_reg_info **use_regs;
|
||||
+};
|
||||
+struct df_d *df;
|
||||
+
|
||||
+static inline int
|
||||
+REG_N_SETS (int regno)
|
||||
+{
|
||||
+ return regstat_n_sets_and_refs[regno].sets;
|
||||
+}
|
||||
+
|
||||
+__attribute__ ((noinline))
|
||||
+ int max_reg_num (void)
|
||||
+{
|
||||
+ return 100;
|
||||
+}
|
||||
+
|
||||
+__attribute__ ((noinline))
|
||||
+ void regstat_init_n_sets_and_refs (void)
|
||||
+{
|
||||
+ unsigned int i;
|
||||
+ unsigned int max_regno = max_reg_num ();
|
||||
+
|
||||
+ for (i = 0; i < max_regno; i++)
|
||||
+ {
|
||||
+ (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs));
|
||||
+ (regstat_n_sets_and_refs[i].refs =
|
||||
+ (df->use_regs[(i)]->n_refs) + REG_N_SETS (i));
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int a_sets[100] =
|
||||
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
|
||||
+ 40, 41, 42,
|
||||
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
|
||||
+ 62, 63, 64,
|
||||
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
|
||||
+ 84, 85, 86,
|
||||
+ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
|
||||
+};
|
||||
+
|
||||
+int a_refs[100] =
|
||||
+ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
|
||||
+ 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
|
||||
+ 78, 80, 82,
|
||||
+ 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116,
|
||||
+ 118, 120,
|
||||
+ 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150,
|
||||
+ 152, 154, 156,
|
||||
+ 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
|
||||
+ 188, 190, 192,
|
||||
+ 194, 196, 198
|
||||
+};
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ struct df_reg_info *b[100], *c[100];
|
||||
+ struct df_d df1;
|
||||
+ size_t s = sizeof (struct df_reg_info);
|
||||
+ struct regstat_n_sets_and_refs_t a[100];
|
||||
+
|
||||
+ df = &df1;
|
||||
+ regstat_n_sets_and_refs = a;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < 100; i++)
|
||||
+ {
|
||||
+ b[i] = (struct df_reg_info *) malloc (s);
|
||||
+ b[i]->n_refs = i;
|
||||
+ c[i] = (struct df_reg_info *) malloc (s);
|
||||
+ c[i]->n_refs = i;
|
||||
+ }
|
||||
+
|
||||
+ df1.def_regs = b;
|
||||
+ df1.use_regs = c;
|
||||
+ regstat_init_n_sets_and_refs ();
|
||||
+
|
||||
+ for (i = 0; i < 100; i++)
|
||||
+ if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i]))
|
||||
+ abort ();
|
||||
+
|
||||
+ for (i = 0; i < 100; i++)
|
||||
+ {
|
||||
+ free (b[i]);
|
||||
+ free (c[i]);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */
|
||||
+/* { dg-final { cleanup-rtl-dump "sms" } } */
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
2011-10-03 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-13 Sevak Sargsyan <sevak.sargsyan@ispras.ru>
|
||||
|
||||
gcc/
|
||||
* config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New
|
||||
define_insn patterns for combine.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test.
|
||||
|
||||
=== modified file 'gcc/config/arm/neon.md'
|
||||
--- old/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
|
||||
+++ new/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000
|
||||
@@ -5428,3 +5428,32 @@
|
||||
emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
|
||||
DONE;
|
||||
})
|
||||
+
|
||||
+(define_insn "neon_vabd<mode>_2"
|
||||
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
|
||||
+ (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
|
||||
+ (match_operand:VDQ 2 "s_register_operand" "w"))))]
|
||||
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
|
||||
+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
+ [(set (attr "neon_type")
|
||||
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
|
||||
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
|
||||
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
|
||||
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
|
||||
+ (const_string "neon_int_5")))]
|
||||
+)
|
||||
+
|
||||
+(define_insn "neon_vabd<mode>_3"
|
||||
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
|
||||
+ (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
|
||||
+ (match_operand:VDQ 2 "s_register_operand" "w")]
|
||||
+ UNSPEC_VSUB)))]
|
||||
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
|
||||
+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
+ [(set (attr "neon_type")
|
||||
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
|
||||
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
|
||||
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
|
||||
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
|
||||
+ (const_string "neon_int_5")))]
|
||||
+)
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 2011-10-03 01:32:17 +0000
|
||||
@@ -0,0 +1,50 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O2 -funsafe-math-optimizations" } */
|
||||
+/* { dg-add-options arm_neon } */
|
||||
+
|
||||
+#include <arm_neon.h>
|
||||
+float32x2_t f_sub_abs_to_vabd_32()
|
||||
+{
|
||||
+ float32x2_t val1 = vdup_n_f32 (10);
|
||||
+ float32x2_t val2 = vdup_n_f32 (30);
|
||||
+ float32x2_t sres = vsub_f32(val1, val2);
|
||||
+ float32x2_t res = vabs_f32 (sres);
|
||||
+
|
||||
+ return res;
|
||||
+}
|
||||
+/* { dg-final { scan-assembler "vabd\.f32" } }*/
|
||||
+
|
||||
+#include <arm_neon.h>
|
||||
+int8x8_t sub_abs_to_vabd_8()
|
||||
+{
|
||||
+ int8x8_t val1 = vdup_n_s8 (10);
|
||||
+ int8x8_t val2 = vdup_n_s8 (30);
|
||||
+ int8x8_t sres = vsub_s8(val1, val2);
|
||||
+ int8x8_t res = vabs_s8 (sres);
|
||||
+
|
||||
+ return res;
|
||||
+}
|
||||
+/* { dg-final { scan-assembler "vabd\.s8" } }*/
|
||||
+
|
||||
+int16x4_t sub_abs_to_vabd_16()
|
||||
+{
|
||||
+ int16x4_t val1 = vdup_n_s16 (10);
|
||||
+ int16x4_t val2 = vdup_n_s16 (30);
|
||||
+ int16x4_t sres = vsub_s16(val1, val2);
|
||||
+ int16x4_t res = vabs_s16 (sres);
|
||||
+
|
||||
+ return res;
|
||||
+}
|
||||
+/* { dg-final { scan-assembler "vabd\.s16" } }*/
|
||||
+
|
||||
+int32x2_t sub_abs_to_vabd_32()
|
||||
+{
|
||||
+ int32x2_t val1 = vdup_n_s32 (10);
|
||||
+ int32x2_t val2 = vdup_n_s32 (30);
|
||||
+ int32x2_t sres = vsub_s32(val1, val2);
|
||||
+ int32x2_t res = vabs_s32 (sres);
|
||||
+
|
||||
+ return res;
|
||||
+}
|
||||
+/* { dg-final { scan-assembler "vabd\.s32" } }*/
|
||||
|
||||
@@ -0,0 +1,436 @@
|
||||
2011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* config/arm/predicates.md (expandable_comparison_operator): New
|
||||
predicate, extracted from...
|
||||
(arm_comparison_operator): ...here.
|
||||
* config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4)
|
||||
(cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc)
|
||||
(movdfcc): Use expandable_comparison_operator.
|
||||
|
||||
gcc/testsuite/
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* gcc.target/arm/cmp-1.c: New test.
|
||||
* gcc.target/arm/cmp-2.c: Likewise.
|
||||
|
||||
2011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR target/49030
|
||||
* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
|
||||
* config/arm/arm.c (maybe_get_arm_condition_code): New function,
|
||||
reusing the old code from get_arm_condition_code. Return ARM_NV
|
||||
for invalid comparison codes.
|
||||
(get_arm_condition_code): Redefine in terms of
|
||||
maybe_get_arm_condition_code.
|
||||
* config/arm/predicates.md (arm_comparison_operator): Use
|
||||
maybe_get_arm_condition_code.
|
||||
|
||||
gcc/testsuite/
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR target/49030
|
||||
* gcc.dg/torture/pr49030.c: New test.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-protos.h'
|
||||
--- old/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000
|
||||
+++ new/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
|
||||
@@ -180,6 +180,7 @@
|
||||
#endif
|
||||
extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
|
||||
#ifdef RTX_CODE
|
||||
+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
|
||||
extern void thumb1_final_prescan_insn (rtx);
|
||||
extern void thumb2_final_prescan_insn (rtx);
|
||||
extern const char *thumb_load_double_from_address (rtx *);
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
|
||||
@@ -17494,10 +17494,10 @@
|
||||
decremented/zeroed by arm_asm_output_opcode as the insns are output. */
|
||||
|
||||
/* Returns the index of the ARM condition code string in
|
||||
- `arm_condition_codes'. COMPARISON should be an rtx like
|
||||
- `(eq (...) (...))'. */
|
||||
-static enum arm_cond_code
|
||||
-get_arm_condition_code (rtx comparison)
|
||||
+ `arm_condition_codes', or ARM_NV if the comparison is invalid.
|
||||
+ COMPARISON should be an rtx like `(eq (...) (...))'. */
|
||||
+enum arm_cond_code
|
||||
+maybe_get_arm_condition_code (rtx comparison)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
|
||||
enum arm_cond_code code;
|
||||
@@ -17521,11 +17521,11 @@
|
||||
case CC_DLTUmode: code = ARM_CC;
|
||||
|
||||
dominance:
|
||||
- gcc_assert (comp_code == EQ || comp_code == NE);
|
||||
-
|
||||
if (comp_code == EQ)
|
||||
return ARM_INVERSE_CONDITION_CODE (code);
|
||||
- return code;
|
||||
+ if (comp_code == NE)
|
||||
+ return code;
|
||||
+ return ARM_NV;
|
||||
|
||||
case CC_NOOVmode:
|
||||
switch (comp_code)
|
||||
@@ -17534,7 +17534,7 @@
|
||||
case EQ: return ARM_EQ;
|
||||
case GE: return ARM_PL;
|
||||
case LT: return ARM_MI;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CC_Zmode:
|
||||
@@ -17542,7 +17542,7 @@
|
||||
{
|
||||
case NE: return ARM_NE;
|
||||
case EQ: return ARM_EQ;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CC_Nmode:
|
||||
@@ -17550,7 +17550,7 @@
|
||||
{
|
||||
case NE: return ARM_MI;
|
||||
case EQ: return ARM_PL;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CCFPEmode:
|
||||
@@ -17575,7 +17575,7 @@
|
||||
/* UNEQ and LTGT do not have a representation. */
|
||||
case UNEQ: /* Fall through. */
|
||||
case LTGT: /* Fall through. */
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CC_SWPmode:
|
||||
@@ -17591,7 +17591,7 @@
|
||||
case GTU: return ARM_CC;
|
||||
case LEU: return ARM_CS;
|
||||
case LTU: return ARM_HI;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CC_Cmode:
|
||||
@@ -17599,7 +17599,7 @@
|
||||
{
|
||||
case LTU: return ARM_CS;
|
||||
case GEU: return ARM_CC;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CC_CZmode:
|
||||
@@ -17611,7 +17611,7 @@
|
||||
case GTU: return ARM_HI;
|
||||
case LEU: return ARM_LS;
|
||||
case LTU: return ARM_CC;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CC_NCVmode:
|
||||
@@ -17621,7 +17621,7 @@
|
||||
case LT: return ARM_LT;
|
||||
case GEU: return ARM_CS;
|
||||
case LTU: return ARM_CC;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
case CCmode:
|
||||
@@ -17637,13 +17637,22 @@
|
||||
case GTU: return ARM_HI;
|
||||
case LEU: return ARM_LS;
|
||||
case LTU: return ARM_CC;
|
||||
- default: gcc_unreachable ();
|
||||
+ default: return ARM_NV;
|
||||
}
|
||||
|
||||
default: gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
+/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
|
||||
+static enum arm_cond_code
|
||||
+get_arm_condition_code (rtx comparison)
|
||||
+{
|
||||
+ enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
|
||||
+ gcc_assert (code != ARM_NV);
|
||||
+ return code;
|
||||
+}
|
||||
+
|
||||
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
|
||||
instructions. */
|
||||
void
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
|
||||
@@ -6543,7 +6543,7 @@
|
||||
|
||||
(define_expand "cbranchsi4"
|
||||
[(set (pc) (if_then_else
|
||||
- (match_operator 0 "arm_comparison_operator"
|
||||
+ (match_operator 0 "expandable_comparison_operator"
|
||||
[(match_operand:SI 1 "s_register_operand" "")
|
||||
(match_operand:SI 2 "nonmemory_operand" "")])
|
||||
(label_ref (match_operand 3 "" ""))
|
||||
@@ -6594,7 +6594,7 @@
|
||||
|
||||
(define_expand "cbranchsf4"
|
||||
[(set (pc) (if_then_else
|
||||
- (match_operator 0 "arm_comparison_operator"
|
||||
+ (match_operator 0 "expandable_comparison_operator"
|
||||
[(match_operand:SF 1 "s_register_operand" "")
|
||||
(match_operand:SF 2 "arm_float_compare_operand" "")])
|
||||
(label_ref (match_operand 3 "" ""))
|
||||
@@ -6606,7 +6606,7 @@
|
||||
|
||||
(define_expand "cbranchdf4"
|
||||
[(set (pc) (if_then_else
|
||||
- (match_operator 0 "arm_comparison_operator"
|
||||
+ (match_operator 0 "expandable_comparison_operator"
|
||||
[(match_operand:DF 1 "s_register_operand" "")
|
||||
(match_operand:DF 2 "arm_float_compare_operand" "")])
|
||||
(label_ref (match_operand 3 "" ""))
|
||||
@@ -6618,7 +6618,7 @@
|
||||
|
||||
(define_expand "cbranchdi4"
|
||||
[(set (pc) (if_then_else
|
||||
- (match_operator 0 "arm_comparison_operator"
|
||||
+ (match_operator 0 "expandable_comparison_operator"
|
||||
[(match_operand:DI 1 "cmpdi_operand" "")
|
||||
(match_operand:DI 2 "cmpdi_operand" "")])
|
||||
(label_ref (match_operand 3 "" ""))
|
||||
@@ -7473,7 +7473,7 @@
|
||||
|
||||
(define_expand "cstoresi4"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "")
|
||||
- (match_operator:SI 1 "arm_comparison_operator"
|
||||
+ (match_operator:SI 1 "expandable_comparison_operator"
|
||||
[(match_operand:SI 2 "s_register_operand" "")
|
||||
(match_operand:SI 3 "reg_or_int_operand" "")]))]
|
||||
"TARGET_32BIT || TARGET_THUMB1"
|
||||
@@ -7609,7 +7609,7 @@
|
||||
|
||||
(define_expand "cstoresf4"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "")
|
||||
- (match_operator:SI 1 "arm_comparison_operator"
|
||||
+ (match_operator:SI 1 "expandable_comparison_operator"
|
||||
[(match_operand:SF 2 "s_register_operand" "")
|
||||
(match_operand:SF 3 "arm_float_compare_operand" "")]))]
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT"
|
||||
@@ -7619,7 +7619,7 @@
|
||||
|
||||
(define_expand "cstoredf4"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "")
|
||||
- (match_operator:SI 1 "arm_comparison_operator"
|
||||
+ (match_operator:SI 1 "expandable_comparison_operator"
|
||||
[(match_operand:DF 2 "s_register_operand" "")
|
||||
(match_operand:DF 3 "arm_float_compare_operand" "")]))]
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
|
||||
@@ -7629,7 +7629,7 @@
|
||||
|
||||
(define_expand "cstoredi4"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "")
|
||||
- (match_operator:SI 1 "arm_comparison_operator"
|
||||
+ (match_operator:SI 1 "expandable_comparison_operator"
|
||||
[(match_operand:DI 2 "cmpdi_operand" "")
|
||||
(match_operand:DI 3 "cmpdi_operand" "")]))]
|
||||
"TARGET_32BIT"
|
||||
@@ -7749,7 +7749,7 @@
|
||||
|
||||
(define_expand "movsicc"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "")
|
||||
- (if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
|
||||
+ (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "")
|
||||
(match_operand:SI 2 "arm_not_operand" "")
|
||||
(match_operand:SI 3 "arm_not_operand" "")))]
|
||||
"TARGET_32BIT"
|
||||
@@ -7769,7 +7769,7 @@
|
||||
|
||||
(define_expand "movsfcc"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "")
|
||||
- (if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
|
||||
+ (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
|
||||
(match_operand:SF 2 "s_register_operand" "")
|
||||
(match_operand:SF 3 "nonmemory_operand" "")))]
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT"
|
||||
@@ -7795,7 +7795,7 @@
|
||||
|
||||
(define_expand "movdfcc"
|
||||
[(set (match_operand:DF 0 "s_register_operand" "")
|
||||
- (if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
|
||||
+ (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
|
||||
(match_operand:DF 2 "s_register_operand" "")
|
||||
(match_operand:DF 3 "arm_float_add_operand" "")))]
|
||||
"TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
|
||||
|
||||
=== modified file 'gcc/config/arm/predicates.md'
|
||||
--- old/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000
|
||||
+++ new/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
|
||||
@@ -242,11 +242,15 @@
|
||||
|
||||
;; True for integer comparisons and, if FP is active, for comparisons
|
||||
;; other than LTGT or UNEQ.
|
||||
+(define_special_predicate "expandable_comparison_operator"
|
||||
+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
|
||||
+ unordered,ordered,unlt,unle,unge,ungt"))
|
||||
+
|
||||
+;; Likewise, but only accept comparisons that are directly supported
|
||||
+;; by ARM condition codes.
|
||||
(define_special_predicate "arm_comparison_operator"
|
||||
- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
|
||||
- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
|
||||
- && (TARGET_FPA || TARGET_VFP)")
|
||||
- (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
|
||||
+ (and (match_operand 0 "expandable_comparison_operator")
|
||||
+ (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
|
||||
|
||||
(define_special_predicate "lt_ge_comparison_operator"
|
||||
(match_code "lt,ge"))
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
|
||||
--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-10-03 09:46:40 +0000
|
||||
@@ -0,0 +1,19 @@
|
||||
+void
|
||||
+sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
|
||||
+ unsigned long dst_skip)
|
||||
+{
|
||||
+ long long y;
|
||||
+ while (nsamples--)
|
||||
+ {
|
||||
+ y = (long long) (*src * 8388608.0f) << 8;
|
||||
+ if (y > 2147483647) {
|
||||
+ *(int *) dst = 2147483647;
|
||||
+ } else if (y < -2147483647 - 1) {
|
||||
+ *(int *) dst = -2147483647 - 1;
|
||||
+ } else {
|
||||
+ *(int *) dst = (int) y;
|
||||
+ }
|
||||
+ dst += dst_skip;
|
||||
+ src++;
|
||||
+ }
|
||||
+}
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/cmp-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/cmp-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/cmp-1.c 2011-10-03 09:47:33 +0000
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O" } */
|
||||
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
|
||||
+/* { dg-final { scan-assembler-not "__aeabi" } } */
|
||||
+int x, y;
|
||||
+
|
||||
+#define TEST_EXPR(NAME, ARGS, EXPR) \
|
||||
+ int NAME##1 ARGS { return (EXPR); } \
|
||||
+ int NAME##2 ARGS { return !(EXPR); } \
|
||||
+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
|
||||
+ void NAME##4 ARGS { if (EXPR) x++; } \
|
||||
+ void NAME##5 ARGS { if (!(EXPR)) x++; }
|
||||
+
|
||||
+#define TEST(NAME, TYPE, OPERATOR) \
|
||||
+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2) \
|
||||
+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2) \
|
||||
+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2) \
|
||||
+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \
|
||||
+ TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100) \
|
||||
+ TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1)
|
||||
+
|
||||
+#define TEST_OP(NAME, OPERATOR) \
|
||||
+ TEST (sc_##NAME, signed char, OPERATOR) \
|
||||
+ TEST (uc_##NAME, unsigned char, OPERATOR) \
|
||||
+ TEST (ss_##NAME, short, OPERATOR) \
|
||||
+ TEST (us_##NAME, unsigned short, OPERATOR) \
|
||||
+ TEST (si_##NAME, int, OPERATOR) \
|
||||
+ TEST (ui_##NAME, unsigned int, OPERATOR) \
|
||||
+ TEST (sll_##NAME, long long, OPERATOR) \
|
||||
+ TEST (ull_##NAME, unsigned long long, OPERATOR)
|
||||
+
|
||||
+TEST_OP (eq, ==)
|
||||
+TEST_OP (ne, !=)
|
||||
+TEST_OP (lt, <)
|
||||
+TEST_OP (gt, >)
|
||||
+TEST_OP (le, <=)
|
||||
+TEST_OP (ge, >=)
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/cmp-2.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/cmp-2.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/cmp-2.c 2011-10-03 09:47:33 +0000
|
||||
@@ -0,0 +1,49 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_vfp_ok } */
|
||||
+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
|
||||
+/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */
|
||||
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
|
||||
+/* { dg-final { scan-assembler-not "__aeabi" } } */
|
||||
+int x, y;
|
||||
+
|
||||
+#define EQ(X, Y) ((X) == (Y))
|
||||
+#define NE(X, Y) ((X) != (Y))
|
||||
+#define LT(X, Y) ((X) < (Y))
|
||||
+#define GT(X, Y) ((X) > (Y))
|
||||
+#define LE(X, Y) ((X) <= (Y))
|
||||
+#define GE(X, Y) ((X) >= (Y))
|
||||
+
|
||||
+#define TEST_EXPR(NAME, ARGS, EXPR) \
|
||||
+ int NAME##1 ARGS { return (EXPR); } \
|
||||
+ int NAME##2 ARGS { return !(EXPR); } \
|
||||
+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
|
||||
+ void NAME##4 ARGS { if (EXPR) x++; } \
|
||||
+ void NAME##5 ARGS { if (!(EXPR)) x++; }
|
||||
+
|
||||
+#define TEST(NAME, TYPE, OPERATOR) \
|
||||
+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2)) \
|
||||
+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2)) \
|
||||
+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2)) \
|
||||
+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2)) \
|
||||
+ TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100)) \
|
||||
+ TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1))
|
||||
+
|
||||
+#define TEST_OP(NAME, OPERATOR) \
|
||||
+ TEST (f_##NAME, float, OPERATOR) \
|
||||
+ TEST (d_##NAME, double, OPERATOR) \
|
||||
+ TEST (ld_##NAME, long double, OPERATOR)
|
||||
+
|
||||
+TEST_OP (eq, EQ)
|
||||
+TEST_OP (ne, NE)
|
||||
+TEST_OP (lt, LT)
|
||||
+TEST_OP (gt, GT)
|
||||
+TEST_OP (le, LE)
|
||||
+TEST_OP (ge, GE)
|
||||
+TEST_OP (blt, __builtin_isless)
|
||||
+TEST_OP (bgt, __builtin_isgreater)
|
||||
+TEST_OP (ble, __builtin_islessequal)
|
||||
+TEST_OP (bge, __builtin_isgreaterequal)
|
||||
+/* This one should be expanded into separate ordered and equality
|
||||
+ comparisons. */
|
||||
+TEST_OP (blg, __builtin_islessgreater)
|
||||
+TEST_OP (bun, __builtin_isunordered)
|
||||
|
||||
@@ -0,0 +1,378 @@
|
||||
2011-10-06 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-data-ref.c (dr_analyze_innermost): Add new argument.
|
||||
Allow not simple iv if analyzing basic block.
|
||||
(create_data_ref): Update call to dr_analyze_innermost.
|
||||
(stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise.
|
||||
* tree-loop-distribution.c (generate_memset_zero): Likewise.
|
||||
* tree-predcom.c (find_looparound_phi): Likewise.
|
||||
* tree-data-ref.h (dr_analyze_innermost): Add new argument.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-24.c: New.
|
||||
|
||||
|
||||
2011-09-15 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow
|
||||
read-after-read dependencies in basic block SLP.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-25.c: New.
|
||||
|
||||
|
||||
2011-04-21 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use
|
||||
operand_equal_p to compare DR_BASE_ADDRESSes.
|
||||
(vect_check_interleaving): Likewise.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/vect-119.c: New test.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 2011-10-02 08:43:10 +0000
|
||||
@@ -0,0 +1,59 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define A 3
|
||||
+#define N 256
|
||||
+
|
||||
+short src[N], dst[N];
|
||||
+
|
||||
+void foo (short * __restrict__ dst, short * __restrict__ src, int h,
|
||||
+ int stride, int dummy)
|
||||
+{
|
||||
+ int i;
|
||||
+ h /= 8;
|
||||
+ for (i = 0; i < h; i++)
|
||||
+ {
|
||||
+ dst[0] += A*src[0];
|
||||
+ dst[1] += A*src[1];
|
||||
+ dst[2] += A*src[2];
|
||||
+ dst[3] += A*src[3];
|
||||
+ dst[4] += A*src[4];
|
||||
+ dst[5] += A*src[5];
|
||||
+ dst[6] += A*src[6];
|
||||
+ dst[7] += A*src[7];
|
||||
+ dst += stride;
|
||||
+ src += stride;
|
||||
+ if (dummy == 32)
|
||||
+ abort ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ dst[i] = 0;
|
||||
+ src[i] = i;
|
||||
+ }
|
||||
+
|
||||
+ foo (dst, src, N, 8, 0);
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ if (dst[i] != A * i)
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 2011-10-02 08:43:10 +0000
|
||||
@@ -0,0 +1,59 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define A 3
|
||||
+#define B 4
|
||||
+#define N 256
|
||||
+
|
||||
+short src[N], dst[N];
|
||||
+
|
||||
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
|
||||
+{
|
||||
+ int i;
|
||||
+ h /= 16;
|
||||
+ for (i = 0; i < h; i++)
|
||||
+ {
|
||||
+ dst[0] += A*src[0] + src[stride];
|
||||
+ dst[1] += A*src[1] + src[1+stride];
|
||||
+ dst[2] += A*src[2] + src[2+stride];
|
||||
+ dst[3] += A*src[3] + src[3+stride];
|
||||
+ dst[4] += A*src[4] + src[4+stride];
|
||||
+ dst[5] += A*src[5] + src[5+stride];
|
||||
+ dst[6] += A*src[6] + src[6+stride];
|
||||
+ dst[7] += A*src[7] + src[7+stride];
|
||||
+ dst += 8;
|
||||
+ src += 8;
|
||||
+ if (dummy == 32)
|
||||
+ abort ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ dst[i] = 0;
|
||||
+ src[i] = i;
|
||||
+ }
|
||||
+
|
||||
+ foo (dst, src, N, 8, 0);
|
||||
+
|
||||
+ for (i = 0; i < N/2; i++)
|
||||
+ {
|
||||
+ if (dst[i] != A * i + i + 8)
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/vect-119.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-119.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-119.c 2011-10-02 08:43:10 +0000
|
||||
@@ -0,0 +1,28 @@
|
||||
+/* { dg-do compile } */
|
||||
+
|
||||
+#define OUTER 32
|
||||
+#define INNER 40
|
||||
+
|
||||
+static unsigned int
|
||||
+bar (const unsigned int x[INNER][2], unsigned int sum)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < INNER; i++)
|
||||
+ sum += x[i][0] * x[i][0] + x[i][1] * x[i][1];
|
||||
+ return sum;
|
||||
+}
|
||||
+
|
||||
+unsigned int foo (const unsigned int x[OUTER][INNER][2])
|
||||
+{
|
||||
+ int i;
|
||||
+ unsigned int sum;
|
||||
+
|
||||
+ sum = 0.0f;
|
||||
+ for (i = 0; i < OUTER; i++)
|
||||
+ sum = bar (x[i], sum);
|
||||
+ return sum;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/tree-data-ref.c'
|
||||
--- old/gcc/tree-data-ref.c 2011-05-26 14:27:33 +0000
|
||||
+++ new/gcc/tree-data-ref.c 2011-10-02 08:43:10 +0000
|
||||
@@ -721,11 +721,11 @@
|
||||
}
|
||||
|
||||
/* Analyzes the behavior of the memory reference DR in the innermost loop or
|
||||
- basic block that contains it. Returns true if analysis succeed or false
|
||||
+ basic block that contains it. Returns true if analysis succeed or false
|
||||
otherwise. */
|
||||
|
||||
bool
|
||||
-dr_analyze_innermost (struct data_reference *dr)
|
||||
+dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
|
||||
{
|
||||
gimple stmt = DR_STMT (dr);
|
||||
struct loop *loop = loop_containing_stmt (stmt);
|
||||
@@ -768,14 +768,25 @@
|
||||
}
|
||||
else
|
||||
base = build_fold_addr_expr (base);
|
||||
+
|
||||
if (in_loop)
|
||||
{
|
||||
if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
|
||||
false))
|
||||
{
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
|
||||
- return false;
|
||||
+ if (nest)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "failed: evolution of base is not"
|
||||
+ " affine.\n");
|
||||
+ return false;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ base_iv.base = base;
|
||||
+ base_iv.step = ssize_int (0);
|
||||
+ base_iv.no_overflow = true;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -800,10 +811,18 @@
|
||||
else if (!simple_iv (loop, loop_containing_stmt (stmt),
|
||||
poffset, &offset_iv, false))
|
||||
{
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "failed: evolution of offset is not"
|
||||
- " affine.\n");
|
||||
- return false;
|
||||
+ if (nest)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "failed: evolution of offset is not"
|
||||
+ " affine.\n");
|
||||
+ return false;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ offset_iv.base = poffset;
|
||||
+ offset_iv.step = ssize_int (0);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -967,7 +986,7 @@
|
||||
DR_REF (dr) = memref;
|
||||
DR_IS_READ (dr) = is_read;
|
||||
|
||||
- dr_analyze_innermost (dr);
|
||||
+ dr_analyze_innermost (dr, nest);
|
||||
dr_analyze_indices (dr, nest, loop);
|
||||
dr_analyze_alias (dr);
|
||||
|
||||
@@ -5185,7 +5204,7 @@
|
||||
DR_STMT (dr) = stmt;
|
||||
DR_REF (dr) = op0;
|
||||
|
||||
- res = dr_analyze_innermost (dr)
|
||||
+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
|
||||
&& stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
|
||||
|
||||
free_data_ref (dr);
|
||||
@@ -5225,7 +5244,7 @@
|
||||
|
||||
DR_STMT (dr) = stmt;
|
||||
DR_REF (dr) = *ref->pos;
|
||||
- dr_analyze_innermost (dr);
|
||||
+ dr_analyze_innermost (dr, loop_containing_stmt (stmt));
|
||||
base_address = DR_BASE_ADDRESS (dr);
|
||||
|
||||
if (!base_address)
|
||||
|
||||
=== modified file 'gcc/tree-data-ref.h'
|
||||
--- old/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000
|
||||
+++ new/gcc/tree-data-ref.h 2011-10-02 08:43:10 +0000
|
||||
@@ -386,7 +386,7 @@
|
||||
DEF_VEC_ALLOC_O (data_ref_loc, heap);
|
||||
|
||||
bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **);
|
||||
-bool dr_analyze_innermost (struct data_reference *);
|
||||
+bool dr_analyze_innermost (struct data_reference *, struct loop *);
|
||||
extern bool compute_data_dependences_for_loop (struct loop *, bool,
|
||||
VEC (loop_p, heap) **,
|
||||
VEC (data_reference_p, heap) **,
|
||||
|
||||
=== modified file 'gcc/tree-loop-distribution.c'
|
||||
--- old/gcc/tree-loop-distribution.c 2011-05-11 13:07:54 +0000
|
||||
+++ new/gcc/tree-loop-distribution.c 2011-10-02 08:43:10 +0000
|
||||
@@ -267,7 +267,7 @@
|
||||
|
||||
DR_STMT (dr) = stmt;
|
||||
DR_REF (dr) = op0;
|
||||
- res = dr_analyze_innermost (dr);
|
||||
+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
|
||||
gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
|
||||
|
||||
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
|
||||
|
||||
=== modified file 'gcc/tree-predcom.c'
|
||||
--- old/gcc/tree-predcom.c 2011-02-11 14:19:44 +0000
|
||||
+++ new/gcc/tree-predcom.c 2011-10-02 08:43:10 +0000
|
||||
@@ -1114,7 +1114,7 @@
|
||||
memset (&init_dr, 0, sizeof (struct data_reference));
|
||||
DR_REF (&init_dr) = init_ref;
|
||||
DR_STMT (&init_dr) = phi;
|
||||
- if (!dr_analyze_innermost (&init_dr))
|
||||
+ if (!dr_analyze_innermost (&init_dr, loop))
|
||||
return NULL;
|
||||
|
||||
if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
|
||||
|
||||
=== modified file 'gcc/tree-vect-data-refs.c'
|
||||
--- old/gcc/tree-vect-data-refs.c 2011-07-04 11:13:51 +0000
|
||||
+++ new/gcc/tree-vect-data-refs.c 2011-10-02 08:43:10 +0000
|
||||
@@ -353,11 +353,7 @@
|
||||
|
||||
/* Check that the data-refs have same bases and offsets. If not, we can't
|
||||
determine if they are dependent. */
|
||||
- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
|
||||
- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
|
||||
- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|
||||
- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
|
||||
- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
|
||||
+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
|
||||
|| !dr_equal_offsets_p (dra, drb))
|
||||
return true;
|
||||
|
||||
@@ -403,11 +399,7 @@
|
||||
|
||||
/* Check that the data-refs have same first location (except init) and they
|
||||
are both either store or load (not load and store). */
|
||||
- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
|
||||
- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
|
||||
- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|
||||
- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
|
||||
- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
|
||||
+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
|
||||
|| !dr_equal_offsets_p (dra, drb)
|
||||
|| !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
|
||||
|| DR_IS_READ (dra) != DR_IS_READ (drb))
|
||||
@@ -615,6 +607,11 @@
|
||||
if (vect_check_interleaving (dra, drb))
|
||||
return false;
|
||||
|
||||
+ /* Read-read is OK (we need this check here, after checking for
|
||||
+ interleaving). */
|
||||
+ if (DR_IS_READ (dra) && DR_IS_READ (drb))
|
||||
+ return false;
|
||||
+
|
||||
if (vect_print_dump_info (REPORT_DR_DETAILS))
|
||||
{
|
||||
fprintf (vect_dump, "can't determine dependence between ");
|
||||
|
||||
@@ -0,0 +1,240 @@
|
||||
2011-10-06 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block
|
||||
vectorizable.
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
|
||||
of vect_analyze_bb here.
|
||||
(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
|
||||
|
||||
gcc/testsuite/
|
||||
* lib/target-supports.exp (check_effective_target_vect64): New.
|
||||
* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
|
||||
of multiple vector sizes.
|
||||
* gcc.dg/vect/bb-slp-26.c: New.
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2010-11-22 12:16:52 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
|
||||
@@ -49,6 +49,7 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 2011-10-02 10:40:34 +0000
|
||||
@@ -0,0 +1,60 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define A 3
|
||||
+#define B 4
|
||||
+#define N 256
|
||||
+
|
||||
+char src[N], dst[N];
|
||||
+
|
||||
+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
|
||||
+ int stride, int dummy)
|
||||
+{
|
||||
+ int i;
|
||||
+ h /= 16;
|
||||
+ for (i = 0; i < h; i++)
|
||||
+ {
|
||||
+ dst[0] += A*src[0];
|
||||
+ dst[1] += A*src[1];
|
||||
+ dst[2] += A*src[2];
|
||||
+ dst[3] += A*src[3];
|
||||
+ dst[4] += A*src[4];
|
||||
+ dst[5] += A*src[5];
|
||||
+ dst[6] += A*src[6];
|
||||
+ dst[7] += A*src[7];
|
||||
+ dst += 8;
|
||||
+ src += 8;
|
||||
+ if (dummy == 32)
|
||||
+ abort ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ dst[i] = 0;
|
||||
+ src[i] = i/8;
|
||||
+ }
|
||||
+
|
||||
+ foo (dst, src, N, 8, 0);
|
||||
+
|
||||
+ for (i = 0; i < N/2; i++)
|
||||
+ {
|
||||
+ if (dst[i] != A * src[i])
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== modified file 'gcc/testsuite/lib/target-supports.exp'
|
||||
--- old/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
|
||||
+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
|
||||
@@ -3283,6 +3283,24 @@
|
||||
return $et_vect_multiple_sizes_saved
|
||||
}
|
||||
|
||||
+# Return 1 if the target supports vectors of 64 bits.
|
||||
+
|
||||
+proc check_effective_target_vect64 { } {
|
||||
+ global et_vect64
|
||||
+
|
||||
+ if [info exists et_vect64_saved] {
|
||||
+ verbose "check_effective_target_vect64: using cached result" 2
|
||||
+ } else {
|
||||
+ set et_vect64_saved 0
|
||||
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
|
||||
+ set et_vect64_saved 1
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
|
||||
+ return $et_vect64_saved
|
||||
+}
|
||||
+
|
||||
# Return 1 if the target supports section-anchors
|
||||
|
||||
proc check_effective_target_section_anchors { } {
|
||||
|
||||
=== modified file 'gcc/tree-vect-slp.c'
|
||||
--- old/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000
|
||||
+++ new/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
|
||||
@@ -1664,42 +1664,18 @@
|
||||
|
||||
/* Check if the basic block can be vectorized. */
|
||||
|
||||
-bb_vec_info
|
||||
-vect_slp_analyze_bb (basic_block bb)
|
||||
+static bb_vec_info
|
||||
+vect_slp_analyze_bb_1 (basic_block bb)
|
||||
{
|
||||
bb_vec_info bb_vinfo;
|
||||
VEC (ddr_p, heap) *ddrs;
|
||||
VEC (slp_instance, heap) *slp_instances;
|
||||
slp_instance instance;
|
||||
- int i, insns = 0;
|
||||
- gimple_stmt_iterator gsi;
|
||||
+ int i;
|
||||
int min_vf = 2;
|
||||
int max_vf = MAX_VECTORIZATION_FACTOR;
|
||||
bool data_dependence_in_bb = false;
|
||||
|
||||
- current_vector_size = 0;
|
||||
-
|
||||
- if (vect_print_dump_info (REPORT_DETAILS))
|
||||
- fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
|
||||
-
|
||||
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
- {
|
||||
- gimple stmt = gsi_stmt (gsi);
|
||||
- if (!is_gimple_debug (stmt)
|
||||
- && !gimple_nop_p (stmt)
|
||||
- && gimple_code (stmt) != GIMPLE_LABEL)
|
||||
- insns++;
|
||||
- }
|
||||
-
|
||||
- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
|
||||
- {
|
||||
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
||||
- fprintf (vect_dump, "not vectorized: too many instructions in basic "
|
||||
- "block.\n");
|
||||
-
|
||||
- return NULL;
|
||||
- }
|
||||
-
|
||||
bb_vinfo = new_bb_vec_info (bb);
|
||||
if (!bb_vinfo)
|
||||
return NULL;
|
||||
@@ -1819,6 +1795,61 @@
|
||||
}
|
||||
|
||||
|
||||
+bb_vec_info
|
||||
+vect_slp_analyze_bb (basic_block bb)
|
||||
+{
|
||||
+ bb_vec_info bb_vinfo;
|
||||
+ int insns = 0;
|
||||
+ gimple_stmt_iterator gsi;
|
||||
+ unsigned int vector_sizes;
|
||||
+
|
||||
+ if (vect_print_dump_info (REPORT_DETAILS))
|
||||
+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
|
||||
+
|
||||
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
+ {
|
||||
+ gimple stmt = gsi_stmt (gsi);
|
||||
+ if (!is_gimple_debug (stmt)
|
||||
+ && !gimple_nop_p (stmt)
|
||||
+ && gimple_code (stmt) != GIMPLE_LABEL)
|
||||
+ insns++;
|
||||
+ }
|
||||
+
|
||||
+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
|
||||
+ {
|
||||
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
||||
+ fprintf (vect_dump, "not vectorized: too many instructions in basic "
|
||||
+ "block.\n");
|
||||
+
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ /* Autodetect first vector size we try. */
|
||||
+ current_vector_size = 0;
|
||||
+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
|
||||
+
|
||||
+ while (1)
|
||||
+ {
|
||||
+ bb_vinfo = vect_slp_analyze_bb_1 (bb);
|
||||
+ if (bb_vinfo)
|
||||
+ return bb_vinfo;
|
||||
+
|
||||
+ destroy_bb_vec_info (bb_vinfo);
|
||||
+
|
||||
+ vector_sizes &= ~current_vector_size;
|
||||
+ if (vector_sizes == 0
|
||||
+ || current_vector_size == 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ /* Try the next biggest vector size. */
|
||||
+ current_vector_size = 1 << floor_log2 (vector_sizes);
|
||||
+ if (vect_print_dump_info (REPORT_DETAILS))
|
||||
+ fprintf (vect_dump, "***** Re-trying analysis with "
|
||||
+ "vector size %d\n", current_vector_size);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
|
||||
the number of created vector stmts depends on the unrolling factor).
|
||||
However, the actual number of vector stmts for every SLP node depends on
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
2011-10-13 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-10-07 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/predicates.md (shift_amount_operand): Remove constant
|
||||
range check.
|
||||
(shift_operator): Check range of constants for all shift operators.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/pr50193-1.c: New file.
|
||||
* gcc.target/arm/shiftable.c: New file.
|
||||
|
||||
=== modified file 'gcc/config/arm/predicates.md'
|
||||
--- old/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
|
||||
+++ new/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000
|
||||
@@ -129,11 +129,12 @@
|
||||
(ior (match_operand 0 "arm_rhs_operand")
|
||||
(match_operand 0 "memory_operand")))
|
||||
|
||||
+;; This doesn't have to do much because the constant is already checked
|
||||
+;; in the shift_operator predicate.
|
||||
(define_predicate "shift_amount_operand"
|
||||
(ior (and (match_test "TARGET_ARM")
|
||||
(match_operand 0 "s_register_operand"))
|
||||
- (and (match_operand 0 "const_int_operand")
|
||||
- (match_test "INTVAL (op) > 0"))))
|
||||
+ (match_operand 0 "const_int_operand")))
|
||||
|
||||
(define_predicate "arm_add_operand"
|
||||
(ior (match_operand 0 "arm_rhs_operand")
|
||||
@@ -219,13 +220,20 @@
|
||||
(match_test "mode == GET_MODE (op)")))
|
||||
|
||||
;; True for shift operators.
|
||||
+;; Notes:
|
||||
+;; * mult is only permitted with a constant shift amount
|
||||
+;; * patterns that permit register shift amounts only in ARM mode use
|
||||
+;; shift_amount_operand, patterns that always allow registers do not,
|
||||
+;; so we don't have to worry about that sort of thing here.
|
||||
(define_special_predicate "shift_operator"
|
||||
(and (ior (ior (and (match_code "mult")
|
||||
(match_test "power_of_two_operand (XEXP (op, 1), mode)"))
|
||||
(and (match_code "rotate")
|
||||
(match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
|
||||
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
|
||||
- (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
|
||||
+ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
|
||||
+ (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT
|
||||
+ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
|
||||
(match_test "mode == GET_MODE (op)")))
|
||||
|
||||
;; True for MULT, to identify which variant of shift_operator is in use.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/shiftable.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/shiftable.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/shiftable.c 2011-10-10 11:43:28 +0000
|
||||
@@ -0,0 +1,63 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+/* { dg-require-effective-target arm32 } */
|
||||
+
|
||||
+/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some
|
||||
+ of these as a left shift, others as a multiply. Check that we match the
|
||||
+ right one. */
|
||||
+
|
||||
+int
|
||||
+plus (int a, int b)
|
||||
+{
|
||||
+ return (a * 64) + b;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */
|
||||
+
|
||||
+int
|
||||
+minus (int a, int b)
|
||||
+{
|
||||
+ return a - (b * 64);
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */
|
||||
+
|
||||
+int
|
||||
+ior (int a, int b)
|
||||
+{
|
||||
+ return (a * 64) | b;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */
|
||||
+
|
||||
+int
|
||||
+xor (int a, int b)
|
||||
+{
|
||||
+ return (a * 64) ^ b;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */
|
||||
+
|
||||
+int
|
||||
+and (int a, int b)
|
||||
+{
|
||||
+ return (a * 64) & b;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */
|
||||
+
|
||||
+int
|
||||
+rsb (int a, int b)
|
||||
+{
|
||||
+ return (a * 64) - b;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */
|
||||
+
|
||||
+int
|
||||
+mvn (int a, int b)
|
||||
+{
|
||||
+ return ~(a * 64);
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */
|
||||
|
||||
@@ -0,0 +1,362 @@
|
||||
2011-10-16 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-09-27 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
|
||||
vectorization.
|
||||
(vectorizable_type_promotion): Likewise.
|
||||
(vect_analyze_stmt): Call vectorizable_type_demotion and
|
||||
vectorizable_type_promotion for basic blocks.
|
||||
(supportable_widening_operation): Don't assume loop vectorization.
|
||||
* tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
|
||||
basic blocks. Update vectorization factor for basic block
|
||||
vectorization.
|
||||
(vect_analyze_slp_instance): Allow multiple types for basic block
|
||||
vectorization. Recheck unrolling factor after construction of SLP
|
||||
instance.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit
|
||||
vectors.
|
||||
* gcc.dg/vect/bb-slp-27.c: New.
|
||||
* gcc.dg/vect/bb-slp-28.c: New.
|
||||
|
||||
|
||||
2011-10-04 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/testsuite/
|
||||
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
|
||||
Make et_vect_multiple_sizes_saved global.
|
||||
(check_effective_target_vect64): Make et_vect64_saved global.
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-06 11:08:08 +0000
|
||||
@@ -48,8 +48,6 @@
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
|
||||
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
|
||||
/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 2011-10-06 11:08:08 +0000
|
||||
@@ -0,0 +1,49 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define A 3
|
||||
+#define N 16
|
||||
+
|
||||
+short src[N], dst[N];
|
||||
+
|
||||
+void foo (int a)
|
||||
+{
|
||||
+ dst[0] += a*src[0];
|
||||
+ dst[1] += a*src[1];
|
||||
+ dst[2] += a*src[2];
|
||||
+ dst[3] += a*src[3];
|
||||
+ dst[4] += a*src[4];
|
||||
+ dst[5] += a*src[5];
|
||||
+ dst[6] += a*src[6];
|
||||
+ dst[7] += a*src[7];
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ dst[i] = 0;
|
||||
+ src[i] = i;
|
||||
+ }
|
||||
+
|
||||
+ foo (A);
|
||||
+
|
||||
+ for (i = 0; i < 8; i++)
|
||||
+ {
|
||||
+ if (dst[i] != A * i)
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 2011-10-06 11:08:08 +0000
|
||||
@@ -0,0 +1,71 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define A 300
|
||||
+#define N 16
|
||||
+
|
||||
+char src[N];
|
||||
+short dst[N];
|
||||
+short src1[N], dst1[N];
|
||||
+
|
||||
+void foo (int a)
|
||||
+{
|
||||
+ dst[0] = (short) (a * (int) src[0]);
|
||||
+ dst[1] = (short) (a * (int) src[1]);
|
||||
+ dst[2] = (short) (a * (int) src[2]);
|
||||
+ dst[3] = (short) (a * (int) src[3]);
|
||||
+ dst[4] = (short) (a * (int) src[4]);
|
||||
+ dst[5] = (short) (a * (int) src[5]);
|
||||
+ dst[6] = (short) (a * (int) src[6]);
|
||||
+ dst[7] = (short) (a * (int) src[7]);
|
||||
+ dst[8] = (short) (a * (int) src[8]);
|
||||
+ dst[9] = (short) (a * (int) src[9]);
|
||||
+ dst[10] = (short) (a * (int) src[10]);
|
||||
+ dst[11] = (short) (a * (int) src[11]);
|
||||
+ dst[12] = (short) (a * (int) src[12]);
|
||||
+ dst[13] = (short) (a * (int) src[13]);
|
||||
+ dst[14] = (short) (a * (int) src[14]);
|
||||
+ dst[15] = (short) (a * (int) src[15]);
|
||||
+
|
||||
+ dst1[0] += src1[0];
|
||||
+ dst1[1] += src1[1];
|
||||
+ dst1[2] += src1[2];
|
||||
+ dst1[3] += src1[3];
|
||||
+ dst1[4] += src1[4];
|
||||
+ dst1[5] += src1[5];
|
||||
+ dst1[6] += src1[6];
|
||||
+ dst1[7] += src1[7];
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ dst[i] = 2;
|
||||
+ dst1[i] = 0;
|
||||
+ src[i] = i;
|
||||
+ src1[i] = i+2;
|
||||
+ }
|
||||
+
|
||||
+ foo (A);
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ if (dst[i] != A * i
|
||||
+ || (i < N/2 && dst1[i] != i + 2))
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_pack_trunc && vect_unpack } } } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== modified file 'gcc/testsuite/lib/target-supports.exp'
|
||||
--- old/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
|
||||
+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000
|
||||
@@ -3268,7 +3268,7 @@
|
||||
# Return 1 if the target supports multiple vector sizes
|
||||
|
||||
proc check_effective_target_vect_multiple_sizes { } {
|
||||
- global et_vect_multiple_sizes
|
||||
+ global et_vect_multiple_sizes_saved
|
||||
|
||||
if [info exists et_vect_multiple_sizes_saved] {
|
||||
verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
|
||||
@@ -3286,7 +3286,7 @@
|
||||
# Return 1 if the target supports vectors of 64 bits.
|
||||
|
||||
proc check_effective_target_vect64 { } {
|
||||
- global et_vect64
|
||||
+ global et_vect64_saved
|
||||
|
||||
if [info exists et_vect64_saved] {
|
||||
verbose "check_effective_target_vect64: using cached result" 2
|
||||
|
||||
=== modified file 'gcc/tree-vect-slp.c'
|
||||
--- old/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
|
||||
+++ new/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000
|
||||
@@ -386,20 +386,15 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
|
||||
- if (ncopies != 1)
|
||||
- {
|
||||
- if (vect_print_dump_info (REPORT_SLP))
|
||||
- fprintf (vect_dump, "SLP with multiple types ");
|
||||
-
|
||||
- /* FORNOW: multiple types are unsupported in BB SLP. */
|
||||
- if (bb_vinfo)
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
/* In case of multiple types we need to detect the smallest type. */
|
||||
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
|
||||
- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
+ {
|
||||
+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
+ if (bb_vinfo)
|
||||
+ vectorization_factor = *max_nunits;
|
||||
+ }
|
||||
+
|
||||
+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
if (is_gimple_call (stmt))
|
||||
rhs_code = CALL_EXPR;
|
||||
@@ -1183,7 +1178,6 @@
|
||||
if (loop_vinfo)
|
||||
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
else
|
||||
- /* No multitypes in BB SLP. */
|
||||
vectorization_factor = nunits;
|
||||
|
||||
/* Calculate the unrolling factor. */
|
||||
@@ -1246,16 +1240,23 @@
|
||||
&max_nunits, &load_permutation, &loads,
|
||||
vectorization_factor))
|
||||
{
|
||||
+ /* Calculate the unrolling factor based on the smallest type. */
|
||||
+ if (max_nunits > nunits)
|
||||
+ unrolling_factor = least_common_multiple (max_nunits, group_size)
|
||||
+ / group_size;
|
||||
+
|
||||
+ if (unrolling_factor != 1 && !loop_vinfo)
|
||||
+ {
|
||||
+ if (vect_print_dump_info (REPORT_SLP))
|
||||
+ fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
|
||||
+ " block SLP");
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
/* Create a new SLP instance. */
|
||||
new_instance = XNEW (struct _slp_instance);
|
||||
SLP_INSTANCE_TREE (new_instance) = node;
|
||||
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
|
||||
- /* Calculate the unrolling factor based on the smallest type in the
|
||||
- loop. */
|
||||
- if (max_nunits > nunits)
|
||||
- unrolling_factor = least_common_multiple (max_nunits, group_size)
|
||||
- / group_size;
|
||||
-
|
||||
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
|
||||
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
|
||||
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
|
||||
|
||||
=== modified file 'gcc/tree-vect-stmts.c'
|
||||
--- old/gcc/tree-vect-stmts.c 2011-10-04 08:57:25 +0000
|
||||
+++ new/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000
|
||||
@@ -3081,11 +3081,9 @@
|
||||
VEC (tree, heap) *vec_oprnds0 = NULL;
|
||||
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
|
||||
tree last_oprnd, intermediate_type;
|
||||
-
|
||||
- /* FORNOW: not supported by basic block SLP vectorization. */
|
||||
- gcc_assert (loop_vinfo);
|
||||
-
|
||||
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
|
||||
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
+
|
||||
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
|
||||
@@ -3113,7 +3111,7 @@
|
||||
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
|
||||
&& CONVERT_EXPR_CODE_P (code))))
|
||||
return false;
|
||||
- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
|
||||
+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
|
||||
&def_stmt, &def, &dt[0], &vectype_in))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
@@ -3360,11 +3358,9 @@
|
||||
int multi_step_cvt = 0;
|
||||
VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
|
||||
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
|
||||
-
|
||||
- /* FORNOW: not supported by basic block SLP vectorization. */
|
||||
- gcc_assert (loop_vinfo);
|
||||
-
|
||||
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
|
||||
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
+
|
||||
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
|
||||
@@ -3393,7 +3389,7 @@
|
||||
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
|
||||
&& CONVERT_EXPR_CODE_P (code))))
|
||||
return false;
|
||||
- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
|
||||
+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
|
||||
&def_stmt, &def, &dt[0], &vectype_in))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
@@ -5153,7 +5149,9 @@
|
||||
else
|
||||
{
|
||||
if (bb_vinfo)
|
||||
- ok = (vectorizable_shift (stmt, NULL, NULL, node)
|
||||
+ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
|
||||
+ || vectorizable_type_demotion (stmt, NULL, NULL, node)
|
||||
+ || vectorizable_shift (stmt, NULL, NULL, node)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, node)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, node)
|
||||
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
|
||||
@@ -5780,7 +5778,7 @@
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+ struct loop *vect_loop = NULL;
|
||||
bool ordered_p;
|
||||
enum machine_mode vec_mode;
|
||||
enum insn_code icode1, icode2;
|
||||
@@ -5789,6 +5787,9 @@
|
||||
tree wide_vectype = vectype_out;
|
||||
enum tree_code c1, c2;
|
||||
|
||||
+ if (loop_info)
|
||||
+ vect_loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+
|
||||
/* The result of a vectorized widening operation usually requires two vectors
|
||||
(because the widened results do not fit int one vector). The generated
|
||||
vector results would normally be expected to be generated in the same
|
||||
@@ -5809,7 +5810,8 @@
|
||||
iterations in parallel). We therefore don't allow to change the order
|
||||
of the computation in the inner-loop during outer-loop vectorization. */
|
||||
|
||||
- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
|
||||
+ if (vect_loop
|
||||
+ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
|
||||
&& !nested_in_vect_loop_p (vect_loop, stmt))
|
||||
ordered_p = false;
|
||||
else
|
||||
|
||||
@@ -0,0 +1,628 @@
|
||||
2011-10-17 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
Backport from mainline r178852:
|
||||
|
||||
2011-09-14 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.c (arm_override_options): Add unaligned_access
|
||||
support.
|
||||
(arm_file_start): Emit attribute for unaligned access as appropriate.
|
||||
* config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
|
||||
(UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
|
||||
(insv, extzv): Add unaligned-access support.
|
||||
(extv): Change to expander. Likewise.
|
||||
(extzv_t1, extv_regsi): Add helpers.
|
||||
(unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
|
||||
(unaligned_storesi, unaligned_storehi): New.
|
||||
(*extv_reg): New (previous extv implementation).
|
||||
* config/arm/arm.opt (munaligned_access): Add option.
|
||||
* config/arm/constraints.md (Uw): New constraint.
|
||||
* expmed.c (store_bit_field_1): Adjust bitfield numbering according
|
||||
to size of access, not size of unit, when BITS_BIG_ENDIAN !=
|
||||
BYTES_BIG_ENDIAN. Don't use bitfield accesses for
|
||||
volatile accesses when -fstrict-volatile-bitfields is in effect.
|
||||
(extract_bit_field_1): Likewise.
|
||||
|
||||
Backport from mainline r172697:
|
||||
|
||||
2011-04-19 Wei Guozhi <carrot@google.com>
|
||||
|
||||
PR target/47855
|
||||
gcc/
|
||||
* config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype.
|
||||
* config/arm/arm.c (thumb1_legitimate_address_p): Remove the static
|
||||
linkage.
|
||||
* config/arm/constraints.md (Uu): New constraint.
|
||||
* config/arm/arm.md (*arm_movqi_insn): Compute attr "length".
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-protos.h'
|
||||
--- old/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
|
||||
+++ new/gcc/config/arm/arm-protos.h 2011-10-11 01:56:19 +0000
|
||||
@@ -59,6 +59,7 @@
|
||||
int);
|
||||
extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
|
||||
int);
|
||||
+extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
|
||||
extern int arm_const_double_rtx (rtx);
|
||||
extern int neg_const_double_rtx_ok_for_fpa (rtx);
|
||||
extern int vfp3_const_double_rtx (rtx);
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000
|
||||
@@ -2065,6 +2065,28 @@
|
||||
fix_cm3_ldrd = 0;
|
||||
}
|
||||
|
||||
+ /* Enable -munaligned-access by default for
|
||||
+ - all ARMv6 architecture-based processors
|
||||
+ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
|
||||
+
|
||||
+ Disable -munaligned-access by default for
|
||||
+ - all pre-ARMv6 architecture-based processors
|
||||
+ - ARMv6-M architecture-based processors. */
|
||||
+
|
||||
+ if (unaligned_access == 2)
|
||||
+ {
|
||||
+ if (arm_arch6 && (arm_arch_notm || arm_arch7))
|
||||
+ unaligned_access = 1;
|
||||
+ else
|
||||
+ unaligned_access = 0;
|
||||
+ }
|
||||
+ else if (unaligned_access == 1
|
||||
+ && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
|
||||
+ {
|
||||
+ warning (0, "target CPU does not support unaligned accesses");
|
||||
+ unaligned_access = 0;
|
||||
+ }
|
||||
+
|
||||
if (TARGET_THUMB1 && flag_schedule_insns)
|
||||
{
|
||||
/* Don't warn since it's on by default in -O2. */
|
||||
@@ -6106,7 +6128,7 @@
|
||||
addresses based on the frame pointer or arg pointer until the
|
||||
reload pass starts. This is so that eliminating such addresses
|
||||
into stack based ones won't produce impossible code. */
|
||||
-static int
|
||||
+int
|
||||
thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
|
||||
{
|
||||
/* ??? Not clear if this is right. Experiment. */
|
||||
@@ -22226,6 +22248,10 @@
|
||||
val = 6;
|
||||
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
|
||||
|
||||
+ /* Tag_CPU_unaligned_access. */
|
||||
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
|
||||
+ unaligned_access);
|
||||
+
|
||||
/* Tag_ABI_FP_16bit_format. */
|
||||
if (arm_fp16_format)
|
||||
asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-10-11 02:31:01 +0000
|
||||
@@ -113,6 +113,10 @@
|
||||
(UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from
|
||||
; another symbolic address.
|
||||
(UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
|
||||
+ (UNSPEC_UNALIGNED_LOAD 29) ; Used to represent ldr/ldrh instructions that access
|
||||
+ ; unaligned locations, on architectures which support
|
||||
+ ; that.
|
||||
+ (UNSPEC_UNALIGNED_STORE 30) ; Same for str/strh.
|
||||
]
|
||||
)
|
||||
|
||||
@@ -2463,10 +2467,10 @@
|
||||
;;; this insv pattern, so this pattern needs to be reevalutated.
|
||||
|
||||
(define_expand "insv"
|
||||
- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
|
||||
- (match_operand:SI 1 "general_operand" "")
|
||||
- (match_operand:SI 2 "general_operand" ""))
|
||||
- (match_operand:SI 3 "reg_or_int_operand" ""))]
|
||||
+ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
|
||||
+ (match_operand 1 "general_operand" "")
|
||||
+ (match_operand 2 "general_operand" ""))
|
||||
+ (match_operand 3 "reg_or_int_operand" ""))]
|
||||
"TARGET_ARM || arm_arch_thumb2"
|
||||
"
|
||||
{
|
||||
@@ -2477,35 +2481,70 @@
|
||||
|
||||
if (arm_arch_thumb2)
|
||||
{
|
||||
- bool use_bfi = TRUE;
|
||||
-
|
||||
- if (GET_CODE (operands[3]) == CONST_INT)
|
||||
- {
|
||||
- HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
|
||||
-
|
||||
- if (val == 0)
|
||||
- {
|
||||
- emit_insn (gen_insv_zero (operands[0], operands[1],
|
||||
- operands[2]));
|
||||
+ if (unaligned_access && MEM_P (operands[0])
|
||||
+ && s_register_operand (operands[3], GET_MODE (operands[3]))
|
||||
+ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
|
||||
+ {
|
||||
+ rtx base_addr;
|
||||
+
|
||||
+ if (BYTES_BIG_ENDIAN)
|
||||
+ start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
|
||||
+ - start_bit;
|
||||
+
|
||||
+ if (width == 32)
|
||||
+ {
|
||||
+ base_addr = adjust_address (operands[0], SImode,
|
||||
+ start_bit / BITS_PER_UNIT);
|
||||
+ emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ rtx tmp = gen_reg_rtx (HImode);
|
||||
+
|
||||
+ base_addr = adjust_address (operands[0], HImode,
|
||||
+ start_bit / BITS_PER_UNIT);
|
||||
+ emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
|
||||
+ emit_insn (gen_unaligned_storehi (base_addr, tmp));
|
||||
+ }
|
||||
+ DONE;
|
||||
+ }
|
||||
+ else if (s_register_operand (operands[0], GET_MODE (operands[0])))
|
||||
+ {
|
||||
+ bool use_bfi = TRUE;
|
||||
+
|
||||
+ if (GET_CODE (operands[3]) == CONST_INT)
|
||||
+ {
|
||||
+ HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
|
||||
+
|
||||
+ if (val == 0)
|
||||
+ {
|
||||
+ emit_insn (gen_insv_zero (operands[0], operands[1],
|
||||
+ operands[2]));
|
||||
+ DONE;
|
||||
+ }
|
||||
+
|
||||
+ /* See if the set can be done with a single orr instruction. */
|
||||
+ if (val == mask && const_ok_for_arm (val << start_bit))
|
||||
+ use_bfi = FALSE;
|
||||
+ }
|
||||
+
|
||||
+ if (use_bfi)
|
||||
+ {
|
||||
+ if (GET_CODE (operands[3]) != REG)
|
||||
+ operands[3] = force_reg (SImode, operands[3]);
|
||||
+
|
||||
+ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
|
||||
+ operands[3]));
|
||||
DONE;
|
||||
}
|
||||
-
|
||||
- /* See if the set can be done with a single orr instruction. */
|
||||
- if (val == mask && const_ok_for_arm (val << start_bit))
|
||||
- use_bfi = FALSE;
|
||||
- }
|
||||
-
|
||||
- if (use_bfi)
|
||||
- {
|
||||
- if (GET_CODE (operands[3]) != REG)
|
||||
- operands[3] = force_reg (SImode, operands[3]);
|
||||
-
|
||||
- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
|
||||
- operands[3]));
|
||||
- DONE;
|
||||
- }
|
||||
+ }
|
||||
+ else
|
||||
+ FAIL;
|
||||
}
|
||||
|
||||
+ if (!s_register_operand (operands[0], GET_MODE (operands[0])))
|
||||
+ FAIL;
|
||||
+
|
||||
target = copy_rtx (operands[0]);
|
||||
/* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
|
||||
subreg as the final target. */
|
||||
@@ -3697,12 +3736,10 @@
|
||||
;; to reduce register pressure later on.
|
||||
|
||||
(define_expand "extzv"
|
||||
- [(set (match_dup 4)
|
||||
- (ashift:SI (match_operand:SI 1 "register_operand" "")
|
||||
- (match_operand:SI 2 "const_int_operand" "")))
|
||||
- (set (match_operand:SI 0 "register_operand" "")
|
||||
- (lshiftrt:SI (match_dup 4)
|
||||
- (match_operand:SI 3 "const_int_operand" "")))]
|
||||
+ [(set (match_operand 0 "s_register_operand" "")
|
||||
+ (zero_extract (match_operand 1 "nonimmediate_operand" "")
|
||||
+ (match_operand 2 "const_int_operand" "")
|
||||
+ (match_operand 3 "const_int_operand" "")))]
|
||||
"TARGET_THUMB1 || arm_arch_thumb2"
|
||||
"
|
||||
{
|
||||
@@ -3711,10 +3748,57 @@
|
||||
|
||||
if (arm_arch_thumb2)
|
||||
{
|
||||
- emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
|
||||
- operands[3]));
|
||||
- DONE;
|
||||
+ HOST_WIDE_INT width = INTVAL (operands[2]);
|
||||
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
|
||||
+
|
||||
+ if (unaligned_access && MEM_P (operands[1])
|
||||
+ && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
|
||||
+ {
|
||||
+ rtx base_addr;
|
||||
+
|
||||
+ if (BYTES_BIG_ENDIAN)
|
||||
+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width
|
||||
+ - bitpos;
|
||||
+
|
||||
+ if (width == 32)
|
||||
+ {
|
||||
+ base_addr = adjust_address (operands[1], SImode,
|
||||
+ bitpos / BITS_PER_UNIT);
|
||||
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ rtx dest = operands[0];
|
||||
+ rtx tmp = gen_reg_rtx (SImode);
|
||||
+
|
||||
+ /* We may get a paradoxical subreg here. Strip it off. */
|
||||
+ if (GET_CODE (dest) == SUBREG
|
||||
+ && GET_MODE (dest) == SImode
|
||||
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
|
||||
+ dest = SUBREG_REG (dest);
|
||||
+
|
||||
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
|
||||
+ FAIL;
|
||||
+
|
||||
+ base_addr = adjust_address (operands[1], HImode,
|
||||
+ bitpos / BITS_PER_UNIT);
|
||||
+ emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
|
||||
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
|
||||
+ }
|
||||
+ DONE;
|
||||
+ }
|
||||
+ else if (s_register_operand (operands[1], GET_MODE (operands[1])))
|
||||
+ {
|
||||
+ emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
|
||||
+ operands[3]));
|
||||
+ DONE;
|
||||
+ }
|
||||
+ else
|
||||
+ FAIL;
|
||||
}
|
||||
+
|
||||
+ if (!s_register_operand (operands[1], GET_MODE (operands[1])))
|
||||
+ FAIL;
|
||||
|
||||
operands[3] = GEN_INT (rshift);
|
||||
|
||||
@@ -3724,12 +3808,154 @@
|
||||
DONE;
|
||||
}
|
||||
|
||||
- operands[2] = GEN_INT (lshift);
|
||||
- operands[4] = gen_reg_rtx (SImode);
|
||||
+ emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
|
||||
+ operands[3], gen_reg_rtx (SImode)));
|
||||
+ DONE;
|
||||
}"
|
||||
)
|
||||
|
||||
-(define_insn "extv"
|
||||
+;; Helper for extzv, for the Thumb-1 register-shifts case.
|
||||
+
|
||||
+(define_expand "extzv_t1"
|
||||
+ [(set (match_operand:SI 4 "s_register_operand" "")
|
||||
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
|
||||
+ (match_operand:SI 2 "const_int_operand" "")))
|
||||
+ (set (match_operand:SI 0 "s_register_operand" "")
|
||||
+ (lshiftrt:SI (match_dup 4)
|
||||
+ (match_operand:SI 3 "const_int_operand" "")))]
|
||||
+ "TARGET_THUMB1"
|
||||
+ "")
|
||||
+
|
||||
+(define_expand "extv"
|
||||
+ [(set (match_operand 0 "s_register_operand" "")
|
||||
+ (sign_extract (match_operand 1 "nonimmediate_operand" "")
|
||||
+ (match_operand 2 "const_int_operand" "")
|
||||
+ (match_operand 3 "const_int_operand" "")))]
|
||||
+ "arm_arch_thumb2"
|
||||
+{
|
||||
+ HOST_WIDE_INT width = INTVAL (operands[2]);
|
||||
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
|
||||
+
|
||||
+ if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
|
||||
+ && (bitpos % BITS_PER_UNIT) == 0)
|
||||
+ {
|
||||
+ rtx base_addr;
|
||||
+
|
||||
+ if (BYTES_BIG_ENDIAN)
|
||||
+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
|
||||
+
|
||||
+ if (width == 32)
|
||||
+ {
|
||||
+ base_addr = adjust_address (operands[1], SImode,
|
||||
+ bitpos / BITS_PER_UNIT);
|
||||
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ rtx dest = operands[0];
|
||||
+ rtx tmp = gen_reg_rtx (SImode);
|
||||
+
|
||||
+ /* We may get a paradoxical subreg here. Strip it off. */
|
||||
+ if (GET_CODE (dest) == SUBREG
|
||||
+ && GET_MODE (dest) == SImode
|
||||
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
|
||||
+ dest = SUBREG_REG (dest);
|
||||
+
|
||||
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
|
||||
+ FAIL;
|
||||
+
|
||||
+ base_addr = adjust_address (operands[1], HImode,
|
||||
+ bitpos / BITS_PER_UNIT);
|
||||
+ emit_insn (gen_unaligned_loadhis (tmp, base_addr));
|
||||
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
|
||||
+ }
|
||||
+
|
||||
+ DONE;
|
||||
+ }
|
||||
+ else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
|
||||
+ FAIL;
|
||||
+ else if (GET_MODE (operands[0]) == SImode
|
||||
+ && GET_MODE (operands[1]) == SImode)
|
||||
+ {
|
||||
+ emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2],
|
||||
+ operands[3]));
|
||||
+ DONE;
|
||||
+ }
|
||||
+
|
||||
+ FAIL;
|
||||
+})
|
||||
+
|
||||
+; Helper to expand register forms of extv with the proper modes.
|
||||
+
|
||||
+(define_expand "extv_regsi"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "")
|
||||
+ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
|
||||
+ (match_operand 2 "const_int_operand" "")
|
||||
+ (match_operand 3 "const_int_operand" "")))]
|
||||
+ ""
|
||||
+{
|
||||
+})
|
||||
+
|
||||
+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
|
||||
+
|
||||
+(define_insn "unaligned_loadsi"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
|
||||
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
|
||||
+ UNSPEC_UNALIGNED_LOAD))]
|
||||
+ "unaligned_access && TARGET_32BIT"
|
||||
+ "ldr%?\t%0, %1\t@ unaligned"
|
||||
+ [(set_attr "arch" "t2,any")
|
||||
+ (set_attr "length" "2,4")
|
||||
+ (set_attr "predicable" "yes")
|
||||
+ (set_attr "type" "load1")])
|
||||
+
|
||||
+(define_insn "unaligned_loadhis"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
|
||||
+ (sign_extend:SI
|
||||
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
|
||||
+ UNSPEC_UNALIGNED_LOAD)))]
|
||||
+ "unaligned_access && TARGET_32BIT"
|
||||
+ "ldr%(sh%)\t%0, %1\t@ unaligned"
|
||||
+ [(set_attr "arch" "t2,any")
|
||||
+ (set_attr "length" "2,4")
|
||||
+ (set_attr "predicable" "yes")
|
||||
+ (set_attr "type" "load_byte")])
|
||||
+
|
||||
+(define_insn "unaligned_loadhiu"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
|
||||
+ (zero_extend:SI
|
||||
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
|
||||
+ UNSPEC_UNALIGNED_LOAD)))]
|
||||
+ "unaligned_access && TARGET_32BIT"
|
||||
+ "ldr%(h%)\t%0, %1\t@ unaligned"
|
||||
+ [(set_attr "arch" "t2,any")
|
||||
+ (set_attr "length" "2,4")
|
||||
+ (set_attr "predicable" "yes")
|
||||
+ (set_attr "type" "load_byte")])
|
||||
+
|
||||
+(define_insn "unaligned_storesi"
|
||||
+ [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
|
||||
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
|
||||
+ UNSPEC_UNALIGNED_STORE))]
|
||||
+ "unaligned_access && TARGET_32BIT"
|
||||
+ "str%?\t%1, %0\t@ unaligned"
|
||||
+ [(set_attr "arch" "t2,any")
|
||||
+ (set_attr "length" "2,4")
|
||||
+ (set_attr "predicable" "yes")
|
||||
+ (set_attr "type" "store1")])
|
||||
+
|
||||
+(define_insn "unaligned_storehi"
|
||||
+ [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
|
||||
+ (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
|
||||
+ UNSPEC_UNALIGNED_STORE))]
|
||||
+ "unaligned_access && TARGET_32BIT"
|
||||
+ "str%(h%)\t%1, %0\t@ unaligned"
|
||||
+ [(set_attr "arch" "t2,any")
|
||||
+ (set_attr "length" "2,4")
|
||||
+ (set_attr "predicable" "yes")
|
||||
+ (set_attr "type" "store1")])
|
||||
+
|
||||
+(define_insn "*extv_reg"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
|
||||
(match_operand:SI 2 "const_int_operand" "M")
|
||||
@@ -6038,8 +6264,8 @@
|
||||
|
||||
|
||||
(define_insn "*arm_movqi_insn"
|
||||
- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
|
||||
- (match_operand:QI 1 "general_operand" "rI,K,m,r"))]
|
||||
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m")
|
||||
+ (match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))]
|
||||
"TARGET_32BIT
|
||||
&& ( register_operand (operands[0], QImode)
|
||||
|| register_operand (operands[1], QImode))"
|
||||
@@ -6047,10 +6273,14 @@
|
||||
mov%?\\t%0, %1
|
||||
mvn%?\\t%0, #%B1
|
||||
ldr%(b%)\\t%0, %1
|
||||
+ str%(b%)\\t%1, %0
|
||||
+ ldr%(b%)\\t%0, %1
|
||||
str%(b%)\\t%1, %0"
|
||||
- [(set_attr "type" "*,*,load1,store1")
|
||||
- (set_attr "insn" "mov,mvn,*,*")
|
||||
- (set_attr "predicable" "yes")]
|
||||
+ [(set_attr "type" "*,*,load1,store1,load1,store1")
|
||||
+ (set_attr "insn" "mov,mvn,*,*,*,*")
|
||||
+ (set_attr "predicable" "yes")
|
||||
+ (set_attr "arch" "any,any,t2,t2,any,any")
|
||||
+ (set_attr "length" "4,4,2,2,4,4")]
|
||||
)
|
||||
|
||||
(define_insn "*thumb1_movqi_insn"
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.opt'
|
||||
--- old/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
|
||||
+++ new/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000
|
||||
@@ -173,3 +173,7 @@
|
||||
Target Report Var(fix_cm3_ldrd) Init(2)
|
||||
Avoid overlapping destination and address registers on LDRD instructions
|
||||
that may trigger Cortex-M3 errata.
|
||||
+
|
||||
+munaligned-access
|
||||
+Target Report Var(unaligned_access) Init(2)
|
||||
+Enable unaligned word and halfword accesses to packed data.
|
||||
|
||||
=== modified file 'gcc/config/arm/constraints.md'
|
||||
--- old/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000
|
||||
+++ new/gcc/config/arm/constraints.md 2011-10-11 02:31:01 +0000
|
||||
@@ -36,6 +36,7 @@
|
||||
;; The following memory constraints have been used:
|
||||
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
|
||||
;; in ARM state: Uq
|
||||
+;; in Thumb state: Uu, Uw
|
||||
|
||||
|
||||
(define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
|
||||
@@ -344,6 +345,27 @@
|
||||
(and (match_code "mem")
|
||||
(match_test "REG_P (XEXP (op, 0))")))
|
||||
|
||||
+(define_memory_constraint "Uu"
|
||||
+ "@internal
|
||||
+ In Thumb state an address that is valid in 16bit encoding."
|
||||
+ (and (match_code "mem")
|
||||
+ (match_test "TARGET_THUMB
|
||||
+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
|
||||
+ 0)")))
|
||||
+
|
||||
+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
|
||||
+; are actually LDM/STM instructions, so cannot be used to access unaligned
|
||||
+; data.
|
||||
+(define_memory_constraint "Uw"
|
||||
+ "@internal
|
||||
+ In Thumb state an address that is valid in 16bit encoding, and that can be
|
||||
+ used for unaligned accesses."
|
||||
+ (and (match_code "mem")
|
||||
+ (match_test "TARGET_THUMB
|
||||
+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
|
||||
+ 0)
|
||||
+ && GET_CODE (XEXP (op, 0)) != POST_INC")))
|
||||
+
|
||||
;; We used to have constraint letters for S and R in ARM state, but
|
||||
;; all uses of these now appear to have been removed.
|
||||
|
||||
|
||||
=== modified file 'gcc/expmed.c'
|
||||
--- old/gcc/expmed.c 2011-05-22 19:02:59 +0000
|
||||
+++ new/gcc/expmed.c 2011-10-11 02:31:01 +0000
|
||||
@@ -657,6 +657,10 @@
|
||||
&& GET_MODE (value) != BLKmode
|
||||
&& bitsize > 0
|
||||
&& GET_MODE_BITSIZE (op_mode) >= bitsize
|
||||
+ /* Do not use insv for volatile bitfields when
|
||||
+ -fstrict-volatile-bitfields is in effect. */
|
||||
+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
|
||||
+ && flag_strict_volatile_bitfields > 0)
|
||||
&& ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
|
||||
&& (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
|
||||
&& insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
|
||||
@@ -700,19 +704,21 @@
|
||||
copy_back = true;
|
||||
}
|
||||
|
||||
- /* On big-endian machines, we count bits from the most significant.
|
||||
- If the bit field insn does not, we must invert. */
|
||||
-
|
||||
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
|
||||
- xbitpos = unit - bitsize - xbitpos;
|
||||
-
|
||||
/* We have been counting XBITPOS within UNIT.
|
||||
Count instead within the size of the register. */
|
||||
- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
|
||||
+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
|
||||
xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
|
||||
|
||||
unit = GET_MODE_BITSIZE (op_mode);
|
||||
|
||||
+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
|
||||
+ "backwards" from the size of the unit we are inserting into.
|
||||
+ Otherwise, we count bits from the most significant on a
|
||||
+ BYTES/BITS_BIG_ENDIAN machine. */
|
||||
+
|
||||
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
|
||||
+ xbitpos = unit - bitsize - xbitpos;
|
||||
+
|
||||
/* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
|
||||
value1 = value;
|
||||
if (GET_MODE (value) != op_mode)
|
||||
@@ -1528,6 +1534,10 @@
|
||||
if (ext_mode != MAX_MACHINE_MODE
|
||||
&& bitsize > 0
|
||||
&& GET_MODE_BITSIZE (ext_mode) >= bitsize
|
||||
+ /* Do not use extv/extzv for volatile bitfields when
|
||||
+ -fstrict-volatile-bitfields is in effect. */
|
||||
+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
|
||||
+ && flag_strict_volatile_bitfields > 0)
|
||||
/* If op0 is a register, we need it in EXT_MODE to make it
|
||||
acceptable to the format of ext(z)v. */
|
||||
&& !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
|
||||
@@ -1552,17 +1562,20 @@
|
||||
/* Get ref to first byte containing part of the field. */
|
||||
xop0 = adjust_address (xop0, byte_mode, xoffset);
|
||||
|
||||
- /* On big-endian machines, we count bits from the most significant.
|
||||
- If the bit field insn does not, we must invert. */
|
||||
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
|
||||
- xbitpos = unit - bitsize - xbitpos;
|
||||
-
|
||||
/* Now convert from counting within UNIT to counting in EXT_MODE. */
|
||||
- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
|
||||
+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
|
||||
xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
|
||||
|
||||
unit = GET_MODE_BITSIZE (ext_mode);
|
||||
|
||||
+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
|
||||
+ "backwards" from the size of the unit we are extracting from.
|
||||
+ Otherwise, we count bits from the most significant on a
|
||||
+ BYTES/BITS_BIG_ENDIAN machine. */
|
||||
+
|
||||
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
|
||||
+ xbitpos = unit - bitsize - xbitpos;
|
||||
+
|
||||
if (xtarget == 0)
|
||||
xtarget = xspec_target = gen_reg_rtx (tmode);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,147 @@
|
||||
2011-10-19 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-09-09 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm-cores.def (generic-armv7-a): New architecture.
|
||||
* config/arm/arm-tables.opt: Regenerate.
|
||||
* config/arm/arm-tune.md: Regenerate.
|
||||
* config/arm/arm.c (arm_file_start): Output .arch directive when
|
||||
user passes -mcpu=generic-*.
|
||||
(arm_issue_rate): Add genericv7a support.
|
||||
* config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec.
|
||||
(ASM_CPU_SPEC): New define.
|
||||
* config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec).
|
||||
* config/arm/semi.h (ASM_SPEC): Likewise.
|
||||
* doc/invoke.texi (ARM Options): Document -mcpu=generic-*
|
||||
and -mtune=generic-*.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-cores.def'
|
||||
--- old/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000
|
||||
+++ new/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000
|
||||
@@ -124,6 +124,7 @@
|
||||
ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
|
||||
ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2)
|
||||
ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
|
||||
+ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
|
||||
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
|
||||
@@ -135,3 +136,4 @@
|
||||
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex)
|
||||
+
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-tune.md'
|
||||
--- old/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000
|
||||
+++ new/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000
|
||||
@@ -1,5 +1,5 @@
|
||||
;; -*- buffer-read-only: t -*-
|
||||
;; Generated automatically by gentune.sh from arm-cores.def
|
||||
(define_attr "tune"
|
||||
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
|
||||
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
|
||||
(const (symbol_ref "((enum attr_tune) arm_tune)")))
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-10-19 16:46:51 +0000
|
||||
@@ -22185,6 +22185,8 @@
|
||||
const char *fpu_name;
|
||||
if (arm_selected_arch)
|
||||
asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
|
||||
+ else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
|
||||
+ asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
|
||||
else
|
||||
asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
|
||||
|
||||
@@ -23717,6 +23719,7 @@
|
||||
case cortexr4:
|
||||
case cortexr4f:
|
||||
case cortexr5:
|
||||
+ case genericv7a:
|
||||
case cortexa5:
|
||||
case cortexa8:
|
||||
case cortexa9:
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.h'
|
||||
--- old/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000
|
||||
+++ new/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000
|
||||
@@ -198,6 +198,7 @@
|
||||
Do not define this macro if it does not need to do anything. */
|
||||
#define EXTRA_SPECS \
|
||||
{ "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \
|
||||
+ { "asm_cpu_spec", ASM_CPU_SPEC }, \
|
||||
SUBTARGET_EXTRA_SPECS
|
||||
|
||||
#ifndef SUBTARGET_EXTRA_SPECS
|
||||
@@ -2278,4 +2279,8 @@
|
||||
instruction. */
|
||||
#define MAX_LDM_STM_OPS 4
|
||||
|
||||
+#define ASM_CPU_SPEC \
|
||||
+ " %{mcpu=generic-*:-march=%*;" \
|
||||
+ " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
|
||||
+
|
||||
#endif /* ! GCC_ARM_H */
|
||||
|
||||
=== modified file 'gcc/config/arm/elf.h'
|
||||
--- old/gcc/config/arm/elf.h 2009-06-21 19:48:15 +0000
|
||||
+++ new/gcc/config/arm/elf.h 2011-10-19 16:46:51 +0000
|
||||
@@ -56,8 +56,7 @@
|
||||
#define ASM_SPEC "\
|
||||
%{mbig-endian:-EB} \
|
||||
%{mlittle-endian:-EL} \
|
||||
-%{mcpu=*:-mcpu=%*} \
|
||||
-%{march=*:-march=%*} \
|
||||
+%(asm_cpu_spec) \
|
||||
%{mapcs-*:-mapcs-%*} \
|
||||
%(subtarget_asm_float_spec) \
|
||||
%{mthumb-interwork:-mthumb-interwork} \
|
||||
|
||||
=== modified file 'gcc/config/arm/semi.h'
|
||||
--- old/gcc/config/arm/semi.h 2007-08-02 09:49:31 +0000
|
||||
+++ new/gcc/config/arm/semi.h 2011-10-19 16:46:51 +0000
|
||||
@@ -65,8 +65,7 @@
|
||||
#define ASM_SPEC "\
|
||||
%{fpic|fpie: -k} %{fPIC|fPIE: -k} \
|
||||
%{mbig-endian:-EB} \
|
||||
-%{mcpu=*:-mcpu=%*} \
|
||||
-%{march=*:-march=%*} \
|
||||
+%(arm_cpu_spec) \
|
||||
%{mapcs-float:-mfloat} \
|
||||
%{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \
|
||||
%{mfloat-abi=*} %{mfpu=*} \
|
||||
|
||||
=== modified file 'gcc/doc/invoke.texi'
|
||||
--- old/gcc/doc/invoke.texi 2011-08-13 08:32:32 +0000
|
||||
+++ new/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000
|
||||
@@ -10215,6 +10215,10 @@
|
||||
@samp{cortex-m0},
|
||||
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
|
||||
|
||||
+@option{-mcpu=generic-@var{arch}} is also permissible, and is
|
||||
+equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
|
||||
+See @option{-mtune} for more information.
|
||||
+
|
||||
@item -mtune=@var{name}
|
||||
@opindex mtune
|
||||
This option is very similar to the @option{-mcpu=} option, except that
|
||||
@@ -10226,6 +10230,13 @@
|
||||
For some ARM implementations better performance can be obtained by using
|
||||
this option.
|
||||
|
||||
+@option{-mtune=generic-@var{arch}} specifies that GCC should tune the
|
||||
+performance for a blend of processors within architecture @var{arch}.
|
||||
+The aim is to generate code that run well on the current most popular
|
||||
+processors, balancing between optimizations that benefit some CPUs in the
|
||||
+range, and avoiding performance pitfalls of other CPUs. The effects of
|
||||
+this option may change in future GCC versions as CPU models come and go.
|
||||
+
|
||||
@item -march=@var{name}
|
||||
@opindex march
|
||||
This specifies the name of the target ARM architecture. GCC uses this
|
||||
|
||||
@@ -0,0 +1,304 @@
|
||||
2011-10-19 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-10-18 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
* config/arm/driver-arm.c (host_detect_local_cpu): Close the file
|
||||
before exiting.
|
||||
|
||||
2011-10-18 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config.host (arm*-*-linux*): Add driver-arm.o and x-arm.
|
||||
* config/arm/arm.opt: Add 'native' processor_type and
|
||||
arm_arch enum values.
|
||||
* config/arm/arm.h (host_detect_local_cpu): New prototype.
|
||||
(EXTRA_SPEC_FUNCTIONS): New define.
|
||||
(MCPU_MTUNE_NATIVE_SPECS): New define.
|
||||
(DRIVER_SELF_SPECS): New define.
|
||||
* config/arm/driver-arm.c: New file.
|
||||
* config/arm/x-arm: New file.
|
||||
* doc/invoke.texi (ARM Options): Document -mcpu=native,
|
||||
-mtune=native and -march=native.
|
||||
|
||||
=== modified file 'gcc/config.host'
|
||||
--- old/gcc/config.host 2011-02-15 09:49:14 +0000
|
||||
+++ new/gcc/config.host 2011-10-19 17:01:50 +0000
|
||||
@@ -100,6 +100,14 @@
|
||||
esac
|
||||
|
||||
case ${host} in
|
||||
+ arm*-*-linux*)
|
||||
+ case ${target} in
|
||||
+ arm*-*-*)
|
||||
+ host_extra_gcc_objs="driver-arm.o"
|
||||
+ host_xmake_file="${host_xmake_file} arm/x-arm"
|
||||
+ ;;
|
||||
+ esac
|
||||
+ ;;
|
||||
alpha*-*-linux*)
|
||||
case ${target} in
|
||||
alpha*-*-linux*)
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.h'
|
||||
--- old/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000
|
||||
+++ new/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000
|
||||
@@ -2283,4 +2283,21 @@
|
||||
" %{mcpu=generic-*:-march=%*;" \
|
||||
" :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
|
||||
|
||||
+/* -mcpu=native handling only makes sense with compiler running on
|
||||
+ an ARM chip. */
|
||||
+#if defined(__arm__)
|
||||
+extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
+# define EXTRA_SPEC_FUNCTIONS \
|
||||
+ { "local_cpu_detect", host_detect_local_cpu },
|
||||
+
|
||||
+# define MCPU_MTUNE_NATIVE_SPECS \
|
||||
+ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \
|
||||
+ " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \
|
||||
+ " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
|
||||
+#else
|
||||
+# define MCPU_MTUNE_NATIVE_SPECS ""
|
||||
+#endif
|
||||
+
|
||||
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
|
||||
+
|
||||
#endif /* ! GCC_ARM_H */
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.opt'
|
||||
--- old/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000
|
||||
+++ new/gcc/config/arm/arm.opt 2011-10-19 17:01:50 +0000
|
||||
@@ -48,6 +48,11 @@
|
||||
Target RejectNegative Joined
|
||||
Specify the name of the target architecture
|
||||
|
||||
+; Other arm_arch values are loaded from arm-tables.opt
|
||||
+; but that is a generated file and this is an odd-one-out.
|
||||
+EnumValue
|
||||
+Enum(arm_arch) String(native) Value(-1) DriverOnly
|
||||
+
|
||||
marm
|
||||
Target RejectNegative InverseMask(THUMB) Undocumented
|
||||
|
||||
@@ -153,6 +158,11 @@
|
||||
Target RejectNegative Joined
|
||||
Tune code for the given processor
|
||||
|
||||
+; Other processor_type values are loaded from arm-tables.opt
|
||||
+; but that is a generated file and this is an odd-one-out.
|
||||
+EnumValue
|
||||
+Enum(processor_type) String(native) Value(-1) DriverOnly
|
||||
+
|
||||
mwords-little-endian
|
||||
Target Report RejectNegative Mask(LITTLE_WORDS)
|
||||
Assume big endian bytes, little endian words
|
||||
|
||||
=== added file 'gcc/config/arm/driver-arm.c'
|
||||
--- old/gcc/config/arm/driver-arm.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000
|
||||
@@ -0,0 +1,149 @@
|
||||
+/* Subroutines for the gcc driver.
|
||||
+ Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
+
|
||||
+This file is part of GCC.
|
||||
+
|
||||
+GCC is free software; you can redistribute it and/or modify
|
||||
+it under the terms of the GNU General Public License as published by
|
||||
+the Free Software Foundation; either version 3, or (at your option)
|
||||
+any later version.
|
||||
+
|
||||
+GCC is distributed in the hope that it will be useful,
|
||||
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+GNU General Public License for more details.
|
||||
+
|
||||
+You should have received a copy of the GNU General Public License
|
||||
+along with GCC; see the file COPYING3. If not see
|
||||
+<http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include "config.h"
|
||||
+#include "system.h"
|
||||
+#include "coretypes.h"
|
||||
+#include "tm.h"
|
||||
+#include "configargs.h"
|
||||
+
|
||||
+struct vendor_cpu {
|
||||
+ const char *part_no;
|
||||
+ const char *arch_name;
|
||||
+ const char *cpu_name;
|
||||
+};
|
||||
+
|
||||
+static struct vendor_cpu arm_cpu_table[] = {
|
||||
+ {"0x926", "armv5te", "arm926ej-s"},
|
||||
+ {"0xa26", "armv5te", "arm1026ej-s"},
|
||||
+ {"0xb02", "armv6k", "mpcore"},
|
||||
+ {"0xb36", "armv6j", "arm1136j-s"},
|
||||
+ {"0xb56", "armv6t2", "arm1156t2-s"},
|
||||
+ {"0xb76", "armv6zk", "arm1176jz-s"},
|
||||
+ {"0xc05", "armv7-a", "cortex-a5"},
|
||||
+ {"0xc08", "armv7-a", "cortex-a8"},
|
||||
+ {"0xc09", "armv7-a", "cortex-a9"},
|
||||
+ {"0xc0f", "armv7-a", "cortex-a15"},
|
||||
+ {"0xc14", "armv7-r", "cortex-r4"},
|
||||
+ {"0xc15", "armv7-r", "cortex-r5"},
|
||||
+ {"0xc20", "armv6-m", "cortex-m0"},
|
||||
+ {"0xc21", "armv6-m", "cortex-m1"},
|
||||
+ {"0xc23", "armv7-m", "cortex-m3"},
|
||||
+ {"0xc24", "armv7e-m", "cortex-m4"},
|
||||
+ {NULL, NULL, NULL}
|
||||
+};
|
||||
+
|
||||
+struct {
|
||||
+ const char *vendor_no;
|
||||
+ const struct vendor_cpu *vendor_parts;
|
||||
+} vendors[] = {
|
||||
+ {"0x41", arm_cpu_table},
|
||||
+ {NULL, NULL}
|
||||
+};
|
||||
+
|
||||
+/* This will be called by the spec parser in gcc.c when it sees
|
||||
+ a %:local_cpu_detect(args) construct. Currently it will be called
|
||||
+ with either "arch", "cpu" or "tune" as argument depending on if
|
||||
+ -march=native, -mcpu=native or -mtune=native is to be substituted.
|
||||
+
|
||||
+ It returns a string containing new command line parameters to be
|
||||
+ put at the place of the above two options, depending on what CPU
|
||||
+ this is executed. E.g. "-march=armv7-a" on a Cortex-A8 for
|
||||
+ -march=native. If the routine can't detect a known processor,
|
||||
+ the -march or -mtune option is discarded.
|
||||
+
|
||||
+ ARGC and ARGV are set depending on the actual arguments given
|
||||
+ in the spec. */
|
||||
+const char *
|
||||
+host_detect_local_cpu (int argc, const char **argv)
|
||||
+{
|
||||
+ const char *val = NULL;
|
||||
+ char buf[128];
|
||||
+ FILE *f = NULL;
|
||||
+ bool arch;
|
||||
+ const struct vendor_cpu *cpu_table = NULL;
|
||||
+
|
||||
+ if (argc < 1)
|
||||
+ goto not_found;
|
||||
+
|
||||
+ arch = strcmp (argv[0], "arch") == 0;
|
||||
+ if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune"))
|
||||
+ goto not_found;
|
||||
+
|
||||
+ f = fopen ("/proc/cpuinfo", "r");
|
||||
+ if (f == NULL)
|
||||
+ goto not_found;
|
||||
+
|
||||
+ while (fgets (buf, sizeof (buf), f) != NULL)
|
||||
+ {
|
||||
+ /* Ensure that CPU implementer is ARM (0x41). */
|
||||
+ if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0)
|
||||
+ {
|
||||
+ int i;
|
||||
+ for (i = 0; vendors[i].vendor_no != NULL; i++)
|
||||
+ if (strstr (buf, vendors[i].vendor_no) != NULL)
|
||||
+ {
|
||||
+ cpu_table = vendors[i].vendor_parts;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Detect arch/cpu. */
|
||||
+ if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0)
|
||||
+ {
|
||||
+ int i;
|
||||
+
|
||||
+ if (cpu_table == NULL)
|
||||
+ goto not_found;
|
||||
+
|
||||
+ for (i = 0; cpu_table[i].part_no != NULL; i++)
|
||||
+ if (strstr (buf, cpu_table[i].part_no) != NULL)
|
||||
+ {
|
||||
+ val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name;
|
||||
+ break;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ fclose (f);
|
||||
+
|
||||
+ if (val == NULL)
|
||||
+ goto not_found;
|
||||
+
|
||||
+ return concat ("-m", argv[0], "=", val, NULL);
|
||||
+
|
||||
+not_found:
|
||||
+ {
|
||||
+ unsigned int i;
|
||||
+ unsigned int opt;
|
||||
+ const char *search[] = {NULL, "arch"};
|
||||
+
|
||||
+ if (f)
|
||||
+ fclose (f);
|
||||
+
|
||||
+ search[0] = argv[0];
|
||||
+ for (opt = 0; opt < ARRAY_SIZE (search); opt++)
|
||||
+ for (i = 0; i < ARRAY_SIZE (configure_default_options); i++)
|
||||
+ if (strcmp (configure_default_options[i].name, search[opt]) == 0)
|
||||
+ return concat ("-m", search[opt], "=",
|
||||
+ configure_default_options[i].value, NULL);
|
||||
+ return NULL;
|
||||
+ }
|
||||
+}
|
||||
|
||||
=== added file 'gcc/config/arm/x-arm'
|
||||
--- old/gcc/config/arm/x-arm 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/config/arm/x-arm 2011-10-19 17:01:50 +0000
|
||||
@@ -0,0 +1,3 @@
|
||||
+driver-arm.o: $(srcdir)/config/arm/driver-arm.c \
|
||||
+ $(CONFIG_H) $(SYSTEM_H)
|
||||
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
|
||||
|
||||
=== modified file 'gcc/doc/invoke.texi'
|
||||
--- old/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000
|
||||
+++ new/gcc/doc/invoke.texi 2011-10-19 17:01:50 +0000
|
||||
@@ -10215,10 +10215,16 @@
|
||||
@samp{cortex-m0},
|
||||
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
|
||||
|
||||
+
|
||||
@option{-mcpu=generic-@var{arch}} is also permissible, and is
|
||||
equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
|
||||
See @option{-mtune} for more information.
|
||||
|
||||
+@option{-mcpu=native} causes the compiler to auto-detect the CPU
|
||||
+of the build computer. At present, this feature is only supported on
|
||||
+Linux, and not all architectures are recognised. If the auto-detect is
|
||||
+unsuccessful the option has no effect.
|
||||
+
|
||||
@item -mtune=@var{name}
|
||||
@opindex mtune
|
||||
This option is very similar to the @option{-mcpu=} option, except that
|
||||
@@ -10237,6 +10243,11 @@
|
||||
range, and avoiding performance pitfalls of other CPUs. The effects of
|
||||
this option may change in future GCC versions as CPU models come and go.
|
||||
|
||||
+@option{-mtune=native} causes the compiler to auto-detect the CPU
|
||||
+of the build computer. At present, this feature is only supported on
|
||||
+Linux, and not all architectures are recognised. If the auto-detect is
|
||||
+unsuccessful the option has no effect.
|
||||
+
|
||||
@item -march=@var{name}
|
||||
@opindex march
|
||||
This specifies the name of the target ARM architecture. GCC uses this
|
||||
@@ -10250,6 +10261,11 @@
|
||||
@samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m},
|
||||
@samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
|
||||
|
||||
+@option{-march=native} causes the compiler to auto-detect the architecture
|
||||
+of the build computer. At present, this feature is only supported on
|
||||
+Linux, and not all architectures are recognised. If the auto-detect is
|
||||
+unsuccessful the option has no effect.
|
||||
+
|
||||
@item -mfpu=@var{name}
|
||||
@itemx -mfpe=@var{number}
|
||||
@itemx -mfp=@var{number}
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
2011-10-19 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-10-18 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
PR tree-optimization/50717
|
||||
|
||||
gcc/
|
||||
* tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type'
|
||||
parameter. Calculate 'type' from stmt.
|
||||
(convert_mult_to_widen): Update call the is_widening_mult_p.
|
||||
(convert_plusminus_to_widen): Likewise.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/pr50717-1.c: New file.
|
||||
* gcc.target/arm/wmul-12.c: Correct types.
|
||||
* gcc.target/arm/wmul-8.c: Correct types.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/pr50717-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/pr50717-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/pr50717-1.c 2011-10-19 14:42:50 +0000
|
||||
@@ -0,0 +1,26 @@
|
||||
+/* PR tree-optimization/50717 */
|
||||
+/* Ensure that widening multiply-and-accumulate is not used where integer
|
||||
+ type promotion or users' casts should prevent it. */
|
||||
+
|
||||
+/* { dg-options "-O2 -fdump-tree-widening_mul" } */
|
||||
+
|
||||
+long long
|
||||
+f (unsigned int a, char b, long long c)
|
||||
+{
|
||||
+ return (a * b) + c;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+g (short a, short b, int c)
|
||||
+{
|
||||
+ return (short)(a * b) + c;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+h (char a, char b, int c)
|
||||
+{
|
||||
+ return (char)(a * b) + c;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */
|
||||
+/* { dg-final { cleanup-tree-dump "widening_mul" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.target/arm/wmul-12.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-07-22 15:46:42 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-10-19 14:42:50 +0000
|
||||
@@ -4,8 +4,8 @@
|
||||
long long
|
||||
foo (int *b, int *c)
|
||||
{
|
||||
- int tmp = *b * *c;
|
||||
- return 10 + (long long)tmp;
|
||||
+ long long tmp = (long long)*b * *c;
|
||||
+ return 10 + tmp;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "smlal" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.target/arm/wmul-8.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-07-15 14:16:54 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-10-19 14:42:50 +0000
|
||||
@@ -4,7 +4,7 @@
|
||||
long long
|
||||
foo (long long a, int *b, int *c)
|
||||
{
|
||||
- return a + *b * *c;
|
||||
+ return a + (long long)*b * *c;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "smlal" } } */
|
||||
|
||||
=== modified file 'gcc/tree-ssa-math-opts.c'
|
||||
--- old/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000
|
||||
+++ new/gcc/tree-ssa-math-opts.c 2011-10-19 14:42:50 +0000
|
||||
@@ -1351,10 +1351,12 @@
|
||||
and *TYPE2_OUT would give the operands of the multiplication. */
|
||||
|
||||
static bool
|
||||
-is_widening_mult_p (tree type, gimple stmt,
|
||||
+is_widening_mult_p (gimple stmt,
|
||||
tree *type1_out, tree *rhs1_out,
|
||||
tree *type2_out, tree *rhs2_out)
|
||||
{
|
||||
+ tree type = TREE_TYPE (gimple_assign_lhs (stmt));
|
||||
+
|
||||
if (TREE_CODE (type) != INTEGER_TYPE
|
||||
&& TREE_CODE (type) != FIXED_POINT_TYPE)
|
||||
return false;
|
||||
@@ -1416,7 +1418,7 @@
|
||||
if (TREE_CODE (type) != INTEGER_TYPE)
|
||||
return false;
|
||||
|
||||
- if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2))
|
||||
+ if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
|
||||
return false;
|
||||
|
||||
to_mode = TYPE_MODE (type);
|
||||
@@ -1592,7 +1594,7 @@
|
||||
if (code == PLUS_EXPR
|
||||
&& (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
|
||||
{
|
||||
- if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1,
|
||||
+ if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
|
||||
&type2, &mult_rhs2))
|
||||
return false;
|
||||
add_rhs = rhs2;
|
||||
@@ -1600,7 +1602,7 @@
|
||||
}
|
||||
else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
|
||||
{
|
||||
- if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1,
|
||||
+ if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
|
||||
&type2, &mult_rhs2))
|
||||
return false;
|
||||
add_rhs = rhs1;
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
2011-10-21 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF mainline:
|
||||
|
||||
2011-10-21 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
PR target/50809
|
||||
|
||||
gcc/
|
||||
* config/arm/driver-arm.c (vendors): Make static.
|
||||
|
||||
=== modified file 'gcc/config/arm/driver-arm.c'
|
||||
--- old/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000
|
||||
+++ new/gcc/config/arm/driver-arm.c 2011-10-21 19:27:47 +0000
|
||||
@@ -49,7 +49,7 @@
|
||||
{NULL, NULL, NULL}
|
||||
};
|
||||
|
||||
-struct {
|
||||
+static struct {
|
||||
const char *vendor_no;
|
||||
const struct vendor_cpu *vendor_parts;
|
||||
} vendors[] = {
|
||||
|
||||
@@ -0,0 +1,453 @@
|
||||
2011-10-27 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-10-16 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vect-stmts.c (vectorizable_load): For SLP without permutation
|
||||
treat the first load of the node as the first element in its
|
||||
interleaving chain.
|
||||
* tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
|
||||
necessary and possible.
|
||||
(vect_build_slp_tree): Add new argument. Allow load groups of any size
|
||||
in basic blocks. Keep all the loads for further permutation check.
|
||||
Use the new argument to determine if there is a permutation. Update
|
||||
the recursive calls.
|
||||
(vect_supported_load_permutation_p): Allow subchains of interleaving
|
||||
chains in basic block vectorization.
|
||||
(vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
|
||||
Check load permutation based on the new parameter.
|
||||
(vect_schedule_slp_instance): Don't start from the first element in
|
||||
interleaving chain unless the loads are permuted.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-29.c: New test.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-29.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 2011-10-23 11:29:25 +0000
|
||||
@@ -0,0 +1,59 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define A 3
|
||||
+#define B 4
|
||||
+#define N 256
|
||||
+
|
||||
+short src[N], dst[N];
|
||||
+
|
||||
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
|
||||
+{
|
||||
+ int i;
|
||||
+ h /= 16;
|
||||
+ for (i = 0; i < h; i++)
|
||||
+ {
|
||||
+ dst[0] = A*src[0] + B*src[1];
|
||||
+ dst[1] = A*src[1] + B*src[2];
|
||||
+ dst[2] = A*src[2] + B*src[3];
|
||||
+ dst[3] = A*src[3] + B*src[4];
|
||||
+ dst[4] = A*src[4] + B*src[5];
|
||||
+ dst[5] = A*src[5] + B*src[6];
|
||||
+ dst[6] = A*src[6] + B*src[7];
|
||||
+ dst[7] = A*src[7] + B*src[8];
|
||||
+ dst += stride;
|
||||
+ src += stride;
|
||||
+ if (dummy == 32)
|
||||
+ abort ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ dst[i] = 0;
|
||||
+ src[i] = i;
|
||||
+ }
|
||||
+
|
||||
+ foo (dst, src, N, 8, 0);
|
||||
+
|
||||
+ for (i = 0; i < N/2; i++)
|
||||
+ {
|
||||
+ if (dst[i] != A * src[i] + B * src[i+1])
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && vect_element_align } } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== modified file 'gcc/tree-vect-slp.c'
|
||||
--- old/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000
|
||||
+++ new/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000
|
||||
@@ -115,13 +115,15 @@
|
||||
{
|
||||
tree oprnd;
|
||||
unsigned int i, number_of_oprnds;
|
||||
- tree def;
|
||||
+ tree def[2];
|
||||
gimple def_stmt;
|
||||
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
|
||||
stmt_vec_info stmt_info =
|
||||
vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
|
||||
enum gimple_rhs_class rhs_class;
|
||||
struct loop *loop = NULL;
|
||||
+ enum tree_code rhs_code;
|
||||
+ bool different_types = false;
|
||||
|
||||
if (loop_vinfo)
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
@@ -133,7 +135,7 @@
|
||||
{
|
||||
oprnd = gimple_op (stmt, i + 1);
|
||||
|
||||
- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
|
||||
+ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
|
||||
&dt[i])
|
||||
|| (!def_stmt && dt[i] != vect_constant_def))
|
||||
{
|
||||
@@ -188,11 +190,11 @@
|
||||
switch (gimple_code (def_stmt))
|
||||
{
|
||||
case GIMPLE_PHI:
|
||||
- def = gimple_phi_result (def_stmt);
|
||||
+ def[i] = gimple_phi_result (def_stmt);
|
||||
break;
|
||||
|
||||
case GIMPLE_ASSIGN:
|
||||
- def = gimple_assign_lhs (def_stmt);
|
||||
+ def[i] = gimple_assign_lhs (def_stmt);
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -206,8 +208,8 @@
|
||||
{
|
||||
/* op0 of the first stmt of the group - store its info. */
|
||||
*first_stmt_dt0 = dt[i];
|
||||
- if (def)
|
||||
- *first_stmt_def0_type = TREE_TYPE (def);
|
||||
+ if (def[i])
|
||||
+ *first_stmt_def0_type = TREE_TYPE (def[i]);
|
||||
else
|
||||
*first_stmt_const_oprnd = oprnd;
|
||||
|
||||
@@ -227,8 +229,8 @@
|
||||
{
|
||||
/* op1 of the first stmt of the group - store its info. */
|
||||
*first_stmt_dt1 = dt[i];
|
||||
- if (def)
|
||||
- *first_stmt_def1_type = TREE_TYPE (def);
|
||||
+ if (def[i])
|
||||
+ *first_stmt_def1_type = TREE_TYPE (def[i]);
|
||||
else
|
||||
{
|
||||
/* We assume that the stmt contains only one constant
|
||||
@@ -249,22 +251,53 @@
|
||||
the def-stmt/s of the first stmt. */
|
||||
if ((i == 0
|
||||
&& (*first_stmt_dt0 != dt[i]
|
||||
- || (*first_stmt_def0_type && def
|
||||
+ || (*first_stmt_def0_type && def[0]
|
||||
&& !types_compatible_p (*first_stmt_def0_type,
|
||||
- TREE_TYPE (def)))))
|
||||
+ TREE_TYPE (def[0])))))
|
||||
|| (i == 1
|
||||
&& (*first_stmt_dt1 != dt[i]
|
||||
- || (*first_stmt_def1_type && def
|
||||
+ || (*first_stmt_def1_type && def[1]
|
||||
&& !types_compatible_p (*first_stmt_def1_type,
|
||||
- TREE_TYPE (def)))))
|
||||
- || (!def
|
||||
+ TREE_TYPE (def[1])))))
|
||||
+ || (!def[i]
|
||||
&& !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
|
||||
- TREE_TYPE (oprnd))))
|
||||
+ TREE_TYPE (oprnd)))
|
||||
+ || different_types)
|
||||
{
|
||||
- if (vect_print_dump_info (REPORT_SLP))
|
||||
- fprintf (vect_dump, "Build SLP failed: different types ");
|
||||
+ if (i != number_of_oprnds - 1)
|
||||
+ different_types = true;
|
||||
+ else
|
||||
+ {
|
||||
+ if (is_gimple_assign (stmt)
|
||||
+ && (rhs_code = gimple_assign_rhs_code (stmt))
|
||||
+ && TREE_CODE_CLASS (rhs_code) == tcc_binary
|
||||
+ && commutative_tree_code (rhs_code)
|
||||
+ && *first_stmt_dt0 == dt[1]
|
||||
+ && *first_stmt_dt1 == dt[0]
|
||||
+ && def[0] && def[1]
|
||||
+ && !(*first_stmt_def0_type
|
||||
+ && !types_compatible_p (*first_stmt_def0_type,
|
||||
+ TREE_TYPE (def[1])))
|
||||
+ && !(*first_stmt_def1_type
|
||||
+ && !types_compatible_p (*first_stmt_def1_type,
|
||||
+ TREE_TYPE (def[0]))))
|
||||
+ {
|
||||
+ if (vect_print_dump_info (REPORT_SLP))
|
||||
+ {
|
||||
+ fprintf (vect_dump, "Swapping operands of ");
|
||||
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
+ }
|
||||
+ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
|
||||
+ gimple_assign_rhs2_ptr (stmt));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (vect_print_dump_info (REPORT_SLP))
|
||||
+ fprintf (vect_dump, "Build SLP failed: different types ");
|
||||
|
||||
- return false;
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -278,10 +311,10 @@
|
||||
|
||||
case vect_internal_def:
|
||||
case vect_reduction_def:
|
||||
- if (i == 0)
|
||||
+ if ((i == 0 && !different_types) || (i == 1 && different_types))
|
||||
VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
|
||||
else
|
||||
- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
|
||||
+ VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -289,7 +322,7 @@
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
fprintf (vect_dump, "Build SLP failed: illegal type of def ");
|
||||
- print_generic_expr (vect_dump, def, TDF_SLIM);
|
||||
+ print_generic_expr (vect_dump, def[i], TDF_SLIM);
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -312,7 +345,7 @@
|
||||
int ncopies_for_cost, unsigned int *max_nunits,
|
||||
VEC (int, heap) **load_permutation,
|
||||
VEC (slp_tree, heap) **loads,
|
||||
- unsigned int vectorization_factor)
|
||||
+ unsigned int vectorization_factor, bool *loads_permuted)
|
||||
{
|
||||
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
|
||||
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
|
||||
@@ -523,7 +556,9 @@
|
||||
|
||||
/* Check that the size of interleaved loads group is not
|
||||
greater than the SLP group size. */
|
||||
- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
|
||||
+ if (loop_vinfo
|
||||
+ && DR_GROUP_SIZE (vinfo_for_stmt (stmt))
|
||||
+ > ncopies * group_size)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
@@ -644,19 +679,22 @@
|
||||
/* Strided loads were reached - stop the recursion. */
|
||||
if (stop_recursion)
|
||||
{
|
||||
+ VEC_safe_push (slp_tree, heap, *loads, *node);
|
||||
if (permutation)
|
||||
{
|
||||
- VEC_safe_push (slp_tree, heap, *loads, *node);
|
||||
+
|
||||
+ *loads_permuted = true;
|
||||
*inside_cost
|
||||
+= targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
|
||||
* group_size;
|
||||
}
|
||||
else
|
||||
- {
|
||||
- /* We don't check here complex numbers chains, so we keep them in
|
||||
- LOADS for further check in vect_supported_load_permutation_p. */
|
||||
+ {
|
||||
+ /* We don't check here complex numbers chains, so we set
|
||||
+ LOADS_PERMUTED for further check in
|
||||
+ vect_supported_load_permutation_p. */
|
||||
if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
|
||||
- VEC_safe_push (slp_tree, heap, *loads, *node);
|
||||
+ *loads_permuted = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -675,7 +713,7 @@
|
||||
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
|
||||
inside_cost, outside_cost, ncopies_for_cost,
|
||||
max_nunits, load_permutation, loads,
|
||||
- vectorization_factor))
|
||||
+ vectorization_factor, loads_permuted))
|
||||
return false;
|
||||
|
||||
SLP_TREE_LEFT (*node) = left_node;
|
||||
@@ -693,7 +731,7 @@
|
||||
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
|
||||
inside_cost, outside_cost, ncopies_for_cost,
|
||||
max_nunits, load_permutation, loads,
|
||||
- vectorization_factor))
|
||||
+ vectorization_factor, loads_permuted))
|
||||
return false;
|
||||
|
||||
SLP_TREE_RIGHT (*node) = right_node;
|
||||
@@ -879,8 +917,10 @@
|
||||
bool supported, bad_permutation = false;
|
||||
sbitmap load_index;
|
||||
slp_tree node, other_complex_node;
|
||||
- gimple stmt, first = NULL, other_node_first;
|
||||
+ gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
|
||||
unsigned complex_numbers = 0;
|
||||
+ struct data_reference *dr;
|
||||
+ bb_vec_info bb_vinfo;
|
||||
|
||||
/* FORNOW: permutations are only supported in SLP. */
|
||||
if (!slp_instn)
|
||||
@@ -1040,6 +1080,76 @@
|
||||
}
|
||||
}
|
||||
|
||||
+ /* In basic block vectorization we allow any subchain of an interleaving
|
||||
+ chain.
|
||||
+ FORNOW: not supported in loop SLP because of realignment compications. */
|
||||
+ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
|
||||
+ bad_permutation = false;
|
||||
+ /* Check that for every node in the instance teh loads form a subchain. */
|
||||
+ if (bb_vinfo)
|
||||
+ {
|
||||
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
|
||||
+ {
|
||||
+ next_load = NULL;
|
||||
+ first_load = NULL;
|
||||
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
|
||||
+ {
|
||||
+ if (!first_load)
|
||||
+ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load));
|
||||
+ else if (first_load
|
||||
+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (load)))
|
||||
+ {
|
||||
+ bad_permutation = true;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (j != 0 && next_load != load)
|
||||
+ {
|
||||
+ bad_permutation = true;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load));
|
||||
+ }
|
||||
+
|
||||
+ if (bad_permutation)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ /* Check that the alignment of the first load in every subchain, i.e.,
|
||||
+ the first statement in every load node, is supported. */
|
||||
+ if (!bad_permutation)
|
||||
+ {
|
||||
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
|
||||
+ {
|
||||
+ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
|
||||
+ if (first_load
|
||||
+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load)))
|
||||
+ {
|
||||
+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
|
||||
+ if (vect_supportable_dr_alignment (dr, false)
|
||||
+ == dr_unaligned_unsupported)
|
||||
+ {
|
||||
+ if (vect_print_dump_info (REPORT_SLP))
|
||||
+ {
|
||||
+ fprintf (vect_dump, "unsupported unaligned load ");
|
||||
+ print_gimple_stmt (vect_dump, first_load, 0,
|
||||
+ TDF_SLIM);
|
||||
+ }
|
||||
+ bad_permutation = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!bad_permutation)
|
||||
+ {
|
||||
+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
|
||||
+ return true;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
|
||||
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
|
||||
well (unless it's reduction). */
|
||||
@@ -1149,6 +1259,7 @@
|
||||
VEC (int, heap) *load_permutation;
|
||||
VEC (slp_tree, heap) *loads;
|
||||
struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
|
||||
+ bool loads_permuted = false;
|
||||
|
||||
if (dr)
|
||||
{
|
||||
@@ -1238,7 +1349,7 @@
|
||||
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
|
||||
&inside_cost, &outside_cost, ncopies_for_cost,
|
||||
&max_nunits, &load_permutation, &loads,
|
||||
- vectorization_factor))
|
||||
+ vectorization_factor, &loads_permuted))
|
||||
{
|
||||
/* Calculate the unrolling factor based on the smallest type. */
|
||||
if (max_nunits > nunits)
|
||||
@@ -1263,7 +1374,8 @@
|
||||
SLP_INSTANCE_LOADS (new_instance) = loads;
|
||||
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
|
||||
SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
|
||||
- if (VEC_length (slp_tree, loads))
|
||||
+
|
||||
+ if (loads_permuted)
|
||||
{
|
||||
if (!vect_supported_load_permutation_p (new_instance, group_size,
|
||||
load_permutation))
|
||||
@@ -2542,10 +2654,11 @@
|
||||
/* Loads should be inserted before the first load. */
|
||||
if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
|
||||
&& STMT_VINFO_STRIDED_ACCESS (stmt_info)
|
||||
- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
|
||||
+ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
|
||||
+ && SLP_INSTANCE_LOAD_PERMUTATION (instance))
|
||||
si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
|
||||
else if (is_pattern_stmt_p (stmt_info))
|
||||
- si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
|
||||
+ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
|
||||
else
|
||||
si = gsi_for_stmt (stmt);
|
||||
|
||||
|
||||
=== modified file 'gcc/tree-vect-stmts.c'
|
||||
--- old/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000
|
||||
+++ new/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000
|
||||
@@ -4285,6 +4285,11 @@
|
||||
if (strided_load)
|
||||
{
|
||||
first_stmt = DR_GROUP_FIRST_DR (stmt_info);
|
||||
+ if (slp
|
||||
+ && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
|
||||
+ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
|
||||
+ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
|
||||
+
|
||||
/* Check if the chain of loads is already vectorized. */
|
||||
if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,61 @@
|
||||
2011-11-04 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
Backport from mainline -r180673:
|
||||
|
||||
gcc/
|
||||
* modulo-sched.c (generate_prolog_epilog): Mark prolog
|
||||
and epilog as BB_DISABLE_SCHEDULE.
|
||||
(mark_loop_unsched): New function.
|
||||
(sms_schedule): Call it.
|
||||
|
||||
=== modified file 'gcc/modulo-sched.c'
|
||||
--- old/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000
|
||||
+++ new/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000
|
||||
@@ -1173,6 +1173,8 @@
|
||||
/* Put the prolog on the entry edge. */
|
||||
e = loop_preheader_edge (loop);
|
||||
split_edge_and_insert (e, get_insns ());
|
||||
+ if (!flag_resched_modulo_sched)
|
||||
+ e->dest->flags |= BB_DISABLE_SCHEDULE;
|
||||
|
||||
end_sequence ();
|
||||
|
||||
@@ -1186,9 +1188,24 @@
|
||||
gcc_assert (single_exit (loop));
|
||||
e = single_exit (loop);
|
||||
split_edge_and_insert (e, get_insns ());
|
||||
+ if (!flag_resched_modulo_sched)
|
||||
+ e->dest->flags |= BB_DISABLE_SCHEDULE;
|
||||
+
|
||||
end_sequence ();
|
||||
}
|
||||
|
||||
+/* Mark LOOP as software pipelined so the later
|
||||
+ scheduling passes don't touch it. */
|
||||
+static void
|
||||
+mark_loop_unsched (struct loop *loop)
|
||||
+{
|
||||
+ unsigned i;
|
||||
+ basic_block *bbs = get_loop_body (loop);
|
||||
+
|
||||
+ for (i = 0; i < loop->num_nodes; i++)
|
||||
+ bbs[i]->flags |= BB_DISABLE_SCHEDULE;
|
||||
+}
|
||||
+
|
||||
/* Return true if all the BBs of the loop are empty except the
|
||||
loop header. */
|
||||
static bool
|
||||
@@ -1714,9 +1731,10 @@
|
||||
permute_partial_schedule (ps, g->closing_branch->first_note);
|
||||
|
||||
/* Mark this loop as software pipelined so the later
|
||||
- scheduling passes doesn't touch it. */
|
||||
+ scheduling passes don't touch it. */
|
||||
if (! flag_resched_modulo_sched)
|
||||
- g->bb->flags |= BB_DISABLE_SCHEDULE;
|
||||
+ mark_loop_unsched (loop);
|
||||
+
|
||||
/* The life-info is not valid any more. */
|
||||
df_set_bb_dirty (g->bb);
|
||||
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
2011-11-02 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF mainline:
|
||||
|
||||
2011-11-01 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning.
|
||||
|
||||
=== modified file 'gcc/config/arm/bpabi.h'
|
||||
--- old/gcc/config/arm/bpabi.h 2010-12-20 17:48:51 +0000
|
||||
+++ new/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000
|
||||
@@ -56,7 +56,8 @@
|
||||
"|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
|
||||
|
||||
#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
|
||||
- "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
|
||||
+ "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\
|
||||
+ ":%{!r:--be8}}}"
|
||||
|
||||
/* Tell the assembler to build BPABI binaries. */
|
||||
#undef SUBTARGET_EXTRA_ASM_SPEC
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,515 @@
|
||||
2011-11-21 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
Backport from mainline r180131:
|
||||
|
||||
2011-10-18 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.c (arm_block_move_unaligned_straight)
|
||||
(arm_adjust_block_mem, arm_block_move_unaligned_loop)
|
||||
(arm_movmemqi_unaligned): New.
|
||||
(arm_gen_movmemqi): Support unaligned block copies.
|
||||
|
||||
gcc/testsuite/
|
||||
* lib/target-supports.exp (check_effective_target_arm_unaligned): New.
|
||||
* gcc.target/arm/unaligned-memcpy-1.c: New.
|
||||
* gcc.target/arm/unaligned-memcpy-2.c: New.
|
||||
* gcc.target/arm/unaligned-memcpy-3.c: New.
|
||||
* gcc.target/arm/unaligned-memcpy-4.c: New.
|
||||
|
||||
2011-09-15 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-10-26 11:38:30 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000
|
||||
@@ -10803,6 +10803,335 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
|
||||
+ unaligned copies on processors which support unaligned semantics for those
|
||||
+ instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
|
||||
+ (using more registers) by doing e.g. load/load/store/store for a factor of 2.
|
||||
+ An interleave factor of 1 (the minimum) will perform no interleaving.
|
||||
+ Load/store multiple are used for aligned addresses where possible. */
|
||||
+
|
||||
+static void
|
||||
+arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
|
||||
+ HOST_WIDE_INT length,
|
||||
+ unsigned int interleave_factor)
|
||||
+{
|
||||
+ rtx *regs = XALLOCAVEC (rtx, interleave_factor);
|
||||
+ int *regnos = XALLOCAVEC (int, interleave_factor);
|
||||
+ HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
|
||||
+ HOST_WIDE_INT i, j;
|
||||
+ HOST_WIDE_INT remaining = length, words;
|
||||
+ rtx halfword_tmp = NULL, byte_tmp = NULL;
|
||||
+ rtx dst, src;
|
||||
+ bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
|
||||
+ bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
|
||||
+ HOST_WIDE_INT srcoffset, dstoffset;
|
||||
+ HOST_WIDE_INT src_autoinc, dst_autoinc;
|
||||
+ rtx mem, addr;
|
||||
+
|
||||
+ gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
|
||||
+
|
||||
+ /* Use hard registers if we have aligned source or destination so we can use
|
||||
+ load/store multiple with contiguous registers. */
|
||||
+ if (dst_aligned || src_aligned)
|
||||
+ for (i = 0; i < interleave_factor; i++)
|
||||
+ regs[i] = gen_rtx_REG (SImode, i);
|
||||
+ else
|
||||
+ for (i = 0; i < interleave_factor; i++)
|
||||
+ regs[i] = gen_reg_rtx (SImode);
|
||||
+
|
||||
+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
|
||||
+ src = copy_addr_to_reg (XEXP (srcbase, 0));
|
||||
+
|
||||
+ srcoffset = dstoffset = 0;
|
||||
+
|
||||
+ /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
|
||||
+ For copying the last bytes we want to subtract this offset again. */
|
||||
+ src_autoinc = dst_autoinc = 0;
|
||||
+
|
||||
+ for (i = 0; i < interleave_factor; i++)
|
||||
+ regnos[i] = i;
|
||||
+
|
||||
+ /* Copy BLOCK_SIZE_BYTES chunks. */
|
||||
+
|
||||
+ for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
|
||||
+ {
|
||||
+ /* Load words. */
|
||||
+ if (src_aligned && interleave_factor > 1)
|
||||
+ {
|
||||
+ emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
|
||||
+ TRUE, srcbase, &srcoffset));
|
||||
+ src_autoinc += UNITS_PER_WORD * interleave_factor;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ for (j = 0; j < interleave_factor; j++)
|
||||
+ {
|
||||
+ addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
|
||||
+ - src_autoinc);
|
||||
+ mem = adjust_automodify_address (srcbase, SImode, addr,
|
||||
+ srcoffset + j * UNITS_PER_WORD);
|
||||
+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
|
||||
+ }
|
||||
+ srcoffset += block_size_bytes;
|
||||
+ }
|
||||
+
|
||||
+ /* Store words. */
|
||||
+ if (dst_aligned && interleave_factor > 1)
|
||||
+ {
|
||||
+ emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
|
||||
+ TRUE, dstbase, &dstoffset));
|
||||
+ dst_autoinc += UNITS_PER_WORD * interleave_factor;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ for (j = 0; j < interleave_factor; j++)
|
||||
+ {
|
||||
+ addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
|
||||
+ - dst_autoinc);
|
||||
+ mem = adjust_automodify_address (dstbase, SImode, addr,
|
||||
+ dstoffset + j * UNITS_PER_WORD);
|
||||
+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
|
||||
+ }
|
||||
+ dstoffset += block_size_bytes;
|
||||
+ }
|
||||
+
|
||||
+ remaining -= block_size_bytes;
|
||||
+ }
|
||||
+
|
||||
+ /* Copy any whole words left (note these aren't interleaved with any
|
||||
+ subsequent halfword/byte load/stores in the interests of simplicity). */
|
||||
+
|
||||
+ words = remaining / UNITS_PER_WORD;
|
||||
+
|
||||
+ gcc_assert (words < interleave_factor);
|
||||
+
|
||||
+ if (src_aligned && words > 1)
|
||||
+ {
|
||||
+ emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
|
||||
+ &srcoffset));
|
||||
+ src_autoinc += UNITS_PER_WORD * words;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ for (j = 0; j < words; j++)
|
||||
+ {
|
||||
+ addr = plus_constant (src,
|
||||
+ srcoffset + j * UNITS_PER_WORD - src_autoinc);
|
||||
+ mem = adjust_automodify_address (srcbase, SImode, addr,
|
||||
+ srcoffset + j * UNITS_PER_WORD);
|
||||
+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
|
||||
+ }
|
||||
+ srcoffset += words * UNITS_PER_WORD;
|
||||
+ }
|
||||
+
|
||||
+ if (dst_aligned && words > 1)
|
||||
+ {
|
||||
+ emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
|
||||
+ &dstoffset));
|
||||
+ dst_autoinc += words * UNITS_PER_WORD;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ for (j = 0; j < words; j++)
|
||||
+ {
|
||||
+ addr = plus_constant (dst,
|
||||
+ dstoffset + j * UNITS_PER_WORD - dst_autoinc);
|
||||
+ mem = adjust_automodify_address (dstbase, SImode, addr,
|
||||
+ dstoffset + j * UNITS_PER_WORD);
|
||||
+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
|
||||
+ }
|
||||
+ dstoffset += words * UNITS_PER_WORD;
|
||||
+ }
|
||||
+
|
||||
+ remaining -= words * UNITS_PER_WORD;
|
||||
+
|
||||
+ gcc_assert (remaining < 4);
|
||||
+
|
||||
+ /* Copy a halfword if necessary. */
|
||||
+
|
||||
+ if (remaining >= 2)
|
||||
+ {
|
||||
+ halfword_tmp = gen_reg_rtx (SImode);
|
||||
+
|
||||
+ addr = plus_constant (src, srcoffset - src_autoinc);
|
||||
+ mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
|
||||
+ emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
|
||||
+
|
||||
+ /* Either write out immediately, or delay until we've loaded the last
|
||||
+ byte, depending on interleave factor. */
|
||||
+ if (interleave_factor == 1)
|
||||
+ {
|
||||
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
|
||||
+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
|
||||
+ emit_insn (gen_unaligned_storehi (mem,
|
||||
+ gen_lowpart (HImode, halfword_tmp)));
|
||||
+ halfword_tmp = NULL;
|
||||
+ dstoffset += 2;
|
||||
+ }
|
||||
+
|
||||
+ remaining -= 2;
|
||||
+ srcoffset += 2;
|
||||
+ }
|
||||
+
|
||||
+ gcc_assert (remaining < 2);
|
||||
+
|
||||
+ /* Copy last byte. */
|
||||
+
|
||||
+ if ((remaining & 1) != 0)
|
||||
+ {
|
||||
+ byte_tmp = gen_reg_rtx (SImode);
|
||||
+
|
||||
+ addr = plus_constant (src, srcoffset - src_autoinc);
|
||||
+ mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
|
||||
+ emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
|
||||
+
|
||||
+ if (interleave_factor == 1)
|
||||
+ {
|
||||
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
|
||||
+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
|
||||
+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
|
||||
+ byte_tmp = NULL;
|
||||
+ dstoffset++;
|
||||
+ }
|
||||
+
|
||||
+ remaining--;
|
||||
+ srcoffset++;
|
||||
+ }
|
||||
+
|
||||
+ /* Store last halfword if we haven't done so already. */
|
||||
+
|
||||
+ if (halfword_tmp)
|
||||
+ {
|
||||
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
|
||||
+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
|
||||
+ emit_insn (gen_unaligned_storehi (mem,
|
||||
+ gen_lowpart (HImode, halfword_tmp)));
|
||||
+ dstoffset += 2;
|
||||
+ }
|
||||
+
|
||||
+ /* Likewise for last byte. */
|
||||
+
|
||||
+ if (byte_tmp)
|
||||
+ {
|
||||
+ addr = plus_constant (dst, dstoffset - dst_autoinc);
|
||||
+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
|
||||
+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
|
||||
+ dstoffset++;
|
||||
+ }
|
||||
+
|
||||
+ gcc_assert (remaining == 0 && srcoffset == dstoffset);
|
||||
+}
|
||||
+
|
||||
+/* From mips_adjust_block_mem:
|
||||
+
|
||||
+ Helper function for doing a loop-based block operation on memory
|
||||
+ reference MEM. Each iteration of the loop will operate on LENGTH
|
||||
+ bytes of MEM.
|
||||
+
|
||||
+ Create a new base register for use within the loop and point it to
|
||||
+ the start of MEM. Create a new memory reference that uses this
|
||||
+ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
|
||||
+
|
||||
+static void
|
||||
+arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
|
||||
+ rtx *loop_mem)
|
||||
+{
|
||||
+ *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
|
||||
+
|
||||
+ /* Although the new mem does not refer to a known location,
|
||||
+ it does keep up to LENGTH bytes of alignment. */
|
||||
+ *loop_mem = change_address (mem, BLKmode, *loop_reg);
|
||||
+ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
|
||||
+}
|
||||
+
|
||||
+/* From mips_block_move_loop:
|
||||
+
|
||||
+ Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
|
||||
+ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
|
||||
+ the memory regions do not overlap. */
|
||||
+
|
||||
+static void
|
||||
+arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
|
||||
+ unsigned int interleave_factor,
|
||||
+ HOST_WIDE_INT bytes_per_iter)
|
||||
+{
|
||||
+ rtx label, src_reg, dest_reg, final_src, test;
|
||||
+ HOST_WIDE_INT leftover;
|
||||
+
|
||||
+ leftover = length % bytes_per_iter;
|
||||
+ length -= leftover;
|
||||
+
|
||||
+ /* Create registers and memory references for use within the loop. */
|
||||
+ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
|
||||
+ arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
|
||||
+
|
||||
+ /* Calculate the value that SRC_REG should have after the last iteration of
|
||||
+ the loop. */
|
||||
+ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
|
||||
+ 0, 0, OPTAB_WIDEN);
|
||||
+
|
||||
+ /* Emit the start of the loop. */
|
||||
+ label = gen_label_rtx ();
|
||||
+ emit_label (label);
|
||||
+
|
||||
+ /* Emit the loop body. */
|
||||
+ arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
|
||||
+ interleave_factor);
|
||||
+
|
||||
+ /* Move on to the next block. */
|
||||
+ emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
|
||||
+ emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
|
||||
+
|
||||
+ /* Emit the loop condition. */
|
||||
+ test = gen_rtx_NE (VOIDmode, src_reg, final_src);
|
||||
+ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
|
||||
+
|
||||
+ /* Mop up any left-over bytes. */
|
||||
+ if (leftover)
|
||||
+ arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
|
||||
+}
|
||||
+
|
||||
+/* Emit a block move when either the source or destination is unaligned (not
|
||||
+ aligned to a four-byte boundary). This may need further tuning depending on
|
||||
+ core type, optimize_size setting, etc. */
|
||||
+
|
||||
+static int
|
||||
+arm_movmemqi_unaligned (rtx *operands)
|
||||
+{
|
||||
+ HOST_WIDE_INT length = INTVAL (operands[2]);
|
||||
+
|
||||
+ if (optimize_size)
|
||||
+ {
|
||||
+ bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
|
||||
+ bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
|
||||
+ /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
|
||||
+ size of code if optimizing for size. We'll use ldm/stm if src_aligned
|
||||
+ or dst_aligned though: allow more interleaving in those cases since the
|
||||
+ resulting code can be smaller. */
|
||||
+ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
|
||||
+ HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
|
||||
+
|
||||
+ if (length > 12)
|
||||
+ arm_block_move_unaligned_loop (operands[0], operands[1], length,
|
||||
+ interleave_factor, bytes_per_iter);
|
||||
+ else
|
||||
+ arm_block_move_unaligned_straight (operands[0], operands[1], length,
|
||||
+ interleave_factor);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Note that the loop created by arm_block_move_unaligned_loop may be
|
||||
+ subject to loop unrolling, which makes tuning this condition a little
|
||||
+ redundant. */
|
||||
+ if (length > 32)
|
||||
+ arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
|
||||
+ else
|
||||
+ arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
|
||||
+ }
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
int
|
||||
arm_gen_movmemqi (rtx *operands)
|
||||
{
|
||||
@@ -10815,8 +11144,13 @@
|
||||
|
||||
if (GET_CODE (operands[2]) != CONST_INT
|
||||
|| GET_CODE (operands[3]) != CONST_INT
|
||||
- || INTVAL (operands[2]) > 64
|
||||
- || INTVAL (operands[3]) & 3)
|
||||
+ || INTVAL (operands[2]) > 64)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
|
||||
+ return arm_movmemqi_unaligned (operands);
|
||||
+
|
||||
+ if (INTVAL (operands[3]) & 3)
|
||||
return 0;
|
||||
|
||||
dstbase = operands[0];
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.h'
|
||||
--- old/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000
|
||||
+++ new/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000
|
||||
@@ -47,6 +47,8 @@
|
||||
{ \
|
||||
if (TARGET_DSP_MULTIPLY) \
|
||||
builtin_define ("__ARM_FEATURE_DSP"); \
|
||||
+ if (unaligned_access) \
|
||||
+ builtin_define ("__ARM_FEATURE_UNALIGNED"); \
|
||||
/* Define __arm__ even when in thumb mode, for \
|
||||
consistency with armcc. */ \
|
||||
builtin_define ("__arm__"); \
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 2011-10-19 22:56:19 +0000
|
||||
@@ -0,0 +1,19 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_unaligned } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+#include <string.h>
|
||||
+
|
||||
+void unknown_alignment (char *dest, char *src)
|
||||
+{
|
||||
+ memcpy (dest, src, 15);
|
||||
+}
|
||||
+
|
||||
+/* We should see three unaligned word loads and store pairs, one unaligned
|
||||
+ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */
|
||||
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "strh" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "strb" 1 } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 2011-10-19 22:56:19 +0000
|
||||
@@ -0,0 +1,21 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_unaligned } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+#include <string.h>
|
||||
+
|
||||
+char dest[16];
|
||||
+
|
||||
+void aligned_dest (char *src)
|
||||
+{
|
||||
+ memcpy (dest, src, 15);
|
||||
+}
|
||||
+
|
||||
+/* Expect a multi-word store for the main part of the copy, but subword
|
||||
+ loads/stores for the remainder. */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times "stmia" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "strh" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "strb" 1 } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 2011-10-19 22:56:19 +0000
|
||||
@@ -0,0 +1,21 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_unaligned } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+#include <string.h>
|
||||
+
|
||||
+char src[16];
|
||||
+
|
||||
+void aligned_src (char *dest)
|
||||
+{
|
||||
+ memcpy (dest, src, 15);
|
||||
+}
|
||||
+
|
||||
+/* Expect a multi-word load for the main part of the copy, but subword
|
||||
+ loads/stores for the remainder. */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "strh" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "strb" 1 } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 2011-10-19 22:56:19 +0000
|
||||
@@ -0,0 +1,18 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_unaligned } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+#include <string.h>
|
||||
+
|
||||
+char src[16];
|
||||
+char dest[16];
|
||||
+
|
||||
+void aligned_both (void)
|
||||
+{
|
||||
+ memcpy (dest, src, 15);
|
||||
+}
|
||||
+
|
||||
+/* We know both src and dest to be aligned: expect multiword loads/stores. */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "stmia" 1 } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/lib/target-supports.exp'
|
||||
--- old/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000
|
||||
+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000
|
||||
@@ -1894,6 +1894,18 @@
|
||||
}]
|
||||
}
|
||||
|
||||
+# Return 1 if this is an ARM target that supports unaligned word/halfword
|
||||
+# load/store instructions.
|
||||
+
|
||||
+proc check_effective_target_arm_unaligned { } {
|
||||
+ return [check_no_compiler_messages arm_unaligned assembly {
|
||||
+ #ifndef __ARM_FEATURE_UNALIGNED
|
||||
+ #error no unaligned support
|
||||
+ #endif
|
||||
+ int i;
|
||||
+ }]
|
||||
+}
|
||||
+
|
||||
# Add the options needed for NEON. We need either -mfloat-abi=softfp
|
||||
# or -mfloat-abi=hard, but if one is already specified by the
|
||||
# multilib, use it. Similarly, if a -mfpu option already enables
|
||||
|
||||
@@ -53,4 +53,26 @@ file://linaro/gcc-4.6-linaro-r106805.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106806.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106807.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106811.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106814.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106815.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106816.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106817.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106818.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106819.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106820.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106821.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106825.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106826.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106827.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106828.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106829.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106830.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106831.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106832.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106833.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106834.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106836.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106839.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106840.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106841.patch \
|
||||
"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# this will prepend this layer to FILESPATH
|
||||
FILESEXTRAPATHS := "${THISDIR}/gcc-4.6"
|
||||
PRINC = "2"
|
||||
PRINC = "3"
|
||||
ARM_INSTRUCTION_SET = "arm"
|
||||
|
||||
Reference in New Issue
Block a user