gcc-4.6: Adjust to match gcc 4.6.3 from OE-Core

Update linaro patches

Signed-off-by: Khem Raj <raj.khem@gmail.com>
This commit is contained in:
Khem Raj
2012-03-06 17:21:34 +00:00
parent 8a728abad7
commit f256ccfb85
18 changed files with 3197 additions and 780 deletions
@@ -0,0 +1,45 @@
Index: gcc-4_6-branch/gcc/config/arm/arm.c
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2012-03-05 17:14:09.901129286 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.c 2012-03-05 17:18:23.061141606 -0800
@@ -17525,6 +17525,13 @@
}
return;
+ case 'v':
+ {
+ gcc_assert (GET_CODE (x) == CONST_DOUBLE);
+ fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
+ return;
+ }
+
/* Register specifier for vld1.16/vst1.16. Translate the S register
number into a D register number and element index. */
case 'z':
@@ -24925,4 +24932,26 @@
return 4;
}
+int
+vfp3_const_double_for_fract_bits (rtx operand)
+{
+ REAL_VALUE_TYPE r0;
+
+ if (GET_CODE (operand) != CONST_DOUBLE)
+ return 0;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
+ if (exact_real_inverse (DFmode, &r0))
+ {
+ if (exact_real_truncate (DFmode, &r0))
+ {
+ HOST_WIDE_INT value = real_to_integer (&r0);
+ value = value & 0xffffffff;
+ if ((value != 0) && ( (value & (value - 1)) == 0))
+ return int_log2 (value);
+ }
+ }
+ return 0;
+}
+
#include "gt-arm.h"
@@ -1,294 +0,0 @@
2011-05-04 Richard Sandiford <richard.sandiford@linaro.org>
Backport from mainline:
2011-03-29 Richard Sandiford <richard.sandiford@linaro.org>
PR debug/48190
* dwarf2out.c (dw_loc_list_node): Add resolved_addr and replaced.
(cached_dw_loc_list_def): New structure.
(cached_dw_loc_list): New typedef.
(cached_dw_loc_list_table): New variable.
(cached_dw_loc_list_table_hash): New function.
(cached_dw_loc_list_table_eq): Likewise.
(add_location_or_const_value_attribute): Take a bool cache_p.
Cache the list when the parameter is true.
(gen_formal_parameter_die): Update caller.
(gen_variable_die): Likewise.
(dwarf2out_finish): Likewise.
(dwarf2out_abstract_function): Nullify cached_dw_loc_list_table
while generating debug info for the decl.
(dwarf2out_function_decl): Clear cached_dw_loc_list_table.
(dwarf2out_init): Initialize cached_dw_loc_list_table.
(resolve_addr): Cache the result of resolving a chain of
location lists.
=== modified file 'gcc/dwarf2out.c'
--- old/gcc/dwarf2out.c 2011-03-29 22:47:59 +0000
+++ new/gcc/dwarf2out.c 2011-05-04 13:20:12 +0000
@@ -4427,6 +4427,11 @@
const char *section; /* Section this loclist is relative to */
dw_loc_descr_ref expr;
hashval_t hash;
+ /* True if all addresses in this and subsequent lists are known to be
+ resolved. */
+ bool resolved_addr;
+ /* True if this list has been replaced by dw_loc_next. */
+ bool replaced;
bool emitted;
} dw_loc_list_node;
@@ -6087,6 +6092,19 @@
/* Table of decl location linked lists. */
static GTY ((param_is (var_loc_list))) htab_t decl_loc_table;
+/* A cached location list. */
+struct GTY (()) cached_dw_loc_list_def {
+ /* The DECL_UID of the decl that this entry describes. */
+ unsigned int decl_id;
+
+ /* The cached location list. */
+ dw_loc_list_ref loc_list;
+};
+typedef struct cached_dw_loc_list_def cached_dw_loc_list;
+
+/* Table of cached location lists. */
+static GTY ((param_is (cached_dw_loc_list))) htab_t cached_dw_loc_list_table;
+
/* A pointer to the base of a list of references to DIE's that
are uniquely identified by their tag, presence/absence of
children DIE's, and list of attribute/value pairs. */
@@ -6434,7 +6452,7 @@
static void insert_double (double_int, unsigned char *);
static void insert_float (const_rtx, unsigned char *);
static rtx rtl_for_decl_location (tree);
-static bool add_location_or_const_value_attribute (dw_die_ref, tree,
+static bool add_location_or_const_value_attribute (dw_die_ref, tree, bool,
enum dwarf_attribute);
static bool tree_add_const_value_attribute (dw_die_ref, tree);
static bool tree_add_const_value_attribute_for_decl (dw_die_ref, tree);
@@ -8168,6 +8186,24 @@
htab_find_with_hash (decl_loc_table, decl, DECL_UID (decl));
}
+/* Returns a hash value for X (which really is a cached_dw_loc_list_list). */
+
+static hashval_t
+cached_dw_loc_list_table_hash (const void *x)
+{
+ return (hashval_t) ((const cached_dw_loc_list *) x)->decl_id;
+}
+
+/* Return nonzero if decl_id of cached_dw_loc_list X is the same as
+ UID of decl *Y. */
+
+static int
+cached_dw_loc_list_table_eq (const void *x, const void *y)
+{
+ return (((const cached_dw_loc_list *) x)->decl_id
+ == DECL_UID ((const_tree) y));
+}
+
/* Equate a DIE to a particular declaration. */
static void
@@ -16965,15 +17001,22 @@
these things can crop up in other ways also.) Note that one type of
constant value which can be passed into an inlined function is a constant
pointer. This can happen for example if an actual argument in an inlined
- function call evaluates to a compile-time constant address. */
+ function call evaluates to a compile-time constant address.
+
+ CACHE_P is true if it is worth caching the location list for DECL,
+ so that future calls can reuse it rather than regenerate it from scratch.
+ This is true for BLOCK_NONLOCALIZED_VARS in inlined subroutines,
+ since we will need to refer to them each time the function is inlined. */
static bool
-add_location_or_const_value_attribute (dw_die_ref die, tree decl,
+add_location_or_const_value_attribute (dw_die_ref die, tree decl, bool cache_p,
enum dwarf_attribute attr)
{
rtx rtl;
dw_loc_list_ref list;
var_loc_list *loc_list;
+ cached_dw_loc_list *cache;
+ void **slot;
if (TREE_CODE (decl) == ERROR_MARK)
return false;
@@ -17010,7 +17053,33 @@
&& add_const_value_attribute (die, rtl))
return true;
}
- list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2);
+ /* If this decl is from BLOCK_NONLOCALIZED_VARS, we might need its
+ list several times. See if we've already cached the contents. */
+ list = NULL;
+ if (loc_list == NULL || cached_dw_loc_list_table == NULL)
+ cache_p = false;
+ if (cache_p)
+ {
+ cache = (cached_dw_loc_list *)
+ htab_find_with_hash (cached_dw_loc_list_table, decl, DECL_UID (decl));
+ if (cache)
+ list = cache->loc_list;
+ }
+ if (list == NULL)
+ {
+ list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2);
+ /* It is usually worth caching this result if the decl is from
+ BLOCK_NONLOCALIZED_VARS and if the list has at least two elements. */
+ if (cache_p && list && list->dw_loc_next)
+ {
+ slot = htab_find_slot_with_hash (cached_dw_loc_list_table, decl,
+ DECL_UID (decl), INSERT);
+ cache = ggc_alloc_cleared_cached_dw_loc_list ();
+ cache->decl_id = DECL_UID (decl);
+ cache->loc_list = list;
+ *slot = cache;
+ }
+ }
if (list)
{
add_AT_location_description (die, attr, list);
@@ -18702,7 +18771,7 @@
equate_decl_number_to_die (node, parm_die);
if (! DECL_ABSTRACT (node_or_origin))
add_location_or_const_value_attribute (parm_die, node_or_origin,
- DW_AT_location);
+ node == NULL, DW_AT_location);
break;
@@ -18887,6 +18956,7 @@
tree context;
int was_abstract;
htab_t old_decl_loc_table;
+ htab_t old_cached_dw_loc_list_table;
/* Make sure we have the actual abstract inline, not a clone. */
decl = DECL_ORIGIN (decl);
@@ -18901,6 +18971,8 @@
get locations in abstract instantces. */
old_decl_loc_table = decl_loc_table;
decl_loc_table = NULL;
+ old_cached_dw_loc_list_table = cached_dw_loc_list_table;
+ cached_dw_loc_list_table = NULL;
/* Be sure we've emitted the in-class declaration DIE (if any) first, so
we don't get confused by DECL_ABSTRACT. */
@@ -18925,6 +18997,7 @@
current_function_decl = save_fn;
decl_loc_table = old_decl_loc_table;
+ cached_dw_loc_list_table = old_cached_dw_loc_list_table;
pop_cfun ();
}
@@ -19709,9 +19782,8 @@
&& !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl_or_origin)))
defer_location (decl_or_origin, var_die);
else
- add_location_or_const_value_attribute (var_die,
- decl_or_origin,
- DW_AT_location);
+ add_location_or_const_value_attribute (var_die, decl_or_origin,
+ decl == NULL, DW_AT_location);
add_pubname (decl_or_origin, var_die);
}
else
@@ -21498,6 +21570,7 @@
dwarf2out_decl (decl);
htab_empty (decl_loc_table);
+ htab_empty (cached_dw_loc_list_table);
}
/* Output a marker (i.e. a label) for the beginning of the generated code for
@@ -22230,6 +22303,11 @@
decl_loc_table = htab_create_ggc (10, decl_loc_table_hash,
decl_loc_table_eq, NULL);
+ /* Allocate the cached_dw_loc_list_table. */
+ cached_dw_loc_list_table
+ = htab_create_ggc (10, cached_dw_loc_list_table_hash,
+ cached_dw_loc_list_table_eq, NULL);
+
/* Allocate the initial hunk of the decl_scope_table. */
decl_scope_table = VEC_alloc (tree, gc, 256);
@@ -22870,30 +22948,53 @@
{
dw_die_ref c;
dw_attr_ref a;
- dw_loc_list_ref *curr;
+ dw_loc_list_ref *curr, *start, loc;
unsigned ix;
FOR_EACH_VEC_ELT (dw_attr_node, die->die_attr, ix, a)
switch (AT_class (a))
{
case dw_val_class_loc_list:
- curr = AT_loc_list_ptr (a);
- while (*curr)
+ start = curr = AT_loc_list_ptr (a);
+ loc = *curr;
+ gcc_assert (loc);
+ /* The same list can be referenced more than once. See if we have
+ already recorded the result from a previous pass. */
+ if (loc->replaced)
+ *curr = loc->dw_loc_next;
+ else if (!loc->resolved_addr)
{
- if (!resolve_addr_in_expr ((*curr)->expr))
+ /* As things stand, we do not expect or allow one die to
+ reference a suffix of another die's location list chain.
+ References must be identical or completely separate.
+ There is therefore no need to cache the result of this
+ pass on any list other than the first; doing so
+ would lead to unnecessary writes. */
+ while (*curr)
{
- dw_loc_list_ref next = (*curr)->dw_loc_next;
- if (next && (*curr)->ll_symbol)
+ gcc_assert (!(*curr)->replaced && !(*curr)->resolved_addr);
+ if (!resolve_addr_in_expr ((*curr)->expr))
{
- gcc_assert (!next->ll_symbol);
- next->ll_symbol = (*curr)->ll_symbol;
+ dw_loc_list_ref next = (*curr)->dw_loc_next;
+ if (next && (*curr)->ll_symbol)
+ {
+ gcc_assert (!next->ll_symbol);
+ next->ll_symbol = (*curr)->ll_symbol;
+ }
+ *curr = next;
}
- *curr = next;
+ else
+ curr = &(*curr)->dw_loc_next;
}
+ if (loc == *start)
+ loc->resolved_addr = 1;
else
- curr = &(*curr)->dw_loc_next;
+ {
+ loc->replaced = 1;
+ loc->dw_loc_next = *start;
+ }
}
- if (!AT_loc_list (a))
+ if (!*start)
{
remove_AT (die, a->dw_attr);
ix--;
@@ -23322,6 +23423,7 @@
add_location_or_const_value_attribute (
VEC_index (deferred_locations, deferred_locations_list, i)->die,
VEC_index (deferred_locations, deferred_locations_list, i)->variable,
+ false,
DW_AT_location);
}
@@ -19,8 +19,8 @@
=== modified file 'gcc/combine.c'
Index: gcc-4_6-branch/gcc/combine.c
===================================================================
--- gcc-4_6-branch.orig/gcc/combine.c 2011-09-16 19:58:21.000000000 -0700
+++ gcc-4_6-branch/gcc/combine.c 2011-09-16 20:05:36.626650681 -0700
--- gcc-4_6-branch.orig/gcc/combine.c 2012-03-05 00:16:20.000000000 -0800
+++ gcc-4_6-branch/gcc/combine.c 2012-03-05 16:05:01.212928507 -0800
@@ -391,8 +391,8 @@
static void undo_all (void);
static void undo_commit (void);
@@ -32,7 +32,7 @@ Index: gcc-4_6-branch/gcc/combine.c
static rtx simplify_if_then_else (rtx);
static rtx simplify_set (rtx);
static rtx simplify_logical (rtx);
@@ -3112,12 +3112,12 @@
@@ -3119,12 +3119,12 @@
if (i1)
{
subst_low_luid = DF_INSN_LUID (i1);
@@ -47,7 +47,7 @@ Index: gcc-4_6-branch/gcc/combine.c
}
}
@@ -3129,7 +3129,7 @@
@@ -3136,7 +3136,7 @@
self-referential RTL when we will be substituting I1SRC for I1DEST
later. Likewise if I0 feeds into I2, either directly or indirectly
through I1, and I0DEST is in I0SRC. */
@@ -56,7 +56,7 @@ Index: gcc-4_6-branch/gcc/combine.c
(i1_feeds_i2_n && i1dest_in_i1src)
|| ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
&& i0dest_in_i0src));
@@ -3168,7 +3168,7 @@
@@ -3180,7 +3180,7 @@
copy of I1SRC each time we substitute it, in order to avoid creating
self-referential RTL when we will be substituting I0SRC for I0DEST
later. */
@@ -65,7 +65,7 @@ Index: gcc-4_6-branch/gcc/combine.c
i0_feeds_i1_n && i0dest_in_i0src);
substed_i1 = 1;
@@ -3198,7 +3198,7 @@
@@ -3214,7 +3214,7 @@
n_occurrences = 0;
subst_low_luid = DF_INSN_LUID (i0);
@@ -74,16 +74,16 @@ Index: gcc-4_6-branch/gcc/combine.c
substed_i0 = 1;
}
@@ -3260,7 +3260,7 @@
@@ -3276,7 +3276,7 @@
{
rtx t = i1pat;
if (i0_feeds_i1_n)
- t = subst (t, i0dest, i0src, 0, 0);
+ t = subst (t, i0dest, i0src, 0, 0, 0);
- t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0);
+ t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0);
XVECEXP (newpat, 0, --total_sets) = t;
}
@@ -3268,10 +3268,10 @@
@@ -3284,10 +3284,10 @@
{
rtx t = i2pat;
if (i1_feeds_i2_n)
@@ -91,12 +91,12 @@ Index: gcc-4_6-branch/gcc/combine.c
+ t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0,
i0_feeds_i1_n && i0dest_in_i0src);
if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
- t = subst (t, i0dest, i0src, 0, 0);
+ t = subst (t, i0dest, i0src, 0, 0, 0);
- t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0);
+ t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0);
XVECEXP (newpat, 0, --total_sets) = t;
}
@@ -4943,11 +4943,13 @@
@@ -4959,11 +4959,13 @@
IN_DEST is nonzero if we are processing the SET_DEST of a SET.
@@ -111,7 +111,7 @@ Index: gcc-4_6-branch/gcc/combine.c
{
enum rtx_code code = GET_CODE (x);
enum machine_mode op0_mode = VOIDmode;
@@ -5008,7 +5010,7 @@
@@ -5024,7 +5026,7 @@
&& GET_CODE (XVECEXP (x, 0, 0)) == SET
&& GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS)
{
@@ -120,7 +120,7 @@ Index: gcc-4_6-branch/gcc/combine.c
/* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER
@@ -5025,7 +5027,7 @@
@@ -5041,7 +5043,7 @@
&& GET_CODE (dest) != CC0
&& GET_CODE (dest) != PC)
{
@@ -129,7 +129,7 @@ Index: gcc-4_6-branch/gcc/combine.c
/* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER
@@ -5071,8 +5073,8 @@
@@ -5087,8 +5089,8 @@
}
else
{
@@ -140,7 +140,7 @@ Index: gcc-4_6-branch/gcc/combine.c
/* If this substitution failed, this whole thing
fails. */
@@ -5149,7 +5151,9 @@
@@ -5165,7 +5167,9 @@
&& (code == SUBREG || code == STRICT_LOW_PART
|| code == ZERO_EXTRACT))
|| code == SET)
@@ -151,7 +151,7 @@ Index: gcc-4_6-branch/gcc/combine.c
/* If we found that we will have to reject this combination,
indicate that by returning the CLOBBER ourselves, rather than
@@ -5206,7 +5210,7 @@
@@ -5222,7 +5226,7 @@
/* If X is sufficiently simple, don't bother trying to do anything
with it. */
if (code != CONST_INT && code != REG && code != CLOBBER)
@@ -160,7 +160,7 @@ Index: gcc-4_6-branch/gcc/combine.c
if (GET_CODE (x) == code)
break;
@@ -5226,10 +5230,12 @@
@@ -5242,10 +5246,12 @@
expression.
OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero
@@ -175,7 +175,7 @@ Index: gcc-4_6-branch/gcc/combine.c
{
enum rtx_code code = GET_CODE (x);
enum machine_mode mode = GET_MODE (x);
@@ -5284,8 +5290,8 @@
@@ -5300,8 +5306,8 @@
false arms to store-flag values. Be careful to use copy_rtx
here since true_rtx or false_rtx might share RTL with x as a
result of the if_then_else_cond call above. */
@@ -186,7 +186,7 @@ Index: gcc-4_6-branch/gcc/combine.c
/* If true_rtx and false_rtx are not general_operands, an if_then_else
is unlikely to be simpler. */
@@ -5629,7 +5635,7 @@
@@ -5645,7 +5651,7 @@
{
/* Try to simplify the expression further. */
rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1));
@@ -195,7 +195,7 @@ Index: gcc-4_6-branch/gcc/combine.c
/* If we could, great. If not, do not go ahead with the IOR
replacement, since PLUS appears in many special purpose
@@ -5722,7 +5728,16 @@
@@ -5738,7 +5744,16 @@
ZERO_EXTRACT is indeed appropriate, it will be placed back by
the call to make_compound_operation in the SET case. */
@@ -213,7 +213,7 @@ Index: gcc-4_6-branch/gcc/combine.c
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx
&& mode == GET_MODE (op0)
@@ -5768,7 +5783,10 @@
@@ -5784,7 +5799,10 @@
/* If STORE_FLAG_VALUE is -1, we have cases similar to
those above. */
@@ -225,7 +225,7 @@ Index: gcc-4_6-branch/gcc/combine.c
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx
&& (num_sign_bit_copies (op0, mode)
@@ -5966,11 +5984,11 @@
@@ -5982,11 +6000,11 @@
if (reg_mentioned_p (from, true_rtx))
true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code,
from, true_val),
@@ -239,7 +239,7 @@ Index: gcc-4_6-branch/gcc/combine.c
SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx);
SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx);
@@ -6187,11 +6205,11 @@
@@ -6203,11 +6221,11 @@
{
temp = subst (simplify_gen_relational (true_code, m, VOIDmode,
cond_op0, cond_op1),
@@ -37,8 +37,10 @@
* config/arm/arm.md (*arm_movqi_insn): Compute attr "length".
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-10-11 01:56:19 +0000
Index: gcc-4_6-branch/gcc/config/arm/arm-protos.h
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm-protos.h 2012-03-05 16:07:15.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm-protos.h 2012-03-05 16:07:50.392936694 -0800
@@ -59,6 +59,7 @@
int);
extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
@@ -47,10 +49,10 @@
extern int arm_const_double_rtx (rtx);
extern int neg_const_double_rtx_ok_for_fpa (rtx);
extern int vfp3_const_double_rtx (rtx);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
+++ new/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000
Index: gcc-4_6-branch/gcc/config/arm/arm.c
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2012-03-05 16:07:15.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.c 2012-03-05 16:07:50.400936694 -0800
@@ -2065,6 +2065,28 @@
fix_cm3_ldrd = 0;
}
@@ -80,7 +82,7 @@
if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
@@ -6106,7 +6128,7 @@
@@ -6123,7 +6145,7 @@
addresses based on the frame pointer or arg pointer until the
reload pass starts. This is so that eliminating such addresses
into stack based ones won't produce impossible code. */
@@ -89,7 +91,7 @@
thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
{
/* ??? Not clear if this is right. Experiment. */
@@ -22226,6 +22248,10 @@
@@ -22251,6 +22273,10 @@
val = 6;
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
@@ -100,22 +102,22 @@
/* Tag_ABI_FP_16bit_format. */
if (arm_fp16_format)
asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
+++ new/gcc/config/arm/arm.md 2011-10-11 02:31:01 +0000
@@ -113,6 +113,10 @@
(UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from
Index: gcc-4_6-branch/gcc/config/arm/arm.md
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.md 2012-03-05 16:07:15.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.md 2012-03-05 16:09:26.284941314 -0800
@@ -114,6 +114,10 @@
; another symbolic address.
(UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
+ (UNSPEC_UNALIGNED_LOAD 29) ; Used to represent ldr/ldrh instructions that access
(UNSPEC_PIC_UNIFIED 29) ; Create a common pic addressing form.
+ (UNSPEC_UNALIGNED_LOAD 30) ; Used to represent ldr/ldrh instructions that access
+ ; unaligned locations, on architectures which support
+ ; that.
+ (UNSPEC_UNALIGNED_STORE 30) ; Same for str/strh.
+ (UNSPEC_UNALIGNED_STORE 31) ; Same for str/strh.
]
)
@@ -2463,10 +2467,10 @@
@@ -2461,10 +2465,10 @@
;;; this insv pattern, so this pattern needs to be reevalutated.
(define_expand "insv"
@@ -130,51 +132,59 @@
"TARGET_ARM || arm_arch_thumb2"
"
{
@@ -2477,35 +2481,70 @@
@@ -2475,35 +2479,70 @@
if (arm_arch_thumb2)
{
- bool use_bfi = TRUE;
-
- if (GET_CODE (operands[3]) == CONST_INT)
- {
- HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
-
- if (val == 0)
- {
- emit_insn (gen_insv_zero (operands[0], operands[1],
- operands[2]));
+ if (unaligned_access && MEM_P (operands[0])
+ && s_register_operand (operands[3], GET_MODE (operands[3]))
+ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
+ {
{
- HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+ rtx base_addr;
+
+ if (BYTES_BIG_ENDIAN)
+ start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
+ - start_bit;
+
- if (val == 0)
+ if (width == 32)
+ {
{
- emit_insn (gen_insv_zero (operands[0], operands[1],
- operands[2]));
- DONE;
+ base_addr = adjust_address (operands[0], SImode,
+ start_bit / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
+ }
}
+ else
+ {
+ rtx tmp = gen_reg_rtx (HImode);
+
- /* See if the set can be done with a single orr instruction. */
- if (val == mask && const_ok_for_arm (val << start_bit))
- use_bfi = FALSE;
+ base_addr = adjust_address (operands[0], HImode,
+ start_bit / BITS_PER_UNIT);
+ emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
+ emit_insn (gen_unaligned_storehi (base_addr, tmp));
+ }
+ DONE;
+ }
}
-
- if (use_bfi)
+ else if (s_register_operand (operands[0], GET_MODE (operands[0])))
+ {
{
- if (GET_CODE (operands[3]) != REG)
- operands[3] = force_reg (SImode, operands[3]);
+ bool use_bfi = TRUE;
+
- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
+ if (GET_CODE (operands[3]) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
@@ -198,24 +208,9 @@
+
+ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+ operands[3]));
DONE;
}
-
- /* See if the set can be done with a single orr instruction. */
- if (val == mask && const_ok_for_arm (val << start_bit))
- use_bfi = FALSE;
- }
-
- if (use_bfi)
- {
- if (GET_CODE (operands[3]) != REG)
- operands[3] = force_reg (SImode, operands[3]);
-
- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
- }
+ }
+ DONE;
+ }
}
+ else
+ FAIL;
}
@@ -226,7 +221,7 @@
target = copy_rtx (operands[0]);
/* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
subreg as the final target. */
@@ -3697,12 +3736,10 @@
@@ -3695,12 +3734,10 @@
;; to reduce register pressure later on.
(define_expand "extzv"
@@ -243,7 +238,7 @@
"TARGET_THUMB1 || arm_arch_thumb2"
"
{
@@ -3711,10 +3748,57 @@
@@ -3709,10 +3746,57 @@
if (arm_arch_thumb2)
{
@@ -304,7 +299,7 @@
operands[3] = GEN_INT (rshift);
@@ -3724,12 +3808,154 @@
@@ -3722,12 +3806,154 @@
DONE;
}
@@ -462,7 +457,7 @@
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "const_int_operand" "M")
@@ -6038,8 +6264,8 @@
@@ -6069,8 +6295,8 @@
(define_insn "*arm_movqi_insn"
@@ -473,7 +468,7 @@
"TARGET_32BIT
&& ( register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode))"
@@ -6047,10 +6273,14 @@
@@ -6078,10 +6304,14 @@
mov%?\\t%0, %1
mvn%?\\t%0, #%B1
ldr%(b%)\\t%0, %1
@@ -491,10 +486,10 @@
)
(define_insn "*thumb1_movqi_insn"
=== modified file 'gcc/config/arm/arm.opt'
--- old/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
+++ new/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000
Index: gcc-4_6-branch/gcc/config/arm/arm.opt
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.opt 2012-03-05 16:07:14.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.opt 2012-03-05 16:07:50.404936697 -0800
@@ -173,3 +173,7 @@
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
@@ -503,10 +498,10 @@
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned word and halfword accesses to packed data.
=== modified file 'gcc/config/arm/constraints.md'
--- old/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/constraints.md 2011-10-11 02:31:01 +0000
Index: gcc-4_6-branch/gcc/config/arm/constraints.md
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/constraints.md 2012-03-05 16:07:14.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/constraints.md 2012-03-05 16:07:50.404936697 -0800
@@ -36,6 +36,7 @@
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -543,10 +538,10 @@
;; We used to have constraint letters for S and R in ARM state, but
;; all uses of these now appear to have been removed.
=== modified file 'gcc/expmed.c'
--- old/gcc/expmed.c 2011-05-22 19:02:59 +0000
+++ new/gcc/expmed.c 2011-10-11 02:31:01 +0000
Index: gcc-4_6-branch/gcc/expmed.c
===================================================================
--- gcc-4_6-branch.orig/gcc/expmed.c 2012-01-04 15:37:51.000000000 -0800
+++ gcc-4_6-branch/gcc/expmed.c 2012-03-05 16:07:50.404936697 -0800
@@ -657,6 +657,10 @@
&& GET_MODE (value) != BLKmode
&& bitsize > 0
@@ -625,4 +620,3 @@
if (xtarget == 0)
xtarget = xspec_target = gen_reg_rtx (tmode);
@@ -48,8 +48,10 @@
Return true for NEON.
=== modified file 'gcc/testsuite/gcc.dg/vect/pr30858.c'
--- old/gcc/testsuite/gcc.dg/vect/pr30858.c 2007-02-22 08:16:18 +0000
+++ new/gcc/testsuite/gcc.dg/vect/pr30858.c 2011-11-20 09:11:09 +0000
Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/pr30858.c
===================================================================
--- gcc-4_6-branch.orig/gcc/testsuite/gcc.dg/vect/pr30858.c 2012-01-04 15:33:52.000000000 -0800
+++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/pr30858.c 2012-03-05 16:23:47.748983031 -0800
@@ -11,5 +11,6 @@
}
@@ -58,10 +60,10 @@
+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cond-8a.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 2011-11-20 09:11:09 +0000
Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 2012-03-05 16:23:47.748983031 -0800
@@ -0,0 +1,75 @@
+/* { dg-require-effective-target vect_condition } */
+
@@ -138,24 +140,10 @@
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-22 16:52:23 +0000
@@ -3150,7 +3150,8 @@
|| [istarget ia64-*-*]
|| [istarget i?86-*-*]
|| [istarget spu-*-*]
- || [istarget x86_64-*-*] } {
+ || [istarget x86_64-*-*]
+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
set et_vect_cond_saved 1
}
}
=== modified file 'gcc/tree-vect-patterns.c'
--- old/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000
+++ new/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000
Index: gcc-4_6-branch/gcc/tree-vect-patterns.c
===================================================================
--- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2012-03-05 16:23:10.000000000 -0800
+++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2012-03-05 16:23:47.748983031 -0800
@@ -50,13 +50,16 @@
tree *);
static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
@@ -299,14 +287,14 @@
def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
- set_vinfo_for_stmt (def_stmt,
- new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
+ def_stmt_info = vinfo_for_stmt (def_stmt);
- gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
def_stmt_info = vinfo_for_stmt (def_stmt);
+ if (def_stmt_info == NULL)
+ {
+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
+ set_vinfo_for_stmt (def_stmt, def_stmt_info);
+ }
gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
- def_stmt_info = vinfo_for_stmt (def_stmt);
+ gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
STMT_VINFO_DEF_TYPE (def_stmt_info)
= STMT_VINFO_DEF_TYPE (orig_stmt_info);
@@ -316,10 +304,10 @@
}
}
=== modified file 'gcc/tree-vect-stmts.c'
--- old/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000
+++ new/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000
Index: gcc-4_6-branch/gcc/tree-vect-stmts.c
===================================================================
--- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2012-03-05 16:23:11.000000000 -0800
+++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2012-03-05 16:23:47.748983031 -0800
@@ -655,20 +655,40 @@
tree rhs = gimple_assign_rhs1 (stmt);
unsigned int op_num;
@@ -372,10 +360,10 @@
break;
case GIMPLE_BINARY_RHS:
=== modified file 'gcc/tree-vectorizer.h'
--- old/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000
+++ new/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000
Index: gcc-4_6-branch/gcc/tree-vectorizer.h
===================================================================
--- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2012-03-05 16:23:11.000000000 -0800
+++ gcc-4_6-branch/gcc/tree-vectorizer.h 2012-03-05 16:23:47.748983031 -0800
@@ -917,7 +917,7 @@
Additional pattern recognition functions can (and will) be added
in the future. */
@@ -385,4 +373,3 @@
void vect_pattern_recog (loop_vec_info);
/* In tree-vectorizer.c. */
@@ -1,354 +0,0 @@
2011-12-30 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-10-12 Richard Sandiford <richard.sandiford@linaro.org>
* expr.h (copy_blkmode_to_reg): Declare.
* expr.c (copy_blkmode_to_reg): New function.
(expand_assignment): Don't expand register RESULT_DECLs before
the lhs. Use copy_blkmode_to_reg to copy BLKmode values into a
RESULT_DECL register.
(expand_expr_real_1): Handle BLKmode decls when looking for promotion.
* stmt.c (expand_return): Move BLKmode-to-register code into
copy_blkmode_to_reg.
=== modified file 'gcc/expr.c'
--- old/gcc/expr.c 2011-10-23 13:33:07 +0000
+++ new/gcc/expr.c 2011-12-30 09:41:30 +0000
@@ -2180,6 +2180,111 @@
return tgtblk;
}
+/* Copy BLKmode value SRC into a register of mode MODE. Return the
+ register if it contains any data, otherwise return null.
+
+ This is used on targets that return BLKmode values in registers. */
+
+rtx
+copy_blkmode_to_reg (enum machine_mode mode, tree src)
+{
+ int i, n_regs;
+ unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
+ unsigned int bitsize;
+ rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
+ enum machine_mode dst_mode;
+
+ gcc_assert (TYPE_MODE (TREE_TYPE (src)) == BLKmode);
+
+ x = expand_normal (src);
+
+ bytes = int_size_in_bytes (TREE_TYPE (src));
+ if (bytes == 0)
+ return NULL_RTX;
+
+ /* If the structure doesn't take up a whole number of words, see
+ whether the register value should be padded on the left or on
+ the right. Set PADDING_CORRECTION to the number of padding
+ bits needed on the left side.
+
+ In most ABIs, the structure will be returned at the least end of
+ the register, which translates to right padding on little-endian
+ targets and left padding on big-endian targets. The opposite
+ holds if the structure is returned at the most significant
+ end of the register. */
+ if (bytes % UNITS_PER_WORD != 0
+ && (targetm.calls.return_in_msb (TREE_TYPE (src))
+ ? !BYTES_BIG_ENDIAN
+ : BYTES_BIG_ENDIAN))
+ padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
+ * BITS_PER_UNIT));
+
+ n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ dst_words = XALLOCAVEC (rtx, n_regs);
+ bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
+
+ /* Copy the structure BITSIZE bits at a time. */
+ for (bitpos = 0, xbitpos = padding_correction;
+ bitpos < bytes * BITS_PER_UNIT;
+ bitpos += bitsize, xbitpos += bitsize)
+ {
+ /* We need a new destination pseudo each time xbitpos is
+ on a word boundary and when xbitpos == padding_correction
+ (the first time through). */
+ if (xbitpos % BITS_PER_WORD == 0
+ || xbitpos == padding_correction)
+ {
+ /* Generate an appropriate register. */
+ dst_word = gen_reg_rtx (word_mode);
+ dst_words[xbitpos / BITS_PER_WORD] = dst_word;
+
+ /* Clear the destination before we move anything into it. */
+ emit_move_insn (dst_word, CONST0_RTX (word_mode));
+ }
+
+ /* We need a new source operand each time bitpos is on a word
+ boundary. */
+ if (bitpos % BITS_PER_WORD == 0)
+ src_word = operand_subword_force (x, bitpos / BITS_PER_WORD, BLKmode);
+
+ /* Use bitpos for the source extraction (left justified) and
+ xbitpos for the destination store (right justified). */
+ store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD, word_mode,
+ extract_bit_field (src_word, bitsize,
+ bitpos % BITS_PER_WORD, 1, false,
+ NULL_RTX, word_mode, word_mode));
+ }
+
+ if (mode == BLKmode)
+ {
+ /* Find the smallest integer mode large enough to hold the
+ entire structure. */
+ for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+ mode != VOIDmode;
+ mode = GET_MODE_WIDER_MODE (mode))
+ /* Have we found a large enough mode? */
+ if (GET_MODE_SIZE (mode) >= bytes)
+ break;
+
+ /* A suitable mode should have been found. */
+ gcc_assert (mode != VOIDmode);
+ }
+
+ if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (word_mode))
+ dst_mode = word_mode;
+ else
+ dst_mode = mode;
+ dst = gen_reg_rtx (dst_mode);
+
+ for (i = 0; i < n_regs; i++)
+ emit_move_insn (operand_subword (dst, i, 0, dst_mode), dst_words[i]);
+
+ if (mode != dst_mode)
+ dst = gen_lowpart (mode, dst);
+
+ return dst;
+}
+
/* Add a USE expression for REG to the (possibly empty) list pointed
to by CALL_FUSAGE. REG must denote a hard register. */
@@ -4382,7 +4487,9 @@
if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from)
&& COMPLETE_TYPE_P (TREE_TYPE (from))
&& TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST
- && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL)
+ && ! (((TREE_CODE (to) == VAR_DECL
+ || TREE_CODE (to) == PARM_DECL
+ || TREE_CODE (to) == RESULT_DECL)
&& REG_P (DECL_RTL (to)))
|| TREE_CODE (to) == SSA_NAME))
{
@@ -4428,12 +4535,15 @@
rtx temp;
push_temp_slots ();
- temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
+ if (REG_P (to_rtx) && TYPE_MODE (TREE_TYPE (from)) == BLKmode)
+ temp = copy_blkmode_to_reg (GET_MODE (to_rtx), from);
+ else
+ temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
if (GET_CODE (to_rtx) == PARALLEL)
emit_group_load (to_rtx, temp, TREE_TYPE (from),
int_size_in_bytes (TREE_TYPE (from)));
- else
+ else if (temp)
emit_move_insn (to_rtx, temp);
preserve_temp_slots (to_rtx);
@@ -8567,10 +8677,15 @@
return temp;
}
- /* If the mode of DECL_RTL does not match that of the decl, it
- must be a promoted value. We return a SUBREG of the wanted mode,
- but mark it so that we know that it was already extended. */
- if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp))
+ /* If the mode of DECL_RTL does not match that of the decl,
+ there are two cases: we are dealing with a BLKmode value
+ that is returned in a register, or we are dealing with
+ a promoted value. In the latter case, return a SUBREG
+ of the wanted mode, but mark it so that we know that it
+ was already extended. */
+ if (REG_P (decl_rtl)
+ && DECL_MODE (exp) != BLKmode
+ && GET_MODE (decl_rtl) != DECL_MODE (exp))
{
enum machine_mode pmode;
=== modified file 'gcc/expr.h'
--- old/gcc/expr.h 2011-03-03 21:56:58 +0000
+++ new/gcc/expr.h 2011-10-12 08:01:43 +0000
@@ -324,6 +324,8 @@
/* Copy BLKmode object from a set of registers. */
extern rtx copy_blkmode_from_reg (rtx, rtx, tree);
+extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
+
/* Mark REG as holding a parameter for the next CALL_INSN. */
extern void use_reg (rtx *, rtx);
=== modified file 'gcc/stmt.c'
--- old/gcc/stmt.c 2011-03-03 21:56:58 +0000
+++ new/gcc/stmt.c 2011-10-12 08:01:43 +0000
@@ -1684,119 +1684,21 @@
expand_value_return (result_rtl);
/* If the result is an aggregate that is being returned in one (or more)
- registers, load the registers here. The compiler currently can't handle
- copying a BLKmode value into registers. We could put this code in a
- more general area (for use by everyone instead of just function
- call/return), but until this feature is generally usable it is kept here
- (and in expand_call). */
+ registers, load the registers here. */
else if (retval_rhs != 0
&& TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode
&& REG_P (result_rtl))
{
- int i;
- unsigned HOST_WIDE_INT bitpos, xbitpos;
- unsigned HOST_WIDE_INT padding_correction = 0;
- unsigned HOST_WIDE_INT bytes
- = int_size_in_bytes (TREE_TYPE (retval_rhs));
- int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- unsigned int bitsize
- = MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD);
- rtx *result_pseudos = XALLOCAVEC (rtx, n_regs);
- rtx result_reg, src = NULL_RTX, dst = NULL_RTX;
- rtx result_val = expand_normal (retval_rhs);
- enum machine_mode tmpmode, result_reg_mode;
-
- if (bytes == 0)
- {
- expand_null_return ();
- return;
- }
-
- /* If the structure doesn't take up a whole number of words, see
- whether the register value should be padded on the left or on
- the right. Set PADDING_CORRECTION to the number of padding
- bits needed on the left side.
-
- In most ABIs, the structure will be returned at the least end of
- the register, which translates to right padding on little-endian
- targets and left padding on big-endian targets. The opposite
- holds if the structure is returned at the most significant
- end of the register. */
- if (bytes % UNITS_PER_WORD != 0
- && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs))
- ? !BYTES_BIG_ENDIAN
- : BYTES_BIG_ENDIAN))
- padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
- * BITS_PER_UNIT));
-
- /* Copy the structure BITSIZE bits at a time. */
- for (bitpos = 0, xbitpos = padding_correction;
- bitpos < bytes * BITS_PER_UNIT;
- bitpos += bitsize, xbitpos += bitsize)
- {
- /* We need a new destination pseudo each time xbitpos is
- on a word boundary and when xbitpos == padding_correction
- (the first time through). */
- if (xbitpos % BITS_PER_WORD == 0
- || xbitpos == padding_correction)
- {
- /* Generate an appropriate register. */
- dst = gen_reg_rtx (word_mode);
- result_pseudos[xbitpos / BITS_PER_WORD] = dst;
-
- /* Clear the destination before we move anything into it. */
- emit_move_insn (dst, CONST0_RTX (GET_MODE (dst)));
- }
-
- /* We need a new source operand each time bitpos is on a word
- boundary. */
- if (bitpos % BITS_PER_WORD == 0)
- src = operand_subword_force (result_val,
- bitpos / BITS_PER_WORD,
- BLKmode);
-
- /* Use bitpos for the source extraction (left justified) and
- xbitpos for the destination store (right justified). */
- store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD, word_mode,
- extract_bit_field (src, bitsize,
- bitpos % BITS_PER_WORD, 1, false,
- NULL_RTX, word_mode, word_mode));
- }
-
- tmpmode = GET_MODE (result_rtl);
- if (tmpmode == BLKmode)
- {
- /* Find the smallest integer mode large enough to hold the
- entire structure and use that mode instead of BLKmode
- on the USE insn for the return register. */
- for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
- tmpmode != VOIDmode;
- tmpmode = GET_MODE_WIDER_MODE (tmpmode))
- /* Have we found a large enough mode? */
- if (GET_MODE_SIZE (tmpmode) >= bytes)
- break;
-
- /* A suitable mode should have been found. */
- gcc_assert (tmpmode != VOIDmode);
-
- PUT_MODE (result_rtl, tmpmode);
- }
-
- if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode))
- result_reg_mode = word_mode;
+ val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs);
+ if (val)
+ {
+ /* Use the mode of the result value on the return register. */
+ PUT_MODE (result_rtl, GET_MODE (val));
+ expand_value_return (val);
+ }
else
- result_reg_mode = tmpmode;
- result_reg = gen_reg_rtx (result_reg_mode);
-
- for (i = 0; i < n_regs; i++)
- emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode),
- result_pseudos[i]);
-
- if (tmpmode != result_reg_mode)
- result_reg = gen_lowpart (tmpmode, result_reg);
-
- expand_value_return (result_reg);
+ expand_null_return ();
}
else if (retval_rhs != 0
&& !VOID_TYPE_P (TREE_TYPE (retval_rhs))
=== added file 'gcc/testsuite/g++.dg/pr48660.C'
--- old/gcc/testsuite/g++.dg/pr48660.C 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/g++.dg/pr48660.C 2011-10-12 08:01:43 +0000
@@ -0,0 +1,22 @@
+template<int N> struct val { char a[N]; };
+
+class Base
+{
+public:
+ virtual val<1> get1() const = 0;
+ virtual val<2> get2() const = 0;
+ virtual val<3> get3() const = 0;
+ virtual val<4> get4() const = 0;
+};
+
+class Derived : public virtual Base
+{
+public:
+ virtual val<1> get1() const { return foo->get1(); }
+ virtual val<2> get2() const { return foo->get2(); }
+ virtual val<3> get3() const { return foo->get3(); }
+ virtual val<4> get4() const { return foo->get4(); }
+ Base *foo;
+};
+
+Base* make() { return new Derived; }
@@ -0,0 +1,104 @@
2012-01-12 Ulrich Weigand <ulrich.weigand@linaro.org>
LP 879725
Backport from mainline:
2012-01-02 Revital Eres <revital.eres@linaro.org>
gcc/
* ddg.c (def_has_ccmode_p): New function.
(add_cross_iteration_register_deps,
create_ddg_dep_from_intra_loop_link): Call it.
gcc/testsuite/
* gcc.dg/sms-11.c: New file.
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-10-02 06:56:53 +0000
+++ new/gcc/ddg.c 2012-01-10 16:05:14 +0000
@@ -166,6 +166,24 @@
return false;
}
+/* Return true if one of the definitions in INSN has MODE_CC. Otherwise
+ return false. */
+static bool
+def_has_ccmode_p (rtx insn)
+{
+ df_ref *def;
+
+ for (def = DF_INSN_DEFS (insn); *def; def++)
+ {
+ enum machine_mode mode = GET_MODE (DF_REF_REG (*def));
+
+ if (GET_MODE_CLASS (mode) == MODE_CC)
+ return true;
+ }
+
+ return false;
+}
+
/* Computes the dependence parameters (latency, distance etc.), creates
a ddg_edge and adds it to the given DDG. */
static void
@@ -202,6 +220,7 @@
whose register has multiple defs in the loop. */
if (flag_modulo_sched_allow_regmoves
&& (t == ANTI_DEP && dt == REG_DEP)
+ && !def_has_ccmode_p (dest_node->insn)
&& !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
{
rtx set;
@@ -335,7 +354,8 @@
if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
|| !flag_modulo_sched_allow_regmoves
|| JUMP_P (use_node->insn)
- || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn))
+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
+ || def_has_ccmode_p (DF_REF_INSN (last_def)))
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
=== added file 'gcc/testsuite/gcc.dg/sms-11.c'
--- old/gcc/testsuite/gcc.dg/sms-11.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/sms-11.c 2012-01-10 16:05:14 +0000
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
+
+extern void abort (void);
+
+float out[4][4] = { 6, 6, 7, 5, 6, 7, 5, 5, 6, 4, 4, 4, 6, 2, 3, 4 };
+
+void
+invert (void)
+{
+ int i, j, k = 0, swap;
+ float tmp[4][4] = { 5, 6, 7, 5, 6, 7, 5, 5, 4, 4, 4, 4, 3, 2, 3, 4 };
+
+ for (i = 0; i < 4; i++)
+ {
+ for (j = i + 1; j < 4; j++)
+ if (tmp[j][i] > tmp[i][i])
+ swap = j;
+
+ if (swap != i)
+ tmp[i][k] = tmp[swap][k];
+ }
+
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ if (tmp[i][j] != out[i][j])
+ abort ();
+}
+
+int
+main ()
+{
+ invert ();
+ return 0;
+}
+
+/* { dg-final { cleanup-rtl-dump "sms" } } */
@@ -0,0 +1,76 @@
2012-01-18 Michael Hope <michael.hope@linaro.org>
Backport from mainline r183126:
2012-01-12 Ira Rosen <irar@il.ibm.com>
gcc/
PR tree-optimization/51799
* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check
that the last operation is a type demotion.
gcc/testsuite/
* gcc.dg/vect/pr51799.c: New test.
* gcc.dg/vect/vect-widen-shift-u8.c: Expect two widening shift
patterns.
=== added file 'gcc/testsuite/gcc.dg/vect/pr51799.c'
--- old/gcc/testsuite/gcc.dg/vect/pr51799.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/pr51799.c 2012-01-18 01:53:19 +0000
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned long uint32_t;
+void
+f0a (uint32_t * __restrict__ result, int8_t * __restrict__ arg1,
+ uint32_t * __restrict__ arg4, int8_t temp_6)
+{
+ int idx;
+ for (idx = 0; idx < 416; idx += 1)
+ {
+ result[idx] = (uint8_t)(((arg1[idx] << 7) + arg4[idx]) * temp_6);
+ }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2011-10-23 13:33:07 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2012-01-18 01:53:19 +0000
@@ -59,7 +59,6 @@
return 0;
}
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
-
=== modified file 'gcc/tree-vect-patterns.c'
--- old/gcc/tree-vect-patterns.c 2011-12-20 07:47:44 +0000
+++ new/gcc/tree-vect-patterns.c 2012-01-18 01:53:19 +0000
@@ -1224,13 +1224,15 @@
{
use_lhs = gimple_assign_lhs (use_stmt);
use_type = TREE_TYPE (use_lhs);
- /* Support only type promotion or signedess change. Check that USE_TYPE
- is not bigger than the original type. */
+ /* Support only type demotion or signedess change. */
if (!INTEGRAL_TYPE_P (use_type)
- || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
- || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
+ || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
return NULL;
+ /* Check that NEW_TYPE is not bigger than the conversion result. */
+ if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
+ return NULL;
+
if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
|| TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
{
@@ -0,0 +1,45 @@
2012-01-16 Michael Hope <michael.hope@linaro.org>
Backport from mainline r183011:
2012-01-09 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
* config/arm/arm-cores.def (cortex-a15): Use cortex_a15_tune for
tuning parameters.
* config/arm/arm.c (arm_cortex_a15_tune): New static variable.
=== modified file 'gcc/config/arm/arm-cores.def'
--- old/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000
+++ new/gcc/config/arm/arm-cores.def 2012-01-15 22:02:31 +0000
@@ -128,7 +128,7 @@
ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2012-01-05 15:35:39 +0000
+++ new/gcc/config/arm/arm.c 2012-01-15 22:02:31 +0000
@@ -983,6 +983,17 @@
arm_default_branch_cost
};
+const struct tune_params arm_cortex_a15_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ 1, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL, /* TODO: Calculate correct values. */
+ false, /* Prefer constant pool. */
+ arm_cortex_a5_branch_cost
+};
+
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
@@ -0,0 +1,47 @@
2012-01-16 Michael Hope <michael.hope@linaro.org>
Backport from mainline r182561:
2011-12-20 Richard Henderson <rth@redhat.com>
gcc/
* config/arm/arm.md (*arm_cmpdi_unsigned): Enable for thumb2.
* config/arm/arm.c (arm_select_cc_mode): Use it.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2012-01-15 22:02:31 +0000
+++ new/gcc/config/arm/arm.c 2012-01-23 00:06:27 +0000
@@ -11602,7 +11602,7 @@
return CC_Zmode;
/* We can do an equality test in three Thumb instructions. */
- if (!TARGET_ARM)
+ if (!TARGET_32BIT)
return CC_Zmode;
/* FALLTHROUGH */
@@ -11614,7 +11614,7 @@
/* DImode unsigned comparisons can be implemented by cmp +
cmpeq without a scratch register. Not worth doing in
Thumb-2. */
- if (TARGET_ARM)
+ if (TARGET_32BIT)
return CC_CZmode;
/* FALLTHROUGH */
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2012-01-05 15:35:39 +0000
+++ new/gcc/config/arm/arm.md 2012-01-15 21:02:00 +0000
@@ -7515,8 +7515,8 @@
[(set (reg:CC_CZ CC_REGNUM)
(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
(match_operand:DI 1 "arm_di_operand" "rDi")))]
- "TARGET_ARM"
- "cmp%?\\t%R0, %R1\;cmpeq\\t%Q0, %Q1"
+ "TARGET_32BIT"
+ "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
[(set_attr "conds" "set")
(set_attr "length" "8")]
)
@@ -0,0 +1,63 @@
2012-01-16 Michael Hope <michael.hope@linaro.org>
Backport from mainline r181210:
gcc/
2011-11-07 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
* config/arm/arm-cores.def: Add -mcpu=cortex-a7.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm-tune.md: Likewise.
* config/arm/bpabi.h (BE8_LINK_SPEC): Add Cortex A-7.
* doc/invoke.texi: Document -mcpu=cortex-a7.
=== modified file 'gcc/config/arm/arm-cores.def'
--- old/gcc/config/arm/arm-cores.def 2012-01-15 22:02:31 +0000
+++ new/gcc/config/arm/arm-cores.def 2012-01-23 00:36:02 +0000
@@ -126,6 +126,7 @@
ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
+ARM_CORE("cortex-a7", cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
=== modified file 'gcc/config/arm/arm-tune.md'
--- old/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000
+++ new/gcc/config/arm/arm-tune.md 2012-01-15 22:43:29 +0000
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from arm-cores.def
(define_attr "tune"
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
=== modified file 'gcc/config/arm/bpabi.h'
--- old/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000
+++ new/gcc/config/arm/bpabi.h 2012-01-15 22:43:29 +0000
@@ -56,6 +56,7 @@
"|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
+ "|mcpu=cortex-a7"\
"|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\
":%{!r:--be8}}}"
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2012-01-05 15:35:39 +0000
+++ new/gcc/doc/invoke.texi 2012-01-15 22:43:29 +0000
@@ -10202,8 +10202,8 @@
@samp{arm10e}, @samp{arm1020e}, @samp{arm1022e},
@samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp},
@samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s},
-@samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15},
-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
+@samp{cortex-a5}, @samp{cortex-a7}, @samp{cortex-a8}, @samp{cortex-a9},
+@samp{cortex-a15}, @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
@samp{cortex-m4}, @samp{cortex-m3},
@samp{cortex-m1},
@samp{cortex-m0},
@@ -0,0 +1,25 @@
2012-01-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline
2012-01-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/51819
* config/arm/arm.c (arm_print_operand): Correct output of alignment
hints for neon loads and stores.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2012-01-23 00:06:27 +0000
+++ new/gcc/config/arm/arm.c 2012-01-23 18:54:21 +0000
@@ -17463,9 +17463,9 @@
/* Only certain alignment specifiers are supported by the hardware. */
if (memsize == 16 && (align % 32) == 0)
align_bits = 256;
- else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
+ else if (memsize == 16 && (align % 16) == 0)
align_bits = 128;
- else if ((align % 8) == 0)
+ else if (memsize >= 8 && (align % 8) == 0)
align_bits = 64;
else
align_bits = 0;
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,28 @@
2012-02-20 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_print_operand): Avoid null-pointer
dereference from MEM_SIZE.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2012-02-09 00:47:59 +0000
+++ new/gcc/config/arm/arm.c 2012-02-20 15:32:26 +0000
@@ -17446,6 +17446,7 @@
rtx addr;
bool postinc = FALSE;
unsigned align, memsize, align_bits;
+ rtx memsize_rtx;
gcc_assert (GET_CODE (x) == MEM);
addr = XEXP (x, 0);
@@ -17460,7 +17461,8 @@
instruction (for some alignments) as an aid to the memory subsystem
of the target. */
align = MEM_ALIGN (x) >> 3;
- memsize = INTVAL (MEM_SIZE (x));
+ memsize_rtx = MEM_SIZE (x);
+ memsize = memsize_rtx ? INTVAL (memsize_rtx) : 0;
/* Only certain alignment specifiers are supported by the hardware. */
if (memsize == 16 && (align % 32) == 0)
@@ -0,0 +1,126 @@
2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
2011-12-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/
* config/arm/arm.c (vfp3_const_double_for_fract_bits): Define.
* config/arm/arm-protos.h (vfp3_const_double_for_fract_bits): Declare.
* config/arm/constraints.md ("Dt"): New constraint.
* config/arm/predicates.md (const_double_vcvt_power_of_two_reciprocal):
New.
* config/arm/vfp.md (*arm_combine_vcvt_f32_s32): New.
(*arm_combine_vcvt_f32_u32): New.
LP:#900426
2011-12-06 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/vfp.md (*combine_vcvt_f64_<FCVTI32typename>): Fix
formatting character for vmov.f64 case.
2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/
* config/arm/arm.c (arm_print_operand): Remove wrongly merged code.
(vfp3_const_double_for_fract_bits): Likewise.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-12-06 10:42:29 +0000
+++ new/gcc/config/arm/arm-protos.h 2012-02-22 13:31:54 +0000
@@ -238,6 +238,7 @@
};
extern const struct tune_params *current_tune;
+extern int vfp3_const_double_for_fract_bits (rtx);
#endif /* RTX_CODE */
#endif /* ! GCC_ARM_PROTOS_H */
=== modified file 'gcc/config/arm/constraints.md'
--- old/gcc/config/arm/constraints.md 2011-12-06 10:42:29 +0000
+++ new/gcc/config/arm/constraints.md 2012-02-22 13:31:54 +0000
@@ -29,7 +29,7 @@
;; in Thumb-1 state: I, J, K, L, M, N, O
;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd
;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
@@ -291,6 +291,12 @@
(and (match_code "const_double")
(match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)")))
+(define_constraint "Dt"
+ "@internal
+ In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation"
+ (and (match_code "const_double")
+ (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)")))
+
(define_memory_constraint "Ut"
"@internal
In ARM/Thumb-2 state an address valid for loading/storing opaque structure
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-12-06 10:42:29 +0000
+++ new/gcc/config/arm/predicates.md 2012-02-22 13:31:54 +0000
@@ -725,6 +725,11 @@
return true;
})
+(define_predicate "const_double_vcvt_power_of_two_reciprocal"
+ (and (match_code "const_double")
+ (match_test "TARGET_32BIT && TARGET_VFP
+ && vfp3_const_double_for_fract_bits (op)")))
+
(define_special_predicate "neon_struct_operand"
(and (match_code "mem")
(match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
=== modified file 'gcc/config/arm/vfp.md'
--- old/gcc/config/arm/vfp.md 2011-12-06 10:42:29 +0000
+++ new/gcc/config/arm/vfp.md 2012-02-22 13:31:54 +0000
@@ -1131,9 +1131,40 @@
(set_attr "type" "fcmpd")]
)
+;; Fixed point to floating point conversions.
+(define_code_iterator FCVT [unsigned_float float])
+(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
+
+(define_insn "*combine_vcvt_f32_<FCVTI32typename>"
+ [(set (match_operand:SF 0 "s_register_operand" "=t")
+ (mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0"))
+ (match_operand 2
+ "const_double_vcvt_power_of_two_reciprocal" "Dt")))]
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
+ "vcvt.f32.<FCVTI32typename>\\t%0, %1, %v2"
+ [(set_attr "predicable" "no")
+ (set_attr "type" "f_cvt")]
+)
+
+;; Not the ideal way of implementing this. Ideally we would be able to split
+;; this into a move to a DP register and then a vcvt.f64.i32
+(define_insn "*combine_vcvt_f64_<FCVTI32typename>"
+ [(set (match_operand:DF 0 "s_register_operand" "=x,x,w")
+ (mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r"))
+ (match_operand 2
+ "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))]
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math
+ && !TARGET_VFP_SINGLE"
+ "@
+ vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+ vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+ vmov.f64\\t%P0, %1, %1\; vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2"
+ [(set_attr "predicable" "no")
+ (set_attr "type" "f_cvt")
+ (set_attr "length" "8")]
+)
;; Store multiple insn used in function prologue.
-
(define_insn "*push_multi_vfp"
[(match_parallel 2 "multi_register_push"
[(set (match_operand:BLK 0 "memory_operand" "=m")
@@ -0,0 +1,80 @@
2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
LP:#922474
gcc/
* config/arm/sync.md (sync_lock_releasedi): Define.
(arm_sync_lock_releasedi): Likewise.
gcc/testsuite
Backport from mainline.
2012-01-30 Greta Yorsh <Greta.Yorsh@arm.com>
* gcc.target/arm/di-longlong64-sync-withldrexd.c: Accept
new code generated for __sync_lock_release.
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2012-02-01 14:13:07 +0000
+++ new/gcc/config/arm/arm.md 2012-02-22 18:37:56 +0000
@@ -157,6 +157,7 @@
(VUNSPEC_SYNC_OP 23) ; Represent a sync_<op>
(VUNSPEC_SYNC_NEW_OP 24) ; Represent a sync_new_<op>
(VUNSPEC_SYNC_OLD_OP 25) ; Represent a sync_old_<op>
+ (VUNSPEC_SYNC_RELEASE 26) ; Represent a sync_lock_release.
]
)
=== modified file 'gcc/config/arm/sync.md'
--- old/gcc/config/arm/sync.md 2011-10-14 15:47:15 +0000
+++ new/gcc/config/arm/sync.md 2012-02-22 18:37:56 +0000
@@ -494,3 +494,36 @@
(set_attr "conds" "unconditional")
(set_attr "predicable" "no")])
+(define_expand "sync_lock_releasedi"
+ [(match_operand:DI 0 "memory_operand")
+ (match_operand:DI 1 "s_register_operand")]
+ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER"
+ {
+ struct arm_sync_generator generator;
+ rtx tmp1 = gen_reg_rtx (DImode);
+ generator.op = arm_sync_generator_omn;
+ generator.u.omn = gen_arm_sync_lock_releasedi;
+ arm_expand_sync (DImode, &generator, operands[1], operands[0], NULL, tmp1);
+ DONE;
+ }
+)
+
+(define_insn "arm_sync_lock_releasedi"
+ [(set (match_operand:DI 2 "s_register_operand" "=&r")
+ (unspec_volatile:DI [(match_operand:DI 1 "arm_sync_memory_operand" "+Q")
+ (match_operand:DI 0 "s_register_operand" "r")]
+ VUNSPEC_SYNC_RELEASE))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:SI 3 "=&r"))]
+ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER"
+ {
+ return arm_output_sync_insn (insn, operands);
+ }
+ [(set_attr "sync_memory" "1")
+ (set_attr "sync_result" "2")
+ (set_attr "sync_t1" "2")
+ (set_attr "sync_t2" "3")
+ (set_attr "sync_new_value" "0")
+ (set_attr "conds" "clob")
+ (set_attr "predicable" "no")]
+)
=== modified file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c'
--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 2011-10-14 15:56:32 +0000
+++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 2012-02-22 18:37:56 +0000
@@ -10,8 +10,8 @@
#include "../../gcc.dg/di-longlong64-sync-1.c"
/* We should be using ldrexd, strexd and no helpers or shorter ldrex. */
-/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */
-/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */
+/* { dg-final { scan-assembler-times "\tldrexd" 48 } } */
+/* { dg-final { scan-assembler-times "\tstrexd" 48 } } */
/* { dg-final { scan-assembler-not "__sync_" } } */
/* { dg-final { scan-assembler-not "ldrex\t" } } */
/* { dg-final { scan-assembler-not "strex\t" } } */
@@ -0,0 +1,46 @@
2012-02-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
gcc/
2012-02-21 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
Revert r183011
* config/arm/arm-cores.def (cortex-a15): Use generic Cortex tuning
parameters.
* config/arm/arm.c (arm_cortex_a15_tune): Remove.
=== modified file 'gcc/config/arm/arm-cores.def'
--- old/gcc/config/arm/arm-cores.def 2012-01-23 00:36:02 +0000
+++ new/gcc/config/arm/arm-cores.def 2012-02-22 15:53:56 +0000
@@ -129,7 +129,7 @@
ARM_CORE("cortex-a7", cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2012-02-24 16:20:29 +0000
+++ new/gcc/config/arm/arm.c 2012-02-24 17:33:58 +0000
@@ -988,17 +988,6 @@
arm_default_branch_cost
};
-const struct tune_params arm_cortex_a15_tune =
-{
- arm_9e_rtx_costs,
- NULL,
- 1, /* Constant limit. */
- 1, /* Max cond insns. */
- ARM_PREFETCH_NOT_BENEFICIAL, /* TODO: Calculate correct values. */
- false, /* Prefer constant pool. */
- arm_cortex_a5_branch_cost
-};
-
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
@@ -4,7 +4,6 @@ file://linaro/gcc-4.6-linaro-r106733.patch \
file://linaro/gcc-4.6-linaro-r106737.patch \
file://linaro/gcc-4.6-linaro-r106738.patch \
file://linaro/gcc-4.6-linaro-r106739.patch \
file://linaro/gcc-4.6-linaro-r106740.patch \
file://linaro/gcc-4.6-linaro-r106741.patch \
file://linaro/gcc-4.6-linaro-r106742.patch \
file://linaro/gcc-4.6-linaro-r106744.patch \
@@ -81,6 +80,17 @@ file://linaro/gcc-4.6-linaro-r106845.patch \
file://linaro/gcc-4.6-linaro-r106846.patch \
file://linaro/gcc-4.6-linaro-r106848.patch \
file://linaro/gcc-4.6-linaro-r106853.patch \
file://linaro/gcc-4.6-linaro-r106854.patch \
file://linaro/gcc-4.6-linaro-r106855.patch \
file://linaro/gcc-4.6-linaro-r106860.patch \
file://linaro/gcc-4.6-linaro-r106861.patch \
file://linaro/gcc-4.6-linaro-r106862.patch \
file://linaro/gcc-4.6-linaro-r106863.patch \
file://linaro/gcc-4.6-linaro-r106864.patch \
file://linaro/gcc-4.6-linaro-r106865.patch \
file://linaro/gcc-4.6-linaro-r106869.patch \
file://linaro/gcc-4.6-linaro-r106870.patch \
file://linaro/gcc-4.6-linaro-r106872.patch \
file://linaro/gcc-4.6-linaro-r106873.patch \
file://linaro/gcc-4.6-linaro-r106874.patch \
file://linaro/fix_linaro_106872.patch \
"