mirror of
https://github.com/openembedded/meta-openembedded.git
synced 2026-05-06 16:58:24 +00:00
gcc-4.6: Bring in linaro patches upto 07.2011 release
Signed-off-by: Khem Raj <raj.khem@gmail.com>
This commit is contained in:
@@ -0,0 +1,545 @@
|
||||
2011-06-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline.
|
||||
2011-06-03 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100)
|
||||
(strongarm1110): Use strongarm tuning.
|
||||
* config/arm/arm-protos.h (tune_params): Add max_insns_skipped
|
||||
field.
|
||||
* config/arm/arm.c (arm_strongarm_tune): New.
|
||||
(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
|
||||
(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune)
|
||||
(arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field
|
||||
setting, using previous defaults or 1 for Cortex-A5.
|
||||
(arm_option_override): Set max_insns_skipped from current tuning.
|
||||
|
||||
2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline.
|
||||
2011-06-02 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* config/arm/arm-cores.def (cortex-a5): Use cortex_a5 tuning.
|
||||
* config/arm/arm.c (arm_cortex_a5_branch_cost): New.
|
||||
(arm_cortex_a5_tune): New.
|
||||
|
||||
2011-06-02 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* config/arm/arm-protos.h (tune_params): Add branch_cost hook.
|
||||
* config/arm/arm.c (arm_default_branch_cost): New.
|
||||
(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
|
||||
(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune)
|
||||
(arm_fa726_tune): Set branch_cost field using
|
||||
arm_default_branch_cost.
|
||||
* config/arm/arm.h (BRANCH_COST): Use branch_cost hook from
|
||||
current_tune structure.
|
||||
* dojump.c (tm_p.h): Include file.
|
||||
|
||||
2011-06-02 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* config/arm/arm-cores.def (arm1156t2-s, arm1156t2f-s): Use v6t2
|
||||
tuning.
|
||||
(cortex-a5, cortex-a8, cortex-a15, cortex-r4, cortex-r4f, cortex-m4)
|
||||
(cortex-m3, cortex-m1, cortex-m0): Use cortex tuning.
|
||||
* config/arm/arm-protos.h (tune_params): Add prefer_constant_pool
|
||||
field.
|
||||
* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
|
||||
(arm_xscale_tune, arm_9e_tune, arm_cortex_a9_tune)
|
||||
(arm_fa726te_tune): Add prefer_constant_pool setting.
|
||||
(arm_v6t2_tune, arm_cortex_tune): New.
|
||||
* config/arm/arm.h (TARGET_USE_MOVT): Make dependent on
|
||||
prefer_constant_pool setting.
|
||||
|
||||
2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline
|
||||
2011-06-01 Paul Brook <paul@cpodesourcery.com>
|
||||
|
||||
* config/arm/arm-cores.def: Add cortex-r5. Add DIV flags to
|
||||
Cortex-A15.
|
||||
* config/arm/arm-tune.md: Regenerate.
|
||||
* config/arm/arm.c (FL_DIV): Rename...
|
||||
(FL_THUMB_DIV): ... to this.
|
||||
(FL_ARM_DIV): Define.
|
||||
(FL_FOR_ARCH7R, FL_FOR_ARCH7M): Use FL_THUMB_DIV.
|
||||
(arm_arch_hwdiv): Remove.
|
||||
(arm_arch_thumb_hwdiv, arm_arch_arm_hwdiv): New variables.
|
||||
(arm_issue_rate): Add cortexr5.
|
||||
* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set
|
||||
__ARM_ARCH_EXT_IDIV__.
|
||||
(TARGET_IDIV): Define.
|
||||
(arm_arch_hwdiv): Remove.
|
||||
(arm_arch_arm_hwdiv, arm_arch_thumb_hwdiv): New prototypes.
|
||||
* config/arm/arm.md (tune_cortexr4): Add cortexr5.
|
||||
(divsi3, udivsi3): New patterns.
|
||||
* config/arm/thumb2.md (divsi3, udivsi3): Remove.
|
||||
* doc/invoke.texi: Document ARM -mcpu=cortex-r5
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-cores.def'
|
||||
--- old/gcc/config/arm/arm-cores.def 2011-01-03 20:52:22 +0000
|
||||
+++ new/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000
|
||||
@@ -70,10 +70,10 @@
|
||||
/* V4 Architecture Processors */
|
||||
ARM_CORE("arm8", arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul)
|
||||
ARM_CORE("arm810", arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul)
|
||||
-ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
|
||||
-ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
|
||||
-ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
|
||||
-ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
|
||||
+ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
|
||||
+ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
|
||||
+ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
|
||||
+ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
|
||||
ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul)
|
||||
ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul)
|
||||
|
||||
@@ -122,15 +122,16 @@
|
||||
ARM_CORE("arm1176jzf-s", arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e)
|
||||
ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e)
|
||||
ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
|
||||
-ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e)
|
||||
-ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e)
|
||||
+ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2)
|
||||
+ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
|
||||
+ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
|
||||
+ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
|
||||
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
|
||||
-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e)
|
||||
-ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e)
|
||||
+ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
|
||||
+ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
|
||||
+ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
|
||||
+ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
|
||||
+ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
|
||||
+ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
|
||||
+ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex)
|
||||
+ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex)
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-protos.h'
|
||||
--- old/gcc/config/arm/arm-protos.h 2011-05-03 15:17:25 +0000
|
||||
+++ new/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000
|
||||
@@ -219,9 +219,14 @@
|
||||
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
|
||||
bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
|
||||
int constant_limit;
|
||||
+ /* Maximum number of instructions to conditionalise in
|
||||
+ arm_final_prescan_insn. */
|
||||
+ int max_insns_skipped;
|
||||
int num_prefetch_slots;
|
||||
int l1_cache_size;
|
||||
int l1_cache_line_size;
|
||||
+ bool prefer_constant_pool;
|
||||
+ int (*branch_cost) (bool, bool);
|
||||
};
|
||||
|
||||
extern const struct tune_params *current_tune;
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-tune.md'
|
||||
--- old/gcc/config/arm/arm-tune.md 2010-12-20 17:48:51 +0000
|
||||
+++ new/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000
|
||||
@@ -1,5 +1,5 @@
|
||||
;; -*- buffer-read-only: t -*-
|
||||
;; Generated automatically by gentune.sh from arm-cores.def
|
||||
(define_attr "tune"
|
||||
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
|
||||
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
|
||||
(const (symbol_ref "((enum attr_tune) arm_tune)")))
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-05-11 14:49:48 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-06-14 16:00:30 +0000
|
||||
@@ -255,6 +255,8 @@
|
||||
static void arm_conditional_register_usage (void);
|
||||
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
|
||||
static unsigned int arm_autovectorize_vector_sizes (void);
|
||||
+static int arm_default_branch_cost (bool, bool);
|
||||
+static int arm_cortex_a5_branch_cost (bool, bool);
|
||||
|
||||
|
||||
/* Table of machine attributes. */
|
||||
@@ -672,12 +674,13 @@
|
||||
#define FL_THUMB2 (1 << 16) /* Thumb-2. */
|
||||
#define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
|
||||
profile. */
|
||||
-#define FL_DIV (1 << 18) /* Hardware divide. */
|
||||
+#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
|
||||
#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
|
||||
#define FL_NEON (1 << 20) /* Neon instructions. */
|
||||
#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
|
||||
architecture. */
|
||||
#define FL_ARCH7 (1 << 22) /* Architecture 7. */
|
||||
+#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
|
||||
|
||||
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
|
||||
|
||||
@@ -704,8 +707,8 @@
|
||||
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
|
||||
#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
|
||||
#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
|
||||
-#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
|
||||
-#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
|
||||
+#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
|
||||
+#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
|
||||
#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
|
||||
|
||||
/* The bits in this mask specify which
|
||||
@@ -791,7 +794,8 @@
|
||||
int arm_arch_thumb2;
|
||||
|
||||
/* Nonzero if chip supports integer division instruction. */
|
||||
-int arm_arch_hwdiv;
|
||||
+int arm_arch_arm_hwdiv;
|
||||
+int arm_arch_thumb_hwdiv;
|
||||
|
||||
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
|
||||
we must report the mode of the memory reference from
|
||||
@@ -864,48 +868,117 @@
|
||||
{
|
||||
arm_slowmul_rtx_costs,
|
||||
NULL,
|
||||
- 3,
|
||||
- ARM_PREFETCH_NOT_BENEFICIAL
|
||||
+ 3, /* Constant limit. */
|
||||
+ 5, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ true, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
};
|
||||
|
||||
const struct tune_params arm_fastmul_tune =
|
||||
{
|
||||
arm_fastmul_rtx_costs,
|
||||
NULL,
|
||||
- 1,
|
||||
- ARM_PREFETCH_NOT_BENEFICIAL
|
||||
+ 1, /* Constant limit. */
|
||||
+ 5, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ true, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
+};
|
||||
+
|
||||
+/* StrongARM has early execution of branches, so a sequence that is worth
|
||||
+ skipping is shorter. Set max_insns_skipped to a lower value. */
|
||||
+
|
||||
+const struct tune_params arm_strongarm_tune =
|
||||
+{
|
||||
+ arm_fastmul_rtx_costs,
|
||||
+ NULL,
|
||||
+ 1, /* Constant limit. */
|
||||
+ 3, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ true, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
};
|
||||
|
||||
const struct tune_params arm_xscale_tune =
|
||||
{
|
||||
arm_xscale_rtx_costs,
|
||||
xscale_sched_adjust_cost,
|
||||
- 2,
|
||||
- ARM_PREFETCH_NOT_BENEFICIAL
|
||||
+ 2, /* Constant limit. */
|
||||
+ 3, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ true, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
};
|
||||
|
||||
const struct tune_params arm_9e_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
NULL,
|
||||
- 1,
|
||||
- ARM_PREFETCH_NOT_BENEFICIAL
|
||||
+ 1, /* Constant limit. */
|
||||
+ 5, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ true, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
+};
|
||||
+
|
||||
+const struct tune_params arm_v6t2_tune =
|
||||
+{
|
||||
+ arm_9e_rtx_costs,
|
||||
+ NULL,
|
||||
+ 1, /* Constant limit. */
|
||||
+ 5, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ false, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
+};
|
||||
+
|
||||
+/* Generic Cortex tuning. Use more specific tunings if appropriate. */
|
||||
+const struct tune_params arm_cortex_tune =
|
||||
+{
|
||||
+ arm_9e_rtx_costs,
|
||||
+ NULL,
|
||||
+ 1, /* Constant limit. */
|
||||
+ 5, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ false, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
+};
|
||||
+
|
||||
+/* Branches can be dual-issued on Cortex-A5, so conditional execution is
|
||||
+ less appealing. Set max_insns_skipped to a low value. */
|
||||
+
|
||||
+const struct tune_params arm_cortex_a5_tune =
|
||||
+{
|
||||
+ arm_9e_rtx_costs,
|
||||
+ NULL,
|
||||
+ 1, /* Constant limit. */
|
||||
+ 1, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ false, /* Prefer constant pool. */
|
||||
+ arm_cortex_a5_branch_cost
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a9_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
cortex_a9_sched_adjust_cost,
|
||||
- 1,
|
||||
- ARM_PREFETCH_BENEFICIAL(4,32,32)
|
||||
+ 1, /* Constant limit. */
|
||||
+ 5, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_BENEFICIAL(4,32,32),
|
||||
+ false, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
};
|
||||
|
||||
const struct tune_params arm_fa726te_tune =
|
||||
{
|
||||
arm_9e_rtx_costs,
|
||||
fa726te_sched_adjust_cost,
|
||||
- 1,
|
||||
- ARM_PREFETCH_NOT_BENEFICIAL
|
||||
+ 1, /* Constant limit. */
|
||||
+ 5, /* Max cond insns. */
|
||||
+ ARM_PREFETCH_NOT_BENEFICIAL,
|
||||
+ true, /* Prefer constant pool. */
|
||||
+ arm_default_branch_cost
|
||||
};
|
||||
|
||||
|
||||
@@ -1711,7 +1784,8 @@
|
||||
arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
|
||||
arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
|
||||
arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
|
||||
- arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
|
||||
+ arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
|
||||
+ arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
|
||||
arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
|
||||
|
||||
/* If we are not using the default (ARM mode) section anchor offset
|
||||
@@ -1991,12 +2065,7 @@
|
||||
max_insns_skipped = 6;
|
||||
}
|
||||
else
|
||||
- {
|
||||
- /* StrongARM has early execution of branches, so a sequence
|
||||
- that is worth skipping is shorter. */
|
||||
- if (arm_tune_strongarm)
|
||||
- max_insns_skipped = 3;
|
||||
- }
|
||||
+ max_insns_skipped = current_tune->max_insns_skipped;
|
||||
|
||||
/* Hot/Cold partitioning is not currently supported, since we can't
|
||||
handle literal pool placement in that case. */
|
||||
@@ -8211,6 +8280,21 @@
|
||||
return cost;
|
||||
}
|
||||
|
||||
+static int
|
||||
+arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
|
||||
+{
|
||||
+ if (TARGET_32BIT)
|
||||
+ return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
|
||||
+ else
|
||||
+ return (optimize > 0) ? 2 : 0;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
|
||||
+{
|
||||
+ return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
|
||||
+}
|
||||
+
|
||||
static int fp_consts_inited = 0;
|
||||
|
||||
/* Only zero is valid for VFP. Other values are also valid for FPA. */
|
||||
@@ -23123,6 +23207,7 @@
|
||||
{
|
||||
case cortexr4:
|
||||
case cortexr4f:
|
||||
+ case cortexr5:
|
||||
case cortexa5:
|
||||
case cortexa8:
|
||||
case cortexa9:
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.h'
|
||||
--- old/gcc/config/arm/arm.h 2011-06-02 12:12:00 +0000
|
||||
+++ new/gcc/config/arm/arm.h 2011-06-14 14:53:07 +0000
|
||||
@@ -101,6 +101,8 @@
|
||||
builtin_define ("__ARM_PCS"); \
|
||||
builtin_define ("__ARM_EABI__"); \
|
||||
} \
|
||||
+ if (TARGET_IDIV) \
|
||||
+ builtin_define ("__ARM_ARCH_EXT_IDIV__"); \
|
||||
} while (0)
|
||||
|
||||
/* The various ARM cores. */
|
||||
@@ -282,7 +284,8 @@
|
||||
(TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em))
|
||||
|
||||
/* Should MOVW/MOVT be used in preference to a constant pool. */
|
||||
-#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
|
||||
+#define TARGET_USE_MOVT \
|
||||
+ (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
|
||||
|
||||
/* We could use unified syntax for arm mode, but for now we just use it
|
||||
for Thumb-2. */
|
||||
@@ -303,6 +306,10 @@
|
||||
/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */
|
||||
#define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7)
|
||||
|
||||
+/* Nonzero if integer division instructions supported. */
|
||||
+#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
|
||||
+ || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
|
||||
+
|
||||
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
|
||||
then TARGET_AAPCS_BASED must be true -- but the converse does not
|
||||
hold. TARGET_BPABI implies the use of the BPABI runtime library,
|
||||
@@ -487,8 +494,11 @@
|
||||
/* Nonzero if chip supports Thumb 2. */
|
||||
extern int arm_arch_thumb2;
|
||||
|
||||
-/* Nonzero if chip supports integer division instruction. */
|
||||
-extern int arm_arch_hwdiv;
|
||||
+/* Nonzero if chip supports integer division instruction in ARM mode. */
|
||||
+extern int arm_arch_arm_hwdiv;
|
||||
+
|
||||
+/* Nonzero if chip supports integer division instruction in Thumb mode. */
|
||||
+extern int arm_arch_thumb_hwdiv;
|
||||
|
||||
#ifndef TARGET_DEFAULT
|
||||
#define TARGET_DEFAULT (MASK_APCS_FRAME)
|
||||
@@ -2018,8 +2028,8 @@
|
||||
/* Try to generate sequences that don't involve branches, we can then use
|
||||
conditional instructions */
|
||||
#define BRANCH_COST(speed_p, predictable_p) \
|
||||
- (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \
|
||||
- : (optimize > 0 ? 2 : 0))
|
||||
+ (current_tune->branch_cost (speed_p, predictable_p))
|
||||
+
|
||||
|
||||
/* Position Independent Code. */
|
||||
/* We decide which register to use based on the compilation options and
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-06-14 14:37:30 +0000
|
||||
@@ -490,7 +490,7 @@
|
||||
|
||||
(define_attr "tune_cortexr4" "yes,no"
|
||||
(const (if_then_else
|
||||
- (eq_attr "tune" "cortexr4,cortexr4f")
|
||||
+ (eq_attr "tune" "cortexr4,cortexr4f,cortexr5")
|
||||
(const_string "yes")
|
||||
(const_string "no"))))
|
||||
|
||||
@@ -3738,6 +3738,28 @@
|
||||
(set_attr "predicable" "yes")]
|
||||
)
|
||||
|
||||
+
|
||||
+;; Division instructions
|
||||
+(define_insn "divsi3"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
+ (div:SI (match_operand:SI 1 "s_register_operand" "r")
|
||||
+ (match_operand:SI 2 "s_register_operand" "r")))]
|
||||
+ "TARGET_IDIV"
|
||||
+ "sdiv%?\t%0, %1, %2"
|
||||
+ [(set_attr "predicable" "yes")
|
||||
+ (set_attr "insn" "sdiv")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "udivsi3"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
+ (udiv:SI (match_operand:SI 1 "s_register_operand" "r")
|
||||
+ (match_operand:SI 2 "s_register_operand" "r")))]
|
||||
+ "TARGET_IDIV"
|
||||
+ "udiv%?\t%0, %1, %2"
|
||||
+ [(set_attr "predicable" "yes")
|
||||
+ (set_attr "insn" "udiv")]
|
||||
+)
|
||||
+
|
||||
|
||||
;; Unary arithmetic insns
|
||||
|
||||
|
||||
=== modified file 'gcc/config/arm/thumb2.md'
|
||||
--- old/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000
|
||||
+++ new/gcc/config/arm/thumb2.md 2011-06-14 14:37:30 +0000
|
||||
@@ -779,26 +779,6 @@
|
||||
(set_attr "length" "2")]
|
||||
)
|
||||
|
||||
-(define_insn "divsi3"
|
||||
- [(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
- (div:SI (match_operand:SI 1 "s_register_operand" "r")
|
||||
- (match_operand:SI 2 "s_register_operand" "r")))]
|
||||
- "TARGET_THUMB2 && arm_arch_hwdiv"
|
||||
- "sdiv%?\t%0, %1, %2"
|
||||
- [(set_attr "predicable" "yes")
|
||||
- (set_attr "insn" "sdiv")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "udivsi3"
|
||||
- [(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
- (udiv:SI (match_operand:SI 1 "s_register_operand" "r")
|
||||
- (match_operand:SI 2 "s_register_operand" "r")))]
|
||||
- "TARGET_THUMB2 && arm_arch_hwdiv"
|
||||
- "udiv%?\t%0, %1, %2"
|
||||
- [(set_attr "predicable" "yes")
|
||||
- (set_attr "insn" "udiv")]
|
||||
-)
|
||||
-
|
||||
(define_insn "*thumb2_subsi_short"
|
||||
[(set (match_operand:SI 0 "low_register_operand" "=l")
|
||||
(minus:SI (match_operand:SI 1 "low_register_operand" "l")
|
||||
|
||||
=== modified file 'gcc/doc/invoke.texi'
|
||||
--- old/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000
|
||||
+++ new/gcc/doc/invoke.texi 2011-06-14 14:37:30 +0000
|
||||
@@ -10208,7 +10208,8 @@
|
||||
@samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp},
|
||||
@samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s},
|
||||
@samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15},
|
||||
-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3},
|
||||
+@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
|
||||
+@samp{cortex-m4}, @samp{cortex-m3},
|
||||
@samp{cortex-m1},
|
||||
@samp{cortex-m0},
|
||||
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
|
||||
|
||||
=== modified file 'gcc/dojump.c'
|
||||
--- old/gcc/dojump.c 2010-05-19 19:09:57 +0000
|
||||
+++ new/gcc/dojump.c 2011-06-14 14:53:07 +0000
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "ggc.h"
|
||||
#include "basic-block.h"
|
||||
#include "output.h"
|
||||
+#include "tm_p.h"
|
||||
|
||||
static bool prefer_and_bit_test (enum machine_mode, int);
|
||||
static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int);
|
||||
|
||||
@@ -0,0 +1,188 @@
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
Chung-Lin Tang <cltang@codesourcery.com>
|
||||
Richard Earnshaw <rearnsha@arm.com>
|
||||
|
||||
PR target/48250
|
||||
* config/arm/arm.c (arm_legitimize_reload_address): Update cases
|
||||
to use sign-magnitude offsets. Reject unsupported unaligned
|
||||
cases. Add detailed description in comments.
|
||||
* config/arm/arm.md (reload_outdf): Disable for ARM mode; change
|
||||
condition from TARGET_32BIT to TARGET_ARM.
|
||||
|
||||
Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
* config/arm/arm.c (arm_legitimize_reload_address): For NEON
|
||||
quad-word modes, reduce to 9-bit index range when above 1016
|
||||
limit.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-06-14 16:00:30 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-06-27 22:14:07 +0000
|
||||
@@ -6488,23 +6488,134 @@
|
||||
HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
|
||||
HOST_WIDE_INT low, high;
|
||||
|
||||
- if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
|
||||
- low = ((val & 0xf) ^ 0x8) - 0x8;
|
||||
- else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
|
||||
- /* Need to be careful, -256 is not a valid offset. */
|
||||
- low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
|
||||
- else if (mode == SImode
|
||||
- || (mode == SFmode && TARGET_SOFT_FLOAT)
|
||||
- || ((mode == HImode || mode == QImode) && ! arm_arch4))
|
||||
- /* Need to be careful, -4096 is not a valid offset. */
|
||||
- low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
|
||||
- else if ((mode == HImode || mode == QImode) && arm_arch4)
|
||||
- /* Need to be careful, -256 is not a valid offset. */
|
||||
- low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
|
||||
- else if (GET_MODE_CLASS (mode) == MODE_FLOAT
|
||||
- && TARGET_HARD_FLOAT && TARGET_FPA)
|
||||
- /* Need to be careful, -1024 is not a valid offset. */
|
||||
- low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
|
||||
+ /* Detect coprocessor load/stores. */
|
||||
+ bool coproc_p = ((TARGET_HARD_FLOAT
|
||||
+ && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
|
||||
+ && (mode == SFmode || mode == DFmode
|
||||
+ || (mode == DImode && TARGET_MAVERICK)))
|
||||
+ || (TARGET_REALLY_IWMMXT
|
||||
+ && VALID_IWMMXT_REG_MODE (mode))
|
||||
+ || (TARGET_NEON
|
||||
+ && (VALID_NEON_DREG_MODE (mode)
|
||||
+ || VALID_NEON_QREG_MODE (mode))));
|
||||
+
|
||||
+ /* For some conditions, bail out when lower two bits are unaligned. */
|
||||
+ if ((val & 0x3) != 0
|
||||
+ /* Coprocessor load/store indexes are 8-bits + '00' appended. */
|
||||
+ && (coproc_p
|
||||
+ /* For DI, and DF under soft-float: */
|
||||
+ || ((mode == DImode || mode == DFmode)
|
||||
+ /* Without ldrd, we use stm/ldm, which does not
|
||||
+ fair well with unaligned bits. */
|
||||
+ && (! TARGET_LDRD
|
||||
+ /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
|
||||
+ || TARGET_THUMB2))))
|
||||
+ return false;
|
||||
+
|
||||
+ /* When breaking down a [reg+index] reload address into [(reg+high)+low],
|
||||
+ of which the (reg+high) gets turned into a reload add insn,
|
||||
+ we try to decompose the index into high/low values that can often
|
||||
+ also lead to better reload CSE.
|
||||
+ For example:
|
||||
+ ldr r0, [r2, #4100] // Offset too large
|
||||
+ ldr r1, [r2, #4104] // Offset too large
|
||||
+
|
||||
+ is best reloaded as:
|
||||
+ add t1, r2, #4096
|
||||
+ ldr r0, [t1, #4]
|
||||
+ add t2, r2, #4096
|
||||
+ ldr r1, [t2, #8]
|
||||
+
|
||||
+ which post-reload CSE can simplify in most cases to eliminate the
|
||||
+ second add instruction:
|
||||
+ add t1, r2, #4096
|
||||
+ ldr r0, [t1, #4]
|
||||
+ ldr r1, [t1, #8]
|
||||
+
|
||||
+ The idea here is that we want to split out the bits of the constant
|
||||
+ as a mask, rather than as subtracting the maximum offset that the
|
||||
+ respective type of load/store used can handle.
|
||||
+
|
||||
+ When encountering negative offsets, we can still utilize it even if
|
||||
+ the overall offset is positive; sometimes this may lead to an immediate
|
||||
+ that can be constructed with fewer instructions.
|
||||
+ For example:
|
||||
+ ldr r0, [r2, #0x3FFFFC]
|
||||
+
|
||||
+ This is best reloaded as:
|
||||
+ add t1, r2, #0x400000
|
||||
+ ldr r0, [t1, #-4]
|
||||
+
|
||||
+ The trick for spotting this for a load insn with N bits of offset
|
||||
+ (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
|
||||
+ negative offset that is going to make bit N and all the bits below
|
||||
+ it become zero in the remainder part.
|
||||
+
|
||||
+ The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
|
||||
+ to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
|
||||
+ used in most cases of ARM load/store instructions. */
|
||||
+
|
||||
+#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
|
||||
+ (((VAL) & ((1 << (N)) - 1)) \
|
||||
+ ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
|
||||
+ : 0)
|
||||
+
|
||||
+ if (coproc_p)
|
||||
+ {
|
||||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
|
||||
+
|
||||
+ /* NEON quad-word load/stores are made of two double-word accesses,
|
||||
+ so the valid index range is reduced by 8. Treat as 9-bit range if
|
||||
+ we go over it. */
|
||||
+ if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
|
||||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
|
||||
+ }
|
||||
+ else if (GET_MODE_SIZE (mode) == 8)
|
||||
+ {
|
||||
+ if (TARGET_LDRD)
|
||||
+ low = (TARGET_THUMB2
|
||||
+ ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
|
||||
+ : SIGN_MAG_LOW_ADDR_BITS (val, 8));
|
||||
+ else
|
||||
+ /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
|
||||
+ to access doublewords. The supported load/store offsets are
|
||||
+ -8, -4, and 4, which we try to produce here. */
|
||||
+ low = ((val & 0xf) ^ 0x8) - 0x8;
|
||||
+ }
|
||||
+ else if (GET_MODE_SIZE (mode) < 8)
|
||||
+ {
|
||||
+ /* NEON element load/stores do not have an offset. */
|
||||
+ if (TARGET_NEON_FP16 && mode == HFmode)
|
||||
+ return false;
|
||||
+
|
||||
+ if (TARGET_THUMB2)
|
||||
+ {
|
||||
+ /* Thumb-2 has an asymmetrical index range of (-256,4096).
|
||||
+ Try the wider 12-bit range first, and re-try if the result
|
||||
+ is out of range. */
|
||||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
|
||||
+ if (low < -255)
|
||||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (mode == HImode || mode == HFmode)
|
||||
+ {
|
||||
+ if (arm_arch4)
|
||||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
|
||||
+ else
|
||||
+ {
|
||||
+ /* The storehi/movhi_bytes fallbacks can use only
|
||||
+ [-4094,+4094] of the full ldrb/strb index range. */
|
||||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
|
||||
+ if (low == 4095 || low == -4095)
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
|
||||
+ }
|
||||
+ }
|
||||
else
|
||||
return false;
|
||||
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-06-14 14:37:30 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-06-27 22:14:07 +0000
|
||||
@@ -6267,7 +6267,7 @@
|
||||
[(match_operand:DF 0 "arm_reload_memory_operand" "=o")
|
||||
(match_operand:DF 1 "s_register_operand" "r")
|
||||
(match_operand:SI 2 "s_register_operand" "=&r")]
|
||||
- "TARGET_32BIT"
|
||||
+ "TARGET_THUMB2"
|
||||
"
|
||||
{
|
||||
enum rtx_code code = GET_CODE (XEXP (operands[0], 0));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,96 @@
|
||||
2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline.
|
||||
LP 791327
|
||||
gcc/
|
||||
2011-06-09 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
PR target/49335
|
||||
* config/arm/predicates.md (add_operator): New.
|
||||
* config/arm/arm.md ("*arith_shiftsi"): Fix for SP reg usage
|
||||
in Thumb2.
|
||||
|
||||
2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline.
|
||||
gcc/
|
||||
2011-06-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
PR target/49385
|
||||
* config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast
|
||||
one of the operands is a register.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-06-27 22:14:07 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-06-28 12:02:27 +0000
|
||||
@@ -8584,18 +8584,22 @@
|
||||
;; Patterns to allow combination of arithmetic, cond code and shifts
|
||||
|
||||
(define_insn "*arith_shiftsi"
|
||||
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
|
||||
(match_operator:SI 1 "shiftable_operator"
|
||||
[(match_operator:SI 3 "shift_operator"
|
||||
- [(match_operand:SI 4 "s_register_operand" "r,r")
|
||||
- (match_operand:SI 5 "shift_amount_operand" "M,r")])
|
||||
- (match_operand:SI 2 "s_register_operand" "rk,rk")]))]
|
||||
+ [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
|
||||
+ (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
|
||||
+ (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
|
||||
"TARGET_32BIT"
|
||||
"%i1%?\\t%0, %2, %4%S3"
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "shift" "4")
|
||||
- (set_attr "arch" "32,a")
|
||||
- ;; We have to make sure to disable the second alternative if
|
||||
+ (set_attr "arch" "a,t2,t2,a")
|
||||
+ ;; Thumb2 doesn't allow the stack pointer to be used for
|
||||
+ ;; operand1 for all operations other than add and sub. In this case
|
||||
+ ;; the minus operation is a candidate for an rsub and hence needs
|
||||
+ ;; to be disabled.
|
||||
+ ;; We have to make sure to disable the fourth alternative if
|
||||
;; the shift_operator is MULT, since otherwise the insn will
|
||||
;; also match a multiply_accumulate pattern and validate_change
|
||||
;; will allow a replacement of the constant with a register
|
||||
@@ -8603,9 +8607,13 @@
|
||||
(set_attr_alternative "insn_enabled"
|
||||
[(const_string "yes")
|
||||
(if_then_else
|
||||
+ (match_operand:SI 1 "add_operator" "")
|
||||
+ (const_string "yes") (const_string "no"))
|
||||
+ (const_string "yes")
|
||||
+ (if_then_else
|
||||
(match_operand:SI 3 "mult_operator" "")
|
||||
(const_string "no") (const_string "yes"))])
|
||||
- (set_attr "type" "alu_shift,alu_shift_reg")])
|
||||
+ (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SI 0 "s_register_operand" "")
|
||||
|
||||
=== modified file 'gcc/config/arm/predicates.md'
|
||||
--- old/gcc/config/arm/predicates.md 2011-05-03 15:14:56 +0000
|
||||
+++ new/gcc/config/arm/predicates.md 2011-06-22 15:50:23 +0000
|
||||
@@ -687,3 +687,6 @@
|
||||
(define_special_predicate "neon_struct_operand"
|
||||
(and (match_code "mem")
|
||||
(match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
|
||||
+
|
||||
+(define_special_predicate "add_operator"
|
||||
+ (match_code "plus"))
|
||||
|
||||
=== modified file 'gcc/config/arm/thumb2.md'
|
||||
--- old/gcc/config/arm/thumb2.md 2011-06-14 14:37:30 +0000
|
||||
+++ new/gcc/config/arm/thumb2.md 2011-06-20 12:18:27 +0000
|
||||
@@ -207,7 +207,9 @@
|
||||
(define_insn "*thumb2_movhi_insn"
|
||||
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
|
||||
(match_operand:HI 1 "general_operand" "rI,n,r,m"))]
|
||||
- "TARGET_THUMB2"
|
||||
+ "TARGET_THUMB2
|
||||
+ && (register_operand (operands[0], HImode)
|
||||
+ || register_operand (operands[1], HImode))"
|
||||
"@
|
||||
mov%?\\t%0, %1\\t%@ movhi
|
||||
movw%?\\t%0, %L1\\t%@ movhi
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
2011-06-30 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline.
|
||||
LP 744754
|
||||
2011-04-17 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
* config/arm/arm.c (neon_struct_mem_operand):
|
||||
Support POST_INC/PRE_DEC memory operands.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-06-27 22:14:07 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000
|
||||
@@ -9357,6 +9357,11 @@
|
||||
if (GET_CODE (ind) == REG)
|
||||
return arm_address_register_rtx_p (ind, 0);
|
||||
|
||||
+ /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
|
||||
+ if (GET_CODE (ind) == POST_INC
|
||||
+ || GET_CODE (ind) == PRE_DEC)
|
||||
+ return arm_address_register_rtx_p (XEXP (ind, 0), 0);
|
||||
+
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
2011-07-03 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from FSF:
|
||||
2011-06-12 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vect-data-refs.c (vect_peeling_hash_get_most_frequent):
|
||||
Take number of iterations to peel into account for equally frequent
|
||||
misalignment values.
|
||||
|
||||
=== modified file 'gcc/tree-vect-data-refs.c'
|
||||
--- old/gcc/tree-vect-data-refs.c 2011-06-02 12:12:00 +0000
|
||||
+++ new/gcc/tree-vect-data-refs.c 2011-06-29 11:20:24 +0000
|
||||
@@ -1256,7 +1256,9 @@
|
||||
vect_peel_info elem = (vect_peel_info) *slot;
|
||||
vect_peel_extended_info max = (vect_peel_extended_info) data;
|
||||
|
||||
- if (elem->count > max->peel_info.count)
|
||||
+ if (elem->count > max->peel_info.count
|
||||
+ || (elem->count == max->peel_info.count
|
||||
+ && max->peel_info.npeel > elem->npeel))
|
||||
{
|
||||
max->peel_info.npeel = elem->npeel;
|
||||
max->peel_info.count = elem->count;
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
* builtins.c (get_object_alignment): Fix comment.
|
||||
* fold-const.c (get_pointer_modulus_and_residue): Remove
|
||||
allow_func_align. Use get_object_alignment.
|
||||
(fold_binary_loc): Update caller.
|
||||
|
||||
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-06-29 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR tree-optimization/49545
|
||||
* builtins.c (get_object_alignment_1): Update function comment.
|
||||
Do not use DECL_ALIGN for functions, but test
|
||||
TARGET_PTRMEMFUNC_VBIT_LOCATION instead.
|
||||
* fold-const.c (get_pointer_modulus_and_residue): Don't check
|
||||
for functions here.
|
||||
* tree-ssa-ccp.c (get_value_from_alignment): Likewise.
|
||||
|
||||
gcc/testsuite/
|
||||
Backport from mainline:
|
||||
|
||||
2011-06-29 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* gcc.dg/torture/pr49169.c: Restrict to ARM and MIPS targets.
|
||||
|
||||
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-07-27 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/49169
|
||||
* fold-const.c (get_pointer_modulus_and_residue): Don't rely on
|
||||
the alignment of function decls.
|
||||
|
||||
gcc/testsuite/
|
||||
Backport from mainline:
|
||||
|
||||
2011-07-27 Michael Hope <michael.hope@linaro.org>
|
||||
Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR tree-optimization/49169
|
||||
* gcc.dg/torture/pr49169.c: New test.
|
||||
|
||||
=== modified file 'gcc/builtins.c'
|
||||
--- old/gcc/builtins.c 2011-03-03 21:56:58 +0000
|
||||
+++ new/gcc/builtins.c 2011-07-04 09:52:27 +0000
|
||||
@@ -264,7 +264,14 @@
|
||||
}
|
||||
|
||||
/* Return the alignment in bits of EXP, an object.
|
||||
- Don't return more than MAX_ALIGN no matter what. */
|
||||
+ Don't return more than MAX_ALIGN no matter what.
|
||||
+
|
||||
+ Note that the address (and thus the alignment) computed here is based
|
||||
+ on the address to which a symbol resolves, whereas DECL_ALIGN is based
|
||||
+ on the address at which an object is actually located. These two
|
||||
+ addresses are not always the same. For example, on ARM targets,
|
||||
+ the address &foo of a Thumb function foo() has the lowest bit set,
|
||||
+ whereas foo() itself starts on an even address. */
|
||||
|
||||
unsigned int
|
||||
get_object_alignment (tree exp, unsigned int max_align)
|
||||
@@ -286,7 +293,21 @@
|
||||
exp = DECL_INITIAL (exp);
|
||||
if (DECL_P (exp)
|
||||
&& TREE_CODE (exp) != LABEL_DECL)
|
||||
- align = DECL_ALIGN (exp);
|
||||
+ {
|
||||
+ if (TREE_CODE (exp) == FUNCTION_DECL)
|
||||
+ {
|
||||
+ /* Function addresses can encode extra information besides their
|
||||
+ alignment. However, if TARGET_PTRMEMFUNC_VBIT_LOCATION
|
||||
+ allows the low bit to be used as a virtual bit, we know
|
||||
+ that the address itself must be 2-byte aligned. */
|
||||
+ if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn)
|
||||
+ align = 2 * BITS_PER_UNIT;
|
||||
+ else
|
||||
+ align = BITS_PER_UNIT;
|
||||
+ }
|
||||
+ else
|
||||
+ align = DECL_ALIGN (exp);
|
||||
+ }
|
||||
else if (CONSTANT_CLASS_P (exp))
|
||||
{
|
||||
align = TYPE_ALIGN (TREE_TYPE (exp));
|
||||
|
||||
=== modified file 'gcc/fold-const.c'
|
||||
--- old/gcc/fold-const.c 2011-05-23 20:37:18 +0000
|
||||
+++ new/gcc/fold-const.c 2011-07-04 09:52:27 +0000
|
||||
@@ -9232,15 +9232,10 @@
|
||||
0 <= N < M as is common. In general, the precise value of P is unknown.
|
||||
M is chosen as large as possible such that constant N can be determined.
|
||||
|
||||
- Returns M and sets *RESIDUE to N.
|
||||
-
|
||||
- If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into
|
||||
- account. This is not always possible due to PR 35705.
|
||||
- */
|
||||
+ Returns M and sets *RESIDUE to N. */
|
||||
|
||||
static unsigned HOST_WIDE_INT
|
||||
-get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue,
|
||||
- bool allow_func_align)
|
||||
+get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue)
|
||||
{
|
||||
enum tree_code code;
|
||||
|
||||
@@ -9270,9 +9265,8 @@
|
||||
}
|
||||
}
|
||||
|
||||
- if (DECL_P (expr)
|
||||
- && (allow_func_align || TREE_CODE (expr) != FUNCTION_DECL))
|
||||
- return DECL_ALIGN_UNIT (expr);
|
||||
+ if (DECL_P (expr))
|
||||
+ return get_object_alignment (expr, ~0U) / BITS_PER_UNIT;
|
||||
}
|
||||
else if (code == POINTER_PLUS_EXPR)
|
||||
{
|
||||
@@ -9282,8 +9276,7 @@
|
||||
|
||||
op0 = TREE_OPERAND (expr, 0);
|
||||
STRIP_NOPS (op0);
|
||||
- modulus = get_pointer_modulus_and_residue (op0, residue,
|
||||
- allow_func_align);
|
||||
+ modulus = get_pointer_modulus_and_residue (op0, residue);
|
||||
|
||||
op1 = TREE_OPERAND (expr, 1);
|
||||
STRIP_NOPS (op1);
|
||||
@@ -11163,8 +11156,7 @@
|
||||
unsigned HOST_WIDE_INT modulus, residue;
|
||||
unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg1);
|
||||
|
||||
- modulus = get_pointer_modulus_and_residue (arg0, &residue,
|
||||
- integer_onep (arg1));
|
||||
+ modulus = get_pointer_modulus_and_residue (arg0, &residue);
|
||||
|
||||
/* This works because modulus is a power of 2. If this weren't the
|
||||
case, we'd have to replace it by its greatest power-of-2
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/torture/pr49169.c'
|
||||
--- old/gcc/testsuite/gcc.dg/torture/pr49169.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/torture/pr49169.c 2011-06-29 09:46:06 +0000
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile { target { arm*-*-* || mips*-*-* } } } */
|
||||
+
|
||||
+#include <stdlib.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+int
|
||||
+main (void)
|
||||
+{
|
||||
+ void *p = main;
|
||||
+ if ((intptr_t) p & 1)
|
||||
+ abort ();
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "abort" } } */
|
||||
|
||||
=== modified file 'gcc/tree-ssa-ccp.c'
|
||||
--- old/gcc/tree-ssa-ccp.c 2011-05-05 15:42:22 +0000
|
||||
+++ new/gcc/tree-ssa-ccp.c 2011-06-29 09:46:06 +0000
|
||||
@@ -522,10 +522,6 @@
|
||||
val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr),
|
||||
TREE_OPERAND (base, 0), TREE_OPERAND (base, 1));
|
||||
else if (base
|
||||
- /* ??? While function decls have DECL_ALIGN their addresses
|
||||
- may encode extra information in the lower bits on some
|
||||
- targets (PR47239). Simply punt for function decls for now. */
|
||||
- && TREE_CODE (base) != FUNCTION_DECL
|
||||
&& ((align = get_object_alignment (base, BIGGEST_ALIGNMENT))
|
||||
> BITS_PER_UNIT))
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,138 @@
|
||||
2011-07-11 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
Backport from mainline -r175090.
|
||||
gcc/
|
||||
* ddg.c (add_intra_loop_mem_dep): New function.
|
||||
(build_intra_loop_deps): Call it.
|
||||
|
||||
gcc/testsuite
|
||||
* gcc.dg/sms-9.c: New file.
|
||||
|
||||
=== modified file 'gcc/ddg.c'
|
||||
--- old/gcc/ddg.c 2011-05-13 16:03:40 +0000
|
||||
+++ new/gcc/ddg.c 2011-07-04 11:00:06 +0000
|
||||
@@ -390,6 +390,33 @@
|
||||
&PATTERN (insn2));
|
||||
}
|
||||
|
||||
+/* Given two nodes, analyze their RTL insns and add intra-loop mem deps
|
||||
+ to ddg G. */
|
||||
+static void
|
||||
+add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
|
||||
+{
|
||||
+
|
||||
+ if ((from->cuid == to->cuid)
|
||||
+ || !insns_may_alias_p (from->insn, to->insn))
|
||||
+ /* Do not create edge if memory references have disjoint alias sets
|
||||
+ or 'to' and 'from' are the same instruction. */
|
||||
+ return;
|
||||
+
|
||||
+ if (mem_write_insn_p (from->insn))
|
||||
+ {
|
||||
+ if (mem_read_insn_p (to->insn))
|
||||
+ create_ddg_dep_no_link (g, from, to,
|
||||
+ DEBUG_INSN_P (to->insn)
|
||||
+ ? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
|
||||
+ else
|
||||
+ create_ddg_dep_no_link (g, from, to,
|
||||
+ DEBUG_INSN_P (to->insn)
|
||||
+ ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
|
||||
+ }
|
||||
+ else if (!mem_read_insn_p (to->insn))
|
||||
+ create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
|
||||
+}
|
||||
+
|
||||
/* Given two nodes, analyze their RTL insns and add inter-loop mem deps
|
||||
to ddg G. */
|
||||
static void
|
||||
@@ -477,10 +504,22 @@
|
||||
if (DEBUG_INSN_P (j_node->insn))
|
||||
continue;
|
||||
if (mem_access_insn_p (j_node->insn))
|
||||
- /* Don't bother calculating inter-loop dep if an intra-loop dep
|
||||
- already exists. */
|
||||
+ {
|
||||
+ /* Don't bother calculating inter-loop dep if an intra-loop dep
|
||||
+ already exists. */
|
||||
if (! TEST_BIT (dest_node->successors, j))
|
||||
add_inter_loop_mem_dep (g, dest_node, j_node);
|
||||
+ /* If -fmodulo-sched-allow-regmoves
|
||||
+ is set certain anti-dep edges are not created.
|
||||
+ It might be that these anti-dep edges are on the
|
||||
+ path from one memory instruction to another such that
|
||||
+ removing these edges could cause a violation of the
|
||||
+ memory dependencies. Thus we add intra edges between
|
||||
+ every two memory instructions in this case. */
|
||||
+ if (flag_modulo_sched_allow_regmoves
|
||||
+ && !TEST_BIT (dest_node->predecessors, j))
|
||||
+ add_intra_loop_mem_dep (g, j_node, dest_node);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/sms-9.c'
|
||||
--- old/gcc/testsuite/gcc.dg/sms-9.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/sms-9.c 2011-07-04 11:00:06 +0000
|
||||
@@ -0,0 +1,60 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-O2 -fmodulo-sched -fno-auto-inc-dec -O2 -fmodulo-sched-allow-regmoves" } */
|
||||
+
|
||||
+#include <stdlib.h>
|
||||
+#include <stdarg.h>
|
||||
+
|
||||
+struct df_ref_info
|
||||
+{
|
||||
+ unsigned int *begin;
|
||||
+ unsigned int *count;
|
||||
+};
|
||||
+
|
||||
+extern void *memset (void *s, int c, __SIZE_TYPE__ n);
|
||||
+
|
||||
+
|
||||
+__attribute__ ((noinline))
|
||||
+ int
|
||||
+ df_reorganize_refs_by_reg_by_insn (struct df_ref_info *ref_info,
|
||||
+ int num, unsigned int start)
|
||||
+{
|
||||
+ unsigned int m = num;
|
||||
+ unsigned int offset = 77;
|
||||
+ unsigned int r;
|
||||
+
|
||||
+ for (r = start; r < m; r++)
|
||||
+ {
|
||||
+ ref_info->begin[r] = offset;
|
||||
+ offset += ref_info->count[r];
|
||||
+ ref_info->count[r] = 0;
|
||||
+ }
|
||||
+
|
||||
+ return offset;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ struct df_ref_info temp;
|
||||
+ int num = 100;
|
||||
+ unsigned int start = 5;
|
||||
+ int i, offset;
|
||||
+
|
||||
+ temp.begin = malloc (100 * sizeof (unsigned int));
|
||||
+ temp.count = malloc (100 * sizeof (unsigned int));
|
||||
+
|
||||
+ memset (temp.begin, 0, sizeof (unsigned int) * num);
|
||||
+ memset (temp.count, 0, sizeof (unsigned int) * num);
|
||||
+
|
||||
+ for (i = 0; i < num; i++)
|
||||
+ temp.count[i] = i + 1;
|
||||
+
|
||||
+ offset = df_reorganize_refs_by_reg_by_insn (&temp, num, start);
|
||||
+
|
||||
+ if (offset != 5112)
|
||||
+ abort ();
|
||||
+
|
||||
+ free (temp.begin);
|
||||
+ free (temp.count);
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
2011-07-11 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
Backport from mainline -r175091
|
||||
gcc/
|
||||
* modulo-sched.c (struct ps_insn): Remove row_rest_count
|
||||
field.
|
||||
(struct partial_schedule): Add rows_length field.
|
||||
(verify_partial_schedule): Check rows_length.
|
||||
(ps_insert_empty_row): Handle rows_length.
|
||||
(create_partial_schedule): Likewise.
|
||||
(free_partial_schedule): Likewise.
|
||||
(reset_partial_schedule): Likewise.
|
||||
(create_ps_insn): Remove rest_count argument.
|
||||
(remove_node_from_ps): Update rows_length.
|
||||
(add_node_to_ps): Update rows_length and call create_ps_insn without
|
||||
passing row_rest_count.
|
||||
(rotate_partial_schedule): Update rows_length.
|
||||
|
||||
=== modified file 'gcc/modulo-sched.c'
|
||||
--- old/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000
|
||||
+++ new/gcc/modulo-sched.c 2011-07-04 12:01:34 +0000
|
||||
@@ -134,8 +134,6 @@
|
||||
ps_insn_ptr next_in_row,
|
||||
prev_in_row;
|
||||
|
||||
- /* The number of nodes in the same row that come after this node. */
|
||||
- int row_rest_count;
|
||||
};
|
||||
|
||||
/* Holds the partial schedule as an array of II rows. Each entry of the
|
||||
@@ -149,6 +147,12 @@
|
||||
/* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
|
||||
ps_insn_ptr *rows;
|
||||
|
||||
+ /* rows_length[i] holds the number of instructions in the row.
|
||||
+ It is used only (as an optimization) to back off quickly from
|
||||
+ trying to schedule a node in a full row; that is, to avoid running
|
||||
+ through futile DFA state transitions. */
|
||||
+ int *rows_length;
|
||||
+
|
||||
/* The earliest absolute cycle of an insn in the partial schedule. */
|
||||
int min_cycle;
|
||||
|
||||
@@ -1907,6 +1911,7 @@
|
||||
int ii = ps->ii;
|
||||
int new_ii = ii + 1;
|
||||
int row;
|
||||
+ int *rows_length_new;
|
||||
|
||||
verify_partial_schedule (ps, sched_nodes);
|
||||
|
||||
@@ -1921,9 +1926,11 @@
|
||||
rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
|
||||
|
||||
rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
|
||||
+ rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
|
||||
for (row = 0; row < split_row; row++)
|
||||
{
|
||||
rows_new[row] = ps->rows[row];
|
||||
+ rows_length_new[row] = ps->rows_length[row];
|
||||
ps->rows[row] = NULL;
|
||||
for (crr_insn = rows_new[row];
|
||||
crr_insn; crr_insn = crr_insn->next_in_row)
|
||||
@@ -1944,6 +1951,7 @@
|
||||
for (row = split_row; row < ii; row++)
|
||||
{
|
||||
rows_new[row + 1] = ps->rows[row];
|
||||
+ rows_length_new[row + 1] = ps->rows_length[row];
|
||||
ps->rows[row] = NULL;
|
||||
for (crr_insn = rows_new[row + 1];
|
||||
crr_insn; crr_insn = crr_insn->next_in_row)
|
||||
@@ -1965,6 +1973,8 @@
|
||||
+ (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
|
||||
free (ps->rows);
|
||||
ps->rows = rows_new;
|
||||
+ free (ps->rows_length);
|
||||
+ ps->rows_length = rows_length_new;
|
||||
ps->ii = new_ii;
|
||||
gcc_assert (ps->min_cycle >= 0);
|
||||
|
||||
@@ -2040,16 +2050,23 @@
|
||||
ps_insn_ptr crr_insn;
|
||||
|
||||
for (row = 0; row < ps->ii; row++)
|
||||
- for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
|
||||
- {
|
||||
- ddg_node_ptr u = crr_insn->node;
|
||||
-
|
||||
- gcc_assert (TEST_BIT (sched_nodes, u->cuid));
|
||||
- /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
|
||||
- popcount (sched_nodes) == number of insns in ps. */
|
||||
- gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
|
||||
- gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
|
||||
- }
|
||||
+ {
|
||||
+ int length = 0;
|
||||
+
|
||||
+ for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
|
||||
+ {
|
||||
+ ddg_node_ptr u = crr_insn->node;
|
||||
+
|
||||
+ length++;
|
||||
+ gcc_assert (TEST_BIT (sched_nodes, u->cuid));
|
||||
+ /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
|
||||
+ popcount (sched_nodes) == number of insns in ps. */
|
||||
+ gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
|
||||
+ gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
|
||||
+ }
|
||||
+
|
||||
+ gcc_assert (ps->rows_length[row] == length);
|
||||
+ }
|
||||
}
|
||||
|
||||
|
||||
@@ -2455,6 +2472,7 @@
|
||||
{
|
||||
partial_schedule_ptr ps = XNEW (struct partial_schedule);
|
||||
ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
|
||||
+ ps->rows_length = (int *) xcalloc (ii, sizeof (int));
|
||||
ps->ii = ii;
|
||||
ps->history = history;
|
||||
ps->min_cycle = INT_MAX;
|
||||
@@ -2493,6 +2511,7 @@
|
||||
return;
|
||||
free_ps_insns (ps);
|
||||
free (ps->rows);
|
||||
+ free (ps->rows_length);
|
||||
free (ps);
|
||||
}
|
||||
|
||||
@@ -2510,6 +2529,8 @@
|
||||
ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
|
||||
* sizeof (ps_insn_ptr));
|
||||
memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
|
||||
+ ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
|
||||
+ memset (ps->rows_length, 0, new_ii * sizeof (int));
|
||||
ps->ii = new_ii;
|
||||
ps->min_cycle = INT_MAX;
|
||||
ps->max_cycle = INT_MIN;
|
||||
@@ -2538,14 +2559,13 @@
|
||||
|
||||
/* Creates an object of PS_INSN and initializes it to the given parameters. */
|
||||
static ps_insn_ptr
|
||||
-create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
|
||||
+create_ps_insn (ddg_node_ptr node, int cycle)
|
||||
{
|
||||
ps_insn_ptr ps_i = XNEW (struct ps_insn);
|
||||
|
||||
ps_i->node = node;
|
||||
ps_i->next_in_row = NULL;
|
||||
ps_i->prev_in_row = NULL;
|
||||
- ps_i->row_rest_count = rest_count;
|
||||
ps_i->cycle = cycle;
|
||||
|
||||
return ps_i;
|
||||
@@ -2578,6 +2598,8 @@
|
||||
if (ps_i->next_in_row)
|
||||
ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
|
||||
}
|
||||
+
|
||||
+ ps->rows_length[row] -= 1;
|
||||
free (ps_i);
|
||||
return true;
|
||||
}
|
||||
@@ -2734,17 +2756,12 @@
|
||||
sbitmap must_precede, sbitmap must_follow)
|
||||
{
|
||||
ps_insn_ptr ps_i;
|
||||
- int rest_count = 1;
|
||||
int row = SMODULO (cycle, ps->ii);
|
||||
|
||||
- if (ps->rows[row]
|
||||
- && ps->rows[row]->row_rest_count >= issue_rate)
|
||||
+ if (ps->rows_length[row] >= issue_rate)
|
||||
return NULL;
|
||||
|
||||
- if (ps->rows[row])
|
||||
- rest_count += ps->rows[row]->row_rest_count;
|
||||
-
|
||||
- ps_i = create_ps_insn (node, rest_count, cycle);
|
||||
+ ps_i = create_ps_insn (node, cycle);
|
||||
|
||||
/* Finds and inserts PS_I according to MUST_FOLLOW and
|
||||
MUST_PRECEDE. */
|
||||
@@ -2754,6 +2771,7 @@
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+ ps->rows_length[row] += 1;
|
||||
return ps_i;
|
||||
}
|
||||
|
||||
@@ -2909,11 +2927,16 @@
|
||||
for (i = 0; i < backward_rotates; i++)
|
||||
{
|
||||
ps_insn_ptr first_row = ps->rows[0];
|
||||
+ int first_row_length = ps->rows_length[0];
|
||||
|
||||
for (row = 0; row < last_row; row++)
|
||||
- ps->rows[row] = ps->rows[row+1];
|
||||
+ {
|
||||
+ ps->rows[row] = ps->rows[row + 1];
|
||||
+ ps->rows_length[row] = ps->rows_length[row + 1];
|
||||
+ }
|
||||
|
||||
ps->rows[last_row] = first_row;
|
||||
+ ps->rows_length[last_row] = first_row_length;
|
||||
}
|
||||
|
||||
ps->max_cycle -= start_cycle;
|
||||
|
||||
@@ -0,0 +1,350 @@
|
||||
2011-07-11 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
gcc/
|
||||
2011-06-22 Dmitry Plotnikov <dplotnikov@ispras.ru>
|
||||
Dmitry Melnik <dm@ispras.ru>
|
||||
|
||||
* config/arm/arm.c (neon_immediate_valid_for_shift): New function.
|
||||
(neon_output_shift_immediate): Ditto.
|
||||
* config/arm/arm-protos.h (neon_immediate_valid_for_shift): New
|
||||
prototype.
|
||||
(neon_output_shift_immediate): Ditto.
|
||||
* config/arm/neon.md (vashl<mode>3): Modified constraint.
|
||||
(vashr<mode>3_imm): New insn pattern.
|
||||
(vlshr<mode>3_imm): Ditto.
|
||||
(vashr<mode>3): Modified constraint.
|
||||
(vlshr<mode>3): Ditto.
|
||||
* config/arm/predicates.md (imm_for_neon_lshift_operand): New
|
||||
predicate.
|
||||
(imm_for_neon_rshift_operand): Ditto.
|
||||
(imm_lshift_or_reg_neon): Ditto.
|
||||
(imm_rshift_or_reg_neon): Ditto.
|
||||
|
||||
* optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm-protos.h'
|
||||
--- old/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000
|
||||
+++ new/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000
|
||||
@@ -64,8 +64,12 @@
|
||||
extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
|
||||
extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
|
||||
int *);
|
||||
+extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *,
|
||||
+ int *, bool);
|
||||
extern char *neon_output_logic_immediate (const char *, rtx *,
|
||||
enum machine_mode, int, int);
|
||||
+extern char *neon_output_shift_immediate (const char *, char, rtx *,
|
||||
+ enum machine_mode, int, bool);
|
||||
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
|
||||
rtx (*) (rtx, rtx, rtx));
|
||||
extern rtx neon_make_constant (rtx);
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-07-04 14:03:49 +0000
|
||||
@@ -8863,6 +8863,66 @@
|
||||
return 1;
|
||||
}
|
||||
|
||||
+/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
|
||||
+ the immediate is valid, write a constant suitable for using as an operand
|
||||
+ to VSHR/VSHL to *MODCONST and the corresponding element width to
|
||||
+ *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
|
||||
+ because they have different limitations. */
|
||||
+
|
||||
+int
|
||||
+neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
|
||||
+ rtx *modconst, int *elementwidth,
|
||||
+ bool isleftshift)
|
||||
+{
|
||||
+ unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
|
||||
+ unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
|
||||
+ unsigned HOST_WIDE_INT last_elt = 0;
|
||||
+ unsigned HOST_WIDE_INT maxshift;
|
||||
+
|
||||
+ /* Split vector constant out into a byte vector. */
|
||||
+ for (i = 0; i < n_elts; i++)
|
||||
+ {
|
||||
+ rtx el = CONST_VECTOR_ELT (op, i);
|
||||
+ unsigned HOST_WIDE_INT elpart;
|
||||
+
|
||||
+ if (GET_CODE (el) == CONST_INT)
|
||||
+ elpart = INTVAL (el);
|
||||
+ else if (GET_CODE (el) == CONST_DOUBLE)
|
||||
+ return 0;
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+
|
||||
+ if (i != 0 && elpart != last_elt)
|
||||
+ return 0;
|
||||
+
|
||||
+ last_elt = elpart;
|
||||
+ }
|
||||
+
|
||||
+ /* Shift less than element size. */
|
||||
+ maxshift = innersize * 8;
|
||||
+
|
||||
+ if (isleftshift)
|
||||
+ {
|
||||
+ /* Left shift immediate value can be from 0 to <size>-1. */
|
||||
+ if (last_elt >= maxshift)
|
||||
+ return 0;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Right shift immediate value can be from 1 to <size>. */
|
||||
+ if (last_elt == 0 || last_elt > maxshift)
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ if (elementwidth)
|
||||
+ *elementwidth = innersize * 8;
|
||||
+
|
||||
+ if (modconst)
|
||||
+ *modconst = CONST_VECTOR_ELT (op, 0);
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
/* Return a string suitable for output of Neon immediate logic operation
|
||||
MNEM. */
|
||||
|
||||
@@ -8885,6 +8945,28 @@
|
||||
return templ;
|
||||
}
|
||||
|
||||
+/* Return a string suitable for output of Neon immediate shift operation
|
||||
+ (VSHR or VSHL) MNEM. */
|
||||
+
|
||||
+char *
|
||||
+neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
|
||||
+ enum machine_mode mode, int quad,
|
||||
+ bool isleftshift)
|
||||
+{
|
||||
+ int width, is_valid;
|
||||
+ static char templ[40];
|
||||
+
|
||||
+ is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
|
||||
+ gcc_assert (is_valid != 0);
|
||||
+
|
||||
+ if (quad)
|
||||
+ sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
|
||||
+ else
|
||||
+ sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
|
||||
+
|
||||
+ return templ;
|
||||
+}
|
||||
+
|
||||
/* Output a sequence of pairwise operations to implement a reduction.
|
||||
NOTE: We do "too much work" here, because pairwise operations work on two
|
||||
registers-worth of operands in one go. Unfortunately we can't exploit those
|
||||
|
||||
=== modified file 'gcc/config/arm/neon.md'
|
||||
--- old/gcc/config/arm/neon.md 2011-07-01 09:19:21 +0000
|
||||
+++ new/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000
|
||||
@@ -956,15 +956,57 @@
|
||||
; SImode elements.
|
||||
|
||||
(define_insn "vashl<mode>3"
|
||||
- [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
|
||||
- (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
|
||||
- (match_operand:VDQIW 2 "s_register_operand" "w")))]
|
||||
- "TARGET_NEON"
|
||||
- "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
- [(set (attr "neon_type")
|
||||
- (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
|
||||
- (const_string "neon_vshl_ddd")
|
||||
- (const_string "neon_shift_3")))]
|
||||
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
|
||||
+ (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
|
||||
+ (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
|
||||
+ "TARGET_NEON"
|
||||
+ {
|
||||
+ switch (which_alternative)
|
||||
+ {
|
||||
+ case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
|
||||
+ case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
|
||||
+ <MODE>mode,
|
||||
+ VALID_NEON_QREG_MODE (<MODE>mode),
|
||||
+ true);
|
||||
+ default: gcc_unreachable ();
|
||||
+ }
|
||||
+ }
|
||||
+ [(set (attr "neon_type")
|
||||
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
|
||||
+ (const_string "neon_vshl_ddd")
|
||||
+ (const_string "neon_shift_3")))]
|
||||
+)
|
||||
+
|
||||
+(define_insn "vashr<mode>3_imm"
|
||||
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
|
||||
+ (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
|
||||
+ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
|
||||
+ "TARGET_NEON"
|
||||
+ {
|
||||
+ return neon_output_shift_immediate ("vshr", 's', &operands[2],
|
||||
+ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
|
||||
+ false);
|
||||
+ }
|
||||
+ [(set (attr "neon_type")
|
||||
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
|
||||
+ (const_string "neon_vshl_ddd")
|
||||
+ (const_string "neon_shift_3")))]
|
||||
+)
|
||||
+
|
||||
+(define_insn "vlshr<mode>3_imm"
|
||||
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
|
||||
+ (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
|
||||
+ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
|
||||
+ "TARGET_NEON"
|
||||
+ {
|
||||
+ return neon_output_shift_immediate ("vshr", 'u', &operands[2],
|
||||
+ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
|
||||
+ false);
|
||||
+ }
|
||||
+ [(set (attr "neon_type")
|
||||
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
|
||||
+ (const_string "neon_vshl_ddd")
|
||||
+ (const_string "neon_shift_3")))]
|
||||
)
|
||||
|
||||
; Used for implementing logical shift-right, which is a left-shift by a negative
|
||||
@@ -1004,28 +1046,34 @@
|
||||
(define_expand "vashr<mode>3"
|
||||
[(set (match_operand:VDQIW 0 "s_register_operand" "")
|
||||
(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
|
||||
- (match_operand:VDQIW 2 "s_register_operand" "")))]
|
||||
+ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx neg = gen_reg_rtx (<MODE>mode);
|
||||
-
|
||||
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
|
||||
- emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
|
||||
-
|
||||
+ if (REG_P (operands[2]))
|
||||
+ {
|
||||
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
|
||||
+ emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vlshr<mode>3"
|
||||
[(set (match_operand:VDQIW 0 "s_register_operand" "")
|
||||
(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
|
||||
- (match_operand:VDQIW 2 "s_register_operand" "")))]
|
||||
+ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx neg = gen_reg_rtx (<MODE>mode);
|
||||
-
|
||||
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
|
||||
- emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
|
||||
-
|
||||
+ if (REG_P (operands[2]))
|
||||
+ {
|
||||
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
|
||||
+ emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
||||
=== modified file 'gcc/config/arm/predicates.md'
|
||||
--- old/gcc/config/arm/predicates.md 2011-06-22 15:50:23 +0000
|
||||
+++ new/gcc/config/arm/predicates.md 2011-07-04 14:03:49 +0000
|
||||
@@ -585,6 +585,26 @@
|
||||
return neon_immediate_valid_for_move (op, mode, NULL, NULL);
|
||||
})
|
||||
|
||||
+(define_predicate "imm_for_neon_lshift_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true);
|
||||
+})
|
||||
+
|
||||
+(define_predicate "imm_for_neon_rshift_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false);
|
||||
+})
|
||||
+
|
||||
+(define_predicate "imm_lshift_or_reg_neon"
|
||||
+ (ior (match_operand 0 "s_register_operand")
|
||||
+ (match_operand 0 "imm_for_neon_lshift_operand")))
|
||||
+
|
||||
+(define_predicate "imm_rshift_or_reg_neon"
|
||||
+ (ior (match_operand 0 "s_register_operand")
|
||||
+ (match_operand 0 "imm_for_neon_rshift_operand")))
|
||||
+
|
||||
(define_predicate "imm_for_neon_logic_operand"
|
||||
(match_code "const_vector")
|
||||
{
|
||||
|
||||
=== modified file 'gcc/optabs.c'
|
||||
--- old/gcc/optabs.c 2011-03-04 10:27:10 +0000
|
||||
+++ new/gcc/optabs.c 2011-07-04 14:03:49 +0000
|
||||
@@ -6171,6 +6171,9 @@
|
||||
init_optab (usashl_optab, US_ASHIFT);
|
||||
init_optab (ashr_optab, ASHIFTRT);
|
||||
init_optab (lshr_optab, LSHIFTRT);
|
||||
+ init_optabv (vashl_optab, ASHIFT);
|
||||
+ init_optabv (vashr_optab, ASHIFTRT);
|
||||
+ init_optabv (vlshr_optab, LSHIFTRT);
|
||||
init_optab (rotl_optab, ROTATE);
|
||||
init_optab (rotr_optab, ROTATERT);
|
||||
init_optab (smin_optab, SMIN);
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 2011-07-04 14:03:49 +0000
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
|
||||
+/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */
|
||||
+
|
||||
+/* Verify that VSHR immediate is used. */
|
||||
+void f1(int n, unsigned int x[], unsigned int y[]) {
|
||||
+ int i;
|
||||
+ for (i = 0; i < n; ++i)
|
||||
+ y[i] = x[i] >> 3;
|
||||
+}
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 2011-07-04 14:03:49 +0000
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
|
||||
+/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */
|
||||
+
|
||||
+/* Verify that VSHR immediate is used. */
|
||||
+void f1(int n, int x[], int y[]) {
|
||||
+ int i;
|
||||
+ for (i = 0; i < n; ++i)
|
||||
+ y[i] = x[i] << 3;
|
||||
+}
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 2011-07-04 14:03:49 +0000
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
|
||||
+/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */
|
||||
+
|
||||
+/* Verify that VSHR immediate is used. */
|
||||
+void f1(int n, int x[], int y[]) {
|
||||
+ int i;
|
||||
+ for (i = 0; i < n; ++i)
|
||||
+ y[i] = x[i] >> 3;
|
||||
+}
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
2011-07-13 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
gcc/
|
||||
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* reload1.c (choose_reload_regs): Use mode sizes to check whether
|
||||
an old reload register completely defines the required value.
|
||||
|
||||
gcc/testsuite/
|
||||
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* gcc.target/arm/neon-modes-3.c: New test.
|
||||
|
||||
=== modified file 'gcc/reload1.c'
|
||||
--- old/gcc/reload1.c 2011-07-01 09:19:21 +0000
|
||||
+++ new/gcc/reload1.c 2011-07-11 10:06:50 +0000
|
||||
@@ -6451,6 +6451,8 @@
|
||||
|
||||
if (regno >= 0
|
||||
&& reg_last_reload_reg[regno] != 0
|
||||
+ && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno]))
|
||||
+ >= GET_MODE_SIZE (mode) + byte)
|
||||
#ifdef CANNOT_CHANGE_MODE_CLASS
|
||||
/* Verify that the register it's in can be used in
|
||||
mode MODE. */
|
||||
@@ -6462,24 +6464,12 @@
|
||||
{
|
||||
enum reg_class rclass = rld[r].rclass, last_class;
|
||||
rtx last_reg = reg_last_reload_reg[regno];
|
||||
- enum machine_mode need_mode;
|
||||
|
||||
i = REGNO (last_reg);
|
||||
i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode);
|
||||
last_class = REGNO_REG_CLASS (i);
|
||||
|
||||
- if (byte == 0)
|
||||
- need_mode = mode;
|
||||
- else
|
||||
- need_mode
|
||||
- = smallest_mode_for_size
|
||||
- (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT,
|
||||
- GET_MODE_CLASS (mode) == MODE_PARTIAL_INT
|
||||
- ? MODE_INT : GET_MODE_CLASS (mode));
|
||||
-
|
||||
- if ((GET_MODE_SIZE (GET_MODE (last_reg))
|
||||
- >= GET_MODE_SIZE (need_mode))
|
||||
- && reg_reloaded_contents[i] == regno
|
||||
+ if (reg_reloaded_contents[i] == regno
|
||||
&& TEST_HARD_REG_BIT (reg_reloaded_valid, i)
|
||||
&& HARD_REGNO_MODE_OK (i, rld[r].mode)
|
||||
&& (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i)
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/neon-modes-3.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/neon-modes-3.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/neon-modes-3.c 2011-07-11 10:06:50 +0000
|
||||
@@ -0,0 +1,61 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O" } */
|
||||
+/* { dg-add-options arm_neon } */
|
||||
+
|
||||
+#include <arm_neon.h>
|
||||
+
|
||||
+void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n)
|
||||
+{
|
||||
+ float32x4x4_t a5, a6, a7, a8, a9;
|
||||
+ int i;
|
||||
+
|
||||
+ a5 = *src;
|
||||
+ a6 = *src;
|
||||
+ a7 = *src;
|
||||
+ a8 = *src;
|
||||
+ a9 = *src;
|
||||
+ while (n--)
|
||||
+ {
|
||||
+ for (i = 0; i < 8; i++)
|
||||
+ {
|
||||
+ float32x4x4_t a0, a1, a2, a3, a4;
|
||||
+
|
||||
+ a0 = *src;
|
||||
+ a1 = *src;
|
||||
+ a2 = *src;
|
||||
+ a3 = *src;
|
||||
+ a4 = *src;
|
||||
+ *src = a0;
|
||||
+ *dest = a0.val[0];
|
||||
+ *dest = a0.val[3];
|
||||
+ *src = a1;
|
||||
+ *dest = a1.val[0];
|
||||
+ *dest = a1.val[3];
|
||||
+ *src = a2;
|
||||
+ *dest = a2.val[0];
|
||||
+ *dest = a2.val[3];
|
||||
+ *src = a3;
|
||||
+ *dest = a3.val[0];
|
||||
+ *dest = a3.val[3];
|
||||
+ *src = a4;
|
||||
+ *dest = a4.val[0];
|
||||
+ *dest = a4.val[3];
|
||||
+ }
|
||||
+ *src = a5;
|
||||
+ *dest = a5.val[0];
|
||||
+ *dest = a5.val[3];
|
||||
+ *src = a6;
|
||||
+ *dest = a6.val[0];
|
||||
+ *dest = a6.val[3];
|
||||
+ *src = a7;
|
||||
+ *dest = a7.val[0];
|
||||
+ *dest = a7.val[3];
|
||||
+ *src = a8;
|
||||
+ *dest = a8.val[0];
|
||||
+ *dest = a8.val[3];
|
||||
+ *src = a9;
|
||||
+ *dest = a9.val[0];
|
||||
+ *dest = a9.val[3];
|
||||
+ }
|
||||
+}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
2011-07-15 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-04-05 Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
* ifcvt.c (cond_exec_process_insns): Disallow converting a block
|
||||
that contains the prologue.
|
||||
|
||||
gcc/testsuite/
|
||||
Backport from mainline:
|
||||
|
||||
2011-04-01 Bernd Schmidt <bernds@codesourcery.com>
|
||||
|
||||
* gcc.c-torture/compile/20110401-1.c: New test.
|
||||
|
||||
=== modified file 'gcc/ifcvt.c'
|
||||
--- old/gcc/ifcvt.c 2010-12-14 00:23:40 +0000
|
||||
+++ new/gcc/ifcvt.c 2011-07-11 04:02:28 +0000
|
||||
@@ -1,5 +1,6 @@
|
||||
/* If-conversion support.
|
||||
- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
|
||||
+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010,
|
||||
+ 2011
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
@@ -304,6 +305,10 @@
|
||||
|
||||
for (insn = start; ; insn = NEXT_INSN (insn))
|
||||
{
|
||||
+ /* dwarf2out can't cope with conditional prologues. */
|
||||
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
|
||||
+ return FALSE;
|
||||
+
|
||||
if (NOTE_P (insn) || DEBUG_INSN_P (insn))
|
||||
goto insn_done;
|
||||
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.c-torture/compile/20110401-1.c'
|
||||
--- old/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 2011-07-11 04:02:28 +0000
|
||||
@@ -0,0 +1,22 @@
|
||||
+void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len)
|
||||
+{
|
||||
+ int k;
|
||||
+ unsigned char temp[4];
|
||||
+ if (len < 128) {
|
||||
+ if (ans != ((void *) 0))
|
||||
+ ans[0] = (unsigned char) len;
|
||||
+ *ans_len = 1;
|
||||
+ } else {
|
||||
+ k = 0;
|
||||
+ while (len) {
|
||||
+ temp[k++] = len & 0xFF;
|
||||
+ len = len >> 8;
|
||||
+ }
|
||||
+ *ans_len = k + 1;
|
||||
+ if (ans != ((void *) 0)) {
|
||||
+ ans[0] = ((unsigned char) k & 0x7F) + 128;
|
||||
+ while (k--)
|
||||
+ ans[*ans_len - 1 - k] = temp[k];
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
2011-07-15 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
2011-03-22 Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
* combine.c (simplify_set): Try harder to find the best CC mode when
|
||||
simplifying a nested COMPARE on the RHS.
|
||||
|
||||
=== modified file 'gcc/combine.c'
|
||||
--- old/gcc/combine.c 2011-05-27 14:31:18 +0000
|
||||
+++ new/gcc/combine.c 2011-07-11 03:52:31 +0000
|
||||
@@ -6287,10 +6287,18 @@
|
||||
enum rtx_code new_code;
|
||||
rtx op0, op1, tmp;
|
||||
int other_changed = 0;
|
||||
+ rtx inner_compare = NULL_RTX;
|
||||
enum machine_mode compare_mode = GET_MODE (dest);
|
||||
|
||||
if (GET_CODE (src) == COMPARE)
|
||||
- op0 = XEXP (src, 0), op1 = XEXP (src, 1);
|
||||
+ {
|
||||
+ op0 = XEXP (src, 0), op1 = XEXP (src, 1);
|
||||
+ if (GET_CODE (op0) == COMPARE && op1 == const0_rtx)
|
||||
+ {
|
||||
+ inner_compare = op0;
|
||||
+ op0 = XEXP (inner_compare, 0), op1 = XEXP (inner_compare, 1);
|
||||
+ }
|
||||
+ }
|
||||
else
|
||||
op0 = src, op1 = CONST0_RTX (GET_MODE (src));
|
||||
|
||||
@@ -6332,6 +6340,12 @@
|
||||
need to use a different CC mode here. */
|
||||
if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
|
||||
compare_mode = GET_MODE (op0);
|
||||
+ else if (inner_compare
|
||||
+ && GET_MODE_CLASS (GET_MODE (inner_compare)) == MODE_CC
|
||||
+ && new_code == old_code
|
||||
+ && op0 == XEXP (inner_compare, 0)
|
||||
+ && op1 == XEXP (inner_compare, 1))
|
||||
+ compare_mode = GET_MODE (inner_compare);
|
||||
else
|
||||
compare_mode = SELECT_CC_MODE (new_code, op0, op1);
|
||||
|
||||
|
||||
@@ -0,0 +1,192 @@
|
||||
2011-07-15 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
2011-06-29 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* config/arm/unwind-arm.c (enum __cxa_type_match_result): New.
|
||||
(cxa_type_match): Correct declaration.
|
||||
(__gnu_unwind_pr_common): Reconstruct
|
||||
additional indirection when __cxa_type_match returns
|
||||
succeeded_with_ptr_to_base.
|
||||
|
||||
libstdc++-v3/
|
||||
Backport from mainline:
|
||||
|
||||
2011-06-29 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* libsupc++/eh_arm.c (__cxa_type_match): Construct address of
|
||||
thrown object here. Return succeded_with_ptr_to_base for all
|
||||
pointer cases.
|
||||
|
||||
=== modified file 'gcc/config/arm/unwind-arm.c'
|
||||
--- old/gcc/config/arm/unwind-arm.c 2011-03-22 10:59:10 +0000
|
||||
+++ new/gcc/config/arm/unwind-arm.c 2011-07-11 03:35:44 +0000
|
||||
@@ -32,13 +32,18 @@
|
||||
typedef unsigned char bool;
|
||||
|
||||
typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */
|
||||
+enum __cxa_type_match_result
|
||||
+ {
|
||||
+ ctm_failed = 0,
|
||||
+ ctm_succeeded = 1,
|
||||
+ ctm_succeeded_with_ptr_to_base = 2
|
||||
+ };
|
||||
|
||||
void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp);
|
||||
bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp);
|
||||
-bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp,
|
||||
- const type_info *rttip,
|
||||
- bool is_reference,
|
||||
- void **matched_object);
|
||||
+enum __cxa_type_match_result __attribute__((weak)) __cxa_type_match
|
||||
+ (_Unwind_Control_Block *ucbp, const type_info *rttip,
|
||||
+ bool is_reference, void **matched_object);
|
||||
|
||||
_Unwind_Ptr __attribute__((weak))
|
||||
__gnu_Unwind_Find_exidx (_Unwind_Ptr, int *);
|
||||
@@ -1107,6 +1112,7 @@
|
||||
_uw rtti;
|
||||
bool is_reference = (data[0] & uint32_highbit) != 0;
|
||||
void *matched;
|
||||
+ enum __cxa_type_match_result match_type;
|
||||
|
||||
/* Check for no-throw areas. */
|
||||
if (data[1] == (_uw) -2)
|
||||
@@ -1118,17 +1124,31 @@
|
||||
{
|
||||
/* Match a catch specification. */
|
||||
rtti = _Unwind_decode_target2 ((_uw) &data[1]);
|
||||
- if (!__cxa_type_match (ucbp, (type_info *) rtti,
|
||||
- is_reference,
|
||||
- &matched))
|
||||
- matched = (void *)0;
|
||||
+ match_type = __cxa_type_match (ucbp,
|
||||
+ (type_info *) rtti,
|
||||
+ is_reference,
|
||||
+ &matched);
|
||||
}
|
||||
+ else
|
||||
+ match_type = ctm_succeeded;
|
||||
|
||||
- if (matched)
|
||||
+ if (match_type)
|
||||
{
|
||||
ucbp->barrier_cache.sp =
|
||||
_Unwind_GetGR (context, R_SP);
|
||||
- ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
|
||||
+ // ctm_succeeded_with_ptr_to_base really
|
||||
+ // means _c_t_m indirected the pointer
|
||||
+ // object. We have to reconstruct the
|
||||
+ // additional pointer layer by using a temporary.
|
||||
+ if (match_type == ctm_succeeded_with_ptr_to_base)
|
||||
+ {
|
||||
+ ucbp->barrier_cache.bitpattern[2]
|
||||
+ = (_uw) matched;
|
||||
+ ucbp->barrier_cache.bitpattern[0]
|
||||
+ = (_uw) &ucbp->barrier_cache.bitpattern[2];
|
||||
+ }
|
||||
+ else
|
||||
+ ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
|
||||
ucbp->barrier_cache.bitpattern[1] = (_uw) data;
|
||||
return _URC_HANDLER_FOUND;
|
||||
}
|
||||
|
||||
=== modified file 'libstdc++-v3/libsupc++/eh_arm.cc'
|
||||
--- old/libstdc++-v3/libsupc++/eh_arm.cc 2011-01-03 20:52:22 +0000
|
||||
+++ new/libstdc++-v3/libsupc++/eh_arm.cc 2011-07-11 03:35:44 +0000
|
||||
@@ -30,10 +30,11 @@
|
||||
using namespace __cxxabiv1;
|
||||
|
||||
|
||||
-// Given the thrown type THROW_TYPE, pointer to a variable containing a
|
||||
-// pointer to the exception object THROWN_PTR_P and a type CATCH_TYPE to
|
||||
-// compare against, return whether or not there is a match and if so,
|
||||
-// update *THROWN_PTR_P.
|
||||
+// Given the thrown type THROW_TYPE, exception object UE_HEADER and a
|
||||
+// type CATCH_TYPE to compare against, return whether or not there is
|
||||
+// a match and if so, update *THROWN_PTR_P to point to either the
|
||||
+// type-matched object, or in the case of a pointer type, the object
|
||||
+// pointed to by the pointer.
|
||||
|
||||
extern "C" __cxa_type_match_result
|
||||
__cxa_type_match(_Unwind_Exception* ue_header,
|
||||
@@ -41,51 +42,51 @@
|
||||
bool is_reference __attribute__((__unused__)),
|
||||
void** thrown_ptr_p)
|
||||
{
|
||||
- bool forced_unwind = __is_gxx_forced_unwind_class(ue_header->exception_class);
|
||||
- bool foreign_exception = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
|
||||
- bool dependent_exception =
|
||||
- __is_dependent_exception(ue_header->exception_class);
|
||||
+ bool forced_unwind
|
||||
+ = __is_gxx_forced_unwind_class(ue_header->exception_class);
|
||||
+ bool foreign_exception
|
||||
+ = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
|
||||
+ bool dependent_exception
|
||||
+ = __is_dependent_exception(ue_header->exception_class);
|
||||
__cxa_exception* xh = __get_exception_header_from_ue(ue_header);
|
||||
__cxa_dependent_exception *dx = __get_dependent_exception_from_ue(ue_header);
|
||||
const std::type_info* throw_type;
|
||||
+ void *thrown_ptr = 0;
|
||||
|
||||
if (forced_unwind)
|
||||
throw_type = &typeid(abi::__forced_unwind);
|
||||
else if (foreign_exception)
|
||||
throw_type = &typeid(abi::__foreign_exception);
|
||||
- else if (dependent_exception)
|
||||
- throw_type = __get_exception_header_from_obj
|
||||
- (dx->primaryException)->exceptionType;
|
||||
else
|
||||
- throw_type = xh->exceptionType;
|
||||
-
|
||||
- void* thrown_ptr = *thrown_ptr_p;
|
||||
+ {
|
||||
+ if (dependent_exception)
|
||||
+ xh = __get_exception_header_from_obj (dx->primaryException);
|
||||
+ throw_type = xh->exceptionType;
|
||||
+ // We used to require the caller set the target of thrown_ptr_p,
|
||||
+ // but that's incorrect -- the EHABI makes no such requirement
|
||||
+ // -- and not all callers will set it. Fortunately callers that
|
||||
+ // do initialize will always pass us the value we calculate
|
||||
+ // here, so there's no backwards compatibility problem.
|
||||
+ thrown_ptr = __get_object_from_ue (ue_header);
|
||||
+ }
|
||||
+
|
||||
+ __cxa_type_match_result result = ctm_succeeded;
|
||||
|
||||
// Pointer types need to adjust the actual pointer, not
|
||||
// the pointer to pointer that is the exception object.
|
||||
// This also has the effect of passing pointer types
|
||||
// "by value" through the __cxa_begin_catch return value.
|
||||
if (throw_type->__is_pointer_p())
|
||||
- thrown_ptr = *(void**) thrown_ptr;
|
||||
+ {
|
||||
+ thrown_ptr = *(void**) thrown_ptr;
|
||||
+ // We need to indicate the indirection to our caller.
|
||||
+ result = ctm_succeeded_with_ptr_to_base;
|
||||
+ }
|
||||
|
||||
if (catch_type->__do_catch(throw_type, &thrown_ptr, 1))
|
||||
{
|
||||
*thrown_ptr_p = thrown_ptr;
|
||||
-
|
||||
- if (typeid(*catch_type) == typeid (typeid(void*)))
|
||||
- {
|
||||
- const __pointer_type_info *catch_pointer_type =
|
||||
- static_cast<const __pointer_type_info *> (catch_type);
|
||||
- const __pointer_type_info *throw_pointer_type =
|
||||
- static_cast<const __pointer_type_info *> (throw_type);
|
||||
-
|
||||
- if (typeid (*catch_pointer_type->__pointee) != typeid (void)
|
||||
- && (*catch_pointer_type->__pointee !=
|
||||
- *throw_pointer_type->__pointee))
|
||||
- return ctm_succeeded_with_ptr_to_base;
|
||||
- }
|
||||
-
|
||||
- return ctm_succeeded;
|
||||
+ return result;
|
||||
}
|
||||
|
||||
return ctm_failed;
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,741 @@
|
||||
2011-07-21 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
PR middle-end/49736
|
||||
* expr.c (all_zeros_p): Undo bogus part of last change.
|
||||
|
||||
2011-07-21 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
gcc/cp/
|
||||
2011-07-13 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* typeck2.c (split_nonconstant_init_1): Pass the initializer directly,
|
||||
rather than a pointer to it. Return true if the whole of the value
|
||||
was initialized by the generated statements. Use
|
||||
complete_ctor_at_level_p instead of count_type_elements.
|
||||
|
||||
gcc/
|
||||
2011-07-13 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* tree.h (categorize_ctor_elements): Remove comment. Fix long line.
|
||||
(count_type_elements): Delete.
|
||||
(complete_ctor_at_level_p): Declare.
|
||||
* expr.c (flexible_array_member_p): New function, split out from...
|
||||
(count_type_elements): ...here. Make static. Replace allow_flexarr
|
||||
parameter with for_ctor_p. When for_ctor_p is true, return the
|
||||
number of elements that should appear in the top-level constructor,
|
||||
otherwise return an estimate of the number of scalars.
|
||||
(categorize_ctor_elements): Replace p_must_clear with p_complete.
|
||||
(categorize_ctor_elements_1): Likewise. Use complete_ctor_at_level_p.
|
||||
(complete_ctor_at_level_p): New function, borrowing union logic
|
||||
from old categorize_ctor_elements_1.
|
||||
(mostly_zeros_p): Return true if the constructor is not complete.
|
||||
(all_zeros_p): Update call to categorize_ctor_elements.
|
||||
* gimplify.c (gimplify_init_constructor): Update call to
|
||||
categorize_ctor_elements. Don't call count_type_elements.
|
||||
Unconditionally prevent clearing for variable-sized types,
|
||||
otherwise rely on categorize_ctor_elements to detect
|
||||
incomplete initializers.
|
||||
|
||||
gcc/testsuite/
|
||||
2011-07-13 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
* gcc.target/arm/pr48183.c: New test.
|
||||
|
||||
=== modified file 'gcc/cp/typeck2.c'
|
||||
--- old/gcc/cp/typeck2.c 2011-05-20 21:29:14 +0000
|
||||
+++ new/gcc/cp/typeck2.c 2011-07-13 13:17:31 +0000
|
||||
@@ -473,18 +473,20 @@
|
||||
|
||||
|
||||
/* The recursive part of split_nonconstant_init. DEST is an lvalue
|
||||
- expression to which INIT should be assigned. INIT is a CONSTRUCTOR. */
|
||||
+ expression to which INIT should be assigned. INIT is a CONSTRUCTOR.
|
||||
+ Return true if the whole of the value was initialized by the
|
||||
+ generated statements. */
|
||||
|
||||
-static void
|
||||
-split_nonconstant_init_1 (tree dest, tree *initp)
|
||||
+static bool
|
||||
+split_nonconstant_init_1 (tree dest, tree init)
|
||||
{
|
||||
unsigned HOST_WIDE_INT idx;
|
||||
- tree init = *initp;
|
||||
tree field_index, value;
|
||||
tree type = TREE_TYPE (dest);
|
||||
tree inner_type = NULL;
|
||||
bool array_type_p = false;
|
||||
- HOST_WIDE_INT num_type_elements, num_initialized_elements;
|
||||
+ bool complete_p = true;
|
||||
+ HOST_WIDE_INT num_split_elts = 0;
|
||||
|
||||
switch (TREE_CODE (type))
|
||||
{
|
||||
@@ -496,7 +498,6 @@
|
||||
case RECORD_TYPE:
|
||||
case UNION_TYPE:
|
||||
case QUAL_UNION_TYPE:
|
||||
- num_initialized_elements = 0;
|
||||
FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx,
|
||||
field_index, value)
|
||||
{
|
||||
@@ -519,13 +520,14 @@
|
||||
sub = build3 (COMPONENT_REF, inner_type, dest, field_index,
|
||||
NULL_TREE);
|
||||
|
||||
- split_nonconstant_init_1 (sub, &value);
|
||||
+ if (!split_nonconstant_init_1 (sub, value))
|
||||
+ complete_p = false;
|
||||
+ num_split_elts++;
|
||||
}
|
||||
else if (!initializer_constant_valid_p (value, inner_type))
|
||||
{
|
||||
tree code;
|
||||
tree sub;
|
||||
- HOST_WIDE_INT inner_elements;
|
||||
|
||||
/* FIXME: Ordered removal is O(1) so the whole function is
|
||||
worst-case quadratic. This could be fixed using an aside
|
||||
@@ -549,21 +551,9 @@
|
||||
code = build_stmt (input_location, EXPR_STMT, code);
|
||||
add_stmt (code);
|
||||
|
||||
- inner_elements = count_type_elements (inner_type, true);
|
||||
- if (inner_elements < 0)
|
||||
- num_initialized_elements = -1;
|
||||
- else if (num_initialized_elements >= 0)
|
||||
- num_initialized_elements += inner_elements;
|
||||
- continue;
|
||||
+ num_split_elts++;
|
||||
}
|
||||
}
|
||||
-
|
||||
- num_type_elements = count_type_elements (type, true);
|
||||
- /* If all elements of the initializer are non-constant and
|
||||
- have been split out, we don't need the empty CONSTRUCTOR. */
|
||||
- if (num_type_elements > 0
|
||||
- && num_type_elements == num_initialized_elements)
|
||||
- *initp = NULL;
|
||||
break;
|
||||
|
||||
case VECTOR_TYPE:
|
||||
@@ -575,6 +565,7 @@
|
||||
code = build2 (MODIFY_EXPR, type, dest, cons);
|
||||
code = build_stmt (input_location, EXPR_STMT, code);
|
||||
add_stmt (code);
|
||||
+ num_split_elts += CONSTRUCTOR_NELTS (init);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -584,6 +575,8 @@
|
||||
|
||||
/* The rest of the initializer is now a constant. */
|
||||
TREE_CONSTANT (init) = 1;
|
||||
+ return complete_p && complete_ctor_at_level_p (TREE_TYPE (init),
|
||||
+ num_split_elts, inner_type);
|
||||
}
|
||||
|
||||
/* A subroutine of store_init_value. Splits non-constant static
|
||||
@@ -599,7 +592,8 @@
|
||||
if (TREE_CODE (init) == CONSTRUCTOR)
|
||||
{
|
||||
code = push_stmt_list ();
|
||||
- split_nonconstant_init_1 (dest, &init);
|
||||
+ if (split_nonconstant_init_1 (dest, init))
|
||||
+ init = NULL_TREE;
|
||||
code = pop_stmt_list (code);
|
||||
DECL_INITIAL (dest) = init;
|
||||
TREE_READONLY (dest) = 0;
|
||||
|
||||
=== modified file 'gcc/expr.c'
|
||||
--- old/gcc/expr.c 2011-06-02 12:12:00 +0000
|
||||
+++ new/gcc/expr.c 2011-07-14 11:52:32 +0000
|
||||
@@ -4866,16 +4866,136 @@
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
+/* Return true if field F of structure TYPE is a flexible array. */
|
||||
+
|
||||
+static bool
|
||||
+flexible_array_member_p (const_tree f, const_tree type)
|
||||
+{
|
||||
+ const_tree tf;
|
||||
+
|
||||
+ tf = TREE_TYPE (f);
|
||||
+ return (DECL_CHAIN (f) == NULL
|
||||
+ && TREE_CODE (tf) == ARRAY_TYPE
|
||||
+ && TYPE_DOMAIN (tf)
|
||||
+ && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
|
||||
+ && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
|
||||
+ && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
|
||||
+ && int_size_in_bytes (type) >= 0);
|
||||
+}
|
||||
+
|
||||
+/* If FOR_CTOR_P, return the number of top-level elements that a constructor
|
||||
+ must have in order for it to completely initialize a value of type TYPE.
|
||||
+ Return -1 if the number isn't known.
|
||||
+
|
||||
+ If !FOR_CTOR_P, return an estimate of the number of scalars in TYPE. */
|
||||
+
|
||||
+static HOST_WIDE_INT
|
||||
+count_type_elements (const_tree type, bool for_ctor_p)
|
||||
+{
|
||||
+ switch (TREE_CODE (type))
|
||||
+ {
|
||||
+ case ARRAY_TYPE:
|
||||
+ {
|
||||
+ tree nelts;
|
||||
+
|
||||
+ nelts = array_type_nelts (type);
|
||||
+ if (nelts && host_integerp (nelts, 1))
|
||||
+ {
|
||||
+ unsigned HOST_WIDE_INT n;
|
||||
+
|
||||
+ n = tree_low_cst (nelts, 1) + 1;
|
||||
+ if (n == 0 || for_ctor_p)
|
||||
+ return n;
|
||||
+ else
|
||||
+ return n * count_type_elements (TREE_TYPE (type), false);
|
||||
+ }
|
||||
+ return for_ctor_p ? -1 : 1;
|
||||
+ }
|
||||
+
|
||||
+ case RECORD_TYPE:
|
||||
+ {
|
||||
+ unsigned HOST_WIDE_INT n;
|
||||
+ tree f;
|
||||
+
|
||||
+ n = 0;
|
||||
+ for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
|
||||
+ if (TREE_CODE (f) == FIELD_DECL)
|
||||
+ {
|
||||
+ if (!for_ctor_p)
|
||||
+ n += count_type_elements (TREE_TYPE (f), false);
|
||||
+ else if (!flexible_array_member_p (f, type))
|
||||
+ /* Don't count flexible arrays, which are not supposed
|
||||
+ to be initialized. */
|
||||
+ n += 1;
|
||||
+ }
|
||||
+
|
||||
+ return n;
|
||||
+ }
|
||||
+
|
||||
+ case UNION_TYPE:
|
||||
+ case QUAL_UNION_TYPE:
|
||||
+ {
|
||||
+ tree f;
|
||||
+ HOST_WIDE_INT n, m;
|
||||
+
|
||||
+ gcc_assert (!for_ctor_p);
|
||||
+ /* Estimate the number of scalars in each field and pick the
|
||||
+ maximum. Other estimates would do instead; the idea is simply
|
||||
+ to make sure that the estimate is not sensitive to the ordering
|
||||
+ of the fields. */
|
||||
+ n = 1;
|
||||
+ for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
|
||||
+ if (TREE_CODE (f) == FIELD_DECL)
|
||||
+ {
|
||||
+ m = count_type_elements (TREE_TYPE (f), false);
|
||||
+ /* If the field doesn't span the whole union, add an extra
|
||||
+ scalar for the rest. */
|
||||
+ if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)),
|
||||
+ TYPE_SIZE (type)) != 1)
|
||||
+ m++;
|
||||
+ if (n < m)
|
||||
+ n = m;
|
||||
+ }
|
||||
+ return n;
|
||||
+ }
|
||||
+
|
||||
+ case COMPLEX_TYPE:
|
||||
+ return 2;
|
||||
+
|
||||
+ case VECTOR_TYPE:
|
||||
+ return TYPE_VECTOR_SUBPARTS (type);
|
||||
+
|
||||
+ case INTEGER_TYPE:
|
||||
+ case REAL_TYPE:
|
||||
+ case FIXED_POINT_TYPE:
|
||||
+ case ENUMERAL_TYPE:
|
||||
+ case BOOLEAN_TYPE:
|
||||
+ case POINTER_TYPE:
|
||||
+ case OFFSET_TYPE:
|
||||
+ case REFERENCE_TYPE:
|
||||
+ return 1;
|
||||
+
|
||||
+ case ERROR_MARK:
|
||||
+ return 0;
|
||||
+
|
||||
+ case VOID_TYPE:
|
||||
+ case METHOD_TYPE:
|
||||
+ case FUNCTION_TYPE:
|
||||
+ case LANG_TYPE:
|
||||
+ default:
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Helper for categorize_ctor_elements. Identical interface. */
|
||||
|
||||
static bool
|
||||
categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
||||
- HOST_WIDE_INT *p_elt_count,
|
||||
- bool *p_must_clear)
|
||||
+ HOST_WIDE_INT *p_init_elts, bool *p_complete)
|
||||
{
|
||||
unsigned HOST_WIDE_INT idx;
|
||||
- HOST_WIDE_INT nz_elts, elt_count;
|
||||
- tree value, purpose;
|
||||
+ HOST_WIDE_INT nz_elts, init_elts, num_fields;
|
||||
+ tree value, purpose, elt_type;
|
||||
|
||||
/* Whether CTOR is a valid constant initializer, in accordance with what
|
||||
initializer_constant_valid_p does. If inferred from the constructor
|
||||
@@ -4884,7 +5004,9 @@
|
||||
bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
|
||||
|
||||
nz_elts = 0;
|
||||
- elt_count = 0;
|
||||
+ init_elts = 0;
|
||||
+ num_fields = 0;
|
||||
+ elt_type = NULL_TREE;
|
||||
|
||||
FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, purpose, value)
|
||||
{
|
||||
@@ -4899,6 +5021,8 @@
|
||||
mult = (tree_low_cst (hi_index, 1)
|
||||
- tree_low_cst (lo_index, 1) + 1);
|
||||
}
|
||||
+ num_fields += mult;
|
||||
+ elt_type = TREE_TYPE (value);
|
||||
|
||||
switch (TREE_CODE (value))
|
||||
{
|
||||
@@ -4906,11 +5030,11 @@
|
||||
{
|
||||
HOST_WIDE_INT nz = 0, ic = 0;
|
||||
|
||||
- bool const_elt_p
|
||||
- = categorize_ctor_elements_1 (value, &nz, &ic, p_must_clear);
|
||||
+ bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic,
|
||||
+ p_complete);
|
||||
|
||||
nz_elts += mult * nz;
|
||||
- elt_count += mult * ic;
|
||||
+ init_elts += mult * ic;
|
||||
|
||||
if (const_from_elts_p && const_p)
|
||||
const_p = const_elt_p;
|
||||
@@ -4922,12 +5046,12 @@
|
||||
case FIXED_CST:
|
||||
if (!initializer_zerop (value))
|
||||
nz_elts += mult;
|
||||
- elt_count += mult;
|
||||
+ init_elts += mult;
|
||||
break;
|
||||
|
||||
case STRING_CST:
|
||||
nz_elts += mult * TREE_STRING_LENGTH (value);
|
||||
- elt_count += mult * TREE_STRING_LENGTH (value);
|
||||
+ init_elts += mult * TREE_STRING_LENGTH (value);
|
||||
break;
|
||||
|
||||
case COMPLEX_CST:
|
||||
@@ -4935,7 +5059,7 @@
|
||||
nz_elts += mult;
|
||||
if (!initializer_zerop (TREE_IMAGPART (value)))
|
||||
nz_elts += mult;
|
||||
- elt_count += mult;
|
||||
+ init_elts += mult;
|
||||
break;
|
||||
|
||||
case VECTOR_CST:
|
||||
@@ -4945,65 +5069,31 @@
|
||||
{
|
||||
if (!initializer_zerop (TREE_VALUE (v)))
|
||||
nz_elts += mult;
|
||||
- elt_count += mult;
|
||||
+ init_elts += mult;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
- HOST_WIDE_INT tc = count_type_elements (TREE_TYPE (value), true);
|
||||
- if (tc < 1)
|
||||
- tc = 1;
|
||||
+ HOST_WIDE_INT tc = count_type_elements (elt_type, false);
|
||||
nz_elts += mult * tc;
|
||||
- elt_count += mult * tc;
|
||||
+ init_elts += mult * tc;
|
||||
|
||||
if (const_from_elts_p && const_p)
|
||||
- const_p = initializer_constant_valid_p (value, TREE_TYPE (value))
|
||||
+ const_p = initializer_constant_valid_p (value, elt_type)
|
||||
!= NULL_TREE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- if (!*p_must_clear
|
||||
- && (TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE
|
||||
- || TREE_CODE (TREE_TYPE (ctor)) == QUAL_UNION_TYPE))
|
||||
- {
|
||||
- tree init_sub_type;
|
||||
- bool clear_this = true;
|
||||
-
|
||||
- if (!VEC_empty (constructor_elt, CONSTRUCTOR_ELTS (ctor)))
|
||||
- {
|
||||
- /* We don't expect more than one element of the union to be
|
||||
- initialized. Not sure what we should do otherwise... */
|
||||
- gcc_assert (VEC_length (constructor_elt, CONSTRUCTOR_ELTS (ctor))
|
||||
- == 1);
|
||||
-
|
||||
- init_sub_type = TREE_TYPE (VEC_index (constructor_elt,
|
||||
- CONSTRUCTOR_ELTS (ctor),
|
||||
- 0)->value);
|
||||
-
|
||||
- /* ??? We could look at each element of the union, and find the
|
||||
- largest element. Which would avoid comparing the size of the
|
||||
- initialized element against any tail padding in the union.
|
||||
- Doesn't seem worth the effort... */
|
||||
- if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (ctor)),
|
||||
- TYPE_SIZE (init_sub_type)) == 1)
|
||||
- {
|
||||
- /* And now we have to find out if the element itself is fully
|
||||
- constructed. E.g. for union { struct { int a, b; } s; } u
|
||||
- = { .s = { .a = 1 } }. */
|
||||
- if (elt_count == count_type_elements (init_sub_type, false))
|
||||
- clear_this = false;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- *p_must_clear = clear_this;
|
||||
- }
|
||||
+ if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
|
||||
+ num_fields, elt_type))
|
||||
+ *p_complete = false;
|
||||
|
||||
*p_nz_elts += nz_elts;
|
||||
- *p_elt_count += elt_count;
|
||||
+ *p_init_elts += init_elts;
|
||||
|
||||
return const_p;
|
||||
}
|
||||
@@ -5013,111 +5103,50 @@
|
||||
and place it in *P_NZ_ELTS;
|
||||
* how many scalar fields in total are in CTOR,
|
||||
and place it in *P_ELT_COUNT.
|
||||
- * if a type is a union, and the initializer from the constructor
|
||||
- is not the largest element in the union, then set *p_must_clear.
|
||||
+ * whether the constructor is complete -- in the sense that every
|
||||
+ meaningful byte is explicitly given a value --
|
||||
+ and place it in *P_COMPLETE.
|
||||
|
||||
Return whether or not CTOR is a valid static constant initializer, the same
|
||||
as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */
|
||||
|
||||
bool
|
||||
categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
|
||||
- HOST_WIDE_INT *p_elt_count,
|
||||
- bool *p_must_clear)
|
||||
+ HOST_WIDE_INT *p_init_elts, bool *p_complete)
|
||||
{
|
||||
*p_nz_elts = 0;
|
||||
- *p_elt_count = 0;
|
||||
- *p_must_clear = false;
|
||||
+ *p_init_elts = 0;
|
||||
+ *p_complete = true;
|
||||
|
||||
- return
|
||||
- categorize_ctor_elements_1 (ctor, p_nz_elts, p_elt_count, p_must_clear);
|
||||
+ return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete);
|
||||
}
|
||||
|
||||
-/* Count the number of scalars in TYPE. Return -1 on overflow or
|
||||
- variable-sized. If ALLOW_FLEXARR is true, don't count flexible
|
||||
- array member at the end of the structure. */
|
||||
+/* TYPE is initialized by a constructor with NUM_ELTS elements, the last
|
||||
+ of which had type LAST_TYPE. Each element was itself a complete
|
||||
+ initializer, in the sense that every meaningful byte was explicitly
|
||||
+ given a value. Return true if the same is true for the constructor
|
||||
+ as a whole. */
|
||||
|
||||
-HOST_WIDE_INT
|
||||
-count_type_elements (const_tree type, bool allow_flexarr)
|
||||
+bool
|
||||
+complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts,
|
||||
+ const_tree last_type)
|
||||
{
|
||||
- const HOST_WIDE_INT max = ~((HOST_WIDE_INT)1 << (HOST_BITS_PER_WIDE_INT-1));
|
||||
- switch (TREE_CODE (type))
|
||||
+ if (TREE_CODE (type) == UNION_TYPE
|
||||
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
|
||||
{
|
||||
- case ARRAY_TYPE:
|
||||
- {
|
||||
- tree telts = array_type_nelts (type);
|
||||
- if (telts && host_integerp (telts, 1))
|
||||
- {
|
||||
- HOST_WIDE_INT n = tree_low_cst (telts, 1) + 1;
|
||||
- HOST_WIDE_INT m = count_type_elements (TREE_TYPE (type), false);
|
||||
- if (n == 0)
|
||||
- return 0;
|
||||
- else if (max / n > m)
|
||||
- return n * m;
|
||||
- }
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
- case RECORD_TYPE:
|
||||
- {
|
||||
- HOST_WIDE_INT n = 0, t;
|
||||
- tree f;
|
||||
-
|
||||
- for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
|
||||
- if (TREE_CODE (f) == FIELD_DECL)
|
||||
- {
|
||||
- t = count_type_elements (TREE_TYPE (f), false);
|
||||
- if (t < 0)
|
||||
- {
|
||||
- /* Check for structures with flexible array member. */
|
||||
- tree tf = TREE_TYPE (f);
|
||||
- if (allow_flexarr
|
||||
- && DECL_CHAIN (f) == NULL
|
||||
- && TREE_CODE (tf) == ARRAY_TYPE
|
||||
- && TYPE_DOMAIN (tf)
|
||||
- && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
|
||||
- && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
|
||||
- && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
|
||||
- && int_size_in_bytes (type) >= 0)
|
||||
- break;
|
||||
-
|
||||
- return -1;
|
||||
- }
|
||||
- n += t;
|
||||
- }
|
||||
-
|
||||
- return n;
|
||||
- }
|
||||
-
|
||||
- case UNION_TYPE:
|
||||
- case QUAL_UNION_TYPE:
|
||||
- return -1;
|
||||
-
|
||||
- case COMPLEX_TYPE:
|
||||
- return 2;
|
||||
-
|
||||
- case VECTOR_TYPE:
|
||||
- return TYPE_VECTOR_SUBPARTS (type);
|
||||
-
|
||||
- case INTEGER_TYPE:
|
||||
- case REAL_TYPE:
|
||||
- case FIXED_POINT_TYPE:
|
||||
- case ENUMERAL_TYPE:
|
||||
- case BOOLEAN_TYPE:
|
||||
- case POINTER_TYPE:
|
||||
- case OFFSET_TYPE:
|
||||
- case REFERENCE_TYPE:
|
||||
- return 1;
|
||||
-
|
||||
- case ERROR_MARK:
|
||||
- return 0;
|
||||
-
|
||||
- case VOID_TYPE:
|
||||
- case METHOD_TYPE:
|
||||
- case FUNCTION_TYPE:
|
||||
- case LANG_TYPE:
|
||||
- default:
|
||||
- gcc_unreachable ();
|
||||
+ if (num_elts == 0)
|
||||
+ return false;
|
||||
+
|
||||
+ gcc_assert (num_elts == 1 && last_type);
|
||||
+
|
||||
+ /* ??? We could look at each element of the union, and find the
|
||||
+ largest element. Which would avoid comparing the size of the
|
||||
+ initialized element against any tail padding in the union.
|
||||
+ Doesn't seem worth the effort... */
|
||||
+ return simple_cst_equal (TYPE_SIZE (type), TYPE_SIZE (last_type)) == 1;
|
||||
}
|
||||
+
|
||||
+ return count_type_elements (type, true) == num_elts;
|
||||
}
|
||||
|
||||
/* Return 1 if EXP contains mostly (3/4) zeros. */
|
||||
@@ -5126,18 +5155,12 @@
|
||||
mostly_zeros_p (const_tree exp)
|
||||
{
|
||||
if (TREE_CODE (exp) == CONSTRUCTOR)
|
||||
-
|
||||
{
|
||||
- HOST_WIDE_INT nz_elts, count, elts;
|
||||
- bool must_clear;
|
||||
-
|
||||
- categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
|
||||
- if (must_clear)
|
||||
- return 1;
|
||||
-
|
||||
- elts = count_type_elements (TREE_TYPE (exp), false);
|
||||
-
|
||||
- return nz_elts < elts / 4;
|
||||
+ HOST_WIDE_INT nz_elts, init_elts;
|
||||
+ bool complete_p;
|
||||
+
|
||||
+ categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
|
||||
+ return !complete_p || nz_elts < init_elts / 4;
|
||||
}
|
||||
|
||||
return initializer_zerop (exp);
|
||||
@@ -5149,12 +5172,11 @@
|
||||
all_zeros_p (const_tree exp)
|
||||
{
|
||||
if (TREE_CODE (exp) == CONSTRUCTOR)
|
||||
-
|
||||
{
|
||||
- HOST_WIDE_INT nz_elts, count;
|
||||
- bool must_clear;
|
||||
+ HOST_WIDE_INT nz_elts, init_elts;
|
||||
+ bool complete_p;
|
||||
|
||||
- categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
|
||||
+ categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
|
||||
return nz_elts == 0;
|
||||
}
|
||||
|
||||
|
||||
=== modified file 'gcc/gimplify.c'
|
||||
--- old/gcc/gimplify.c 2011-05-26 10:27:57 +0000
|
||||
+++ new/gcc/gimplify.c 2011-07-13 13:17:31 +0000
|
||||
@@ -3693,9 +3693,8 @@
|
||||
case ARRAY_TYPE:
|
||||
{
|
||||
struct gimplify_init_ctor_preeval_data preeval_data;
|
||||
- HOST_WIDE_INT num_type_elements, num_ctor_elements;
|
||||
- HOST_WIDE_INT num_nonzero_elements;
|
||||
- bool cleared, valid_const_initializer;
|
||||
+ HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
|
||||
+ bool cleared, complete_p, valid_const_initializer;
|
||||
|
||||
/* Aggregate types must lower constructors to initialization of
|
||||
individual elements. The exception is that a CONSTRUCTOR node
|
||||
@@ -3712,7 +3711,7 @@
|
||||
can only do so if it known to be a valid constant initializer. */
|
||||
valid_const_initializer
|
||||
= categorize_ctor_elements (ctor, &num_nonzero_elements,
|
||||
- &num_ctor_elements, &cleared);
|
||||
+ &num_ctor_elements, &complete_p);
|
||||
|
||||
/* If a const aggregate variable is being initialized, then it
|
||||
should never be a lose to promote the variable to be static. */
|
||||
@@ -3750,26 +3749,29 @@
|
||||
parts in, then generate code for the non-constant parts. */
|
||||
/* TODO. There's code in cp/typeck.c to do this. */
|
||||
|
||||
- num_type_elements = count_type_elements (type, true);
|
||||
+ if (int_size_in_bytes (TREE_TYPE (ctor)) < 0)
|
||||
+ /* store_constructor will ignore the clearing of variable-sized
|
||||
+ objects. Initializers for such objects must explicitly set
|
||||
+ every field that needs to be set. */
|
||||
+ cleared = false;
|
||||
+ else if (!complete_p)
|
||||
+ /* If the constructor isn't complete, clear the whole object
|
||||
+ beforehand.
|
||||
|
||||
- /* If count_type_elements could not determine number of type elements
|
||||
- for a constant-sized object, assume clearing is needed.
|
||||
- Don't do this for variable-sized objects, as store_constructor
|
||||
- will ignore the clearing of variable-sized objects. */
|
||||
- if (num_type_elements < 0 && int_size_in_bytes (type) >= 0)
|
||||
+ ??? This ought not to be needed. For any element not present
|
||||
+ in the initializer, we should simply set them to zero. Except
|
||||
+ we'd need to *find* the elements that are not present, and that
|
||||
+ requires trickery to avoid quadratic compile-time behavior in
|
||||
+ large cases or excessive memory use in small cases. */
|
||||
cleared = true;
|
||||
- /* If there are "lots" of zeros, then block clear the object first. */
|
||||
- else if (num_type_elements - num_nonzero_elements
|
||||
+ else if (num_ctor_elements - num_nonzero_elements
|
||||
> CLEAR_RATIO (optimize_function_for_speed_p (cfun))
|
||||
- && num_nonzero_elements < num_type_elements/4)
|
||||
- cleared = true;
|
||||
- /* ??? This bit ought not be needed. For any element not present
|
||||
- in the initializer, we should simply set them to zero. Except
|
||||
- we'd need to *find* the elements that are not present, and that
|
||||
- requires trickery to avoid quadratic compile-time behavior in
|
||||
- large cases or excessive memory use in small cases. */
|
||||
- else if (num_ctor_elements < num_type_elements)
|
||||
- cleared = true;
|
||||
+ && num_nonzero_elements < num_ctor_elements / 4)
|
||||
+ /* If there are "lots" of zeros, it's more efficient to clear
|
||||
+ the memory and then set the nonzero elements. */
|
||||
+ cleared = true;
|
||||
+ else
|
||||
+ cleared = false;
|
||||
|
||||
/* If there are "lots" of initialized elements, and all of them
|
||||
are valid address constants, then the entire initializer can
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/pr48183.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/pr48183.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/pr48183.c 2011-07-13 13:17:31 +0000
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* testsuite/gcc.target/arm/pr48183.c */
|
||||
+
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O -g" } */
|
||||
+/* { dg-add-options arm_neon } */
|
||||
+
|
||||
+#include <arm_neon.h>
|
||||
+
|
||||
+void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n)
|
||||
+{
|
||||
+ unsigned i;
|
||||
+ int16x4x2_t input;
|
||||
+ int32x4x2_t mid;
|
||||
+ int32x4x2_t output;
|
||||
+
|
||||
+ for (i = 0; i < n/2; i += 8) {
|
||||
+ input = vld2_s16(src + i);
|
||||
+ mid.val[0] = vmovl_s16(input.val[0]);
|
||||
+ mid.val[1] = vmovl_s16(input.val[1]);
|
||||
+ output.val[0] = vshlq_n_s32(mid.val[0], 8);
|
||||
+ output.val[1] = vshlq_n_s32(mid.val[1], 8);
|
||||
+ vst2q_s32((int32_t *)dst + i, output);
|
||||
+ }
|
||||
+}
|
||||
|
||||
=== modified file 'gcc/tree.h'
|
||||
--- old/gcc/tree.h 2011-07-01 09:19:21 +0000
|
||||
+++ new/gcc/tree.h 2011-07-13 13:17:31 +0000
|
||||
@@ -4627,21 +4627,10 @@
|
||||
|
||||
extern VEC(tree,gc) *ctor_to_vec (tree);
|
||||
|
||||
-/* Examine CTOR to discover:
|
||||
- * how many scalar fields are set to nonzero values,
|
||||
- and place it in *P_NZ_ELTS;
|
||||
- * how many scalar fields in total are in CTOR,
|
||||
- and place it in *P_ELT_COUNT.
|
||||
- * if a type is a union, and the initializer from the constructor
|
||||
- is not the largest element in the union, then set *p_must_clear.
|
||||
-
|
||||
- Return whether or not CTOR is a valid static constant initializer, the same
|
||||
- as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */
|
||||
-
|
||||
-extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *,
|
||||
- bool *);
|
||||
-
|
||||
-extern HOST_WIDE_INT count_type_elements (const_tree, bool);
|
||||
+extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
|
||||
+ HOST_WIDE_INT *, bool *);
|
||||
+
|
||||
+extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree);
|
||||
|
||||
/* integer_zerop (tree x) is nonzero if X is an integer constant of value 0. */
|
||||
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
2011-07-21 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-07-21 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK.
|
||||
|
||||
=== modified file 'gcc/regcprop.c'
|
||||
--- old/gcc/regcprop.c 2010-12-17 22:51:25 +0000
|
||||
+++ new/gcc/regcprop.c 2011-07-21 11:30:53 +0000
|
||||
@@ -418,10 +418,9 @@
|
||||
|
||||
offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0)
|
||||
+ (BYTES_BIG_ENDIAN ? byteoffset : 0));
|
||||
- return gen_rtx_raw_REG (new_mode,
|
||||
- regno + subreg_regno_offset (regno, orig_mode,
|
||||
- offset,
|
||||
- new_mode));
|
||||
+ regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
|
||||
+ if (HARD_REGNO_MODE_OK (regno, new_mode))
|
||||
+ return gen_rtx_raw_REG (new_mode, regno);
|
||||
}
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
@@ -18,4 +18,22 @@ file://linaro/gcc-4.6-linaro-r106751.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106753.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106754.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106755.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106759.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106761.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106762.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106763.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106764.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106766.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106768.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106769.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106770.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106771.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106772.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106773.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106775.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106776.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106777.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106778.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106781.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106782.patch \
|
||||
"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# this will prepend this layer to FILESPATH
|
||||
FILESEXTRAPATHS := "${THISDIR}/gcc-4.6"
|
||||
PRINC = "1"
|
||||
PRINC = "2"
|
||||
ARM_INSTRUCTION_SET = "arm"
|
||||
|
||||
Reference in New Issue
Block a user