gcc-4.6: Migrate recipes from OE-Core

Remove linaro patches. If one needs to use linaro
modified gcc they should use meta-linaro

Signed-off-by: Khem Raj <raj.khem@gmail.com>
This commit is contained in:
Khem Raj
2012-06-28 12:19:53 -07:00
committed by Koen Kooi
parent 680af24d1f
commit 6b278fbb02
163 changed files with 6935 additions and 46495 deletions

View File

@@ -0,0 +1,119 @@
require recipes-devtools/gcc/gcc-common.inc
PR = "r27"
# Third digit in PV should be incremented after a minor release
# happens from this branch on gcc e.g. currently its 4.6.0
# when 4.6.1 is releases and we bump SRCREV beyond the release
# on branch then PV should be incremented to 4.6.1+svnr${SRCPV}
# to reflect that change
PV = "4.6.3+svnr${SRCPV}"
# BINV should be incremented after updating to a revision
# after a minor gcc release (e.g. 4.6.1 or 4.6.2) has been made
# the value will be minor-release+1 e.g. if current minor release was
# 4.6.1 then the value below will have 2 which will mean 4.6.2
# which will be next minor release and so on.
BINV = "4.6.4"
SRCREV = "184847"
BRANCH = "gcc-4_6-branch"
FILESPATH = "${@base_set_filespath([ '${FILE_DIRNAME}/gcc-4.6' ], d)}"
DEPENDS =+ "mpfr gmp libmpc"
NATIVEDEPS = "mpfr-native gmp-native libmpc-native zlib-native"
LICENSE="GPL-3.0-with-GCC-exception & GPLv3"
LIC_FILES_CHKSUM = "file://COPYING;md5=59530bdf33659b29e73d4adb9f9f6552 \
file://COPYING3;md5=d32239bcb673463ab874e80d47fae504 \
file://COPYING3.LIB;md5=6a6a8e020838b23406c81b19c1d46df6 \
file://COPYING.LIB;md5=2d5025d4aa3495befef8f17206a5b0a1 \
file://COPYING.RUNTIME;md5=fe60d87048567d4fe8c8a0ed2448bcc8"
SRC_URI = "svn://gcc.gnu.org/svn/gcc/branches;module=${BRANCH};proto=http \
file://gcc-4.3.1-ARCH_FLAGS_FOR_TARGET.patch \
file://100-uclibc-conf.patch \
file://gcc-uclibc-locale-ctype_touplow_t.patch \
file://cache-amnesia.patch \
file://gcc-flags-for-build.patch \
file://103-uclibc-conf-noupstream.patch \
file://200-uclibc-locale.patch \
file://203-uclibc-locale-no__x.patch; \
file://204-uclibc-locale-wchar_fix.patch; \
file://205-uclibc-locale-update.patch; \
file://301-missing-execinfo_h.patch \
file://302-c99-snprintf.patch \
file://303-c99-complex-ugly-hack.patch \
file://304-index_macro.patch \
file://305-libmudflap-susv3-legacy.patch \
file://306-libstdc++-namespace.patch \
file://740-sh-pr24836.patch \
file://800-arm-bigendian.patch \
file://904-flatten-switch-stmt-00.patch \
file://arm-nolibfloat.patch \
file://arm-softfloat.patch \
file://zecke-xgcc-cpp.patch \
file://gcc-poison-system-directories.patch \
file://gcc-poison-dir-extend.patch \
file://gcc-4.3.3-SYSROOT_CFLAGS_FOR_TARGET.patch \
file://64bithack.patch \
file://optional_libstdc.patch \
file://disable_relax_pic_calls_flag.patch \
file://COLLECT_GCC_OPTIONS.patch \
file://use-defaults.h-and-t-oe-in-B.patch \
file://powerpc-e5500.patch \
file://fix-for-ice-50099.patch \
file://gcc-with-linker-hash-style.patch \
file://pr46934.patch \
file://pr32219.patch \
file://pr47551.patch \
file://gcc-arm-set-cost.patch \
file://GPLUSPLUS_INCLUDE_DIR_with_sysroot.patch \
file://fortran-cross-compile-hack.patch \
file://cpp-honour-sysroot.patch \
file://mips64-default-n64.patch \
file://gcc-argument-list-too-long.patch \
"
SRC_URI_append_sh3 = " file://sh3-installfix-fixheaders.patch "
#S = "${WORKDIR}/${BRANCH}"
S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/${BRANCH}"
B = "${WORKDIR}/${BRANCH}/build.${HOST_SYS}.${TARGET_SYS}"
# Language Overrides
FORTRAN = ""
JAVA = ""
EXTRA_OECONF_BASE = " --enable-lto \
--enable-libssp \
--disable-bootstrap \
--disable-libgomp \
--disable-libmudflap \
--with-system-zlib \
--with-linker-hash-style=${LINKER_HASH_STYLE} \
--with-ppl=no \
--with-cloog=no \
--enable-cheaders=c_global "
EXTRA_OECONF_INITIAL = "--disable-libmudflap \
--disable-libgomp \
--disable-libssp \
--disable-libquadmath \
--with-system-zlib \
--disable-lto \
--disable-plugin \
--enable-decimal-float=no"
EXTRA_OECONF_INTERMEDIATE = "--disable-libmudflap \
--disable-libgomp \
--disable-libquadmath \
--with-system-zlib \
--disable-lto \
--disable-plugin \
--disable-libssp"
EXTRA_OECONF_append_libc-uclibc = " --disable-decimal-float "

View File

@@ -0,0 +1,39 @@
Upstream-Status: Pending
Index: gcc-4.6.0/contrib/regression/objs-gcc.sh
===================================================================
--- gcc-4.6.0.orig/contrib/regression/objs-gcc.sh
+++ gcc-4.6.0/contrib/regression/objs-gcc.sh
@@ -106,6 +106,10 @@ if [ $H_REAL_TARGET = $H_REAL_HOST -a $H
then
make all-gdb all-dejagnu all-ld || exit 1
make install-gdb install-dejagnu install-ld || exit 1
+elif [ $H_REAL_TARGET = $H_REAL_HOST -a $H_REAL_TARGET = i686-pc-linux-uclibc ]
+ then
+ make all-gdb all-dejagnu all-ld || exit 1
+ make install-gdb install-dejagnu install-ld || exit 1
elif [ $H_REAL_TARGET = $H_REAL_HOST ] ; then
make bootstrap || exit 1
make install || exit 1
Index: gcc-4.6.0/libjava/classpath/ltconfig
===================================================================
--- gcc-4.6.0.orig/libjava/classpath/ltconfig
+++ gcc-4.6.0/libjava/classpath/ltconfig
@@ -603,7 +603,7 @@ host_os=`echo $host | sed 's/^\([^-]*\)-
# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
case $host_os in
-linux-gnu*) ;;
+linux-gnu*|linux-uclibc*) ;;
linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
esac
@@ -1247,7 +1247,7 @@ linux-gnuoldld* | linux-gnuaout* | linux
;;
# This must be Linux ELF.
-linux-gnu*)
+linux*)
version_type=linux
need_lib_prefix=no
need_version=no

View File

@@ -0,0 +1,17 @@
Upstream-Status: Pending
Corrects sub machine arch corectly
Index: gcc-4.6.0/gcc/config.gcc
===================================================================
--- gcc-4.6.0.orig/gcc/config.gcc
+++ gcc-4.6.0/gcc/config.gcc
@@ -2316,7 +2316,7 @@ score-*-elf)
;;
sh-*-elf* | sh[12346l]*-*-elf* | \
sh-*-symbianelf* | sh[12346l]*-*-symbianelf* | \
- sh-*-linux* | sh[2346lbe]*-*-linux* | \
+ sh*-*-linux* | sh[2346lbe]*-*-linux* | \
sh-*-netbsdelf* | shl*-*-netbsdelf* | sh5-*-netbsd* | sh5l*-*-netbsd* | \
sh64-*-netbsd* | sh64l*-*-netbsd*)
tmake_file="${tmake_file} sh/t-sh sh/t-elf"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,235 @@
Upstream-Status: Pending
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c++locale_internal.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/c++locale_internal.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c++locale_internal.h
@@ -60,4 +60,49 @@ extern "C" __typeof(wcsxfrm_l) __wcsxfrm
extern "C" __typeof(wctype_l) __wctype_l;
#endif
+# define __nl_langinfo_l nl_langinfo_l
+# define __strcoll_l strcoll_l
+# define __strftime_l strftime_l
+# define __strtod_l strtod_l
+# define __strtof_l strtof_l
+# define __strtold_l strtold_l
+# define __strxfrm_l strxfrm_l
+# define __newlocale newlocale
+# define __freelocale freelocale
+# define __duplocale duplocale
+# define __uselocale uselocale
+
+# ifdef _GLIBCXX_USE_WCHAR_T
+# define __iswctype_l iswctype_l
+# define __towlower_l towlower_l
+# define __towupper_l towupper_l
+# define __wcscoll_l wcscoll_l
+# define __wcsftime_l wcsftime_l
+# define __wcsxfrm_l wcsxfrm_l
+# define __wctype_l wctype_l
+# endif
+
+#else
+# define __nl_langinfo_l(N, L) nl_langinfo((N))
+# define __strcoll_l(S1, S2, L) strcoll((S1), (S2))
+# define __strtod_l(S, E, L) strtod((S), (E))
+# define __strtof_l(S, E, L) strtof((S), (E))
+# define __strtold_l(S, E, L) strtold((S), (E))
+# define __strxfrm_l(S1, S2, N, L) strxfrm((S1), (S2), (N))
+# warning should dummy __newlocale check for C|POSIX ?
+# define __newlocale(a, b, c) NULL
+# define __freelocale(a) ((void)0)
+# define __duplocale(a) __c_locale()
+//# define __uselocale ?
+//
+# ifdef _GLIBCXX_USE_WCHAR_T
+# define __iswctype_l(C, M, L) iswctype((C), (M))
+# define __towlower_l(C, L) towlower((C))
+# define __towupper_l(C, L) towupper((C))
+# define __wcscoll_l(S1, S2, L) wcscoll((S1), (S2))
+//# define __wcsftime_l(S, M, F, T, L) wcsftime((S), (M), (F), (T))
+# define __wcsxfrm_l(S1, S2, N, L) wcsxfrm((S1), (S2), (N))
+# define __wctype_l(S, L) wctype((S))
+# endif
+
#endif // GLIBC 2.3 and later
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/c_locale.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.cc
@@ -39,20 +39,6 @@
#include <langinfo.h>
#include <bits/c++locale_internal.h>
-#ifndef __UCLIBC_HAS_XLOCALE__
-#define __strtol_l(S, E, B, L) strtol((S), (E), (B))
-#define __strtoul_l(S, E, B, L) strtoul((S), (E), (B))
-#define __strtoll_l(S, E, B, L) strtoll((S), (E), (B))
-#define __strtoull_l(S, E, B, L) strtoull((S), (E), (B))
-#define __strtof_l(S, E, L) strtof((S), (E))
-#define __strtod_l(S, E, L) strtod((S), (E))
-#define __strtold_l(S, E, L) strtold((S), (E))
-#warning should dummy __newlocale check for C|POSIX ?
-#define __newlocale(a, b, c) NULL
-#define __freelocale(a) ((void)0)
-#define __duplocale(a) __c_locale()
-#endif
-
namespace std
{
template<>
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/collate_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/collate_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/collate_members.cc
@@ -36,13 +36,6 @@
#include <locale>
#include <bits/c++locale_internal.h>
-#ifndef __UCLIBC_HAS_XLOCALE__
-#define __strcoll_l(S1, S2, L) strcoll((S1), (S2))
-#define __strxfrm_l(S1, S2, N, L) strxfrm((S1), (S2), (N))
-#define __wcscoll_l(S1, S2, L) wcscoll((S1), (S2))
-#define __wcsxfrm_l(S1, S2, N, L) wcsxfrm((S1), (S2), (N))
-#endif
-
namespace std
{
// These are basically extensions to char_traits, and perhaps should
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/monetary_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/monetary_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/monetary_members.cc
@@ -43,10 +43,6 @@
#warning tailor for stub locale support
#endif
-#ifndef __UCLIBC_HAS_XLOCALE__
-#define __nl_langinfo_l(N, L) nl_langinfo((N))
-#endif
-
namespace std
{
// Construct and return valid pattern consisting of some combination of:
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/numeric_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/numeric_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/numeric_members.cc
@@ -41,9 +41,6 @@
#ifdef __UCLIBC_MJN3_ONLY__
#warning tailor for stub locale support
#endif
-#ifndef __UCLIBC_HAS_XLOCALE__
-#define __nl_langinfo_l(N, L) nl_langinfo((N))
-#endif
namespace std
{
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/time_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.cc
@@ -40,9 +40,6 @@
#ifdef __UCLIBC_MJN3_ONLY__
#warning tailor for stub locale support
#endif
-#ifndef __UCLIBC_HAS_XLOCALE__
-#define __nl_langinfo_l(N, L) nl_langinfo((N))
-#endif
namespace std
{
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/ctype_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/ctype_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/ctype_members.cc
@@ -38,13 +38,6 @@
#undef _LIBC
#include <bits/c++locale_internal.h>
-#ifndef __UCLIBC_HAS_XLOCALE__
-#define __wctype_l(S, L) wctype((S))
-#define __towupper_l(C, L) towupper((C))
-#define __towlower_l(C, L) towlower((C))
-#define __iswctype_l(C, M, L) iswctype((C), (M))
-#endif
-
namespace std
{
// NB: The other ctype<char> specializations are in src/locale.cc and
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/messages_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.cc
@@ -39,13 +39,10 @@
#ifdef __UCLIBC_MJN3_ONLY__
#warning fix gettext stuff
#endif
-#ifdef __UCLIBC_HAS_GETTEXT_AWARENESS__
-extern "C" char *__dcgettext(const char *domainname,
- const char *msgid, int category);
#undef gettext
-#define gettext(msgid) __dcgettext(NULL, msgid, LC_MESSAGES)
+#ifdef __UCLIBC_HAS_GETTEXT_AWARENESS__
+#define gettext(msgid) dcgettext(NULL, msgid, LC_MESSAGES)
#else
-#undef gettext
#define gettext(msgid) (msgid)
#endif
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/messages_members.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.h
@@ -36,15 +36,11 @@
#ifdef __UCLIBC_MJN3_ONLY__
#warning fix prototypes for *textdomain funcs
#endif
-#ifdef __UCLIBC_HAS_GETTEXT_AWARENESS__
-extern "C" char *__textdomain(const char *domainname);
-extern "C" char *__bindtextdomain(const char *domainname,
- const char *dirname);
-#else
-#undef __textdomain
-#undef __bindtextdomain
-#define __textdomain(D) ((void)0)
-#define __bindtextdomain(D,P) ((void)0)
+#ifndef __UCLIBC_HAS_GETTEXT_AWARENESS__
+#undef textdomain
+#undef bindtextdomain
+#define textdomain(D) ((void)0)
+#define bindtextdomain(D,P) ((void)0)
#endif
// Non-virtual member functions.
@@ -70,7 +66,7 @@ extern "C" char *__bindtextdomain(const
messages<_CharT>::open(const basic_string<char>& __s, const locale& __loc,
const char* __dir) const
{
- __bindtextdomain(__s.c_str(), __dir);
+ bindtextdomain(__s.c_str(), __dir);
return this->do_open(__s, __loc);
}
@@ -90,7 +86,7 @@ extern "C" char *__bindtextdomain(const
{
// No error checking is done, assume the catalog exists and can
// be used.
- __textdomain(__s.c_str());
+ textdomain(__s.c_str());
return 0;
}
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/c_locale.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.h
@@ -68,6 +68,7 @@ namespace __gnu_cxx
{
extern "C" __typeof(uselocale) __uselocale;
}
+#define __uselocale uselocale
#endif
namespace std

View File

@@ -0,0 +1,54 @@
Upstream-Status: Pending
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/monetary_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/monetary_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/monetary_members.cc
@@ -401,7 +401,7 @@ namespace std
# ifdef __UCLIBC_HAS_XLOCALE__
_M_data->_M_decimal_point = __cloc->decimal_point_wc;
_M_data->_M_thousands_sep = __cloc->thousands_sep_wc;
-# else
+# elif defined __UCLIBC_HAS_LOCALE__
_M_data->_M_decimal_point = __global_locale->decimal_point_wc;
_M_data->_M_thousands_sep = __global_locale->thousands_sep_wc;
# endif
@@ -556,7 +556,7 @@ namespace std
# ifdef __UCLIBC_HAS_XLOCALE__
_M_data->_M_decimal_point = __cloc->decimal_point_wc;
_M_data->_M_thousands_sep = __cloc->thousands_sep_wc;
-# else
+# elif defined __UCLIBC_HAS_LOCALE__
_M_data->_M_decimal_point = __global_locale->decimal_point_wc;
_M_data->_M_thousands_sep = __global_locale->thousands_sep_wc;
# endif
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/numeric_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/numeric_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/numeric_members.cc
@@ -127,12 +127,25 @@ namespace std
{
// Named locale.
// NB: In the GNU model wchar_t is always 32 bit wide.
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning fix this... should be numeric
+#endif
+#ifdef __UCLIBC__
+# ifdef __UCLIBC_HAS_XLOCALE__
+ _M_data->_M_decimal_point = __cloc->decimal_point_wc;
+ _M_data->_M_thousands_sep = __cloc->thousands_sep_wc;
+# elif defined __UCLIBC_HAS_LOCALE__
+ _M_data->_M_decimal_point = __global_locale->decimal_point_wc;
+ _M_data->_M_thousands_sep = __global_locale->thousands_sep_wc;
+# endif
+#else
union { char *__s; wchar_t __w; } __u;
__u.__s = __nl_langinfo_l(_NL_NUMERIC_DECIMAL_POINT_WC, __cloc);
_M_data->_M_decimal_point = __u.__w;
__u.__s = __nl_langinfo_l(_NL_NUMERIC_THOUSANDS_SEP_WC, __cloc);
_M_data->_M_thousands_sep = __u.__w;
+#endif
if (_M_data->_M_thousands_sep == L'\0')
_M_data->_M_grouping = "";

View File

@@ -0,0 +1,521 @@
Upstream-Status: Pending
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/c_locale.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.cc
@@ -39,23 +39,20 @@
#include <langinfo.h>
#include <bits/c++locale_internal.h>
-namespace std
-{
+_GLIBCXX_BEGIN_NAMESPACE(std)
+
template<>
void
__convert_to_v(const char* __s, float& __v, ios_base::iostate& __err,
const __c_locale& __cloc)
{
- if (!(__err & ios_base::failbit))
- {
- char* __sanity;
- errno = 0;
- float __f = __strtof_l(__s, &__sanity, __cloc);
- if (__sanity != __s && errno != ERANGE)
- __v = __f;
- else
- __err |= ios_base::failbit;
- }
+ char* __sanity;
+ errno = 0;
+ float __f = __strtof_l(__s, &__sanity, __cloc);
+ if (__sanity != __s && errno != ERANGE)
+ __v = __f;
+ else
+ __err |= ios_base::failbit;
}
template<>
@@ -63,16 +60,13 @@ namespace std
__convert_to_v(const char* __s, double& __v, ios_base::iostate& __err,
const __c_locale& __cloc)
{
- if (!(__err & ios_base::failbit))
- {
- char* __sanity;
- errno = 0;
- double __d = __strtod_l(__s, &__sanity, __cloc);
- if (__sanity != __s && errno != ERANGE)
- __v = __d;
- else
- __err |= ios_base::failbit;
- }
+ char* __sanity;
+ errno = 0;
+ double __d = __strtod_l(__s, &__sanity, __cloc);
+ if (__sanity != __s && errno != ERANGE)
+ __v = __d;
+ else
+ __err |= ios_base::failbit;
}
template<>
@@ -80,16 +74,13 @@ namespace std
__convert_to_v(const char* __s, long double& __v, ios_base::iostate& __err,
const __c_locale& __cloc)
{
- if (!(__err & ios_base::failbit))
- {
- char* __sanity;
- errno = 0;
- long double __ld = __strtold_l(__s, &__sanity, __cloc);
- if (__sanity != __s && errno != ERANGE)
- __v = __ld;
- else
- __err |= ios_base::failbit;
- }
+ char* __sanity;
+ errno = 0;
+ long double __ld = __strtold_l(__s, &__sanity, __cloc);
+ if (__sanity != __s && errno != ERANGE)
+ __v = __ld;
+ else
+ __err |= ios_base::failbit;
}
void
@@ -110,17 +101,18 @@ namespace std
void
locale::facet::_S_destroy_c_locale(__c_locale& __cloc)
{
- if (_S_get_c_locale() != __cloc)
+ if (__cloc && _S_get_c_locale() != __cloc)
__freelocale(__cloc);
}
__c_locale
locale::facet::_S_clone_c_locale(__c_locale& __cloc)
{ return __duplocale(__cloc); }
-} // namespace std
-namespace __gnu_cxx
-{
+_GLIBCXX_END_NAMESPACE
+
+_GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)
+
const char* const category_names[6 + _GLIBCXX_NUM_CATEGORIES] =
{
"LC_CTYPE",
@@ -138,9 +130,11 @@ namespace __gnu_cxx
"LC_IDENTIFICATION"
#endif
};
-}
-namespace std
-{
+_GLIBCXX_END_NAMESPACE
+
+_GLIBCXX_BEGIN_NAMESPACE(std)
+
const char* const* const locale::_S_categories = __gnu_cxx::category_names;
-} // namespace std
+
+_GLIBCXX_END_NAMESPACE
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/ctype_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/ctype_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/ctype_members.cc
@@ -33,16 +33,20 @@
// Written by Benjamin Kosnik <bkoz@redhat.com>
+#include <features.h>
+#ifdef __UCLIBC_HAS_LOCALE__
#define _LIBC
#include <locale>
#undef _LIBC
+#else
+#include <locale>
+#endif
#include <bits/c++locale_internal.h>
-namespace std
-{
+_GLIBCXX_BEGIN_NAMESPACE(std)
+
// NB: The other ctype<char> specializations are in src/locale.cc and
// various /config/os/* files.
- template<>
ctype_byname<char>::ctype_byname(const char* __s, size_t __refs)
: ctype<char>(0, false, __refs)
{
@@ -57,6 +61,8 @@ namespace std
#endif
}
}
+ ctype_byname<char>::~ctype_byname()
+ { }
#ifdef _GLIBCXX_USE_WCHAR_T
ctype<wchar_t>::__wmask_type
@@ -138,17 +144,33 @@ namespace std
ctype<wchar_t>::
do_is(mask __m, wchar_t __c) const
{
- // Highest bitmask in ctype_base == 10, but extra in "C"
- // library for blank.
+ // The case of __m == ctype_base::space is particularly important,
+ // due to its use in many istream functions. Therefore we deal with
+ // it first, exploiting the knowledge that on GNU systems _M_bit[5]
+ // is the mask corresponding to ctype_base::space. NB: an encoding
+ // change would not affect correctness!
+
bool __ret = false;
- const size_t __bitmasksize = 11;
- for (size_t __bitcur = 0; __bitcur <= __bitmasksize; ++__bitcur)
- if (__m & _M_bit[__bitcur]
- && __iswctype_l(__c, _M_wmask[__bitcur], _M_c_locale_ctype))
- {
- __ret = true;
- break;
- }
+ if (__m == _M_bit[5])
+ __ret = __iswctype_l(__c, _M_wmask[5], _M_c_locale_ctype);
+ else
+ {
+ // Highest bitmask in ctype_base == 10, but extra in "C"
+ // library for blank.
+ const size_t __bitmasksize = 11;
+ for (size_t __bitcur = 0; __bitcur <= __bitmasksize; ++__bitcur)
+ if (__m & _M_bit[__bitcur])
+ {
+ if (__iswctype_l(__c, _M_wmask[__bitcur], _M_c_locale_ctype))
+ {
+ __ret = true;
+ break;
+ }
+ else if (__m == _M_bit[__bitcur])
+ break;
+ }
+ }
+
return __ret;
}
@@ -290,4 +312,5 @@ namespace std
#endif
}
#endif // _GLIBCXX_USE_WCHAR_T
-}
+
+_GLIBCXX_END_NAMESPACE
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/messages_members.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.h
@@ -53,12 +53,16 @@
template<typename _CharT>
messages<_CharT>::messages(__c_locale __cloc, const char* __s,
size_t __refs)
- : facet(__refs), _M_c_locale_messages(_S_clone_c_locale(__cloc)),
- _M_name_messages(__s)
+ : facet(__refs), _M_c_locale_messages(NULL),
+ _M_name_messages(NULL)
{
- char* __tmp = new char[std::strlen(__s) + 1];
- std::strcpy(__tmp, __s);
+ const size_t __len = std::strlen(__s) + 1;
+ char* __tmp = new char[__len];
+ std::memcpy(__tmp, __s, __len);
_M_name_messages = __tmp;
+
+ // Last to avoid leaking memory if new throws.
+ _M_c_locale_messages = _S_clone_c_locale(__cloc);
}
template<typename _CharT>
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/monetary_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/monetary_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/monetary_members.cc
@@ -33,9 +33,14 @@
// Written by Benjamin Kosnik <bkoz@redhat.com>
+#include <features.h>
+#ifdef __UCLIBC_HAS_LOCALE__
#define _LIBC
#include <locale>
#undef _LIBC
+#else
+#include <locale>
+#endif
#include <bits/c++locale_internal.h>
#ifdef __UCLIBC_MJN3_ONLY__
@@ -206,7 +211,7 @@ namespace std
}
break;
default:
- ;
+ __ret = pattern();
}
return __ret;
}
@@ -390,7 +395,9 @@ namespace std
__c_locale __old = __uselocale(__cloc);
#else
// Switch to named locale so that mbsrtowcs will work.
- char* __old = strdup(setlocale(LC_ALL, NULL));
+ char* __old = setlocale(LC_ALL, NULL);
+ const size_t __llen = strlen(__old) + 1;
+ char* __sav = new char[__llen];
setlocale(LC_ALL, __name);
#endif
@@ -477,8 +484,8 @@ namespace std
#ifdef __UCLIBC_HAS_XLOCALE__
__uselocale(__old);
#else
- setlocale(LC_ALL, __old);
- free(__old);
+ setlocale(LC_ALL, __sav);
+ delete [] __sav;
#endif
__throw_exception_again;
}
@@ -498,8 +505,8 @@ namespace std
#ifdef __UCLIBC_HAS_XLOCALE__
__uselocale(__old);
#else
- setlocale(LC_ALL, __old);
- free(__old);
+ setlocale(LC_ALL, __sav);
+ delete [] __sav;
#endif
}
}
@@ -545,8 +552,11 @@ namespace std
__c_locale __old = __uselocale(__cloc);
#else
// Switch to named locale so that mbsrtowcs will work.
- char* __old = strdup(setlocale(LC_ALL, NULL));
- setlocale(LC_ALL, __name);
+ char* __old = setlocale(LC_ALL, NULL);
+ const size_t __llen = strlen(__old) + 1;
+ char* __sav = new char[__llen];
+ memcpy(__sav, __old, __llen);
+ setlocale(LC_ALL, __name);
#endif
#ifdef __UCLIBC_MJN3_ONLY__
@@ -633,8 +643,8 @@ namespace std
#ifdef __UCLIBC_HAS_XLOCALE__
__uselocale(__old);
#else
- setlocale(LC_ALL, __old);
- free(__old);
+ setlocale(LC_ALL, __sav);
+ delete [] __sav;
#endif
__throw_exception_again;
}
@@ -653,8 +663,8 @@ namespace std
#ifdef __UCLIBC_HAS_XLOCALE__
__uselocale(__old);
#else
- setlocale(LC_ALL, __old);
- free(__old);
+ setlocale(LC_ALL, __sav);
+ delete [] __sav;
#endif
}
}
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/numeric_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/numeric_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/numeric_members.cc
@@ -33,9 +33,14 @@
// Written by Benjamin Kosnik <bkoz@redhat.com>
+#include <features.h>
+#ifdef __UCLIBC_HAS_LOCALE__
#define _LIBC
#include <locale>
#undef _LIBC
+#else
+#include <locale>
+#endif
#include <bits/c++locale_internal.h>
#ifdef __UCLIBC_MJN3_ONLY__
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/time_members.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.h
@@ -50,12 +50,21 @@
__timepunct<_CharT>::__timepunct(__c_locale __cloc, const char* __s,
size_t __refs)
: facet(__refs), _M_data(NULL), _M_c_locale_timepunct(NULL),
- _M_name_timepunct(__s)
+ _M_name_timepunct(NULL)
{
- char* __tmp = new char[std::strlen(__s) + 1];
- std::strcpy(__tmp, __s);
+ const size_t __len = std::strlen(__s) + 1;
+ char* __tmp = new char[__len];
+ std::memcpy(__tmp, __s, __len);
_M_name_timepunct = __tmp;
- _M_initialize_timepunct(__cloc);
+
+ try
+ { _M_initialize_timepunct(__cloc); }
+ catch(...)
+ {
+ delete [] _M_name_timepunct;
+ __throw_exception_again;
+ }
+
}
template<typename _CharT>
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/c_locale.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c_locale.h
@@ -39,21 +39,23 @@
#pragma GCC system_header
#include <cstring> // get std::strlen
-#include <cstdio> // get std::snprintf or std::sprintf
+#include <cstdio> // get std::vsnprintf or std::vsprintf
#include <clocale>
#include <langinfo.h> // For codecvt
#ifdef __UCLIBC_MJN3_ONLY__
#warning fix this
#endif
-#ifdef __UCLIBC_HAS_LOCALE__
+#ifdef _GLIBCXX_USE_ICONV
#include <iconv.h> // For codecvt using iconv, iconv_t
#endif
-#ifdef __UCLIBC_HAS_GETTEXT_AWARENESS__
-#include <libintl.h> // For messages
+#ifdef HAVE_LIBINTL_H
+#include <libintl.h> // For messages
#endif
+#include <cstdarg>
#ifdef __UCLIBC_MJN3_ONLY__
#warning what is _GLIBCXX_C_LOCALE_GNU for
+// psm: used in os/gnu-linux/ctype_noninline.h
#endif
#define _GLIBCXX_C_LOCALE_GNU 1
@@ -78,23 +80,25 @@ namespace std
#else
typedef int* __c_locale;
#endif
-
- // Convert numeric value of type _Tv to string and return length of
- // string. If snprintf is available use it, otherwise fall back to
- // the unsafe sprintf which, in general, can be dangerous and should
+ // Convert numeric value of type double to string and return length of
+ // string. If vsnprintf is available use it, otherwise fall back to
+ // the unsafe vsprintf which, in general, can be dangerous and should
// be avoided.
- template<typename _Tv>
- int
- __convert_from_v(char* __out,
- const int __size __attribute__ ((__unused__)),
- const char* __fmt,
-#ifdef __UCLIBC_HAS_XCLOCALE__
- _Tv __v, const __c_locale& __cloc, int __prec)
+ inline int
+ __convert_from_v(const __c_locale&
+#ifndef __UCLIBC_HAS_XCLOCALE__
+ __cloc __attribute__ ((__unused__))
+#endif
+ ,
+ char* __out,
+ const int __size,
+ const char* __fmt, ...)
{
+ va_list __args;
+#ifdef __UCLIBC_HAS_XCLOCALE__
+
__c_locale __old = __gnu_cxx::__uselocale(__cloc);
#else
- _Tv __v, const __c_locale&, int __prec)
- {
# ifdef __UCLIBC_HAS_LOCALE__
char* __old = std::setlocale(LC_ALL, NULL);
char* __sav = new char[std::strlen(__old) + 1];
@@ -103,7 +107,9 @@ namespace std
# endif
#endif
- const int __ret = std::snprintf(__out, __size, __fmt, __prec, __v);
+ va_start(__args, __fmt);
+ const int __ret = std::vsnprintf(__out, __size, __fmt, __args);
+ va_end(__args);
#ifdef __UCLIBC_HAS_XCLOCALE__
__gnu_cxx::__uselocale(__old);
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/time_members.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.cc
@@ -53,11 +53,14 @@ namespace std
const size_t __len = __strftime_l(__s, __maxlen, __format, __tm,
_M_c_locale_timepunct);
#else
- char* __old = strdup(setlocale(LC_ALL, NULL));
+ char* __old = setlocale(LC_ALL, NULL);
+ const size_t __llen = strlen(__old) + 1;
+ char* __sav = new char[__llen];
+ memcpy(__sav, __old, __llen);
setlocale(LC_ALL, _M_name_timepunct);
const size_t __len = strftime(__s, __maxlen, __format, __tm);
- setlocale(LC_ALL, __old);
- free(__old);
+ setlocale(LC_ALL, __sav);
+ delete [] __sav;
#endif
// Make sure __s is null terminated.
if (__len == 0)
@@ -207,11 +210,14 @@ namespace std
const size_t __len = __wcsftime_l(__s, __maxlen, __format, __tm,
_M_c_locale_timepunct);
#else
- char* __old = strdup(setlocale(LC_ALL, NULL));
+ char* __old = setlocale(LC_ALL, NULL);
+ const size_t __llen = strlen(__old) + 1;
+ char* __sav = new char[__llen];
+ memcpy(__sav, __old, __llen);
setlocale(LC_ALL, _M_name_timepunct);
const size_t __len = wcsftime(__s, __maxlen, __format, __tm);
- setlocale(LC_ALL, __old);
- free(__old);
+ setlocale(LC_ALL, __sav);
+ delete [] __sav;
#endif
// Make sure __s is null terminated.
if (__len == 0)
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c++locale_internal.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/c++locale_internal.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/c++locale_internal.h
@@ -31,6 +31,9 @@
#include <bits/c++config.h>
#include <clocale>
+#include <cstdlib>
+#include <cstring>
+#include <cstddef>
#ifdef __UCLIBC_MJN3_ONLY__
#warning clean this up

View File

@@ -0,0 +1,15 @@
Upstream-Status: Pending
Index: gcc-4.6.0/boehm-gc/include/gc.h
===================================================================
--- gcc-4.6.0.orig/boehm-gc/include/gc.h
+++ gcc-4.6.0/boehm-gc/include/gc.h
@@ -503,7 +503,7 @@ GC_API GC_PTR GC_malloc_atomic_ignore_of
#if defined(__linux__) || defined(__GLIBC__)
# include <features.h>
# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1 || __GLIBC__ > 2) \
- && !defined(__ia64__)
+ && !defined(__ia64__) && !defined(__UCLIBC__)
# ifndef GC_HAVE_BUILTIN_BACKTRACE
# define GC_HAVE_BUILTIN_BACKTRACE
# endif

View File

@@ -0,0 +1,15 @@
Upstream-Status: Pending
Index: gcc-4.6.0/libstdc++-v3/include/c_std/cstdio
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/include/c_std/cstdio
+++ gcc-4.6.0/libstdc++-v3/include/c_std/cstdio
@@ -136,7 +136,7 @@ namespace std
using ::vsprintf;
} // namespace std
-#if _GLIBCXX_USE_C99
+#if _GLIBCXX_USE_C99 || defined(__UCLIBC__)
#undef snprintf
#undef vfscanf

View File

@@ -0,0 +1,16 @@
Upstream-Status: Inappropriate [embedded specific]
Index: gcc-4.6.0/libstdc++-v3/configure
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/configure
+++ gcc-4.6.0/libstdc++-v3/configure
@@ -18302,6 +18302,9 @@ $as_echo_n "checking for ISO C99 support
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <complex.h>
+#ifdef __UCLIBC__
+#error ugly hack to make sure configure test fails here for cross until uClibc supports the complex funcs
+#endif
int
main ()
{

View File

@@ -0,0 +1,30 @@
Upstream-Status: Pending
Index: gcc-4.6.0/libstdc++-v3/include/ext/rope
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/include/ext/rope
+++ gcc-4.6.0/libstdc++-v3/include/ext/rope
@@ -54,6 +54,9 @@
#include <bits/gthr.h>
#include <tr1/functional>
+/* cope w/ index defined as macro, SuSv3 proposal */
+#undef index
+
# ifdef __GC
# define __GC_CONST const
# else
Index: gcc-4.6.0/libstdc++-v3/include/ext/ropeimpl.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/include/ext/ropeimpl.h
+++ gcc-4.6.0/libstdc++-v3/include/ext/ropeimpl.h
@@ -49,6 +49,9 @@
#include <ext/memory> // For uninitialized_copy_n
#include <ext/numeric> // For power
+/* cope w/ index defined as macro, SuSv3 proposal */
+#undef index
+
namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

View File

@@ -0,0 +1,51 @@
Upstream-Status: Inappropriate [embedded specific]
Index: gcc-4.6.0/libmudflap/mf-hooks2.c
===================================================================
--- gcc-4.6.0.orig/libmudflap/mf-hooks2.c
+++ gcc-4.6.0/libmudflap/mf-hooks2.c
@@ -421,7 +421,7 @@ WRAPPER2(void, bzero, void *s, size_t n)
{
TRACE ("%s\n", __PRETTY_FUNCTION__);
MF_VALIDATE_EXTENT(s, n, __MF_CHECK_WRITE, "bzero region");
- bzero (s, n);
+ memset (s, 0, n);
}
@@ -431,7 +431,7 @@ WRAPPER2(void, bcopy, const void *src, v
TRACE ("%s\n", __PRETTY_FUNCTION__);
MF_VALIDATE_EXTENT(src, n, __MF_CHECK_READ, "bcopy src");
MF_VALIDATE_EXTENT(dest, n, __MF_CHECK_WRITE, "bcopy dest");
- bcopy (src, dest, n);
+ memmove (dest, src, n);
}
@@ -441,7 +441,7 @@ WRAPPER2(int, bcmp, const void *s1, cons
TRACE ("%s\n", __PRETTY_FUNCTION__);
MF_VALIDATE_EXTENT(s1, n, __MF_CHECK_READ, "bcmp 1st arg");
MF_VALIDATE_EXTENT(s2, n, __MF_CHECK_READ, "bcmp 2nd arg");
- return bcmp (s1, s2, n);
+ return n == 0 ? 0 : memcmp (s1, s2, n);
}
@@ -450,7 +450,7 @@ WRAPPER2(char *, index, const char *s, i
size_t n = strlen (s);
TRACE ("%s\n", __PRETTY_FUNCTION__);
MF_VALIDATE_EXTENT(s, CLAMPADD(n, 1), __MF_CHECK_READ, "index region");
- return index (s, c);
+ return strchr (s, c);
}
@@ -459,7 +459,7 @@ WRAPPER2(char *, rindex, const char *s,
size_t n = strlen (s);
TRACE ("%s\n", __PRETTY_FUNCTION__);
MF_VALIDATE_EXTENT(s, CLAMPADD(n, 1), __MF_CHECK_READ, "rindex region");
- return rindex (s, c);
+ return strrchr (s, c);
}
/* XXX: stpcpy, memccpy */

View File

@@ -0,0 +1,40 @@
Upstream-Status: Pending
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/messages_members.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/messages_members.h
@@ -32,7 +32,8 @@
//
// Written by Benjamin Kosnik <bkoz@redhat.com>
-
+namespace std
+{
#ifdef __UCLIBC_MJN3_ONLY__
#warning fix prototypes for *textdomain funcs
#endif
@@ -116,3 +117,4 @@
this->_S_create_c_locale(this->_M_c_locale_messages, __s);
}
}
+}
Index: gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/uclibc/time_members.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/uclibc/time_members.h
@@ -33,7 +33,8 @@
//
// Written by Benjamin Kosnik <bkoz@redhat.com>
-
+namespace std
+{
template<typename _CharT>
__timepunct<_CharT>::__timepunct(size_t __refs)
: facet(__refs), _M_data(NULL), _M_c_locale_timepunct(NULL),
@@ -75,3 +76,4 @@
delete _M_data;
_S_destroy_c_locale(_M_c_locale_timepunct);
}
+}

View File

@@ -0,0 +1,68 @@
Upstream-Status: Inappropriate [embedded specific]
GCC has internal multilib handling code but it assumes a very specific rigid directory
layout. The build system implementation of multilib layout is very generic and allows
complete customisation of the library directories.
This patch is a partial solution to allow any custom directories to be passed into gcc
and handled correctly. It forces gcc to use the base_libdir (which is the current
directory, "."). We need to do this for each multilib that is configured as we don't
know which compiler options may be being passed into the compiler. Since we have a compiler
per mulitlib at this point that isn't an issue.
The one problem is the target compiler is only going to work for the default multlilib at
this point. Ideally we'd figure out which multilibs were being enabled with which paths
and be able to patch these entries with a complete set of correct paths but this we
don't have such code at this point. This is something the target gcc recipe should do
and override these platform defaults in its build config.
RP 15/8/11
Index: gcc-4_6-branch/gcc/config/i386/t-linux64
===================================================================
--- gcc-4_6-branch.orig/gcc/config/i386/t-linux64 2011-06-23 15:15:29.000000000 +0100
+++ gcc-4_6-branch/gcc/config/i386/t-linux64 2011-08-15 13:09:03.772415848 +0100
@@ -24,8 +24,8 @@
# MULTILIB_OSDIRNAMES according to what is found on the target.
MULTILIB_OPTIONS = m64/m32
-MULTILIB_DIRNAMES = 64 32
-MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)
+MULTILIB_DIRNAMES = . .
+MULTILIB_OSDIRNAMES = ../$(shell basename $(base_libdir)) ../$(shell basename $(base_libdir))
LIBGCC = stmp-multilib
INSTALL_LIBGCC = install-multilib
Index: gcc-4_6-branch/gcc/config/mips/t-linux64
===================================================================
--- gcc-4_6-branch.orig/gcc/config/mips/t-linux64 2011-08-15 13:06:13.732415763 +0100
+++ gcc-4_6-branch/gcc/config/mips/t-linux64 2011-08-15 13:09:11.452419446 +0100
@@ -17,8 +17,8 @@
# <http://www.gnu.org/licenses/>.
MULTILIB_OPTIONS = mabi=n32/mabi=32/mabi=64
-MULTILIB_DIRNAMES = n32 32 64
-MULTILIB_OSDIRNAMES = ../lib32 ../lib ../lib64
+MULTILIB_DIRNAMES = . . .
+MULTILIB_OSDIRNAMES = ../$(shell basename $(base_libdir)) ../$(shell basename $(base_libdir)) ../$(shell basename $(base_libdir))
EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
Index: gcc-4_6-branch/gcc/config/rs6000/t-linux64
===================================================================
--- gcc-4_6-branch.orig/gcc/config/rs6000/t-linux64 2011-08-15 13:06:25.272415822 +0100
+++ gcc-4_6-branch/gcc/config/rs6000/t-linux64 2011-08-15 13:09:21.062415878 +0100
@@ -32,11 +32,11 @@
# MULTILIB_OSDIRNAMES according to what is found on the target.
MULTILIB_OPTIONS = m64/m32 msoft-float
-MULTILIB_DIRNAMES = 64 32 nof
+MULTILIB_DIRNAMES = . . .
MULTILIB_EXTRA_OPTS = fPIC mstrict-align
MULTILIB_EXCEPTIONS = m64/msoft-float
MULTILIB_EXCLUSIONS = m64/!m32/msoft-float
-MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib) nof
+MULTILIB_OSDIRNAMES = ../$(shell basename $(base_libdir)) ../$(shell basename $(base_libdir)) ../$(shell basename $(base_libdir))
MULTILIB_MATCHES = $(MULTILIB_MATCHES_FLOAT)
softfp_wrap_start := '\#ifndef __powerpc64__'

View File

@@ -0,0 +1,31 @@
Upstream-Status: Pending
http://sourceforge.net/mailarchive/forum.php?thread_id=8959304&forum_id=5348
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24836
Index: gcc-4.6.0/gcc/configure.ac
===================================================================
--- gcc-4.6.0.orig/gcc/configure.ac
+++ gcc-4.6.0/gcc/configure.ac
@@ -2926,7 +2926,7 @@ foo: .long 25
tls_first_minor=14
tls_as_opt="-m64 -Aesame --fatal-warnings"
;;
- sh-*-* | sh[34]-*-*)
+ sh-*-* | sh[34]*-*-*)
conftest_s='
.section ".tdata","awT",@progbits
foo: .long 25
Index: gcc-4.6.0/gcc/configure
===================================================================
--- gcc-4.6.0.orig/gcc/configure
+++ gcc-4.6.0/gcc/configure
@@ -22756,7 +22756,7 @@ foo: .long 25
tls_first_minor=14
tls_as_opt="-m64 -Aesame --fatal-warnings"
;;
- sh-*-* | sh[34]-*-*)
+ sh-*-* | sh[34]*-*-*)
conftest_s='
.section ".tdata","awT",@progbits
foo: .long 25

View File

@@ -0,0 +1,36 @@
Upstream-Status: Pending
By Lennert Buytenhek <buytenh@wantstofly.org>
Adds support for arm*b-linux* big-endian ARM targets
See http://gcc.gnu.org/PR16350
Index: gcc-4.6.0/gcc/config/arm/linux-elf.h
===================================================================
--- gcc-4.6.0.orig/gcc/config/arm/linux-elf.h
+++ gcc-4.6.0/gcc/config/arm/linux-elf.h
@@ -51,7 +51,7 @@
#undef MULTILIB_DEFAULTS
#define MULTILIB_DEFAULTS \
- { "marm", "mlittle-endian", "mhard-float", "mno-thumb-interwork" }
+ { "marm", TARGET_ENDIAN_OPTION, "mhard-float", "mno-thumb-interwork" }
/* Now we define the strings used to build the spec file. */
#undef LIB_SPEC
Index: gcc-4.6.0/gcc/config.gcc
===================================================================
--- gcc-4.6.0.orig/gcc/config.gcc
+++ gcc-4.6.0/gcc/config.gcc
@@ -822,6 +822,11 @@ arm*-*-linux*) # ARM GNU/Linux with EL
esac
tmake_file="${tmake_file} t-linux arm/t-arm"
case ${target} in
+ arm*b-*)
+ tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+ ;;
+ esac
+ case ${target} in
arm*-*-linux-*eabi)
tm_file="$tm_file arm/bpabi.h arm/linux-eabi.h"
tm_file="$tm_file ../../libgcc/config/arm/bpabi-lib.h"

View File

@@ -0,0 +1,76 @@
Upstream-Status: Pending
Hi,
The attached patch makes sure that we create smaller object code for
simple switch statements. We just make sure to flatten the switch
statement into an if-else chain, basically.
This fixes a size-regression as compared to gcc-3.4, as can be seen
below.
2007-04-15 Bernhard Fischer <..>
* stmt.c (expand_case): Do not create a complex binary tree when
optimizing for size but rather use the simple ordered list.
(emit_case_nodes): do not emit jumps to the default_label when
optimizing for size.
Not regtested so far.
Comments?
Attached is the test switch.c mentioned below.
$ for i in 2.95 3.3 3.4 4.0 4.1 4.2.orig-HEAD 4.3.orig-HEAD 4.3-HEAD;do
gcc-$i -DCHAIN -Os -o switch-CHAIN-$i.o -c switch.c ;done
$ for i in 2.95 3.3 3.4 4.0 4.1 4.2.orig-HEAD 4.3.orig-HEAD 4.3-HEAD;do
gcc-$i -UCHAIN -Os -o switch-$i.o -c switch.c ;done
$ size switch-*.o
text data bss dec hex filename
169 0 0 169 a9 switch-2.95.o
115 0 0 115 73 switch-3.3.o
103 0 0 103 67 switch-3.4.o
124 0 0 124 7c switch-4.0.o
124 0 0 124 7c switch-4.1.o
124 0 0 124 7c switch-4.2.orig-HEAD.o
95 0 0 95 5f switch-4.3-HEAD.o
124 0 0 124 7c switch-4.3.orig-HEAD.o
166 0 0 166 a6 switch-CHAIN-2.95.o
111 0 0 111 6f switch-CHAIN-3.3.o
95 0 0 95 5f switch-CHAIN-3.4.o
95 0 0 95 5f switch-CHAIN-4.0.o
95 0 0 95 5f switch-CHAIN-4.1.o
95 0 0 95 5f switch-CHAIN-4.2.orig-HEAD.o
95 0 0 95 5f switch-CHAIN-4.3-HEAD.o
95 0 0 95 5f switch-CHAIN-4.3.orig-HEAD.o
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="gcc-4.3.gcc-flatten-switch-stmt.00.diff"
Index: gcc-4.6.0/gcc/stmt.c
===================================================================
--- gcc-4.6.0.orig/gcc/stmt.c
+++ gcc-4.6.0/gcc/stmt.c
@@ -2478,7 +2478,11 @@ expand_case (gimple stmt)
default code is emitted. */
use_cost_table = estimate_case_costs (case_list);
- balance_case_nodes (&case_list, NULL);
+ /* When optimizing for size, we want a straight list to avoid
+ jumps as much as possible. This basically creates an if-else
+ chain. */
+ if (!optimize_size)
+ balance_case_nodes (&case_list, NULL);
emit_case_nodes (index, case_list, default_label, index_type);
if (default_label)
emit_jump (default_label);
@@ -3046,6 +3050,7 @@ emit_case_nodes (rtx index, case_node_pt
{
if (!node_has_low_bound (node, index_type))
{
+ if (!optimize_size) /* don't jl to the .default_label. */
emit_cmp_and_jump_insns (index,
convert_modes
(mode, imode,

View File

@@ -0,0 +1,25 @@
Upstream-Status: Pending
#This patck added --sysroot into COLLECT_GCC_OPTIONS which is used to
#invoke collect2.
Index: gcc-4.6.0/gcc/gcc.c
===================================================================
--- gcc-4.6.0.orig/gcc/gcc.c
+++ gcc-4.6.0/gcc/gcc.c
@@ -3948,6 +3948,15 @@ set_collect_gcc_options (void)
sizeof ("COLLECT_GCC_OPTIONS=") - 1);
first_time = TRUE;
+#ifdef HAVE_LD_SYSROOT
+ if (target_system_root_changed && target_system_root)
+ {
+ obstack_grow (&collect_obstack, "'--sysroot=", sizeof("'--sysroot=")-1);
+ obstack_grow (&collect_obstack, target_system_root,strlen(target_system_root));
+ obstack_grow (&collect_obstack, "'", 1);
+ first_time = FALSE;
+ }
+#endif
for (i = 0; (int) i < n_switches; i++)
{
const char *const *args;

View File

@@ -0,0 +1,186 @@
source: http://patchwork.ozlabs.org/patch/129800/
Upstream-Status: Submitted
ChangeLog
* Makefile.in (gcc_gxx_include_dir_add_sysroot): New.
(PREPROCESSOR_DEFINES): Define GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT.
* cppdefault.c (cpp_include_defaults): replace hard coded "1" with
GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT for "add_sysroot" field.
* configure.ac (AC_SUBST): Add gcc_gxx_include_dir_add_sysroot to
control whether sysroot should be prepended to gxx include dir.
* configure: Regenerate.
Hi, this is a follow up for issue "http://codereview.appspot.com/4641076".
The rationale for the patch copied from previous thread:
=======================================
The setup:
Configuring a toolchain targeting x86-64 GNU Linux (Ubuntu Lucid), as a
cross-compiler. Using a sysroot to provide the Lucid headers+libraries,
with the sysroot path being within the GCC install tree. Want to use the
Lucid system libstdc++ and headers, which means that I'm not
building/installing libstdc++-v3.
So, configuring with:
--with-sysroot="$SYSROOT"
--disable-libstdc++-v3 \
--with-gxx-include-dir="$SYSROOT/usr/include/c++/4.4" \
(among other options).
Hoping to support two usage models with this configuration, w.r.t. use of
the sysroot:
(1) somebody installs the sysroot in the normal location relative to the
GCC install, and relocates the whole bundle (sysroot+GCC). This works
great AFAICT, GCC finds its includes (including the C++ includes) thanks
to the add_standard_paths iprefix handling.
(2) somebody installs the sysroot in a non-standard location, and uses
--sysroot to try to access it. This works fine for the C headers, but
doesn't work.
For the C headers, add_standard_paths prepends the sysroot location to
the /usr/include path (since that's what's specified in cppdefault.c for
that path). It doesn't do the same for the C++ include path, though
(again, as specified in cppdefault.c).
add_standard_paths doesn't attempt to relocate built-in include paths that
start with the compiled-in sysroot location (e.g., the g++ include dir, in
this case). This isn't surprising really: normally you either prepend the
sysroot location or you don't (as specified by cppdefault.c); none of the
built-in paths normally *start* with the sysroot location and need to be
relocated. However, in this odd-ball case of trying to use the C++ headers
from the sysroot, one of the paths *does* need to be relocated in this way.
===========================
Index: gcc-4_6-branch/gcc/Makefile.in
===================================================================
--- gcc-4_6-branch.orig/gcc/Makefile.in 2012-03-04 09:33:36.000000000 -0800
+++ gcc-4_6-branch/gcc/Makefile.in 2012-03-04 09:41:06.858672113 -0800
@@ -587,6 +587,7 @@
build_tooldir = $(exec_prefix)/$(target_noncanonical)
# Directory in which the compiler finds target-independent g++ includes.
gcc_gxx_include_dir = @gcc_gxx_include_dir@
+gcc_gxx_include_dir_add_sysroot = @gcc_gxx_include_dir_add_sysroot@
# Directory to search for site-specific includes.
local_includedir = $(local_prefix)/include
includedir = $(prefix)/include
@@ -3964,6 +3965,7 @@
-DGCC_INCLUDE_DIR=\"$(libsubdir)/include\" \
-DFIXED_INCLUDE_DIR=\"$(libsubdir)/include-fixed\" \
-DGPLUSPLUS_INCLUDE_DIR=\"$(gcc_gxx_include_dir)\" \
+ -DGPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT=$(gcc_gxx_include_dir_add_sysroot) \
-DGPLUSPLUS_TOOL_INCLUDE_DIR=\"$(gcc_gxx_include_dir)/$(target_noncanonical)\" \
-DGPLUSPLUS_BACKWARD_INCLUDE_DIR=\"$(gcc_gxx_include_dir)/backward\" \
-DLOCAL_INCLUDE_DIR=\"$(local_includedir)\" \
Index: gcc-4_6-branch/gcc/configure.ac
===================================================================
--- gcc-4_6-branch.orig/gcc/configure.ac 2012-03-04 09:33:36.000000000 -0800
+++ gcc-4_6-branch/gcc/configure.ac 2012-03-04 09:41:06.862671939 -0800
@@ -144,6 +144,15 @@
fi
fi
+gcc_gxx_include_dir_add_sysroot=0
+if test "${with_sysroot+set}" = set; then :
+ gcc_gxx_without_sysroot=`expr "${gcc_gxx_include_dir}" : "${with_sysroot}"'\(.*\)'`
+ if test "${gcc_gxx_without_sysroot}"; then :
+ gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
+ gcc_gxx_include_dir_add_sysroot=1
+ fi
+fi
+
AC_ARG_WITH(cpp_install_dir,
[ --with-cpp-install-dir=DIR
install the user visible C preprocessor in DIR
@@ -4727,6 +4736,7 @@
AC_SUBST(float_h_file)
AC_SUBST(gcc_config_arguments)
AC_SUBST(gcc_gxx_include_dir)
+AC_SUBST(gcc_gxx_include_dir_add_sysroot)
AC_SUBST(host_exeext)
AC_SUBST(host_xm_file_list)
AC_SUBST(host_xm_include_list)
Index: gcc-4_6-branch/gcc/cppdefault.c
===================================================================
--- gcc-4_6-branch.orig/gcc/cppdefault.c 2012-03-03 01:03:17.000000000 -0800
+++ gcc-4_6-branch/gcc/cppdefault.c 2012-03-04 09:41:06.862671939 -0800
@@ -48,15 +48,18 @@
= {
#ifdef GPLUSPLUS_INCLUDE_DIR
/* Pick up GNU C++ generic include files. */
- { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1, 0, 0 },
+ { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1,
+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },
#endif
#ifdef GPLUSPLUS_TOOL_INCLUDE_DIR
/* Pick up GNU C++ target-dependent include files. */
- { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1, 0, 1 },
+ { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1,
+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 1 },
#endif
#ifdef GPLUSPLUS_BACKWARD_INCLUDE_DIR
/* Pick up GNU C++ backward and deprecated include files. */
- { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1, 0, 0 },
+ { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1,
+ GPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT, 0 },
#endif
#ifdef GCC_INCLUDE_DIR
/* This is the dir for gcc's private headers. */
Index: gcc-4_6-branch/gcc/configure
===================================================================
--- gcc-4_6-branch.orig/gcc/configure 2012-03-04 09:33:36.000000000 -0800
+++ gcc-4_6-branch/gcc/configure 2012-03-04 09:41:12.462671816 -0800
@@ -636,6 +636,7 @@
host_xm_include_list
host_xm_file_list
host_exeext
+gcc_gxx_include_dir_add_sysroot
gcc_gxx_include_dir
gcc_config_arguments
float_h_file
@@ -3313,6 +3314,15 @@
fi
fi
+gcc_gxx_include_dir_add_sysroot=0
+if test "${with_sysroot+set}" = set; then :
+ gcc_gxx_without_sysroot=`expr "${gcc_gxx_include_dir}" : "${with_sysroot}"'\(.*\)'`
+ if test "${gcc_gxx_without_sysroot}"; then :
+ gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
+ gcc_gxx_include_dir_add_sysroot=1
+ fi
+fi
+
# Check whether --with-cpp_install_dir was given.
if test "${with_cpp_install_dir+set}" = set; then :
@@ -17504,7 +17514,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17507 "configure"
+#line 17517 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -17610,7 +17620,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17613 "configure"
+#line 17623 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -26141,6 +26151,7 @@
+

View File

@@ -0,0 +1,26 @@
Upstream-Status: Pending
# Dimitry Andric <dimitry@andric.com>, 2004-05-01
#
# * Removed the extra -lfloat option from LIBGCC_SPEC, since it isn't needed
# anymore. (The required functions are now in libgcc.)
#
# Fixes errors like
# arm-softfloat-linux-gnu/3.4.0/../../../../arm-softfloat-linux-gnu/bin/ld: cannot find -lfloat
# collect2: ld returned 1 exit status
# make[2]: *** [arm-softfloat-linux-gnu/gcc-3.4.0-glibc-2.3.2/build-glibc/iconvdata/ISO8859-1.so] Error 1
# when building glibc-2.3.3 with gcc-3.4.0 for arm-softfloat
Index: gcc-4.6.0/gcc/config/arm/linux-elf.h
===================================================================
--- gcc-4.6.0.orig/gcc/config/arm/linux-elf.h
+++ gcc-4.6.0/gcc/config/arm/linux-elf.h
@@ -60,7 +60,7 @@
%{shared:-lc} \
%{!shared:%{profile:-lc_p}%{!profile:-lc}}"
-#define LIBGCC_SPEC "%{msoft-float:-lfloat} %{mfloat-abi=soft*:-lfloat} -lgcc"
+#define LIBGCC_SPEC "-lgcc"
#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"

View File

@@ -0,0 +1,18 @@
Upstream-Status: Pending
Index: gcc-4.6.0/gcc/config/arm/t-linux
===================================================================
--- gcc-4.6.0.orig/gcc/config/arm/t-linux
+++ gcc-4.6.0/gcc/config/arm/t-linux
@@ -23,7 +23,10 @@ TARGET_LIBGCC2_CFLAGS = -fomit-frame-poi
LIB1ASMSRC = arm/lib1funcs.asm
LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
- _arm_addsubdf3 _arm_addsubsf3
+ _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
+ _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
+ _fixsfsi _fixunssfsi _floatdidf _floatdisf _floatundisf _floatundidf
+# _arm_addsubdf3 _arm_addsubsf3
# MULTILIB_OPTIONS = mhard-float/msoft-float
# MULTILIB_DIRNAMES = hard-float soft-float

View File

@@ -0,0 +1,33 @@
Upstream-Status: Inappropriate [embedded specific]
---
gcc/configure | 2 +-
gcc/configure.ac | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
Index: gcc-4.6.0/gcc/configure
===================================================================
--- gcc-4.6.0.orig/gcc/configure
+++ gcc-4.6.0/gcc/configure
@@ -10898,7 +10898,7 @@ else
saved_CFLAGS="${CFLAGS}"
CC="${CC_FOR_BUILD}" CFLAGS="${CFLAGS_FOR_BUILD}" \
LDFLAGS="${LDFLAGS_FOR_BUILD}" \
- ${realsrcdir}/configure \
+ CONFIG_SITE= ${realsrcdir}/configure --cache-file=./other.cache \
--enable-languages=${enable_languages-all} \
--target=$target_alias --host=$build_alias --build=$build_alias
CFLAGS="${saved_CFLAGS}"
Index: gcc-4.6.0/gcc/configure.ac
===================================================================
--- gcc-4.6.0.orig/gcc/configure.ac
+++ gcc-4.6.0/gcc/configure.ac
@@ -1435,7 +1435,7 @@ else
saved_CFLAGS="${CFLAGS}"
CC="${CC_FOR_BUILD}" CFLAGS="${CFLAGS_FOR_BUILD}" \
LDFLAGS="${LDFLAGS_FOR_BUILD}" \
- ${realsrcdir}/configure \
+ CONFIG_SITE= ${realsrcdir}/configure --cache-file=./other.cache \
--enable-languages=${enable_languages-all} \
--target=$target_alias --host=$build_alias --build=$build_alias
CFLAGS="${saved_CFLAGS}"

View File

@@ -0,0 +1,40 @@
Currently, if the gcc toolchain is relocated and installed from sstate, then you try and compile
preprocessed source (.i or .ii files), the compiler will try and access the builtin sysroot location
rather than the --sysroot option specified on the commandline. If access to that directory is
permission denied (unreadable), gcc will error.
This happens when ccache is in use due to the fact it uses preprocessed source files.
The fix below adds %I to the cpp-output spec macro so the default substitutions for -iprefix,
-isystem, -isysroot happen and the correct sysroot is used.
[YOCTO #2074]
Upstream-Status: Pending
RP 2012/04/13
Index: gcc-4_6-branch/gcc/gcc.c
===================================================================
--- gcc-4_6-branch.orig/gcc/gcc.c 2012-04-13 12:24:37.939671140 +0000
+++ gcc-4_6-branch/gcc/gcc.c 2012-04-13 12:24:54.439670688 +0000
@@ -953,7 +953,7 @@
%W{o*:--output-pch=%*}}%V}}}}}}", 0, 0, 0},
{".i", "@cpp-output", 0, 0, 0},
{"@cpp-output",
- "%{!M:%{!MM:%{!E:cc1 -fpreprocessed %i %(cc1_options) %{!fsyntax-only:%(invoke_as)}}}}", 0, 0, 0},
+ "%{!M:%{!MM:%{!E:cc1 -fpreprocessed %i %I %(cc1_options) %{!fsyntax-only:%(invoke_as)}}}}", 0, 0, 0},
{".s", "@assembler", 0, 0, 0},
{"@assembler",
"%{!M:%{!MM:%{!E:%{!S:as %(asm_debug) %(asm_options) %i %A }}}}", 0, 0, 0},
Index: gcc-4_6-branch/gcc/cp/lang-specs.h
===================================================================
--- gcc-4_6-branch.orig/gcc/cp/lang-specs.h 2012-04-13 12:25:01.019670594 +0000
+++ gcc-4_6-branch/gcc/cp/lang-specs.h 2012-04-13 12:25:07.567670180 +0000
@@ -64,5 +64,5 @@
{".ii", "@c++-cpp-output", 0, 0, 0},
{"@c++-cpp-output",
"%{!M:%{!MM:%{!E:\
- cc1plus -fpreprocessed %i %(cc1_options) %2\
+ cc1plus -fpreprocessed %i %I %(cc1_options) %2\
%{!fsyntax-only:%(invoke_as)}}}}", 0, 0, 0},

View File

@@ -0,0 +1,48 @@
Upstream-Status: Inappropriate [configuration]
GCC: disable MASK_RELAX_PIC_CALLS bit
The new feature added after 4.3.3
"http://www.pubbs.net/200909/gcc/94048-patch-add-support-for-rmipsjalr.html"
will cause cc1plus eat up all the system memory when build webkit-gtk.
The function mips_get_pic_call_symbol keeps on recursively calling itself.
Disable this feature to walk aside the bug.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Index: gcc-4.6.0/gcc/configure
===================================================================
--- gcc-4.6.0.orig/gcc/configure
+++ gcc-4.6.0/gcc/configure
@@ -24887,13 +24887,6 @@ $as_echo_n "checking assembler and linke
rm -f conftest.*
fi
fi
- if test $gcc_cv_as_ld_jalr_reloc = yes; then
- if test x$target_cpu_default = x; then
- target_cpu_default=MASK_RELAX_PIC_CALLS
- else
- target_cpu_default="($target_cpu_default)|MASK_RELAX_PIC_CALLS"
- fi
- fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_ld_jalr_reloc" >&5
$as_echo "$gcc_cv_as_ld_jalr_reloc" >&6; }
Index: gcc-4.6.0/gcc/configure.ac
===================================================================
--- gcc-4.6.0.orig/gcc/configure.ac
+++ gcc-4.6.0/gcc/configure.ac
@@ -3764,13 +3764,6 @@ x:
rm -f conftest.*
fi
fi
- if test $gcc_cv_as_ld_jalr_reloc = yes; then
- if test x$target_cpu_default = x; then
- target_cpu_default=MASK_RELAX_PIC_CALLS
- else
- target_cpu_default="($target_cpu_default)|MASK_RELAX_PIC_CALLS"
- fi
- fi
AC_MSG_RESULT($gcc_cv_as_ld_jalr_reloc)
AC_CACHE_CHECK([linker for .eh_frame personality relaxation],

View File

@@ -0,0 +1,49 @@
This patch address an issue with the compiler generating an ICE
during compliation of lttng-ust.
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50099
Upstream-Status: Pending
Signed-off-by: Khem Raj <khem.raj@gmail.com>
Signed-off-by: Saul Wold <sgw@linux.intel.com>
Index: gcc/config/arm/arm.md
===================================================================
--- gcc-4.6.0/gcc/config/arm/arm.md (revision 178135)
+++ gcc-4.6.0/gcc/config/arm/arm.md (working copy)
@@ -4217,6 +4217,7 @@ (define_split
"TARGET_32BIT"
[(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
{
+ rtx srcop = operands[1];
rtx lo_part = gen_lowpart (SImode, operands[0]);
enum machine_mode src_mode = GET_MODE (operands[1]);
@@ -4224,14 +4225,21 @@ (define_split
&& !reg_overlap_mentioned_p (operands[0], operands[1]))
emit_clobber (operands[0]);
+ if (TARGET_ARM && src_mode == QImode
+ && !arm_reg_or_extendqisi_mem_op (srcop, QImode))
+ {
+ rtx dest = gen_lowpart (QImode, lo_part);
+ emit_move_insn (dest, srcop);
+ srcop = dest;
+ }
if (!REG_P (lo_part) || src_mode != SImode
- || !rtx_equal_p (lo_part, operands[1]))
+ || !rtx_equal_p (lo_part, srcop))
{
if (src_mode == SImode)
- emit_move_insn (lo_part, operands[1]);
+ emit_move_insn (lo_part, srcop);
else
emit_insn (gen_rtx_SET (VOIDmode, lo_part,
- gen_rtx_SIGN_EXTEND (SImode, operands[1])));
+ gen_rtx_SIGN_EXTEND (SImode, srcop)));
operands[1] = lo_part;
}
operands[0] = gen_highpart (SImode, operands[0]);

View File

@@ -0,0 +1,32 @@
Upstream-Status: Inappropriate [embedded specific]
* Fortran would have searched for arm-angstrom-gnueabi-gfortran but would have used
used gfortan. For gcc_4.2.2.bb we want to use the gfortran compiler from our cross
directory.
Index: gcc-4.5+svnr155514/libgfortran/configure
===================================================================
--- gcc-4.5+svnr155514.orig/libgfortran/configure 2009-12-29 22:02:01.000000000 -0800
+++ gcc-4.5+svnr155514/libgfortran/configure 2009-12-30 08:12:40.889091657 -0800
@@ -11655,7 +11655,7 @@ CC="$lt_save_CC"
# We need gfortran to compile parts of the library
#AC_PROG_FC(gfortran)
-FC="$GFORTRAN"
+#FC="$GFORTRAN"
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
Index: gcc-4.5+svnr155514/libgfortran/configure.ac
===================================================================
--- gcc-4.5+svnr155514.orig/libgfortran/configure.ac 2009-12-29 22:02:01.000000000 -0800
+++ gcc-4.5+svnr155514/libgfortran/configure.ac 2009-12-30 08:12:13.453094218 -0800
@@ -187,7 +187,7 @@ AC_SUBST(enable_static)
# We need gfortran to compile parts of the library
#AC_PROG_FC(gfortran)
-FC="$GFORTRAN"
+#FC="$GFORTRAN"
AC_PROG_FC(gfortran)
# extra LD Flags which are required for targets

View File

@@ -0,0 +1,33 @@
Upstream-Status: Inappropriate [embedded specific]
---
configure | 2 +-
configure.ac | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
Index: gcc-4.6.0/configure.ac
===================================================================
--- gcc-4.6.0.orig/configure.ac
+++ gcc-4.6.0/configure.ac
@@ -3073,7 +3073,7 @@ fi
# for target_alias and gcc doesn't manage it consistently.
target_configargs="--cache-file=./config.cache ${target_configargs}"
-FLAGS_FOR_TARGET=
+FLAGS_FOR_TARGET="$ARCH_FLAGS_FOR_TARGET"
case " $target_configdirs " in
*" newlib "*)
case " $target_configargs " in
Index: gcc-4.6.0/configure
===================================================================
--- gcc-4.6.0.orig/configure
+++ gcc-4.6.0/configure
@@ -7594,7 +7594,7 @@ fi
# for target_alias and gcc doesn't manage it consistently.
target_configargs="--cache-file=./config.cache ${target_configargs}"
-FLAGS_FOR_TARGET=
+FLAGS_FOR_TARGET="$ARCH_FLAGS_FOR_TARGET"
case " $target_configdirs " in
*" newlib "*)
case " $target_configargs " in

View File

@@ -0,0 +1,116 @@
Upstream-Status: Pending
Before committing, I noticed that PR/32161 was marked as a dup of PR/32009, but my previous patch did not fix it.
This alternative patch is better because it lets you just use CFLAGS_FOR_TARGET to set the compilation flags for libgcc. Since bootstrapped target libraries are never compiled with the native compiler, it makes little sense to use different flags for stage1 and later stages. And it also makes little sense to use a different variable than CFLAGS_FOR_TARGET.
Other changes I had to do include:
- moving the creation of default CFLAGS_FOR_TARGET from Makefile.am to configure.ac, because otherwise the BOOT_CFLAGS are substituted into CFLAGS_FOR_TARGET (which is "-O2 -g $(CFLAGS)") via $(CFLAGS). It is also cleaner this way though.
- passing the right CFLAGS to configure scripts as exported environment variables
I also stopped passing LIBCFLAGS to configure scripts since they are unused in the whole src tree. And I updated the documentation as H-P reminded me to do.
Bootstrapped/regtested i686-pc-linux-gnu, will commit to 4.4 shortly. Ok for 4.3?
Paolo
2008-02-19 Paolo Bonzini <bonzini@gnu.org>
PR bootstrap/32009
PR bootstrap/32161
* configure.ac (CFLAGS_FOR_TARGET, CXXFLAGS_FOR_TARGET): Compute here.
* configure: Regenerate.
* Makefile.def: Define stage_libcflags for all bootstrap stages.
* Makefile.tpl (BOOT_LIBCFLAGS, STAGE2_LIBCFLAGS, STAGE3_LIBCFLAGS,
STAGE4_LIBCFLAGS): New.
(CFLAGS_FOR_TARGET, CXXFLAGS_FOR_TARGET): Subst from autoconf, without
$(SYSROOT_CFLAGS_FOR_TARGET) and $(DEBUG_PREFIX_CFLAGS_FOR_TARGET).
(BASE_TARGET_EXPORTS): Append them here to C{,XX}FLAGS.
(EXTRA_TARGET_FLAGS): Append them here to {LIB,}C{,XX}FLAGS.
(configure-stage[+id+]-[+prefix+][+module+]): Pass stage_libcflags
for target modules. Don't export LIBCFLAGS.
(all-stage[+id+]-[+prefix+][+module+]): Pass stage_libcflags; pass
$(BASE_FLAGS_TO_PASS) where [+args+] was passed, and [+args+] after
the overridden CFLAGS_FOR_TARGET and CXXFLAGS_FOR_TARGET.
(invocations of `all'): Replace $(TARGET_FLAGS_TO_PASS) with
$(EXTRA_TARGET_FLAGS), $(FLAGS_TO_PASS) with $(EXTRA_HOST_FLAGS).
* Makefile.in: Regenerate.
config:
2008-02-19 Paolo Bonzini <bonzini@gnu.org>
PR bootstrap/32009
* mh-ppc-darwin (BOOT_CFLAGS): Reenable.
gcc:
2008-02-19 Paolo Bonzini <bonzini@gnu.org>
PR bootstrap/32009
* doc/install.texi: Correct references to CFLAGS, replacing them
with BOOT_CFLAGS. Document flags used during bootstrap for
target libraries.
---
Makefile.def | 25
Makefile.in | 1845 ++++++++++++++++++++++++++++++-------------------
Makefile.tpl | 91 +-
config/mh-ppc-darwin | 3
configure | 36
configure.ac | 32
gcc/Makefile.in | 2
gcc/configure | 6
gcc/configure.ac | 3
gcc/doc/install.texi | 56 -
libiberty/Makefile.in | 162 ++--
libiberty/configure | 46 -
libiberty/configure.ac | 43 -
13 files changed, 1454 insertions(+), 896 deletions(-)
Index: gcc-4.6.0/configure
===================================================================
--- gcc-4.6.0.orig/configure
+++ gcc-4.6.0/configure
@@ -6785,6 +6785,38 @@ if test "x$CXXFLAGS_FOR_TARGET" = x; the
fi
+# During gcc bootstrap, if we use some random cc for stage1 then CFLAGS
+# might be empty or "-g". We don't require a C++ compiler, so CXXFLAGS
+# might also be empty (or "-g", if a non-GCC C++ compiler is in the path).
+# We want to ensure that TARGET libraries (which we know are built with
+# gcc) are built with "-O2 -g", so include those options when setting
+# CFLAGS_FOR_TARGET and CXXFLAGS_FOR_TARGET.
+if test "x$CFLAGS_FOR_TARGET" = x; then
+ CFLAGS_FOR_TARGET=$CFLAGS
+ case " $CFLAGS " in
+ *" -O2 "*) ;;
+ *) CFLAGS_FOR_TARGET="-O2 $CFLAGS" ;;
+ esac
+ case " $CFLAGS " in
+ *" -g "* | *" -g3 "*) ;;
+ *) CFLAGS_FOR_TARGET="-g $CFLAGS" ;;
+ esac
+fi
+
+
+if test "x$CXXFLAGS_FOR_TARGET" = x; then
+ CXXFLAGS_FOR_TARGET=$CXXFLAGS
+ case " $CXXFLAGS " in
+ *" -O2 "*) ;;
+ *) CXXFLAGS_FOR_TARGET="-O2 $CXXFLAGS" ;;
+ esac
+ case " $CXXFLAGS " in
+ *" -g "* | *" -g3 "*) ;;
+ *) CXXFLAGS_FOR_TARGET="-g $CXXFLAGS" ;;
+ esac
+fi
+
+
# Handle --with-headers=XXX. If the value is not "yes", the contents of
# the named directory are copied to $(tooldir)/sys-include.
if test x"${with_headers}" != x && test x"${with_headers}" != xno ; then

View File

@@ -0,0 +1,33 @@
There would be an "Argument list too long" error when the
build directory is longer than 200, this is caused by:
headers=`echo $(PLUGIN_HEADERS) | tr ' ' '\012' | sort -u`
The PLUGIN_HEADERS is too long before sort, so the "echo" can't handle
it, use the $(sort list) of GNU make which can handle the too long list
would fix the problem, the header would be short enough after sorted.
The "tr ' ' '\012'" was used for translating the space to "\n", the
$(sort list) doesn't need this.
Signed-off-by: Robert Yang <liezhi.yang@windriver.com>
Upstream-Status: Pending
---
gcc/Makefile.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -4553,7 +4553,7 @@ install-plugin: installdirs lang.install-plugin s-header-vars install-gengtype
# We keep the directory structure for files in config or c-family and .def
# files. All other files are flattened to a single directory.
$(mkinstalldirs) $(DESTDIR)$(plugin_includedir)
- headers=`echo $(PLUGIN_HEADERS) | tr ' ' '\012' | sort -u`; \
+ headers="$(sort $(PLUGIN_HEADERS))"; \
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`; \
for file in $$headers; do \
if [ -f $$file ] ; then \
--
1.7.10.2

View File

@@ -0,0 +1,35 @@
Upstream-Status:Backport
2011-08-18 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-08-18 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/arm.c (arm_rtx_costs_1): Don't modify the costs of SET.
(arm_size_rtx_costs): Likewise.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-08-12 08:08:31 +0000
+++ new/gcc/config/arm/arm.c 2011-08-18 13:53:37 +0000
@@ -7464,6 +7464,9 @@
*total = COSTS_N_INSNS (4);
return true;
+ case SET:
+ return false;
+
default:
*total = COSTS_N_INSNS (4);
return false;
@@ -7811,6 +7814,9 @@
*total = COSTS_N_INSNS (1) + 1;
return true;
+ case SET:
+ return false;
+
default:
if (mode != VOIDmode)
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));

View File

@@ -0,0 +1,189 @@
Upstream-Status: Pending
Index: gcc-4_6-branch/Makefile.def
===================================================================
--- gcc-4_6-branch.orig/Makefile.def 2012-03-03 01:08:03.000000000 -0800
+++ gcc-4_6-branch/Makefile.def 2012-03-04 09:19:02.430607447 -0800
@@ -242,6 +242,7 @@
flags_to_pass = { flag= BISON ; };
flags_to_pass = { flag= CC_FOR_BUILD ; };
flags_to_pass = { flag= CFLAGS_FOR_BUILD ; };
+flags_to_pass = { flag= CPPFLAGS_FOR_BUILD ; };
flags_to_pass = { flag= CXX_FOR_BUILD ; };
flags_to_pass = { flag= EXPECT ; };
flags_to_pass = { flag= FLEX ; };
Index: gcc-4_6-branch/gcc/Makefile.in
===================================================================
--- gcc-4_6-branch.orig/gcc/Makefile.in 2012-03-03 01:03:17.000000000 -0800
+++ gcc-4_6-branch/gcc/Makefile.in 2012-03-04 09:19:02.430607447 -0800
@@ -770,7 +770,7 @@
# Native linker and preprocessor flags. For x-fragment overrides.
BUILD_LDFLAGS=@BUILD_LDFLAGS@
-BUILD_CPPFLAGS=$(ALL_CPPFLAGS)
+BUILD_CPPFLAGS=$(INCLUDES) @BUILD_CPPFLAGS@ $(X_CPPFLAGS)
# Actual name to use when installing a native compiler.
GCC_INSTALL_NAME := $(shell echo gcc|sed '$(program_transform_name)')
Index: gcc-4_6-branch/gcc/configure.ac
===================================================================
--- gcc-4_6-branch.orig/gcc/configure.ac 2012-03-03 01:17:45.000000000 -0800
+++ gcc-4_6-branch/gcc/configure.ac 2012-03-04 09:19:02.430607447 -0800
@@ -1774,16 +1774,18 @@
# Also, we cannot run fixincludes.
# These are the normal (build=host) settings:
-CC_FOR_BUILD='$(CC)' AC_SUBST(CC_FOR_BUILD)
-BUILD_CFLAGS='$(ALL_CFLAGS)' AC_SUBST(BUILD_CFLAGS)
-BUILD_LDFLAGS='$(LDFLAGS)' AC_SUBST(BUILD_LDFLAGS)
-STMP_FIXINC=stmp-fixinc AC_SUBST(STMP_FIXINC)
+CC_FOR_BUILD='$(CC)' AC_SUBST(CC_FOR_BUILD)
+BUILD_CFLAGS='$(ALL_CFLAGS)' AC_SUBST(BUILD_CFLAGS)
+BUILD_LDFLAGS='$(LDFLAGS)' AC_SUBST(BUILD_LDFLAGS)
+BUILD_CPPFLAGS='$(ALL_CPPFLAGS)' AC_SUBST(BUILD_CPPFLAGS)
+STMP_FIXINC=stmp-fixinc AC_SUBST(STMP_FIXINC)
# And these apply if build != host, or we are generating coverage data
if test x$build != x$host || test "x$coverage_flags" != x
then
BUILD_CFLAGS='$(INTERNAL_CFLAGS) $(T_CFLAGS) $(CFLAGS_FOR_BUILD)'
BUILD_LDFLAGS='$(LDFLAGS_FOR_BUILD)'
+ BUILD_CPPFLAGS='$(CPPFLAGS_FOR_BUILD)'
fi
# Expand extra_headers to include complete path.
Index: gcc-4_6-branch/Makefile.in
===================================================================
--- gcc-4_6-branch.orig/Makefile.in 2012-03-03 01:08:03.000000000 -0800
+++ gcc-4_6-branch/Makefile.in 2012-03-04 09:19:02.446607448 -0800
@@ -338,6 +338,7 @@
AS_FOR_BUILD = @AS_FOR_BUILD@
CC_FOR_BUILD = @CC_FOR_BUILD@
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
CXX_FOR_BUILD = @CXX_FOR_BUILD@
DLLTOOL_FOR_BUILD = @DLLTOOL_FOR_BUILD@
@@ -691,6 +692,7 @@
"BISON=$(BISON)" \
"CC_FOR_BUILD=$(CC_FOR_BUILD)" \
"CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
+ "CPPFLAGS_FOR_BUILD=$(CPPFLAGS_FOR_BUILD)" \
"CXX_FOR_BUILD=$(CXX_FOR_BUILD)" \
"EXPECT=$(EXPECT)" \
"FLEX=$(FLEX)" \
Index: gcc-4_6-branch/gcc/configure
===================================================================
--- gcc-4_6-branch.orig/gcc/configure 2012-03-03 01:17:45.000000000 -0800
+++ gcc-4_6-branch/gcc/configure 2012-03-04 09:19:15.638608087 -0800
@@ -703,6 +703,7 @@
LIBTOOL
collect2
STMP_FIXINC
+BUILD_CPPFLAGS
BUILD_LDFLAGS
BUILD_CFLAGS
CC_FOR_BUILD
@@ -4842,7 +4843,7 @@
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_cv_cc_gcc_supports_ada" >&5
$as_echo "$acx_cv_cc_gcc_supports_ada" >&6; }
-if test x$GNATBIND != xno && test x$GNATMAKE != xno && test x$acx_cv_cc_gcc_supports_ada != xno; then
+if test "x$GNATBIND" != xno && test "x$GNATMAKE" != xno && test x$acx_cv_cc_gcc_supports_ada != xno; then
have_gnat=yes
else
have_gnat=no
@@ -11372,6 +11373,7 @@
CC_FOR_BUILD='$(CC)'
BUILD_CFLAGS='$(ALL_CFLAGS)'
BUILD_LDFLAGS='$(LDFLAGS)'
+BUILD_CPPFLAGS='$(ALL_CPPFLAGS)'
STMP_FIXINC=stmp-fixinc
# And these apply if build != host, or we are generating coverage data
@@ -11379,6 +11381,7 @@
then
BUILD_CFLAGS='$(INTERNAL_CFLAGS) $(T_CFLAGS) $(CFLAGS_FOR_BUILD)'
BUILD_LDFLAGS='$(LDFLAGS_FOR_BUILD)'
+ BUILD_CPPFLAGS='$(CPPFLAGS_FOR_BUILD)'
fi
# Expand extra_headers to include complete path.
@@ -17495,7 +17498,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17498 "configure"
+#line 17501 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -17601,7 +17604,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17604 "configure"
+#line 17607 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
Index: gcc-4_6-branch/Makefile.tpl
===================================================================
--- gcc-4_6-branch.orig/Makefile.tpl 2012-03-03 01:08:03.000000000 -0800
+++ gcc-4_6-branch/Makefile.tpl 2012-03-04 09:19:02.454607448 -0800
@@ -341,6 +341,7 @@
AS_FOR_BUILD = @AS_FOR_BUILD@
CC_FOR_BUILD = @CC_FOR_BUILD@
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
CXX_FOR_BUILD = @CXX_FOR_BUILD@
DLLTOOL_FOR_BUILD = @DLLTOOL_FOR_BUILD@
Index: gcc-4_6-branch/configure.ac
===================================================================
--- gcc-4_6-branch.orig/configure.ac 2012-03-03 01:17:45.000000000 -0800
+++ gcc-4_6-branch/configure.ac 2012-03-04 09:19:02.454607448 -0800
@@ -3123,6 +3123,7 @@
# our build compiler if desired.
if test x"${build}" = x"${host}" ; then
CFLAGS_FOR_BUILD=${CFLAGS_FOR_BUILD-${CFLAGS}}
+ CPPFLAGS_FOR_BUILD=${CPPFLAGS_FOR_BUILD-${CPPFLAGS}}
CXXFLAGS_FOR_BUILD=${CXXFLAGS_FOR_BUILD-${CXXFLAGS}}
LDFLAGS_FOR_BUILD=${LDFLAGS_FOR_BUILD-${LDFLAGS}}
fi
@@ -3189,6 +3190,7 @@
AC_SUBST(AS_FOR_BUILD)
AC_SUBST(CC_FOR_BUILD)
AC_SUBST(CFLAGS_FOR_BUILD)
+AC_SUBST(CPPFLAGS_FOR_BUILD)
AC_SUBST(CXXFLAGS_FOR_BUILD)
AC_SUBST(CXX_FOR_BUILD)
AC_SUBST(DLLTOOL_FOR_BUILD)
Index: gcc-4_6-branch/configure
===================================================================
--- gcc-4_6-branch.orig/configure 2012-03-03 01:17:45.000000000 -0800
+++ gcc-4_6-branch/configure 2012-03-04 09:19:02.458607448 -0800
@@ -617,6 +617,7 @@
DLLTOOL_FOR_BUILD
CXX_FOR_BUILD
CXXFLAGS_FOR_BUILD
+CPPFLAGS_FOR_BUILD
CFLAGS_FOR_BUILD
CC_FOR_BUILD
AS_FOR_BUILD
@@ -7644,6 +7645,7 @@
# our build compiler if desired.
if test x"${build}" = x"${host}" ; then
CFLAGS_FOR_BUILD=${CFLAGS_FOR_BUILD-${CFLAGS}}
+ CPPFLAGS_FOR_BUILD=${CPPFLAGS_FOR_BUILD-${CPPFLAGS}}
CXXFLAGS_FOR_BUILD=${CXXFLAGS_FOR_BUILD-${CXXFLAGS}}
LDFLAGS_FOR_BUILD=${LDFLAGS_FOR_BUILD-${LDFLAGS}}
fi
@@ -7709,6 +7711,7 @@
+

View File

@@ -0,0 +1,27 @@
Upstream-Status: Pending
Add /sw/include and /opt/include based on the original
zecke-no-host-includes.patch patch. The original patch checked for
/usr/include, /sw/include and /opt/include and then triggered a failure and
aborted.
Instead, we add the two missing items to the current scan. If the user
wants this to be a failure, they can add "-Werror=poison-system-directories".
Signed-off-by: Mark Hatle <mark.hatle@windriver.com>
Index: gcc-4.6.0/gcc/incpath.c
===================================================================
--- gcc-4.6.0.orig/gcc/incpath.c
+++ gcc-4.6.0/gcc/incpath.c
@@ -363,7 +363,9 @@ merge_include_chains (const char *sysroo
{
if ((!strncmp (p->name, "/usr/include", 12))
|| (!strncmp (p->name, "/usr/local/include", 18))
- || (!strncmp (p->name, "/usr/X11R6/include", 18)))
+ || (!strncmp (p->name, "/usr/X11R6/include", 18))
+ || (!strncmp (p->name, "/sw/include", 11))
+ || (!strncmp (p->name, "/opt/include", 12)))
warning (OPT_Wpoison_system_directories,
"include location \"%s\" is unsafe for "
"cross-compilation",

View File

@@ -0,0 +1,223 @@
Upstream-Status: Inappropriate [distribution: codesourcery]
gcc/
2008-07-02 Joseph Myers <joseph@codesourcery.com>
* c-incpath.c: Include toplev.h.
(merge_include_chains): Use warning instead of cpp_error for
system directory poisoning diagnostic.
* Makefile.in (c-incpath.o): Depend on toplev.h.
* gcc.c (LINK_COMMAND_SPEC): Pass
--error-poison-system-directories if
-Werror=poison-system-directories.
2007-06-13 Joseph Myers <joseph@codesourcery.com>
* common.opt (--Wno-poison-system-directories): New.
* doc/invoke.texi (-Wno-poison-system-directories): Document.
* c-incpath.c: Include flags.h.
(merge_include_chains): Check flag_poison_system_directories.
* gcc.c (LINK_COMMAND_SPEC): Pass --no-poison-system-directories
to linker if -Wno-poison-system-directories.
* Makefile.in (c-incpath.o): Depend on $(FLAGS_H).
2007-03-20 Daniel Jacobowitz <dan@codesourcery.com>
Joseph Myers <joseph@codesourcery.com>
* configure.ac (--enable-poison-system-directories): New option.
* configure, config.in: Regenerate.
* c-incpath.c (merge_include_chains): If
ENABLE_POISON_SYSTEM_DIRECTORIES defined, warn for use of
/usr/include, /usr/local/include or /usr/X11R6/include.
Index: gcc-4_6-branch/gcc/common.opt
===================================================================
--- gcc-4_6-branch.orig/gcc/common.opt 2012-03-03 01:03:17.000000000 -0800
+++ gcc-4_6-branch/gcc/common.opt 2012-03-04 09:24:54.410624483 -0800
@@ -567,6 +567,10 @@
Common Var(warn_padded) Warning
Warn when padding is required to align structure members
+Wpoison-system-directories
+Common Var(flag_poison_system_directories) Init(1) Warning
+Warn for -I and -L options using system directories if cross compiling
+
Wshadow
Common Var(warn_shadow) Warning
Warn when one local variable shadows another
Index: gcc-4_6-branch/gcc/config.in
===================================================================
--- gcc-4_6-branch.orig/gcc/config.in 2012-03-03 01:03:17.000000000 -0800
+++ gcc-4_6-branch/gcc/config.in 2012-03-04 09:24:54.410624483 -0800
@@ -144,6 +144,12 @@
#endif
+/* Define to warn for use of native system header directories */
+#ifndef USED_FOR_TARGET
+#undef ENABLE_POISON_SYSTEM_DIRECTORIES
+#endif
+
+
/* Define if you want all operations on RTL (the basic data structure of the
optimizer and back end) to be checked for dynamic type safety at runtime.
This is quite expensive. */
Index: gcc-4_6-branch/gcc/configure.ac
===================================================================
--- gcc-4_6-branch.orig/gcc/configure.ac 2012-03-04 09:22:07.000000000 -0800
+++ gcc-4_6-branch/gcc/configure.ac 2012-03-04 09:24:54.410624483 -0800
@@ -4692,6 +4692,16 @@
fi)
AC_SUBST(slibdir)
+AC_ARG_ENABLE([poison-system-directories],
+ AS_HELP_STRING([--enable-poison-system-directories],
+ [warn for use of native system header directories]),,
+ [enable_poison_system_directories=no])
+if test "x${enable_poison_system_directories}" = "xyes"; then
+ AC_DEFINE([ENABLE_POISON_SYSTEM_DIRECTORIES],
+ [1],
+ [Define to warn for use of native system header directories])
+fi
+
# Substitute configuration variables
AC_SUBST(subdirs)
AC_SUBST(srcdir)
Index: gcc-4_6-branch/gcc/doc/invoke.texi
===================================================================
--- gcc-4_6-branch.orig/gcc/doc/invoke.texi 2012-03-03 00:46:39.000000000 -0800
+++ gcc-4_6-branch/gcc/doc/invoke.texi 2012-03-04 09:24:54.414624482 -0800
@@ -257,6 +257,7 @@
-Woverlength-strings -Wpacked -Wpacked-bitfield-compat -Wpadded @gol
-Wparentheses -Wpedantic-ms-format -Wno-pedantic-ms-format @gol
-Wpointer-arith -Wno-pointer-to-int-cast @gol
+-Wno-poison-system-directories @gol
-Wredundant-decls @gol
-Wreturn-type -Wsequence-point -Wshadow @gol
-Wsign-compare -Wsign-conversion -Wstack-protector @gol
@@ -3782,6 +3783,14 @@
for most targets, it is made up of code and thus requires the stack
to be made executable in order for the program to work properly.
+@item -Wno-poison-system-directories
+@opindex Wno-poison-system-directories
+Do not warn for @option{-I} or @option{-L} options using system
+directories such as @file{/usr/include} when cross compiling. This
+option is intended for use in chroot environments when such
+directories contain the correct headers and libraries for the target
+system rather than the host.
+
@item -Wfloat-equal
@opindex Wfloat-equal
@opindex Wno-float-equal
Index: gcc-4_6-branch/gcc/gcc.c
===================================================================
--- gcc-4_6-branch.orig/gcc/gcc.c 2012-03-03 01:03:17.000000000 -0800
+++ gcc-4_6-branch/gcc/gcc.c 2012-03-04 09:24:54.418624482 -0800
@@ -659,6 +659,8 @@
%{flto} %{flto=*} %l " LINK_PIE_SPEC \
"%X %{o*} %{e*} %{N} %{n} %{r}\
%{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!nostartfiles:%S}}\
+ %{Wno-poison-system-directories:--no-poison-system-directories}\
+ %{Werror=poison-system-directories:--error-poison-system-directories}\
%{static:} %{L*} %(mfwrap) %(link_libgcc) %o\
%{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\
%(mflib) " STACK_SPLIT_SPEC "\
Index: gcc-4_6-branch/gcc/incpath.c
===================================================================
--- gcc-4_6-branch.orig/gcc/incpath.c 2012-03-03 01:03:17.000000000 -0800
+++ gcc-4_6-branch/gcc/incpath.c 2012-03-04 09:24:54.418624482 -0800
@@ -353,6 +353,24 @@
}
fprintf (stderr, _("End of search list.\n"));
}
+
+#ifdef ENABLE_POISON_SYSTEM_DIRECTORIES
+ if (flag_poison_system_directories)
+ {
+ struct cpp_dir *p;
+
+ for (p = heads[QUOTE]; p; p = p->next)
+ {
+ if ((!strncmp (p->name, "/usr/include", 12))
+ || (!strncmp (p->name, "/usr/local/include", 18))
+ || (!strncmp (p->name, "/usr/X11R6/include", 18)))
+ warning (OPT_Wpoison_system_directories,
+ "include location \"%s\" is unsafe for "
+ "cross-compilation",
+ p->name);
+ }
+ }
+#endif
}
/* Use given -I paths for #include "..." but not #include <...>, and
Index: gcc-4_6-branch/gcc/Makefile.in
===================================================================
--- gcc-4_6-branch.orig/gcc/Makefile.in 2012-03-04 09:22:05.000000000 -0800
+++ gcc-4_6-branch/gcc/Makefile.in 2012-03-04 09:24:54.418624482 -0800
@@ -2179,7 +2179,7 @@
incpath.o: incpath.c incpath.h $(CONFIG_H) $(SYSTEM_H) $(CPPLIB_H) \
intl.h prefix.h coretypes.h $(TM_H) cppdefault.h $(TARGET_H) \
- $(MACHMODE_H)
+ $(MACHMODE_H) $(FLAGS_H) toplev.h
prefix.o: prefix.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) prefix.h \
Makefile $(BASEVER)
Index: gcc-4_6-branch/gcc/configure
===================================================================
--- gcc-4_6-branch.orig/gcc/configure 2012-03-04 09:22:07.000000000 -0800
+++ gcc-4_6-branch/gcc/configure 2012-03-04 09:25:31.502626277 -0800
@@ -912,6 +912,7 @@
enable_maintainer_mode
enable_version_specific_runtime_libs
with_slibdir
+enable_poison_system_directories
enable_plugin
enable_libquadmath_support
'
@@ -1623,6 +1624,8 @@
--enable-version-specific-runtime-libs
specify that runtime libraries should be
installed in a compiler-specific directory
+ --enable-poison-system-directories
+ warn for use of native system header directories
--enable-plugin enable plugin support
--disable-libquadmath-support
disable libquadmath support for Fortran
@@ -17498,7 +17501,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17501 "configure"
+#line 17504 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -17604,7 +17607,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17607 "configure"
+#line 17610 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -26125,6 +26128,19 @@
+# Check whether --enable-poison-system-directories was given.
+if test "${enable_poison_system_directories+set}" = set; then :
+ enableval=$enable_poison_system_directories;
+else
+ enable_poison_system_directories=no
+fi
+
+if test "x${enable_poison_system_directories}" = "xyes"; then
+
+$as_echo "#define ENABLE_POISON_SYSTEM_DIRECTORIES 1" >>confdefs.h
+
+fi
+
# Substitute configuration variables

View File

@@ -0,0 +1,72 @@
Upstream-Status: Pending
Index: gcc-4.6.0/libstdc++-v3/config/locale/generic/c_locale.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/generic/c_locale.h
+++ gcc-4.6.0/libstdc++-v3/config/locale/generic/c_locale.h
@@ -41,13 +41,22 @@
#include <clocale>
+#ifdef __UCLIBC__
+#include <features.h>
+#include <ctype.h>
+#endif
+
#define _GLIBCXX_NUM_CATEGORIES 0
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
+#ifdef __UCLIBC__
+ typedef __ctype_touplow_t* __c_locale;
+#else
typedef int* __c_locale;
+#endif
// Convert numeric value of type double and long double to string and
// return length of string. If vsnprintf is available use it, otherwise
Index: gcc-4.6.0/libstdc++-v3/config/os/gnu-linux/ctype_base.h
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/os/gnu-linux/ctype_base.h
+++ gcc-4.6.0/libstdc++-v3/config/os/gnu-linux/ctype_base.h
@@ -34,6 +34,11 @@
// Information as gleaned from /usr/include/ctype.h
+#ifdef __UCLIBC__
+#include <features.h>
+#include <ctype.h>
+#endif
+
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -42,7 +47,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
struct ctype_base
{
// Non-standard typedefs.
+#ifdef __UCLIBC__
+ typedef const __ctype_touplow_t* __to_type;
+#else
typedef const int* __to_type;
+#endif
// NB: Offsets into ctype<char>::_M_table force a particular size
// on the mask type. Because of this, we don't use an enum.
Index: gcc-4.6.0/libstdc++-v3/config/locale/generic/c_locale.cc
===================================================================
--- gcc-4.6.0.orig/libstdc++-v3/config/locale/generic/c_locale.cc
+++ gcc-4.6.0/libstdc++-v3/config/locale/generic/c_locale.cc
@@ -264,5 +264,10 @@ _GLIBCXX_END_NAMESPACE_VERSION
#ifdef _GLIBCXX_LONG_DOUBLE_COMPAT
#define _GLIBCXX_LDBL_COMPAT(dbl, ldbl) \
extern "C" void ldbl (void) __attribute__ ((alias (#dbl)))
+#ifdef __UCLIBC__
+// This is because __c_locale is of type __ctype_touplow_t* which is short on uclibc. for glibc its int*
+_GLIBCXX_LDBL_COMPAT(_ZSt14__convert_to_vIdEvPKcRT_RSt12_Ios_IostateRKPs, _ZSt14__convert_to_vIeEvPKcRT_RSt12_Ios_IostateRKPs);
+#else
_GLIBCXX_LDBL_COMPAT(_ZSt14__convert_to_vIdEvPKcRT_RSt12_Ios_IostateRKPi, _ZSt14__convert_to_vIeEvPKcRT_RSt12_Ios_IostateRKPi);
+#endif
#endif // _GLIBCXX_LONG_DOUBLE_COMPAT

View File

@@ -0,0 +1,196 @@
Upstream-Status: Backport
Signed-off-by: Khem Raj <raj.khem@gmail.com>
commit 3cb9bbfa927aa187048534f9069202c017a78e38
Author: ppluzhnikov <ppluzhnikov@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Wed May 11 18:28:14 2011 +0000
2011-05-11 Satoru Takabayashi <satorux@google.com>
Paul Pluzhnikov <ppluzhnikov@google.com>
* gcc/doc/install.texi (Configuration): Document
--with-linker-hash-style.
* gcc/gcc.c (init_spec): Handle LINKER_HASH_STYLE.
* gcc/config.in: Add LINKER_HASH_STYLE.
* gcc/configure.ac: Add --with-linker-hash-style.
* gcc/configure: Regenerate.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@173668 138bc75d-0d04-0410-961f-82ee72b054a4
Index: gcc-4_6-branch/gcc/config.in
===================================================================
--- gcc-4_6-branch.orig/gcc/config.in 2012-03-04 09:30:04.000000000 -0800
+++ gcc-4_6-branch/gcc/config.in 2012-03-04 09:32:30.878646575 -0800
@@ -1583,6 +1583,12 @@
#endif
+/* The linker hash style */
+#ifndef USED_FOR_TARGET
+#undef LINKER_HASH_STYLE
+#endif
+
+
/* Define to the name of the LTO plugin DSO that must be passed to the
linker's -plugin=LIB option. */
#ifndef USED_FOR_TARGET
Index: gcc-4_6-branch/gcc/configure
===================================================================
--- gcc-4_6-branch.orig/gcc/configure 2012-03-04 09:30:05.000000000 -0800
+++ gcc-4_6-branch/gcc/configure 2012-03-04 09:32:39.918647011 -0800
@@ -915,6 +915,7 @@
enable_poison_system_directories
enable_plugin
enable_libquadmath_support
+with_linker_hash_style
'
ac_precious_vars='build_alias
host_alias
@@ -1667,6 +1668,8 @@
with the compiler
--with-system-zlib use installed libz
--with-slibdir=DIR shared libraries in DIR [LIBDIR]
+ --with-linker-hash-style={sysv,gnu,both}
+ specify the linker hash style
Some influential environment variables:
CC C compiler command
@@ -17501,7 +17504,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17504 "configure"
+#line 17507 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -17607,7 +17610,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 17610 "configure"
+#line 17613 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -26432,6 +26435,36 @@
fi
+# Specify what hash style to use by default.
+
+# Check whether --with-linker-hash-style was given.
+if test "${with_linker_hash_style+set}" = set; then :
+ withval=$with_linker_hash_style; case x"$withval" in
+ xsysv)
+ LINKER_HASH_STYLE=sysv
+ ;;
+ xgnu)
+ LINKER_HASH_STYLE=gnu
+ ;;
+ xboth)
+ LINKER_HASH_STYLE=both
+ ;;
+ *)
+ as_fn_error "$withval is an invalid option to --with-linker-hash-style" "$LINENO" 5
+ ;;
+ esac
+else
+ LINKER_HASH_STYLE=''
+fi
+
+if test x"${LINKER_HASH_STYLE}" != x; then
+
+cat >>confdefs.h <<_ACEOF
+#define LINKER_HASH_STYLE "$LINKER_HASH_STYLE"
+_ACEOF
+
+fi
+
# Configure the subdirectories
# AC_CONFIG_SUBDIRS($subdirs)
Index: gcc-4_6-branch/gcc/configure.ac
===================================================================
--- gcc-4_6-branch.orig/gcc/configure.ac 2012-03-04 09:30:05.000000000 -0800
+++ gcc-4_6-branch/gcc/configure.ac 2012-03-04 09:32:30.890646574 -0800
@@ -4905,6 +4905,30 @@
fi
+# Specify what hash style to use by default.
+AC_ARG_WITH([linker-hash-style],
+[AC_HELP_STRING([--with-linker-hash-style={sysv,gnu,both}],
+ [specify the linker hash style])],
+[case x"$withval" in
+ xsysv)
+ LINKER_HASH_STYLE=sysv
+ ;;
+ xgnu)
+ LINKER_HASH_STYLE=gnu
+ ;;
+ xboth)
+ LINKER_HASH_STYLE=both
+ ;;
+ *)
+ AC_MSG_ERROR([$withval is an invalid option to --with-linker-hash-style])
+ ;;
+ esac],
+[LINKER_HASH_STYLE=''])
+if test x"${LINKER_HASH_STYLE}" != x; then
+ AC_DEFINE_UNQUOTED(LINKER_HASH_STYLE, "$LINKER_HASH_STYLE",
+ [The linker hash style])
+fi
+
# Configure the subdirectories
# AC_CONFIG_SUBDIRS($subdirs)
Index: gcc-4_6-branch/gcc/doc/install.texi
===================================================================
--- gcc-4_6-branch.orig/gcc/doc/install.texi 2012-03-03 00:46:39.000000000 -0800
+++ gcc-4_6-branch/gcc/doc/install.texi 2012-03-04 09:32:30.894646574 -0800
@@ -1665,6 +1665,11 @@
support @option{--build-id} option, a warning is issued and the
@option{--enable-linker-build-id} option is ignored. The default is off.
+@item --with-linker-hash-style=@var{choice}
+Tells GCC to pass @option{--hash-style=@var{choice}} option to the
+linker for all final links. @var{choice} can be one of
+@samp{sysv}, @samp{gnu}, and @samp{both} where @samp{sysv} is the default.
+
@item --enable-gnu-unique-object
@itemx --disable-gnu-unique-object
Tells GCC to use the gnu_unique_object relocation for C++ template
Index: gcc-4_6-branch/gcc/gcc.c
===================================================================
--- gcc-4_6-branch.orig/gcc/gcc.c 2012-03-04 09:30:04.000000000 -0800
+++ gcc-4_6-branch/gcc/gcc.c 2012-03-04 09:32:30.894646574 -0800
@@ -1427,7 +1427,8 @@
}
#endif
-#if defined LINK_EH_SPEC || defined LINK_BUILDID_SPEC
+#if defined LINK_EH_SPEC || defined LINK_BUILDID_SPEC || \
+ defined LINKER_HASH_STYLE
# ifdef LINK_BUILDID_SPEC
/* Prepend LINK_BUILDID_SPEC to whatever link_spec we had before. */
obstack_grow (&obstack, LINK_BUILDID_SPEC, sizeof(LINK_BUILDID_SPEC) - 1);
@@ -1436,6 +1437,16 @@
/* Prepend LINK_EH_SPEC to whatever link_spec we had before. */
obstack_grow (&obstack, LINK_EH_SPEC, sizeof(LINK_EH_SPEC) - 1);
# endif
+# ifdef LINKER_HASH_STYLE
+ /* Prepend --hash-style=LINKER_HASH_STYLE to whatever link_spec we had
+ before. */
+ {
+ static const char hash_style[] = "--hash-style=";
+ obstack_grow (&obstack, hash_style, sizeof(hash_style) - 1);
+ obstack_grow (&obstack, LINKER_HASH_STYLE, sizeof(LINKER_HASH_STYLE) - 1);
+ obstack_1grow (&obstack, ' ');
+ }
+# endif
obstack_grow0 (&obstack, link_spec, strlen (link_spec));
link_spec = XOBFINISH (&obstack, const char *);
#endif

View File

@@ -1,45 +0,0 @@
Index: gcc-4_6-branch/gcc/config/arm/arm.c
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2012-03-05 17:14:09.901129286 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.c 2012-03-05 17:18:23.061141606 -0800
@@ -17525,6 +17525,13 @@
}
return;
+ case 'v':
+ {
+ gcc_assert (GET_CODE (x) == CONST_DOUBLE);
+ fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
+ return;
+ }
+
/* Register specifier for vld1.16/vst1.16. Translate the S register
number into a D register number and element index. */
case 'z':
@@ -24925,4 +24932,26 @@
return 4;
}
+int
+vfp3_const_double_for_fract_bits (rtx operand)
+{
+ REAL_VALUE_TYPE r0;
+
+ if (GET_CODE (operand) != CONST_DOUBLE)
+ return 0;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
+ if (exact_real_inverse (DFmode, &r0))
+ {
+ if (exact_real_truncate (DFmode, &r0))
+ {
+ HOST_WIDE_INT value = real_to_integer (&r0);
+ value = value & 0xffffffff;
+ if ((value != 0) && ( (value & (value - 1)) == 0))
+ return int_log2 (value);
+ }
+ }
+ return 0;
+}
+
#include "gt-arm.h"

View File

@@ -1,51 +0,0 @@
2011-02-21 Andrew Stubbs <ams@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Mark Shinwell <shinwell@codesourcery.com>
Forward-ported from Linaro GCC 4.5 (bzr99324).
gcc/
* config/arm/arm.h (arm_class_likely_spilled_p): Check against
LO_REGS only for Thumb-1.
(MODE_BASE_REG_CLASS): Restrict base registers to those which can
be used in short instructions when optimising for size on Thumb-2.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-01-29 03:20:57 +0000
+++ new/gcc/config/arm/arm.c 2011-02-21 14:04:51 +0000
@@ -22304,14 +22304,16 @@
/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
- We need to define this for LO_REGS on thumb. Otherwise we can end up
- using r0-r4 for function arguments, r7 for the stack frame and don't
- have enough left over to do doubleword arithmetic. */
-
+ We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
+ using r0-r4 for function arguments, r7 for the stack frame and don't have
+ enough left over to do doubleword arithmetic. For Thumb-2 all the
+ potentially problematic instructions accept high registers so this is not
+ necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
+ that require many low registers. */
static bool
arm_class_likely_spilled_p (reg_class_t rclass)
{
- if ((TARGET_THUMB && rclass == LO_REGS)
+ if ((TARGET_THUMB1 && rclass == LO_REGS)
|| rclass == CC_REG)
return true;
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-01-29 03:20:57 +0000
+++ new/gcc/config/arm/arm.h 2011-02-21 14:04:51 +0000
@@ -1185,7 +1185,7 @@
when addressing quantities in QI or HI mode; if we don't know the
mode, then we must be conservative. */
#define MODE_BASE_REG_CLASS(MODE) \
- (TARGET_32BIT ? CORE_REGS : \
+ (TARGET_ARM || (TARGET_THUMB2 && !optimize_size) ? CORE_REGS : \
(((MODE) == SImode) ? BASE_REGS : LO_REGS))
/* For Thumb we can not support SP+reg addressing, so we return LO_REGS

View File

@@ -1,653 +0,0 @@
2011-03-27 Ira Rosen <ira.rosen@linaro.org>
gcc/
* doc/invoke.texi (max-stores-to-sink): Document.
* params.h (MAX_STORES_TO_SINK): Define.
* opts.c (finish_options): Set MAX_STORES_TO_SINK to 0
if either vectorization or if-conversion is disabled.
* tree-data-ref.c (dr_equal_offsets_p1): Moved and renamed from
tree-vect-data-refs.c vect_equal_offsets.
(dr_equal_offsets_p): New function.
(find_data_references_in_bb): Remove static.
* tree-data-ref.h (find_data_references_in_bb): Declare.
(dr_equal_offsets_p): Likewise.
* tree-vect-data-refs.c (vect_equal_offsets): Move to tree-data-ref.c.
(vect_drs_dependent_in_basic_block): Update calls to
vect_equal_offsets.
(vect_check_interleaving): Likewise.
* tree-ssa-phiopt.c: Include cfgloop.h and tree-data-ref.h.
(cond_if_else_store_replacement): Rename to...
(cond_if_else_store_replacement_1): ... this. Change arguments and
documentation.
(cond_if_else_store_replacement): New function.
* Makefile.in (tree-ssa-phiopt.o): Adjust dependencies.
* params.def (PARAM_MAX_STORES_TO_SINK): Define.
gcc/testsuite/
* gcc.dg/vect/vect-cselim-1.c: New test.
* gcc.dg/vect/vect-cselim-2.c: New test.
=== modified file 'gcc/Makefile.in'
--- old/gcc/Makefile.in 2011-03-26 09:20:34 +0000
+++ new/gcc/Makefile.in 2011-04-18 11:31:29 +0000
@@ -2422,7 +2422,8 @@
tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \
- $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h
+ $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \
+ $(TREE_DATA_REF_H)
tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2011-03-29 14:24:42 +0000
+++ new/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000
@@ -8909,6 +8909,11 @@
The maximum number of namespaces to consult for suggestions when C++
name lookup fails for an identifier. The default is 1000.
+@item max-stores-to-sink
+The maximum number of conditional stores paires that can be sunk. Set to 0
+if either vectorization (@option{-ftree-vectorize}) or if-conversion
+(@option{-ftree-loop-if-convert}) is disabled. The default is 2.
+
@end table
@end table
=== modified file 'gcc/opts.c'
--- old/gcc/opts.c 2011-02-17 22:51:57 +0000
+++ new/gcc/opts.c 2011-03-27 09:38:18 +0000
@@ -823,6 +823,12 @@
opts->x_flag_split_stack = 0;
}
}
+
+ /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
+ is disabled. */
+ if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert)
+ maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0,
+ opts->x_param_values, opts_set->x_param_values);
}
#define LEFT_COLUMN 27
=== modified file 'gcc/params.def'
--- old/gcc/params.def 2011-03-26 09:20:34 +0000
+++ new/gcc/params.def 2011-04-18 11:31:29 +0000
@@ -883,6 +883,13 @@
"name lookup fails",
1000, 0, 0)
+/* Maximum number of conditional store pairs that can be sunk. */
+DEFPARAM (PARAM_MAX_STORES_TO_SINK,
+ "max-stores-to-sink",
+ "Maximum number of conditional store pairs that can be sunk",
+ 2, 0, 0)
+
+
/*
Local variables:
mode:c
=== modified file 'gcc/params.h'
--- old/gcc/params.h 2011-01-13 13:41:03 +0000
+++ new/gcc/params.h 2011-03-27 09:38:18 +0000
@@ -206,4 +206,6 @@
PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO)
#define MIN_NONDEBUG_INSN_UID \
PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
+#define MAX_STORES_TO_SINK \
+ PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
#endif /* ! GCC_PARAMS_H */
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 2011-03-27 09:38:18 +0000
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+typedef struct {
+ short a;
+ short b;
+} data;
+
+data in1[N], in2[N], out[N];
+short result[N*2] = {7,-7,9,-6,11,-5,13,-4,15,-3,17,-2,19,-1,21,0,23,1,25,2,27,3,29,4,31,5,33,6,35,7,37,8,39,9,41,10,43,11,45,12,47,13,49,14,51,15,53,16,55,17,57,18,59,19,61,20,63,21,65,22,67,23,69,24,71,25,73,26,75,27,77,28,79,29,81,30,83,31,85,32,87,33,89,34,91,35,93,36,95,37,97,38,99,39,101,40,103,41,105,42};
+short out1[N], out2[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int i;
+ short c, d;
+
+ /* Vectorizable with conditional store sinking. */
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i].b;
+ d = in2[i].b;
+
+ if (c >= d)
+ {
+ out[i].b = c;
+ out[i].a = d + 5;
+ }
+ else
+ {
+ out[i].b = d - 12;
+ out[i].a = c + d;
+ }
+ }
+
+ /* Not vectorizable. */
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i].b;
+ d = in2[i].b;
+
+ if (c >= d)
+ {
+ out1[i] = c;
+ }
+ else
+ {
+ out2[i] = c + d;
+ }
+ }
+}
+
+int
+main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in1[i].a = i;
+ in1[i].b = i + 2;
+ in2[i].a = 5;
+ in2[i].b = i + 5;
+ __asm__ volatile ("");
+ }
+
+ foo ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-2.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 2011-03-27 09:38:18 +0000
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+int a[N], b[N], in1[N], in2[N];
+int result[2*N] = {5,-7,7,-6,9,-5,11,-4,13,-3,15,-2,17,-1,19,0,21,1,23,2,25,3,27,4,29,5,31,6,33,7,35,8,37,9,39,10,41,11,43,12,45,13,47,14,49,15,51,16,53,17,55,18,57,19,59,20,61,21,63,22,65,23,67,24,69,25,71,26,73,27,75,28,77,29,79,30,81,31,83,32,85,33,87,34,89,35,91,36,93,37,95,38,97,39,99,40,101,41,103,42};
+
+__attribute__ ((noinline)) void
+foo (int *pa, int *pb)
+{
+ int i;
+ int c, d;
+
+ /* Store sinking should not work here since the pointers may alias. */
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i];
+ d = in2[i];
+
+ if (c >= d)
+ {
+ *pa = c;
+ *pb = d + 5;
+ }
+ else
+ {
+ *pb = d - 12;
+ *pa = c + d;
+ }
+
+ pa++;
+ pb++;
+ }
+}
+
+int
+main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in1[i] = i;
+ in2[i] = i + 5;
+ __asm__ volatile ("");
+ }
+
+ foo (a, b);
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != result[2*i] || b[i] != result[2*i+1])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/tree-data-ref.c'
--- old/gcc/tree-data-ref.c 2011-02-05 01:39:20 +0000
+++ new/gcc/tree-data-ref.c 2011-03-27 09:38:18 +0000
@@ -991,6 +991,48 @@
return dr;
}
+/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
+ expressions. */
+static bool
+dr_equal_offsets_p1 (tree offset1, tree offset2)
+{
+ bool res;
+
+ STRIP_NOPS (offset1);
+ STRIP_NOPS (offset2);
+
+ if (offset1 == offset2)
+ return true;
+
+ if (TREE_CODE (offset1) != TREE_CODE (offset2)
+ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
+ return false;
+
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
+ TREE_OPERAND (offset2, 0));
+
+ if (!res || !BINARY_CLASS_P (offset1))
+ return res;
+
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
+ TREE_OPERAND (offset2, 1));
+
+ return res;
+}
+
+/* Check if DRA and DRB have equal offsets. */
+bool
+dr_equal_offsets_p (struct data_reference *dra,
+ struct data_reference *drb)
+{
+ tree offset1, offset2;
+
+ offset1 = DR_OFFSET (dra);
+ offset2 = DR_OFFSET (drb);
+
+ return dr_equal_offsets_p1 (offset1, offset2);
+}
+
/* Returns true if FNA == FNB. */
static bool
@@ -4294,7 +4336,7 @@
DATAREFS. Returns chrec_dont_know when failing to analyze a
difficult case, returns NULL_TREE otherwise. */
-static tree
+tree
find_data_references_in_bb (struct loop *loop, basic_block bb,
VEC (data_reference_p, heap) **datarefs)
{
=== modified file 'gcc/tree-data-ref.h'
--- old/gcc/tree-data-ref.h 2011-01-25 21:24:23 +0000
+++ new/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000
@@ -426,10 +426,14 @@
extern void compute_all_dependences (VEC (data_reference_p, heap) *,
VEC (ddr_p, heap) **, VEC (loop_p, heap) *,
bool);
+extern tree find_data_references_in_bb (struct loop *, basic_block,
+ VEC (data_reference_p, heap) **);
extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
extern bool dr_may_alias_p (const struct data_reference *,
const struct data_reference *);
+extern bool dr_equal_offsets_p (struct data_reference *,
+ struct data_reference *);
/* Return true when the base objects of data references A and B are
=== modified file 'gcc/tree-ssa-phiopt.c'
--- old/gcc/tree-ssa-phiopt.c 2010-11-03 15:18:50 +0000
+++ new/gcc/tree-ssa-phiopt.c 2011-03-27 09:38:18 +0000
@@ -34,6 +34,8 @@
#include "langhooks.h"
#include "pointer-set.h"
#include "domwalk.h"
+#include "cfgloop.h"
+#include "tree-data-ref.h"
static unsigned int tree_ssa_phiopt (void);
static unsigned int tree_ssa_phiopt_worker (bool);
@@ -1292,35 +1294,18 @@
return true;
}
-/* Do the main work of conditional store replacement. We already know
- that the recognized pattern looks like so:
-
- split:
- if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
- THEN_BB:
- X = Y;
- goto JOIN_BB;
- ELSE_BB:
- X = Z;
- fallthrough (edge E0)
- JOIN_BB:
- some more
-
- We check that THEN_BB and ELSE_BB contain only one store
- that the stores have a "simple" RHS. */
+/* Do the main work of conditional store replacement. */
static bool
-cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
- basic_block join_bb)
+cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
+ basic_block join_bb, gimple then_assign,
+ gimple else_assign)
{
- gimple then_assign = last_and_only_stmt (then_bb);
- gimple else_assign = last_and_only_stmt (else_bb);
tree lhs_base, lhs, then_rhs, else_rhs;
source_location then_locus, else_locus;
gimple_stmt_iterator gsi;
gimple newphi, new_stmt;
- /* Check if then_bb and else_bb contain only one store each. */
if (then_assign == NULL
|| !gimple_assign_single_p (then_assign)
|| else_assign == NULL
@@ -1385,6 +1370,190 @@
return true;
}
+/* Conditional store replacement. We already know
+ that the recognized pattern looks like so:
+
+ split:
+ if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
+ THEN_BB:
+ ...
+ X = Y;
+ ...
+ goto JOIN_BB;
+ ELSE_BB:
+ ...
+ X = Z;
+ ...
+ fallthrough (edge E0)
+ JOIN_BB:
+ some more
+
+ We check that it is safe to sink the store to JOIN_BB by verifying that
+ there are no read-after-write or write-after-write dependencies in
+ THEN_BB and ELSE_BB. */
+
+static bool
+cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
+ basic_block join_bb)
+{
+ gimple then_assign = last_and_only_stmt (then_bb);
+ gimple else_assign = last_and_only_stmt (else_bb);
+ VEC (data_reference_p, heap) *then_datarefs, *else_datarefs;
+ VEC (ddr_p, heap) *then_ddrs, *else_ddrs;
+ gimple then_store, else_store;
+ bool found, ok = false, res;
+ struct data_dependence_relation *ddr;
+ data_reference_p then_dr, else_dr;
+ int i, j;
+ tree then_lhs, else_lhs;
+ VEC (gimple, heap) *then_stores, *else_stores;
+ basic_block blocks[3];
+
+ if (MAX_STORES_TO_SINK == 0)
+ return false;
+
+ /* Handle the case with single statement in THEN_BB and ELSE_BB. */
+ if (then_assign && else_assign)
+ return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
+ then_assign, else_assign);
+
+ /* Find data references. */
+ then_datarefs = VEC_alloc (data_reference_p, heap, 1);
+ else_datarefs = VEC_alloc (data_reference_p, heap, 1);
+ if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
+ == chrec_dont_know)
+ || !VEC_length (data_reference_p, then_datarefs)
+ || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
+ == chrec_dont_know)
+ || !VEC_length (data_reference_p, else_datarefs))
+ {
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ return false;
+ }
+
+ /* Find pairs of stores with equal LHS. */
+ then_stores = VEC_alloc (gimple, heap, 1);
+ else_stores = VEC_alloc (gimple, heap, 1);
+ FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr)
+ {
+ if (DR_IS_READ (then_dr))
+ continue;
+
+ then_store = DR_STMT (then_dr);
+ then_lhs = gimple_assign_lhs (then_store);
+ found = false;
+
+ FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr)
+ {
+ if (DR_IS_READ (else_dr))
+ continue;
+
+ else_store = DR_STMT (else_dr);
+ else_lhs = gimple_assign_lhs (else_store);
+
+ if (operand_equal_p (then_lhs, else_lhs, 0))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ continue;
+
+ VEC_safe_push (gimple, heap, then_stores, then_store);
+ VEC_safe_push (gimple, heap, else_stores, else_store);
+ }
+
+ /* No pairs of stores found. */
+ if (!VEC_length (gimple, then_stores)
+ || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK)
+ {
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+ return false;
+ }
+
+ /* Compute and check data dependencies in both basic blocks. */
+ then_ddrs = VEC_alloc (ddr_p, heap, 1);
+ else_ddrs = VEC_alloc (ddr_p, heap, 1);
+ compute_all_dependences (then_datarefs, &then_ddrs, NULL, false);
+ compute_all_dependences (else_datarefs, &else_ddrs, NULL, false);
+ blocks[0] = then_bb;
+ blocks[1] = else_bb;
+ blocks[2] = join_bb;
+ renumber_gimple_stmt_uids_in_blocks (blocks, 3);
+
+ /* Check that there are no read-after-write or write-after-write dependencies
+ in THEN_BB. */
+ FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr)
+ {
+ struct data_reference *dra = DDR_A (ddr);
+ struct data_reference *drb = DDR_B (ddr);
+
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
+ {
+ free_dependence_relations (then_ddrs);
+ free_dependence_relations (else_ddrs);
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+ return false;
+ }
+ }
+
+ /* Check that there are no read-after-write or write-after-write dependencies
+ in ELSE_BB. */
+ FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr)
+ {
+ struct data_reference *dra = DDR_A (ddr);
+ struct data_reference *drb = DDR_B (ddr);
+
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
+ {
+ free_dependence_relations (then_ddrs);
+ free_dependence_relations (else_ddrs);
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+ return false;
+ }
+ }
+
+ /* Sink stores with same LHS. */
+ FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store)
+ {
+ else_store = VEC_index (gimple, else_stores, i);
+ res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
+ then_store, else_store);
+ ok = ok || res;
+ }
+
+ free_dependence_relations (then_ddrs);
+ free_dependence_relations (else_ddrs);
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+
+ return ok;
+}
+
/* Always do these optimizations if we have SSA
trees to work on. */
static bool
=== modified file 'gcc/tree-vect-data-refs.c'
--- old/gcc/tree-vect-data-refs.c 2011-02-25 11:18:14 +0000
+++ new/gcc/tree-vect-data-refs.c 2011-03-27 09:38:18 +0000
@@ -289,39 +289,6 @@
}
}
-
-/* Function vect_equal_offsets.
-
- Check if OFFSET1 and OFFSET2 are identical expressions. */
-
-static bool
-vect_equal_offsets (tree offset1, tree offset2)
-{
- bool res;
-
- STRIP_NOPS (offset1);
- STRIP_NOPS (offset2);
-
- if (offset1 == offset2)
- return true;
-
- if (TREE_CODE (offset1) != TREE_CODE (offset2)
- || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
- return false;
-
- res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
- TREE_OPERAND (offset2, 0));
-
- if (!res || !BINARY_CLASS_P (offset1))
- return res;
-
- res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
- TREE_OPERAND (offset2, 1));
-
- return res;
-}
-
-
/* Check dependence between DRA and DRB for basic block vectorization.
If the accesses share same bases and offsets, we can compare their initial
constant offsets to decide whether they differ or not. In case of a read-
@@ -352,7 +319,7 @@
|| TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|| TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
!= TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
+ || !dr_equal_offsets_p (dra, drb))
return true;
/* Check the types. */
@@ -402,7 +369,7 @@
|| TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|| TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
!= TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
+ || !dr_equal_offsets_p (dra, drb)
|| !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
|| DR_IS_READ (dra) != DR_IS_READ (drb))
return false;

View File

@@ -1,126 +0,0 @@
2011-04-21 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2008-12-03 Daniel Jacobowitz <dan@codesourcery.com>
gcc/testsuite/
* gcc.dg/vect/vect-shift-3.c, gcc.dg/vect/vect-shift-4.c: New.
* lib/target-supports.exp (check_effective_target_vect_shift_char): New
function.
=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-3.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 2011-04-21 13:51:06 +0000
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned short dst[N] __attribute__((aligned(N)));
+unsigned short src[N] __attribute__((aligned(N)));
+
+__attribute__ ((noinline))
+void array_shift(void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ dst[i] = src[i] >> 3;
+}
+
+int main()
+{
+ volatile int i;
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ src[i] = i << 3;
+
+ array_shift ();
+
+ for (i = 0; i < N; i++)
+ if (dst[i] != i)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-4.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 2011-04-21 13:51:06 +0000
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target vect_shift_char } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned char dst[N] __attribute__((aligned(N)));
+unsigned char src[N] __attribute__((aligned(N)));
+
+__attribute__ ((noinline))
+void array_shift(void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ dst[i] = src[i] >> 3;
+}
+
+int main()
+{
+ volatile int i;
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ src[i] = i << 3;
+
+ array_shift ();
+
+ for (i = 0; i < N; i++)
+ if (dst[i] != i)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-02-19 15:31:15 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-04-21 13:51:06 +0000
@@ -2308,6 +2308,26 @@
}
+# Return 1 if the target supports hardware vector shift operation for char.
+
+proc check_effective_target_vect_shift_char { } {
+ global et_vect_shift_char_saved
+
+ if [info exists et_vect_shift_char_saved] {
+ verbose "check_effective_target_vect_shift_char: using cached result" 2
+ } else {
+ set et_vect_shift_char_saved 0
+ if { ([istarget powerpc*-*-*]
+ && ![istarget powerpc-*-linux*paired*])
+ || [check_effective_target_arm32] } {
+ set et_vect_shift_char_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2
+ return $et_vect_shift_char_saved
+}
+
# Return 1 if the target supports hardware vectors of long, 0 otherwise.
#
# This can change for different subtargets so do not cache the result.

View File

@@ -1,177 +0,0 @@
2011-04-27 Ira Rosen <ira.rosen@linaro.org>
Backport from FSF:
2011-04-03 Richard Guenther <rguenther@suse.de>
Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-if-conv.c (memrefs_read_or_written_unconditionally): Strip all
non-variable offsets and compare the remaining bases of the two
accesses instead of looking for exact same data-ref.
gcc/testsuite/
* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: New test.
* gcc.dg/vect/vect.exp: Run if-cvt-stores-vect* tests with
-ftree-loop-if-convert-stores.
=== added file 'gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c'
--- old/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 2011-04-24 07:45:49 +0000
@@ -0,0 +1,69 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+typedef struct {
+ short a;
+ short b;
+} data;
+
+data in1[N], in2[N], out[N];
+short result[N*2] = {10,-7,11,-6,12,-5,13,-4,14,-3,15,-2,16,-1,17,0,18,1,19,2,20,3,21,4,22,5,23,6,24,7,25,8,26,9,27,10,28,11,29,12,30,13,31,14,32,15,33,16,34,17,35,18,36,19,37,20,38,21,39,22,40,23,41,24,42,25,43,26,44,27,45,28,46,29,47,30,48,31,49,32,50,33,51,34,52,35,53,36,54,37,55,38,56,39,57,40,58,41,59,42};
+short out1[N], out2[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int i;
+ short c, d;
+
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i].b;
+ d = in2[i].b;
+
+ if (c >= d)
+ {
+ out[i].b = in1[i].a;
+ out[i].a = d + 5;
+ }
+ else
+ {
+ out[i].b = d - 12;
+ out[i].a = in2[i].a + d;
+ }
+ }
+}
+
+int
+main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in1[i].a = i;
+ in1[i].b = i + 2;
+ in2[i].a = 5;
+ in2[i].b = i + 5;
+ __asm__ volatile ("");
+ }
+
+ foo ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp'
--- old/gcc/testsuite/gcc.dg/vect/vect.exp 2010-11-22 21:49:19 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-04-24 07:45:49 +0000
@@ -210,6 +210,12 @@
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
+# -ftree-loop-if-convert-stores
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+
# With -O3.
# Don't allow IPA cloning, because it throws our counts out of whack.
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
=== modified file 'gcc/tree-if-conv.c'
--- old/gcc/tree-if-conv.c 2011-02-23 16:49:52 +0000
+++ new/gcc/tree-if-conv.c 2011-04-24 07:45:49 +0000
@@ -464,8 +464,8 @@
/* Returns true when the memory references of STMT are read or written
unconditionally. In other words, this function returns true when
for every data reference A in STMT there exist other accesses to
- the same data reference with predicates that add up (OR-up) to the
- true predicate: this ensures that the data reference A is touched
+ a data reference with the same base with predicates that add up (OR-up) to
+ the true predicate: this ensures that the data reference A is touched
(read or written) on every iteration of the if-converted loop. */
static bool
@@ -489,21 +489,38 @@
continue;
for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
- if (DR_STMT (b) != stmt
- && same_data_refs (a, b))
- {
- tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
- if (DR_RW_UNCONDITIONALLY (b) == 1
- || is_true_predicate (cb)
- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
- ca, cb)))
- {
- DR_RW_UNCONDITIONALLY (a) = 1;
- DR_RW_UNCONDITIONALLY (b) = 1;
- found = true;
- break;
- }
+ {
+ tree ref_base_a = DR_REF (a);
+ tree ref_base_b = DR_REF (b);
+
+ if (DR_STMT (b) == stmt)
+ continue;
+
+ while (TREE_CODE (ref_base_a) == COMPONENT_REF
+ || TREE_CODE (ref_base_a) == IMAGPART_EXPR
+ || TREE_CODE (ref_base_a) == REALPART_EXPR)
+ ref_base_a = TREE_OPERAND (ref_base_a, 0);
+
+ while (TREE_CODE (ref_base_b) == COMPONENT_REF
+ || TREE_CODE (ref_base_b) == IMAGPART_EXPR
+ || TREE_CODE (ref_base_b) == REALPART_EXPR)
+ ref_base_b = TREE_OPERAND (ref_base_b, 0);
+
+ if (!operand_equal_p (ref_base_a, ref_base_b, 0))
+ {
+ tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+
+ if (DR_RW_UNCONDITIONALLY (b) == 1
+ || is_true_predicate (cb)
+ || is_true_predicate (ca
+ = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
+ {
+ DR_RW_UNCONDITIONALLY (a) = 1;
+ DR_RW_UNCONDITIONALLY (b) = 1;
+ found = true;
+ break;
+ }
+ }
}
if (!found)

View File

@@ -1,140 +0,0 @@
2011-05-02 Ira Rosen <ira.rosen@linaro.org>
Backport from FSF:
2011-03-27 Ira Rosen <ira.rosen@linaro.org>
gcc/
* config/arm/arm.c (arm_autovectorize_vector_sizes): New function.
(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
gcc/testsuite/
* gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
accesses to preserve the meaning of the test for doubleword vectors.
* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
* gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-03-02 11:38:43 +0000
+++ new/gcc/config/arm/arm.c 2011-04-28 11:46:58 +0000
@@ -250,6 +250,7 @@
bool is_packed);
static void arm_conditional_register_usage (void);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static unsigned int arm_autovectorize_vector_sizes (void);
/* Table of machine attributes. */
@@ -395,6 +396,9 @@
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+ arm_autovectorize_vector_sizes
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
@@ -23511,6 +23515,12 @@
}
}
+static unsigned int
+arm_autovectorize_vector_sizes (void)
+{
+ return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+}
+
static bool
arm_vector_alignment_reachable (const_tree type, bool is_packed)
{
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000
@@ -8,7 +8,7 @@
void with_restrict(int * __restrict p)
{
int i;
- int *q = p - 2;
+ int *q = p - 1;
for (i = 0; i < 1000; ++i) {
p[i] = q[i];
@@ -19,7 +19,7 @@
void without_restrict(int * p)
{
int i;
- int *q = p - 2;
+ int *q = p - 1;
for (i = 0; i < 1000; ++i) {
p[i] = q[i];
@@ -38,8 +38,8 @@
a[i] = b[i] = i;
}
- with_restrict(a + 2);
- without_restrict(b + 2);
+ with_restrict(a + 1);
+ without_restrict(b + 1);
for (i = 0; i < 1002; ++i) {
if (a[i] != b[i])
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2010-11-22 12:16:52 +0000
+++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000
@@ -4,9 +4,9 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 8
+#define N 12
-unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
int
main1 ()
@@ -101,7 +101,7 @@
}
/* SLP with unrolling by 8. */
- for (i = 0; i < N/2; i++)
+ for (i = 0; i < N/4; i++)
{
out[i*9] = in[i*9];
out[i*9 + 1] = in[i*9 + 1];
@@ -115,7 +115,7 @@
}
/* check results: */
- for (i = 0; i < N/2; i++)
+ for (i = 0; i < N/4; i++)
{
if (out[i*9] != in[i*9]
|| out[i*9 + 1] != in[i*9 + 1]
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2010-11-22 12:16:52 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000
@@ -17,7 +17,7 @@
float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
- float E[4] = {0,1,2,480};
+ float E[4] = {0,480,960,1440};
float s;
int i, j;
@@ -55,7 +55,7 @@
s = 0;
for (j=0; j<N; j+=4)
s += C[j];
- B[i+3] = B[i] + s;
+ B[i+1] = B[i] + s;
}
/* check results: */

View File

@@ -1,255 +0,0 @@
2011-04-26 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-04-15 Maxim Kuvyrkov <maxim@codesourcery.com>
gcc/
* combine.c (subst, combine_simlify_rtx): Add new argument, use it
to track processing of conditionals. Update all callers.
(try_combine, simplify_if_then_else): Update.
2011-04-25 Maxim Kuvyrkov <maxim@codesourcery.com>
Eric Botcazou <ebotcazou@adacore.com>
gcc/
* combine.c (combine_simplify_rtx): Avoid mis-simplifying conditionals
for STORE_FLAG_VALUE==-1 case.
=== modified file 'gcc/combine.c'
Index: gcc-4_6-branch/gcc/combine.c
===================================================================
--- gcc-4_6-branch.orig/gcc/combine.c 2012-03-05 00:16:20.000000000 -0800
+++ gcc-4_6-branch/gcc/combine.c 2012-03-05 16:05:01.212928507 -0800
@@ -391,8 +391,8 @@
static void undo_all (void);
static void undo_commit (void);
static rtx *find_split_point (rtx *, rtx, bool);
-static rtx subst (rtx, rtx, rtx, int, int);
-static rtx combine_simplify_rtx (rtx, enum machine_mode, int);
+static rtx subst (rtx, rtx, rtx, int, int, int);
+static rtx combine_simplify_rtx (rtx, enum machine_mode, int, int);
static rtx simplify_if_then_else (rtx);
static rtx simplify_set (rtx);
static rtx simplify_logical (rtx);
@@ -3119,12 +3119,12 @@
if (i1)
{
subst_low_luid = DF_INSN_LUID (i1);
- i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0);
+ i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
}
else
{
subst_low_luid = DF_INSN_LUID (i2);
- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0);
+ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
}
}
@@ -3136,7 +3136,7 @@
self-referential RTL when we will be substituting I1SRC for I1DEST
later. Likewise if I0 feeds into I2, either directly or indirectly
through I1, and I0DEST is in I0SRC. */
- newpat = subst (PATTERN (i3), i2dest, i2src, 0,
+ newpat = subst (PATTERN (i3), i2dest, i2src, 0, 0,
(i1_feeds_i2_n && i1dest_in_i1src)
|| ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
&& i0dest_in_i0src));
@@ -3180,7 +3180,7 @@
copy of I1SRC each time we substitute it, in order to avoid creating
self-referential RTL when we will be substituting I0SRC for I0DEST
later. */
- newpat = subst (newpat, i1dest, i1src, 0,
+ newpat = subst (newpat, i1dest, i1src, 0, 0,
i0_feeds_i1_n && i0dest_in_i0src);
substed_i1 = 1;
@@ -3214,7 +3214,7 @@
n_occurrences = 0;
subst_low_luid = DF_INSN_LUID (i0);
- newpat = subst (newpat, i0dest, i0src, 0, 0);
+ newpat = subst (newpat, i0dest, i0src, 0, 0, 0);
substed_i0 = 1;
}
@@ -3276,7 +3276,7 @@
{
rtx t = i1pat;
if (i0_feeds_i1_n)
- t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0);
+ t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0);
XVECEXP (newpat, 0, --total_sets) = t;
}
@@ -3284,10 +3284,10 @@
{
rtx t = i2pat;
if (i1_feeds_i2_n)
- t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
+ t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0,
i0_feeds_i1_n && i0dest_in_i0src);
if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
- t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0);
+ t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0);
XVECEXP (newpat, 0, --total_sets) = t;
}
@@ -4959,11 +4959,13 @@
IN_DEST is nonzero if we are processing the SET_DEST of a SET.
+ IN_COND is nonzero if we are on top level of the condition.
+
UNIQUE_COPY is nonzero if each substitution must be unique. We do this
by copying if `n_occurrences' is nonzero. */
static rtx
-subst (rtx x, rtx from, rtx to, int in_dest, int unique_copy)
+subst (rtx x, rtx from, rtx to, int in_dest, int in_cond, int unique_copy)
{
enum rtx_code code = GET_CODE (x);
enum machine_mode op0_mode = VOIDmode;
@@ -5024,7 +5026,7 @@
&& GET_CODE (XVECEXP (x, 0, 0)) == SET
&& GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS)
{
- new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, unique_copy);
+ new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, 0, unique_copy);
/* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER
@@ -5041,7 +5043,7 @@
&& GET_CODE (dest) != CC0
&& GET_CODE (dest) != PC)
{
- new_rtx = subst (dest, from, to, 0, unique_copy);
+ new_rtx = subst (dest, from, to, 0, 0, unique_copy);
/* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER
@@ -5087,8 +5089,8 @@
}
else
{
- new_rtx = subst (XVECEXP (x, i, j), from, to, 0,
- unique_copy);
+ new_rtx = subst (XVECEXP (x, i, j), from, to, 0, 0,
+ unique_copy);
/* If this substitution failed, this whole thing
fails. */
@@ -5165,7 +5167,9 @@
&& (code == SUBREG || code == STRICT_LOW_PART
|| code == ZERO_EXTRACT))
|| code == SET)
- && i == 0), unique_copy);
+ && i == 0),
+ code == IF_THEN_ELSE && i == 0,
+ unique_copy);
/* If we found that we will have to reject this combination,
indicate that by returning the CLOBBER ourselves, rather than
@@ -5222,7 +5226,7 @@
/* If X is sufficiently simple, don't bother trying to do anything
with it. */
if (code != CONST_INT && code != REG && code != CLOBBER)
- x = combine_simplify_rtx (x, op0_mode, in_dest);
+ x = combine_simplify_rtx (x, op0_mode, in_dest, in_cond);
if (GET_CODE (x) == code)
break;
@@ -5242,10 +5246,12 @@
expression.
OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero
- if we are inside a SET_DEST. */
+ if we are inside a SET_DEST. IN_COND is nonzero if we are on the top level
+ of a condition. */
static rtx
-combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
+combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest,
+ int in_cond)
{
enum rtx_code code = GET_CODE (x);
enum machine_mode mode = GET_MODE (x);
@@ -5300,8 +5306,8 @@
false arms to store-flag values. Be careful to use copy_rtx
here since true_rtx or false_rtx might share RTL with x as a
result of the if_then_else_cond call above. */
- true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0);
- false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0);
+ true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0, 0);
+ false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0, 0);
/* If true_rtx and false_rtx are not general_operands, an if_then_else
is unlikely to be simpler. */
@@ -5645,7 +5651,7 @@
{
/* Try to simplify the expression further. */
rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1));
- temp = combine_simplify_rtx (tor, VOIDmode, in_dest);
+ temp = combine_simplify_rtx (tor, VOIDmode, in_dest, 0);
/* If we could, great. If not, do not go ahead with the IOR
replacement, since PLUS appears in many special purpose
@@ -5738,7 +5744,16 @@
ZERO_EXTRACT is indeed appropriate, it will be placed back by
the call to make_compound_operation in the SET case. */
- if (STORE_FLAG_VALUE == 1
+ if (in_cond)
+ /* Don't apply below optimizations if the caller would
+ prefer a comparison rather than a value.
+ E.g., for the condition in an IF_THEN_ELSE most targets need
+ an explicit comparison. */
+ {
+ ;
+ }
+
+ else if (STORE_FLAG_VALUE == 1
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx
&& mode == GET_MODE (op0)
@@ -5784,7 +5799,10 @@
/* If STORE_FLAG_VALUE is -1, we have cases similar to
those above. */
- if (STORE_FLAG_VALUE == -1
+ if (in_cond)
+ ;
+
+ else if (STORE_FLAG_VALUE == -1
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx
&& (num_sign_bit_copies (op0, mode)
@@ -5982,11 +6000,11 @@
if (reg_mentioned_p (from, true_rtx))
true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code,
from, true_val),
- pc_rtx, pc_rtx, 0, 0);
+ pc_rtx, pc_rtx, 0, 0, 0);
if (reg_mentioned_p (from, false_rtx))
false_rtx = subst (known_cond (copy_rtx (false_rtx), false_code,
from, false_val),
- pc_rtx, pc_rtx, 0, 0);
+ pc_rtx, pc_rtx, 0, 0, 0);
SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx);
SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx);
@@ -6203,11 +6221,11 @@
{
temp = subst (simplify_gen_relational (true_code, m, VOIDmode,
cond_op0, cond_op1),
- pc_rtx, pc_rtx, 0, 0);
+ pc_rtx, pc_rtx, 0, 0, 0);
temp = simplify_gen_binary (MULT, m, temp,
simplify_gen_binary (MULT, m, c1,
const_true_rtx));
- temp = subst (temp, pc_rtx, pc_rtx, 0, 0);
+ temp = subst (temp, pc_rtx, pc_rtx, 0, 0, 0);
temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp);
if (extend_op != UNKNOWN)

View File

@@ -1,21 +0,0 @@
2011-05-06 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
From Sergey Grechanik <mouseentity@ispras.ru>, approved for mainline
* config/arm/arm.c (coproc_secondary_reload_class): Return NO_REGS
for constant vectors.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-05-03 15:18:07 +0000
+++ new/gcc/config/arm/arm.c 2011-05-06 11:33:02 +0000
@@ -9193,7 +9193,7 @@
/* The neon move patterns handle all legitimate vector and struct
addresses. */
if (TARGET_NEON
- && MEM_P (x)
+ && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
|| VALID_NEON_STRUCT_MODE (mode)))

View File

@@ -1,24 +0,0 @@
2011-05-12 Michael Hope <michael.hope@linaro.org>
gcc/
Backport from mainline:
2011-05-05 Michael Hope <michael.hope@linaro.org>
PR pch/45979
* config/host-linux.c (TRY_EMPTY_VM_SPACE): Define for
__ARM_EABI__ hosts.
=== modified file 'gcc/config/host-linux.c'
--- old/gcc/config/host-linux.c 2010-11-29 14:09:41 +0000
+++ new/gcc/config/host-linux.c 2011-05-06 20:19:30 +0000
@@ -84,6 +84,8 @@
# define TRY_EMPTY_VM_SPACE 0x60000000
#elif defined(__mc68000__)
# define TRY_EMPTY_VM_SPACE 0x40000000
+#elif defined(__ARM_EABI__)
+# define TRY_EMPTY_VM_SPACE 0x60000000
#else
# define TRY_EMPTY_VM_SPACE 0
#endif

View File

@@ -1,640 +0,0 @@
2011-05-13 Revital Eres <revital.eres@linaro.org>
gcc/
* loop-doloop.c (doloop_condition_get): Support new form of
doloop pattern and use prev_nondebug_insn instead of PREV_INSN.
* config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*".
(doloop_end): New.
* config/arm/arm.md (*addsi3_compare0): Remove "*".
* params.def (sms-min-sc): New param flag.
* doc/invoke.texi (sms-min-sc): Document it.
* ddg.c (create_ddg_dep_from_intra_loop_link): If a true dep edge
enters the branch create an anti edge in the opposite direction
to prevent the creation of reg-moves.
* modulo-sched.c: Adjust comment to reflect the fact we are
scheduling closing branch.
(PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT and redefine.
(stage_count): New field in struct partial_schedule.
(calculate_stage_count): New function.
(normalize_sched_times): Rename to reset_sched_times and handle
incrementing the sched time of the nodes by a constant value
passed as parameter.
(duplicate_insns_of_cycles): Skip closing branch.
(sms_schedule_by_order): Schedule closing branch.
(ps_insn_find_column): Handle closing branch.
(sms_schedule): Call reset_sched_times and adjust the code to
support scheduling of the closing branch. Use sms-min-sc.
Support new form of doloop pattern.
(ps_insert_empty_row): Update calls to normalize_sched_times
and rotate_partial_schedule functions.
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-05-06 11:28:27 +0000
+++ new/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000
@@ -791,7 +791,7 @@
""
)
-(define_insn "*addsi3_compare0"
+(define_insn "addsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
(plus:SI (match_operand:SI 1 "s_register_operand" "r, r")
=== modified file 'gcc/config/arm/thumb2.md'
--- old/gcc/config/arm/thumb2.md 2011-01-03 20:52:22 +0000
+++ new/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000
@@ -836,7 +836,7 @@
"operands[4] = GEN_INT (- INTVAL (operands[2]));"
)
-(define_insn "*thumb2_addsi3_compare0"
+(define_insn "thumb2_addsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
(plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r")
@@ -1118,3 +1118,54 @@
"
operands[2] = GEN_INT (32 - INTVAL (operands[2]));
")
+
+;; Define the subtract-one-and-jump insns so loop.c
+;; knows what to generate.
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
+ (use (match_operand 2 "" "")) ; max iterations
+ (use (match_operand 3 "" "")) ; loop level
+ (use (match_operand 4 "" ""))] ; label
+ "TARGET_32BIT"
+ "
+ {
+ /* Currently SMS relies on the do-loop pattern to recognize loops
+ where (1) the control part consists of all insns defining and/or
+ using a certain 'count' register and (2) the loop count can be
+ adjusted by modifying this register prior to the loop.
+ ??? The possible introduction of a new block to initialize the
+ new IV can potentially affect branch optimizations. */
+ if (optimize > 0 && flag_modulo_sched)
+ {
+ rtx s0;
+ rtx bcomp;
+ rtx loc_ref;
+ rtx cc_reg;
+ rtx insn;
+ rtx cmp;
+
+ /* Only use this on innermost loops. */
+ if (INTVAL (operands[3]) > 1)
+ FAIL;
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+
+ s0 = operands [0];
+ if (TARGET_THUMB2)
+ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+ else
+ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+
+ cmp = XVECEXP (PATTERN (insn), 0, 0);
+ cc_reg = SET_DEST (cmp);
+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ loc_ref, pc_rtx)));
+ DONE;
+ }else
+ FAIL;
+}")
+
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2010-11-30 11:41:24 +0000
+++ new/gcc/ddg.c 2011-05-11 07:15:47 +0000
@@ -197,6 +197,11 @@
}
}
+ /* If a true dep edge enters the branch create an anti edge in the
+ opposite direction to prevent the creation of reg-moves. */
+ if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
+ create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
+
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
add_edge_to_ddg (g, e);
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000
+++ new/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000
@@ -8730,6 +8730,10 @@
The maximum number of best instructions in the ready list that are considered
for renaming in the selective scheduler. The default value is 2.
+@item sms-min-sc
+The minimum value of stage count that swing modulo scheduler will
+generate. The default value is 2.
+
@item max-last-value-rtl
The maximum size measured as number of RTLs that can be recorded in an expression
in combiner for a pseudo register as last known value of that register. The default
=== modified file 'gcc/loop-doloop.c'
--- old/gcc/loop-doloop.c 2010-11-30 11:41:24 +0000
+++ new/gcc/loop-doloop.c 2011-05-11 07:15:47 +0000
@@ -78,6 +78,8 @@
rtx inc_src;
rtx condition;
rtx pattern;
+ rtx cc_reg = NULL_RTX;
+ rtx reg_orig = NULL_RTX;
/* The canonical doloop pattern we expect has one of the following
forms:
@@ -96,7 +98,16 @@
2) (set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
(label_ref (label))
- (pc))). */
+ (pc))).
+
+ Some targets (ARM) do the comparison before the branch, as in the
+ following form:
+
+ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc))) */
pattern = PATTERN (doloop_pat);
@@ -104,19 +115,47 @@
{
rtx cond;
rtx prev_insn = prev_nondebug_insn (doloop_pat);
+ rtx cmp_arg1, cmp_arg2;
+ rtx cmp_orig;
- /* We expect the decrement to immediately precede the branch. */
+ /* In case the pattern is not PARALLEL we expect two forms
+ of doloop which are cases 2) and 3) above: in case 2) the
+ decrement immediately precedes the branch, while in case 3)
+ the compare and decrement instructions immediately precede
+ the branch. */
if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
return 0;
cmp = pattern;
- inc = PATTERN (PREV_INSN (doloop_pat));
+ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
+ {
+ /* The third case: the compare and decrement instructions
+ immediately precede the branch. */
+ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
+ if (GET_CODE (cmp_orig) != SET)
+ return 0;
+ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
+ return 0;
+ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
+ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
+ if (cmp_arg2 != const0_rtx
+ || GET_CODE (cmp_arg1) != PLUS)
+ return 0;
+ reg_orig = XEXP (cmp_arg1, 0);
+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
+ || !REG_P (reg_orig))
+ return 0;
+ cc_reg = SET_DEST (cmp_orig);
+
+ inc = XVECEXP (PATTERN (prev_insn), 0, 1);
+ }
+ else
+ inc = PATTERN (prev_insn);
/* We expect the condition to be of the form (reg != 0) */
cond = XEXP (SET_SRC (cmp), 0);
if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
return 0;
-
}
else
{
@@ -162,11 +201,15 @@
return 0;
if ((XEXP (condition, 0) == reg)
+ /* For the third case: */
+ || ((cc_reg != NULL_RTX)
+ && (XEXP (condition, 0) == cc_reg)
+ && (reg_orig == reg))
|| (GET_CODE (XEXP (condition, 0)) == PLUS
- && XEXP (XEXP (condition, 0), 0) == reg))
+ && XEXP (XEXP (condition, 0), 0) == reg))
{
if (GET_CODE (pattern) != PARALLEL)
- /* The second form we expect:
+ /* For the second form we expect:
(set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
@@ -181,7 +224,24 @@
(set (reg) (plus (reg) (const_int -1)))
(additional clobbers and uses)])
- So we return that form instead.
+ For the third form we expect:
+
+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc)))
+
+ which is equivalent to the following:
+
+ (parallel [(set (cc) (compare (reg, 1))
+ (set (reg) (plus (reg) (const_int -1)))
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc))))])
+
+ So we return the second form instead for the two cases.
+
*/
condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-02-14 17:59:10 +0000
+++ new/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000
@@ -84,14 +84,13 @@
II cycles (i.e. use register copies to prevent a def from overwriting
itself before reaching the use).
- SMS works with countable loops (1) whose control part can be easily
- decoupled from the rest of the loop and (2) whose loop count can
- be easily adjusted. This is because we peel a constant number of
- iterations into a prologue and epilogue for which we want to avoid
- emitting the control part, and a kernel which is to iterate that
- constant number of iterations less than the original loop. So the
- control part should be a set of insns clearly identified and having
- its own iv, not otherwise used in the loop (at-least for now), which
+ SMS works with countable loops whose loop count can be easily
+ adjusted. This is because we peel a constant number of iterations
+ into a prologue and epilogue for which we want to avoid emitting
+ the control part, and a kernel which is to iterate that constant
+ number of iterations less than the original loop. So the control
+ part should be a set of insns clearly identified and having its
+ own iv, not otherwise used in the loop (at-least for now), which
initializes a register before the loop to the number of iterations.
Currently SMS relies on the do-loop pattern to recognize such loops,
where (1) the control part comprises of all insns defining and/or
@@ -116,8 +115,10 @@
/* The number of different iterations the nodes in ps span, assuming
the stage boundaries are placed efficiently. */
-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
- + 1 + (ps)->ii - 1) / (ps)->ii)
+#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
+ + 1 + ii - 1) / ii)
+/* The stage count of ps. */
+#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
/* A single instruction in the partial schedule. */
struct ps_insn
@@ -155,6 +156,8 @@
int max_cycle;
ddg_ptr g; /* The DDG of the insns in the partial schedule. */
+
+ int stage_count; /* The stage count of the partial schedule. */
};
/* We use this to record all the register replacements we do in
@@ -195,7 +198,7 @@
rtx, rtx);
static void duplicate_insns_of_cycles (partial_schedule_ptr,
int, int, int, rtx);
-
+static int calculate_stage_count (partial_schedule_ptr ps);
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
#define SCHED_FIRST_REG_MOVE(x) \
@@ -310,10 +313,10 @@
either a single (parallel) branch-on-count or a (non-parallel)
branch immediately preceded by a single (decrement) insn. */
first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
- : PREV_INSN (tail));
+ : prev_nondebug_insn (tail));
for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
- if (reg_mentioned_p (reg, insn))
+ if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn))
{
if (dump_file)
{
@@ -569,13 +572,12 @@
}
}
-/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
- of SCHED_ROW and SCHED_STAGE. */
+/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
+ SCHED_ROW and SCHED_STAGE. */
static void
-normalize_sched_times (partial_schedule_ptr ps)
+reset_sched_times (partial_schedule_ptr ps, int amount)
{
int row;
- int amount = PS_MIN_CYCLE (ps);
int ii = ps->ii;
ps_insn_ptr crr_insn;
@@ -584,19 +586,43 @@
{
ddg_node_ptr u = crr_insn->node;
int normalized_time = SCHED_TIME (u) - amount;
+ int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
+ int sc_until_cycle_zero, stage;
- if (dump_file)
- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
- min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
- (u), ps->min_cycle);
+ if (dump_file)
+ {
+ /* Print the scheduling times after the rotation. */
+ fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
+ "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
+ INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
+ normalized_time);
+ if (JUMP_P (crr_insn->node->insn))
+ fprintf (dump_file, " (branch)");
+ fprintf (dump_file, "\n");
+ }
+
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
SCHED_TIME (u) = normalized_time;
- SCHED_ROW (u) = normalized_time % ii;
- SCHED_STAGE (u) = normalized_time / ii;
+ SCHED_ROW (u) = SMODULO (normalized_time, ii);
+
+ /* The calculation of stage count is done adding the number
+ of stages before cycle zero and after cycle zero. */
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
+
+ if (SCHED_TIME (u) < 0)
+ {
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+ }
+ else
+ {
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+ }
}
}
-
+
/* Set SCHED_COLUMN of each node according to its position in PS. */
static void
set_columns_for_ps (partial_schedule_ptr ps)
@@ -646,9 +672,12 @@
/* Do not duplicate any insn which refers to count_reg as it
belongs to the control part.
+ The closing branch is scheduled as well and thus should
+ be ignored.
TODO: This should be done by analyzing the control part of
the loop. */
- if (reg_mentioned_p (count_reg, u_node->insn))
+ if (reg_mentioned_p (count_reg, u_node->insn)
+ || JUMP_P (ps_ij->node->insn))
continue;
if (for_prolog)
@@ -1009,9 +1038,11 @@
continue;
}
- /* Don't handle BBs with calls or barriers, or !single_set insns,
- or auto-increment insns (to avoid creating invalid reg-moves
- for the auto-increment insns).
+ /* Don't handle BBs with calls or barriers or auto-increment insns
+ (to avoid creating invalid reg-moves for the auto-increment insns),
+ or !single_set with the exception of instructions that include
+ count_reg---these instructions are part of the control part
+ that do-loop recognizes.
??? Should handle auto-increment insns.
??? Should handle insns defining subregs. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
@@ -1021,7 +1052,8 @@
if (CALL_P (insn)
|| BARRIER_P (insn)
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
+ && !reg_mentioned_p (count_reg, insn))
|| (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
&& GET_CODE (SET_DEST (set)) == SUBREG))
@@ -1049,7 +1081,11 @@
continue;
}
- if (! (g = create_ddg (bb, 0)))
+ /* Always schedule the closing branch with the rest of the
+ instructions. The branch is rotated to be in row ii-1 at the
+ end of the scheduling procedure to make sure it's the last
+ instruction in the iteration. */
+ if (! (g = create_ddg (bb, 1)))
{
if (dump_file)
fprintf (dump_file, "SMS create_ddg failed\n");
@@ -1157,14 +1193,17 @@
ps = sms_schedule_by_order (g, mii, maxii, node_order);
- if (ps){
- stage_count = PS_STAGE_COUNT (ps);
- gcc_assert(stage_count >= 1);
- }
+ if (ps)
+ {
+ stage_count = calculate_stage_count (ps);
+ gcc_assert(stage_count >= 1);
+ PS_STAGE_COUNT(ps) = stage_count;
+ }
- /* Stage count of 1 means that there is no interleaving between
- iterations, let the scheduling passes do the job. */
- if (stage_count <= 1
+ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
+ 1 means that there is no interleaving between iterations thus
+ we let the scheduling passes do the job in this case. */
+ if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
|| (count_init && (loop_count <= stage_count))
|| (flag_branch_probabilities && (trip_count <= stage_count)))
{
@@ -1182,32 +1221,24 @@
else
{
struct undo_replace_buff_elem *reg_move_replaces;
-
- if (dump_file)
- {
+ int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ /* Set the stage boundaries. The closing_branch was scheduled
+ and should appear in the last (ii-1) row. */
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ set_columns_for_ps (ps);
+
+ canon_loop (loop);
+
+ if (dump_file)
+ {
fprintf (dump_file,
"SMS succeeded %d %d (with ii, sc)\n", ps->ii,
stage_count);
print_partial_schedule (ps, dump_file);
- fprintf (dump_file,
- "SMS Branch (%d) will later be scheduled at cycle %d.\n",
- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
}
-
- /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
- the closing_branch was scheduled and should appear in the last (ii-1)
- row. Otherwise, we are free to schedule the branch, and we let nodes
- that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
- row; this should reduce stage_count to minimum.
- TODO: Revisit the issue of scheduling the insns of the
- control part relative to the branch when the control part
- has more than one insn. */
- normalize_sched_times (ps);
- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
- set_columns_for_ps (ps);
-
- canon_loop (loop);
-
+
/* case the BCT count is not known , Do loop-versioning */
if (count_reg && ! count_init)
{
@@ -1760,12 +1791,6 @@
continue;
}
- if (JUMP_P (insn)) /* Closing branch handled later. */
- {
- RESET_BIT (tobe_scheduled, u);
- continue;
- }
-
if (TEST_BIT (sched_nodes, u))
continue;
@@ -1893,8 +1918,8 @@
if (dump_file)
fprintf (dump_file, "split_row=%d\n", split_row);
- normalize_sched_times (ps);
- rotate_partial_schedule (ps, ps->min_cycle);
+ reset_sched_times (ps, PS_MIN_CYCLE (ps));
+ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
for (row = 0; row < split_row; row++)
@@ -2571,6 +2596,7 @@
ps_insn_ptr next_ps_i;
ps_insn_ptr first_must_follow = NULL;
ps_insn_ptr last_must_precede = NULL;
+ ps_insn_ptr last_in_row = NULL;
int row;
if (! ps_i)
@@ -2597,8 +2623,37 @@
else
last_must_precede = next_ps_i;
}
+ /* The closing branch must be the last in the row. */
+ if (must_precede
+ && TEST_BIT (must_precede, next_ps_i->node->cuid)
+ && JUMP_P (next_ps_i->node->insn))
+ return false;
+
+ last_in_row = next_ps_i;
}
+ /* The closing branch is scheduled as well. Make sure there is no
+ dependent instruction after it as the branch should be the last
+ instruction in the row. */
+ if (JUMP_P (ps_i->node->insn))
+ {
+ if (first_must_follow)
+ return false;
+ if (last_in_row)
+ {
+ /* Make the branch the last in the row. New instructions
+ will be inserted at the beginning of the row or after the
+ last must_precede instruction thus the branch is guaranteed
+ to remain the last instruction in the row. */
+ last_in_row->next_in_row = ps_i;
+ ps_i->prev_in_row = last_in_row;
+ ps_i->next_in_row = NULL;
+ }
+ else
+ ps->rows[row] = ps_i;
+ return true;
+ }
+
/* Now insert the node after INSERT_AFTER_PSI. */
if (! last_must_precede)
@@ -2820,6 +2875,24 @@
return ps_i;
}
+/* Calculate the stage count of the partial schedule PS. The calculation
+ takes into account the rotation to bring the closing branch to row
+ ii-1. */
+int
+calculate_stage_count (partial_schedule_ptr ps)
+{
+ int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
+ int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
+ int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
+ int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
+
+ /* The calculation of stage count is done adding the number of stages
+ before cycle zero and after cycle zero. */
+ stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
+
+ return stage_count;
+}
+
/* Rotate the rows of PS such that insns scheduled at time
START_CYCLE will appear in row 0. Updates max/min_cycles. */
void
=== modified file 'gcc/params.def'
--- old/gcc/params.def 2011-04-18 11:31:29 +0000
+++ new/gcc/params.def 2011-05-11 07:15:47 +0000
@@ -344,6 +344,11 @@
"sms-max-ii-factor",
"A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop",
100, 0, 0)
+/* The minimum value of stage count that swing modulo scheduler will generate. */
+DEFPARAM(PARAM_SMS_MIN_SC,
+ "sms-min-sc",
+ "The minimum value of stage count that swing modulo scheduler will generate.",
+ 2, 1, 1)
DEFPARAM(PARAM_SMS_DFA_HISTORY,
"sms-dfa-history",
"The number of cycles the swing modulo scheduler considers when checking conflicts using DFA",

View File

@@ -1,30 +0,0 @@
2011-05-13 Revital Eres <revital.eres@linaro.org>
gcc/
* ddg.c (free_ddg_all_sccs): Free sccs field in struct ddg_all_sccs.
* modulo-sched.c (sms_schedule): Avoid unfreed memory when SMS fails.
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-05-11 07:15:47 +0000
+++ new/gcc/ddg.c 2011-05-13 16:03:40 +0000
@@ -1016,6 +1016,7 @@
for (i = 0; i < all_sccs->num_sccs; i++)
free_scc (all_sccs->sccs[i]);
+ free (all_sccs->sccs);
free (all_sccs);
}
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000
+++ new/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000
@@ -1216,7 +1216,6 @@
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
fprintf (dump_file, ")\n");
}
- continue;
}
else
{

View File

@@ -1,134 +0,0 @@
2011-06-02 Chung-Lin Tang <cltang@codesourcery.com>
Backport from mainline:
2011-03-21 Chung-Lin Tang <cltang@codesourcery.com>
gcc/
* simplify-rtx.c (simplify_binary_operation_1): Handle
(xor (and A B) C) case when B and C are both constants.
gcc/testsuite/
* gcc.target/arm/xor-and.c: New.
2011-03-18 Chung-Lin Tang <cltang@codesourcery.com>
gcc/
* combine.c (try_combine): Do simplification only call of
subst() on i2 even when i1 is present. Update comments.
gcc/testsuite/
* gcc.target/arm/unsigned-extend-1.c: New.
=== modified file 'gcc/combine.c'
--- old/gcc/combine.c 2011-05-06 11:28:27 +0000
+++ new/gcc/combine.c 2011-05-27 14:31:18 +0000
@@ -3089,7 +3089,7 @@
/* It is possible that the source of I2 or I1 may be performing
an unneeded operation, such as a ZERO_EXTEND of something
that is known to have the high part zero. Handle that case
- by letting subst look at the innermost one of them.
+ by letting subst look at the inner insns.
Another way to do this would be to have a function that tries
to simplify a single insn instead of merging two or more
@@ -3114,11 +3114,9 @@
subst_low_luid = DF_INSN_LUID (i1);
i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
}
- else
- {
- subst_low_luid = DF_INSN_LUID (i2);
- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
- }
+
+ subst_low_luid = DF_INSN_LUID (i2);
+ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
}
n_occurrences = 0; /* `subst' counts here */
=== modified file 'gcc/simplify-rtx.c'
--- old/gcc/simplify-rtx.c 2011-03-26 09:24:06 +0000
+++ new/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000
@@ -2484,6 +2484,46 @@
XEXP (op0, 1), mode),
op1);
+ /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P),
+ we can transform like this:
+ (A&B)^C == ~(A&B)&C | ~C&(A&B)
+ == (~A|~B)&C | ~C&(A&B) * DeMorgan's Law
+ == ~A&C | ~B&C | A&(~C&B) * Distribute and re-order
+ Attempt a few simplifications when B and C are both constants. */
+ if (GET_CODE (op0) == AND
+ && CONST_INT_P (op1)
+ && CONST_INT_P (XEXP (op0, 1)))
+ {
+ rtx a = XEXP (op0, 0);
+ rtx b = XEXP (op0, 1);
+ rtx c = op1;
+ HOST_WIDE_INT bval = INTVAL (b);
+ HOST_WIDE_INT cval = INTVAL (c);
+
+ rtx na_c
+ = simplify_binary_operation (AND, mode,
+ simplify_gen_unary (NOT, mode, a, mode),
+ c);
+ if ((~cval & bval) == 0)
+ {
+ /* Try to simplify ~A&C | ~B&C. */
+ if (na_c != NULL_RTX)
+ return simplify_gen_binary (IOR, mode, na_c,
+ GEN_INT (~bval & cval));
+ }
+ else
+ {
+ /* If ~A&C is zero, simplify A&(~C&B) | ~B&C. */
+ if (na_c == const0_rtx)
+ {
+ rtx a_nc_b = simplify_gen_binary (AND, mode, a,
+ GEN_INT (~cval & bval));
+ return simplify_gen_binary (IOR, mode, a_nc_b,
+ GEN_INT (~bval & cval));
+ }
+ }
+ }
+
/* (xor (comparison foo bar) (const_int 1)) can become the reversed
comparison if STORE_FLAG_VALUE is 1. */
if (STORE_FLAG_VALUE == 1
=== added file 'gcc/testsuite/gcc.target/arm/unsigned-extend-1.c'
--- old/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 2011-05-27 14:31:18 +0000
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv6" } */
+
+unsigned char foo (unsigned char c)
+{
+ return (c >= '0') && (c <= '9');
+}
+
+/* { dg-final { scan-assembler-not "uxtb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/xor-and.c'
--- old/gcc/testsuite/gcc.target/arm/xor-and.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/xor-and.c 2011-05-27 14:31:18 +0000
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv6" } */
+
+unsigned short foo (unsigned short x)
+{
+ x ^= 0x4002;
+ x >>= 1;
+ x |= 0x8000;
+ return x;
+}
+
+/* { dg-final { scan-assembler "orr" } } */
+/* { dg-final { scan-assembler-not "mvn" } } */
+/* { dg-final { scan-assembler-not "uxth" } } */

View File

@@ -1,329 +0,0 @@
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.md (*maddhidi4tb, *maddhidi4tt): New define_insns.
(*maddhisi4tb, *maddhisi4tt): New define_insns.
gcc/testsuite/
* gcc.target/arm/smlatb-1.c: New file.
* gcc.target/arm/smlatt-1.c: New file.
* gcc.target/arm/smlaltb-1.c: New file.
* gcc.target/arm/smlaltt-1.c: New file.
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-06-07 Bernd Schmidt <bernds@codesourcery.com>
Andrew Stubbs <ams@codesourcery.com>
gcc/
* simplify-rtx.c (simplify_unary_operation_1): Canonicalize widening
multiplies.
* doc/md.texi (Canonicalization of Instructions): Document widening
multiply canonicalization.
gcc/testsuite/
* gcc.target/arm/mla-2.c: New test.
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000
+++ new/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000
@@ -1809,6 +1809,36 @@
(set_attr "predicable" "yes")]
)
+;; Note: there is no maddhisi4ibt because this one is canonical form
+(define_insn "*maddhisi4tb"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (plus:SI (mult:SI (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16))
+ (sign_extend:SI
+ (match_operand:HI 2 "s_register_operand" "r")))
+ (match_operand:SI 3 "s_register_operand" "r")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlatb%?\\t%0, %1, %2, %3"
+ [(set_attr "insn" "smlaxy")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*maddhisi4tt"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (plus:SI (mult:SI (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16))
+ (ashiftrt:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (const_int 16)))
+ (match_operand:SI 3 "s_register_operand" "r")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlatt%?\\t%0, %1, %2, %3"
+ [(set_attr "insn" "smlaxy")
+ (set_attr "predicable" "yes")]
+)
+
(define_insn "*maddhidi4"
[(set (match_operand:DI 0 "s_register_operand" "=r")
(plus:DI
@@ -1822,6 +1852,39 @@
[(set_attr "insn" "smlalxy")
(set_attr "predicable" "yes")])
+;; Note: there is no maddhidi4ibt because this one is canonical form
+(define_insn "*maddhidi4tb"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (plus:DI
+ (mult:DI (sign_extend:DI
+ (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16)))
+ (sign_extend:DI
+ (match_operand:HI 2 "s_register_operand" "r")))
+ (match_operand:DI 3 "s_register_operand" "0")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlaltb%?\\t%Q0, %R0, %1, %2"
+ [(set_attr "insn" "smlalxy")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*maddhidi4tt"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (plus:DI
+ (mult:DI (sign_extend:DI
+ (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16)))
+ (sign_extend:DI
+ (ashiftrt:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (const_int 16))))
+ (match_operand:DI 3 "s_register_operand" "0")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlaltt%?\\t%Q0, %R0, %1, %2"
+ [(set_attr "insn" "smlalxy")
+ (set_attr "predicable" "yes")])
+
(define_expand "mulsf3"
[(set (match_operand:SF 0 "s_register_operand" "")
(mult:SF (match_operand:SF 1 "s_register_operand" "")
=== modified file 'gcc/doc/md.texi'
--- old/gcc/doc/md.texi 2011-05-05 15:43:06 +0000
+++ new/gcc/doc/md.texi 2011-06-07 11:18:20 +0000
@@ -5929,6 +5929,23 @@
will be written using @code{zero_extract} rather than the equivalent
@code{and} or @code{sign_extract} operations.
+@cindex @code{mult}, canonicalization of
+@item
+@code{(sign_extend:@var{m1} (mult:@var{m2} (sign_extend:@var{m2} @var{x})
+(sign_extend:@var{m2} @var{y})))} is converted to @code{(mult:@var{m1}
+(sign_extend:@var{m1} @var{x}) (sign_extend:@var{m1} @var{y}))}, and likewise
+for @code{zero_extend}.
+
+@item
+@code{(sign_extend:@var{m1} (mult:@var{m2} (ashiftrt:@var{m2}
+@var{x} @var{s}) (sign_extend:@var{m2} @var{y})))} is converted
+to @code{(mult:@var{m1} (sign_extend:@var{m1} (ashiftrt:@var{m2}
+@var{x} @var{s})) (sign_extend:@var{m1} @var{y}))}, and likewise for
+patterns using @code{zero_extend} and @code{lshiftrt}. If the second
+operand of @code{mult} is also a shift, then that is extended also.
+This transformation is only applied when it can be proven that the
+original operation had sufficient precision to prevent overflow.
+
@end itemize
Further canonicalization rules are defined in the function
=== modified file 'gcc/simplify-rtx.c'
--- old/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000
+++ new/gcc/simplify-rtx.c 2011-06-02 12:32:16 +0000
@@ -1000,6 +1000,48 @@
&& GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF)
return XEXP (op, 0);
+ /* Extending a widening multiplication should be canonicalized to
+ a wider widening multiplication. */
+ if (GET_CODE (op) == MULT)
+ {
+ rtx lhs = XEXP (op, 0);
+ rtx rhs = XEXP (op, 1);
+ enum rtx_code lcode = GET_CODE (lhs);
+ enum rtx_code rcode = GET_CODE (rhs);
+
+ /* Widening multiplies usually extend both operands, but sometimes
+ they use a shift to extract a portion of a register. */
+ if ((lcode == SIGN_EXTEND
+ || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+ && (rcode == SIGN_EXTEND
+ || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+ {
+ enum machine_mode lmode = GET_MODE (lhs);
+ enum machine_mode rmode = GET_MODE (rhs);
+ int bits;
+
+ if (lcode == ASHIFTRT)
+ /* Number of bits not shifted off the end. */
+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+ else /* lcode == SIGN_EXTEND */
+ /* Size of inner mode. */
+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+ if (rcode == ASHIFTRT)
+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+ else /* rcode == SIGN_EXTEND */
+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+ /* We can only widen multiplies if the result is mathematiclly
+ equivalent. I.e. if overflow was impossible. */
+ if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+ return simplify_gen_binary
+ (MULT, mode,
+ simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode),
+ simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode));
+ }
+ }
+
/* Check for a sign extension of a subreg of a promoted
variable, where the promotion is sign-extended, and the
target mode is the same as the variable's promotion. */
@@ -1071,6 +1113,48 @@
&& GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
return rtl_hooks.gen_lowpart_no_emit (mode, op);
+ /* Extending a widening multiplication should be canonicalized to
+ a wider widening multiplication. */
+ if (GET_CODE (op) == MULT)
+ {
+ rtx lhs = XEXP (op, 0);
+ rtx rhs = XEXP (op, 1);
+ enum rtx_code lcode = GET_CODE (lhs);
+ enum rtx_code rcode = GET_CODE (rhs);
+
+ /* Widening multiplies usually extend both operands, but sometimes
+ they use a shift to extract a portion of a register. */
+ if ((lcode == ZERO_EXTEND
+ || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+ && (rcode == ZERO_EXTEND
+ || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+ {
+ enum machine_mode lmode = GET_MODE (lhs);
+ enum machine_mode rmode = GET_MODE (rhs);
+ int bits;
+
+ if (lcode == LSHIFTRT)
+ /* Number of bits not shifted off the end. */
+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+ else /* lcode == ZERO_EXTEND */
+ /* Size of inner mode. */
+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+ if (rcode == LSHIFTRT)
+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+ else /* rcode == ZERO_EXTEND */
+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+ /* We can only widen multiplies if the result is mathematiclly
+ equivalent. I.e. if overflow was impossible. */
+ if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+ return simplify_gen_binary
+ (MULT, mode,
+ simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode),
+ simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode));
+ }
+ }
+
/* (zero_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>). */
if (GET_CODE (op) == ZERO_EXTEND)
return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0),
=== added file 'gcc/testsuite/gcc.target/arm/mla-2.c'
--- old/gcc/testsuite/gcc.target/arm/mla-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/mla-2.c 2011-06-02 12:32:16 +0000
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long foolong (long long x, short *a, short *b)
+{
+ return x + *a * *b;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlaltb-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlaltb-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlaltb-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long int
+foo (long long x, int in)
+{
+ short a = in & 0xffff;
+ short b = (in & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlaltb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlaltt-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlaltt-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlaltt-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long int
+foo (long long x, int in1, int in2)
+{
+ short a = (in1 & 0xffff0000) >> 16;
+ short b = (in2 & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlaltt" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlatb-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlatb-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlatb-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int x, int in)
+{
+ short a = in & 0xffff;
+ short b = (in & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlatb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlatt-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlatt-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlatt-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int x, int in1, int in2)
+{
+ short a = (in1 & 0xffff0000) >> 16;
+ short b = (in2 & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlatt" } } */

View File

@@ -1,120 +0,0 @@
2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline:
gcc/
2011-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/neon.md (orndi3_neon): Actually split it.
2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
gcc/
2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/neon.md ("orn<mode>3_neon"): Canonicalize not.
("orndi3_neon"): Likewise.
("bic<mode>3_neon"): Likewise.
gcc/testsuite
2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* gcc.target/arm/neon-vorn-vbic.c: New test.
=== modified file 'gcc/config/arm/neon.md'
--- old/gcc/config/arm/neon.md 2011-06-02 12:12:00 +0000
+++ new/gcc/config/arm/neon.md 2011-06-04 00:04:47 +0000
@@ -783,30 +783,57 @@
(define_insn "orn<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
- (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+ (match_operand:VDQ 1 "s_register_operand" "w")))]
"TARGET_NEON"
"vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "orndi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
- (ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0")
- (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))]
+;; TODO: investigate whether we should disable
+;; this and bicdi3_neon for the A8 in line with the other
+;; changes above.
+(define_insn_and_split "orndi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
+ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
+ (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
"TARGET_NEON"
"@
vorn\t%P0, %P1, %P2
#
+ #
#"
- [(set_attr "neon_type" "neon_int_1,*,*")
- (set_attr "length" "*,8,8")]
+ "reload_completed &&
+ (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
+ "
+ {
+ if (TARGET_THUMB2)
+ {
+ operands[3] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[4] = gen_highpart (SImode, operands[2]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[5] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ }
+ else
+ {
+ emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
+ emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
+ DONE;
+ }
+ }"
+ [(set_attr "neon_type" "neon_int_1,*,*,*")
+ (set_attr "length" "*,16,8,8")
+ (set_attr "arch" "any,a,t2,t2")]
)
(define_insn "bic<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
- (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+ (match_operand:VDQ 1 "s_register_operand" "w")))]
"TARGET_NEON"
"vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "neon_type" "neon_int_1")]
=== added file 'gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c'
--- old/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 2011-06-03 23:50:02 +0000
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-add-options arm_neon } */
+
+void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
+{
+ int i;
+ for (i = 0; i < 9; i++)
+ c[i] = b[i] | (~a[i]);
+}
+void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
+{
+ int i;
+ for (i = 0; i < 9; i++)
+ c[i] = b[i] & (~a[i]);
+}
+
+/* { dg-final { scan-assembler "vorn\\t" } } */
+/* { dg-final { scan-assembler "vbic\\t" } } */

View File

@@ -1,545 +0,0 @@
2011-06-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
2011-06-03 Julian Brown <julian@codesourcery.com>
* config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100)
(strongarm1110): Use strongarm tuning.
* config/arm/arm-protos.h (tune_params): Add max_insns_skipped
field.
* config/arm/arm.c (arm_strongarm_tune): New.
(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune)
(arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field
setting, using previous defaults or 1 for Cortex-A5.
(arm_option_override): Set max_insns_skipped from current tuning.
2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
2011-06-02 Julian Brown <julian@codesourcery.com>
* config/arm/arm-cores.def (cortex-a5): Use cortex_a5 tuning.
* config/arm/arm.c (arm_cortex_a5_branch_cost): New.
(arm_cortex_a5_tune): New.
2011-06-02 Julian Brown <julian@codesourcery.com>
* config/arm/arm-protos.h (tune_params): Add branch_cost hook.
* config/arm/arm.c (arm_default_branch_cost): New.
(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune)
(arm_fa726_tune): Set branch_cost field using
arm_default_branch_cost.
* config/arm/arm.h (BRANCH_COST): Use branch_cost hook from
current_tune structure.
* dojump.c (tm_p.h): Include file.
2011-06-02 Julian Brown <julian@codesourcery.com>
* config/arm/arm-cores.def (arm1156t2-s, arm1156t2f-s): Use v6t2
tuning.
(cortex-a5, cortex-a8, cortex-a15, cortex-r4, cortex-r4f, cortex-m4)
(cortex-m3, cortex-m1, cortex-m0): Use cortex tuning.
* config/arm/arm-protos.h (tune_params): Add prefer_constant_pool
field.
* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
(arm_xscale_tune, arm_9e_tune, arm_cortex_a9_tune)
(arm_fa726te_tune): Add prefer_constant_pool setting.
(arm_v6t2_tune, arm_cortex_tune): New.
* config/arm/arm.h (TARGET_USE_MOVT): Make dependent on
prefer_constant_pool setting.
2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline
2011-06-01 Paul Brook <paul@cpodesourcery.com>
* config/arm/arm-cores.def: Add cortex-r5. Add DIV flags to
Cortex-A15.
* config/arm/arm-tune.md: Regenerate.
* config/arm/arm.c (FL_DIV): Rename...
(FL_THUMB_DIV): ... to this.
(FL_ARM_DIV): Define.
(FL_FOR_ARCH7R, FL_FOR_ARCH7M): Use FL_THUMB_DIV.
(arm_arch_hwdiv): Remove.
(arm_arch_thumb_hwdiv, arm_arch_arm_hwdiv): New variables.
(arm_issue_rate): Add cortexr5.
* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set
__ARM_ARCH_EXT_IDIV__.
(TARGET_IDIV): Define.
(arm_arch_hwdiv): Remove.
(arm_arch_arm_hwdiv, arm_arch_thumb_hwdiv): New prototypes.
* config/arm/arm.md (tune_cortexr4): Add cortexr5.
(divsi3, udivsi3): New patterns.
* config/arm/thumb2.md (divsi3, udivsi3): Remove.
* doc/invoke.texi: Document ARM -mcpu=cortex-r5
=== modified file 'gcc/config/arm/arm-cores.def'
--- old/gcc/config/arm/arm-cores.def 2011-01-03 20:52:22 +0000
+++ new/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000
@@ -70,10 +70,10 @@
/* V4 Architecture Processors */
ARM_CORE("arm8", arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul)
ARM_CORE("arm810", arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul)
-ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul)
ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul)
@@ -122,15 +122,16 @@
ARM_CORE("arm1176jzf-s", arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e)
ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e)
ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e)
-ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e)
-ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e)
+ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2)
+ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
+ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
+ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, 9e)
-ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e)
-ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e)
-ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e)
-ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e)
-ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e)
-ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e)
+ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex)
+ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
+ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex)
+ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
+ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex)
+ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex)
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-05-03 15:17:25 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000
@@ -219,9 +219,14 @@
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
int constant_limit;
+ /* Maximum number of instructions to conditionalise in
+ arm_final_prescan_insn. */
+ int max_insns_skipped;
int num_prefetch_slots;
int l1_cache_size;
int l1_cache_line_size;
+ bool prefer_constant_pool;
+ int (*branch_cost) (bool, bool);
};
extern const struct tune_params *current_tune;
=== modified file 'gcc/config/arm/arm-tune.md'
--- old/gcc/config/arm/arm-tune.md 2010-12-20 17:48:51 +0000
+++ new/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from arm-cores.def
(define_attr "tune"
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-05-11 14:49:48 +0000
+++ new/gcc/config/arm/arm.c 2011-06-14 16:00:30 +0000
@@ -255,6 +255,8 @@
static void arm_conditional_register_usage (void);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
static unsigned int arm_autovectorize_vector_sizes (void);
+static int arm_default_branch_cost (bool, bool);
+static int arm_cortex_a5_branch_cost (bool, bool);
/* Table of machine attributes. */
@@ -672,12 +674,13 @@
#define FL_THUMB2 (1 << 16) /* Thumb-2. */
#define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
profile. */
-#define FL_DIV (1 << 18) /* Hardware divide. */
+#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
#define FL_NEON (1 << 20) /* Neon instructions. */
#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
architecture. */
#define FL_ARCH7 (1 << 22) /* Architecture 7. */
+#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
@@ -704,8 +707,8 @@
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
-#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
-#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
+#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
+#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
/* The bits in this mask specify which
@@ -791,7 +794,8 @@
int arm_arch_thumb2;
/* Nonzero if chip supports integer division instruction. */
-int arm_arch_hwdiv;
+int arm_arch_arm_hwdiv;
+int arm_arch_thumb_hwdiv;
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from
@@ -864,48 +868,117 @@
{
arm_slowmul_rtx_costs,
NULL,
- 3,
- ARM_PREFETCH_NOT_BENEFICIAL
+ 3, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_default_branch_cost
};
const struct tune_params arm_fastmul_tune =
{
arm_fastmul_rtx_costs,
NULL,
- 1,
- ARM_PREFETCH_NOT_BENEFICIAL
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_default_branch_cost
+};
+
+/* StrongARM has early execution of branches, so a sequence that is worth
+ skipping is shorter. Set max_insns_skipped to a lower value. */
+
+const struct tune_params arm_strongarm_tune =
+{
+ arm_fastmul_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ 3, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_default_branch_cost
};
const struct tune_params arm_xscale_tune =
{
arm_xscale_rtx_costs,
xscale_sched_adjust_cost,
- 2,
- ARM_PREFETCH_NOT_BENEFICIAL
+ 2, /* Constant limit. */
+ 3, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_default_branch_cost
};
const struct tune_params arm_9e_tune =
{
arm_9e_rtx_costs,
NULL,
- 1,
- ARM_PREFETCH_NOT_BENEFICIAL
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_default_branch_cost
+};
+
+const struct tune_params arm_v6t2_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost
+};
+
+/* Generic Cortex tuning. Use more specific tunings if appropriate. */
+const struct tune_params arm_cortex_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost
+};
+
+/* Branches can be dual-issued on Cortex-A5, so conditional execution is
+ less appealing. Set max_insns_skipped to a low value. */
+
+const struct tune_params arm_cortex_a5_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ 1, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_cortex_a5_branch_cost
};
const struct tune_params arm_cortex_a9_tune =
{
arm_9e_rtx_costs,
cortex_a9_sched_adjust_cost,
- 1,
- ARM_PREFETCH_BENEFICIAL(4,32,32)
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_BENEFICIAL(4,32,32),
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost
};
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
fa726te_sched_adjust_cost,
- 1,
- ARM_PREFETCH_NOT_BENEFICIAL
+ 1, /* Constant limit. */
+ 5, /* Max cond insns. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ true, /* Prefer constant pool. */
+ arm_default_branch_cost
};
@@ -1711,7 +1784,8 @@
arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
- arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
+ arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
+ arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
/* If we are not using the default (ARM mode) section anchor offset
@@ -1991,12 +2065,7 @@
max_insns_skipped = 6;
}
else
- {
- /* StrongARM has early execution of branches, so a sequence
- that is worth skipping is shorter. */
- if (arm_tune_strongarm)
- max_insns_skipped = 3;
- }
+ max_insns_skipped = current_tune->max_insns_skipped;
/* Hot/Cold partitioning is not currently supported, since we can't
handle literal pool placement in that case. */
@@ -8211,6 +8280,21 @@
return cost;
}
+static int
+arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
+{
+ if (TARGET_32BIT)
+ return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
+ else
+ return (optimize > 0) ? 2 : 0;
+}
+
+static int
+arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
+{
+ return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
+}
+
static int fp_consts_inited = 0;
/* Only zero is valid for VFP. Other values are also valid for FPA. */
@@ -23123,6 +23207,7 @@
{
case cortexr4:
case cortexr4f:
+ case cortexr5:
case cortexa5:
case cortexa8:
case cortexa9:
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-06-02 12:12:00 +0000
+++ new/gcc/config/arm/arm.h 2011-06-14 14:53:07 +0000
@@ -101,6 +101,8 @@
builtin_define ("__ARM_PCS"); \
builtin_define ("__ARM_EABI__"); \
} \
+ if (TARGET_IDIV) \
+ builtin_define ("__ARM_ARCH_EXT_IDIV__"); \
} while (0)
/* The various ARM cores. */
@@ -282,7 +284,8 @@
(TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em))
/* Should MOVW/MOVT be used in preference to a constant pool. */
-#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
+#define TARGET_USE_MOVT \
+ (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
/* We could use unified syntax for arm mode, but for now we just use it
for Thumb-2. */
@@ -303,6 +306,10 @@
/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */
#define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7)
+/* Nonzero if integer division instructions supported. */
+#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
+ || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
+
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
then TARGET_AAPCS_BASED must be true -- but the converse does not
hold. TARGET_BPABI implies the use of the BPABI runtime library,
@@ -487,8 +494,11 @@
/* Nonzero if chip supports Thumb 2. */
extern int arm_arch_thumb2;
-/* Nonzero if chip supports integer division instruction. */
-extern int arm_arch_hwdiv;
+/* Nonzero if chip supports integer division instruction in ARM mode. */
+extern int arm_arch_arm_hwdiv;
+
+/* Nonzero if chip supports integer division instruction in Thumb mode. */
+extern int arm_arch_thumb_hwdiv;
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)
@@ -2018,8 +2028,8 @@
/* Try to generate sequences that don't involve branches, we can then use
conditional instructions */
#define BRANCH_COST(speed_p, predictable_p) \
- (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \
- : (optimize > 0 ? 2 : 0))
+ (current_tune->branch_cost (speed_p, predictable_p))
+
/* Position Independent Code. */
/* We decide which register to use based on the compilation options and
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000
+++ new/gcc/config/arm/arm.md 2011-06-14 14:37:30 +0000
@@ -490,7 +490,7 @@
(define_attr "tune_cortexr4" "yes,no"
(const (if_then_else
- (eq_attr "tune" "cortexr4,cortexr4f")
+ (eq_attr "tune" "cortexr4,cortexr4f,cortexr5")
(const_string "yes")
(const_string "no"))))
@@ -3738,6 +3738,28 @@
(set_attr "predicable" "yes")]
)
+
+;; Division instructions
+(define_insn "divsi3"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (div:SI (match_operand:SI 1 "s_register_operand" "r")
+ (match_operand:SI 2 "s_register_operand" "r")))]
+ "TARGET_IDIV"
+ "sdiv%?\t%0, %1, %2"
+ [(set_attr "predicable" "yes")
+ (set_attr "insn" "sdiv")]
+)
+
+(define_insn "udivsi3"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (udiv:SI (match_operand:SI 1 "s_register_operand" "r")
+ (match_operand:SI 2 "s_register_operand" "r")))]
+ "TARGET_IDIV"
+ "udiv%?\t%0, %1, %2"
+ [(set_attr "predicable" "yes")
+ (set_attr "insn" "udiv")]
+)
+
;; Unary arithmetic insns
=== modified file 'gcc/config/arm/thumb2.md'
--- old/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000
+++ new/gcc/config/arm/thumb2.md 2011-06-14 14:37:30 +0000
@@ -779,26 +779,6 @@
(set_attr "length" "2")]
)
-(define_insn "divsi3"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
- (div:SI (match_operand:SI 1 "s_register_operand" "r")
- (match_operand:SI 2 "s_register_operand" "r")))]
- "TARGET_THUMB2 && arm_arch_hwdiv"
- "sdiv%?\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "insn" "sdiv")]
-)
-
-(define_insn "udivsi3"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
- (udiv:SI (match_operand:SI 1 "s_register_operand" "r")
- (match_operand:SI 2 "s_register_operand" "r")))]
- "TARGET_THUMB2 && arm_arch_hwdiv"
- "udiv%?\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "insn" "udiv")]
-)
-
(define_insn "*thumb2_subsi_short"
[(set (match_operand:SI 0 "low_register_operand" "=l")
(minus:SI (match_operand:SI 1 "low_register_operand" "l")
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000
+++ new/gcc/doc/invoke.texi 2011-06-14 14:37:30 +0000
@@ -10208,7 +10208,8 @@
@samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp},
@samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s},
@samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15},
-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3},
+@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
+@samp{cortex-m4}, @samp{cortex-m3},
@samp{cortex-m1},
@samp{cortex-m0},
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
=== modified file 'gcc/dojump.c'
--- old/gcc/dojump.c 2010-05-19 19:09:57 +0000
+++ new/gcc/dojump.c 2011-06-14 14:53:07 +0000
@@ -36,6 +36,7 @@
#include "ggc.h"
#include "basic-block.h"
#include "output.h"
+#include "tm_p.h"
static bool prefer_and_bit_test (enum machine_mode, int);
static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int);

View File

@@ -1,24 +0,0 @@
2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
gcc/
2011-06-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/49385
* config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast
one of the operands is a register.
Index: gcc-4_6-branch/gcc/config/arm/thumb2.md
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/thumb2.md 2011-09-16 20:22:40.000000000 -0700
+++ gcc-4_6-branch/gcc/config/arm/thumb2.md 2011-09-16 20:28:47.648690433 -0700
@@ -207,7 +207,9 @@
(define_insn "*thumb2_movhi_insn"
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
(match_operand:HI 1 "general_operand" "rI,n,r,m"))]
- "TARGET_THUMB2"
+ "TARGET_THUMB2
+ && (register_operand (operands[0], HImode)
+ || register_operand (operands[1], HImode))"
"@
mov%?\\t%0, %1\\t%@ movhi
movw%?\\t%0, %L1\\t%@ movhi

View File

@@ -1,25 +0,0 @@
2011-06-30 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
LP 744754
2011-04-17 Chung-Lin Tang <cltang@codesourcery.com>
* config/arm/arm.c (neon_struct_mem_operand):
Support POST_INC/PRE_DEC memory operands.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-06-27 22:14:07 +0000
+++ new/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000
@@ -9357,6 +9357,11 @@
if (GET_CODE (ind) == REG)
return arm_address_register_rtx_p (ind, 0);
+ /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
+ if (GET_CODE (ind) == POST_INC
+ || GET_CODE (ind) == PRE_DEC)
+ return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
return FALSE;
}

View File

@@ -1,25 +0,0 @@
2011-07-03 Ira Rosen <ira.rosen@linaro.org>
Backport from FSF:
2011-06-12 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-data-refs.c (vect_peeling_hash_get_most_frequent):
Take number of iterations to peel into account for equally frequent
misalignment values.
=== modified file 'gcc/tree-vect-data-refs.c'
--- old/gcc/tree-vect-data-refs.c 2011-06-02 12:12:00 +0000
+++ new/gcc/tree-vect-data-refs.c 2011-06-29 11:20:24 +0000
@@ -1256,7 +1256,9 @@
vect_peel_info elem = (vect_peel_info) *slot;
vect_peel_extended_info max = (vect_peel_extended_info) data;
- if (elem->count > max->peel_info.count)
+ if (elem->count > max->peel_info.count
+ || (elem->count == max->peel_info.count
+ && max->peel_info.npeel > elem->npeel))
{
max->peel_info.npeel = elem->npeel;
max->peel_info.count = elem->count;

View File

@@ -1,182 +0,0 @@
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* builtins.c (get_object_alignment): Fix comment.
* fold-const.c (get_pointer_modulus_and_residue): Remove
allow_func_align. Use get_object_alignment.
(fold_binary_loc): Update caller.
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-06-29 Richard Sandiford <richard.sandiford@linaro.org>
PR tree-optimization/49545
* builtins.c (get_object_alignment_1): Update function comment.
Do not use DECL_ALIGN for functions, but test
TARGET_PTRMEMFUNC_VBIT_LOCATION instead.
* fold-const.c (get_pointer_modulus_and_residue): Don't check
for functions here.
* tree-ssa-ccp.c (get_value_from_alignment): Likewise.
gcc/testsuite/
Backport from mainline:
2011-06-29 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.dg/torture/pr49169.c: Restrict to ARM and MIPS targets.
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-07-27 Richard Guenther <rguenther@suse.de>
PR tree-optimization/49169
* fold-const.c (get_pointer_modulus_and_residue): Don't rely on
the alignment of function decls.
gcc/testsuite/
Backport from mainline:
2011-07-27 Michael Hope <michael.hope@linaro.org>
Richard Sandiford <richard.sandiford@linaro.org>
PR tree-optimization/49169
* gcc.dg/torture/pr49169.c: New test.
=== modified file 'gcc/builtins.c'
--- old/gcc/builtins.c 2011-03-03 21:56:58 +0000
+++ new/gcc/builtins.c 2011-07-04 09:52:27 +0000
@@ -264,7 +264,14 @@
}
/* Return the alignment in bits of EXP, an object.
- Don't return more than MAX_ALIGN no matter what. */
+ Don't return more than MAX_ALIGN no matter what.
+
+ Note that the address (and thus the alignment) computed here is based
+ on the address to which a symbol resolves, whereas DECL_ALIGN is based
+ on the address at which an object is actually located. These two
+ addresses are not always the same. For example, on ARM targets,
+ the address &foo of a Thumb function foo() has the lowest bit set,
+ whereas foo() itself starts on an even address. */
unsigned int
get_object_alignment (tree exp, unsigned int max_align)
@@ -286,7 +293,21 @@
exp = DECL_INITIAL (exp);
if (DECL_P (exp)
&& TREE_CODE (exp) != LABEL_DECL)
- align = DECL_ALIGN (exp);
+ {
+ if (TREE_CODE (exp) == FUNCTION_DECL)
+ {
+ /* Function addresses can encode extra information besides their
+ alignment. However, if TARGET_PTRMEMFUNC_VBIT_LOCATION
+ allows the low bit to be used as a virtual bit, we know
+ that the address itself must be 2-byte aligned. */
+ if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn)
+ align = 2 * BITS_PER_UNIT;
+ else
+ align = BITS_PER_UNIT;
+ }
+ else
+ align = DECL_ALIGN (exp);
+ }
else if (CONSTANT_CLASS_P (exp))
{
align = TYPE_ALIGN (TREE_TYPE (exp));
=== modified file 'gcc/fold-const.c'
--- old/gcc/fold-const.c 2011-05-23 20:37:18 +0000
+++ new/gcc/fold-const.c 2011-07-04 09:52:27 +0000
@@ -9232,15 +9232,10 @@
0 <= N < M as is common. In general, the precise value of P is unknown.
M is chosen as large as possible such that constant N can be determined.
- Returns M and sets *RESIDUE to N.
-
- If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into
- account. This is not always possible due to PR 35705.
- */
+ Returns M and sets *RESIDUE to N. */
static unsigned HOST_WIDE_INT
-get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue,
- bool allow_func_align)
+get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue)
{
enum tree_code code;
@@ -9270,9 +9265,8 @@
}
}
- if (DECL_P (expr)
- && (allow_func_align || TREE_CODE (expr) != FUNCTION_DECL))
- return DECL_ALIGN_UNIT (expr);
+ if (DECL_P (expr))
+ return get_object_alignment (expr, ~0U) / BITS_PER_UNIT;
}
else if (code == POINTER_PLUS_EXPR)
{
@@ -9282,8 +9276,7 @@
op0 = TREE_OPERAND (expr, 0);
STRIP_NOPS (op0);
- modulus = get_pointer_modulus_and_residue (op0, residue,
- allow_func_align);
+ modulus = get_pointer_modulus_and_residue (op0, residue);
op1 = TREE_OPERAND (expr, 1);
STRIP_NOPS (op1);
@@ -11163,8 +11156,7 @@
unsigned HOST_WIDE_INT modulus, residue;
unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg1);
- modulus = get_pointer_modulus_and_residue (arg0, &residue,
- integer_onep (arg1));
+ modulus = get_pointer_modulus_and_residue (arg0, &residue);
/* This works because modulus is a power of 2. If this weren't the
case, we'd have to replace it by its greatest power-of-2
=== added file 'gcc/testsuite/gcc.dg/torture/pr49169.c'
--- old/gcc/testsuite/gcc.dg/torture/pr49169.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/torture/pr49169.c 2011-06-29 09:46:06 +0000
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { arm*-*-* || mips*-*-* } } } */
+
+#include <stdlib.h>
+#include <stdint.h>
+
+int
+main (void)
+{
+ void *p = main;
+ if ((intptr_t) p & 1)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-assembler "abort" } } */
=== modified file 'gcc/tree-ssa-ccp.c'
--- old/gcc/tree-ssa-ccp.c 2011-05-05 15:42:22 +0000
+++ new/gcc/tree-ssa-ccp.c 2011-06-29 09:46:06 +0000
@@ -522,10 +522,6 @@
val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr),
TREE_OPERAND (base, 0), TREE_OPERAND (base, 1));
else if (base
- /* ??? While function decls have DECL_ALIGN their addresses
- may encode extra information in the lower bits on some
- targets (PR47239). Simply punt for function decls for now. */
- && TREE_CODE (base) != FUNCTION_DECL
&& ((align = get_object_alignment (base, BIGGEST_ALIGNMENT))
> BITS_PER_UNIT))
{

View File

@@ -1,138 +0,0 @@
2011-07-11 Revital Eres <revital.eres@linaro.org>
Backport from mainline -r175090.
gcc/
* ddg.c (add_intra_loop_mem_dep): New function.
(build_intra_loop_deps): Call it.
gcc/testsuite
* gcc.dg/sms-9.c: New file.
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-05-13 16:03:40 +0000
+++ new/gcc/ddg.c 2011-07-04 11:00:06 +0000
@@ -390,6 +390,33 @@
&PATTERN (insn2));
}
+/* Given two nodes, analyze their RTL insns and add intra-loop mem deps
+ to ddg G. */
+static void
+add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
+{
+
+ if ((from->cuid == to->cuid)
+ || !insns_may_alias_p (from->insn, to->insn))
+ /* Do not create edge if memory references have disjoint alias sets
+ or 'to' and 'from' are the same instruction. */
+ return;
+
+ if (mem_write_insn_p (from->insn))
+ {
+ if (mem_read_insn_p (to->insn))
+ create_ddg_dep_no_link (g, from, to,
+ DEBUG_INSN_P (to->insn)
+ ? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
+ else
+ create_ddg_dep_no_link (g, from, to,
+ DEBUG_INSN_P (to->insn)
+ ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
+ }
+ else if (!mem_read_insn_p (to->insn))
+ create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
+}
+
/* Given two nodes, analyze their RTL insns and add inter-loop mem deps
to ddg G. */
static void
@@ -477,10 +504,22 @@
if (DEBUG_INSN_P (j_node->insn))
continue;
if (mem_access_insn_p (j_node->insn))
- /* Don't bother calculating inter-loop dep if an intra-loop dep
- already exists. */
+ {
+ /* Don't bother calculating inter-loop dep if an intra-loop dep
+ already exists. */
if (! TEST_BIT (dest_node->successors, j))
add_inter_loop_mem_dep (g, dest_node, j_node);
+ /* If -fmodulo-sched-allow-regmoves
+ is set certain anti-dep edges are not created.
+ It might be that these anti-dep edges are on the
+ path from one memory instruction to another such that
+ removing these edges could cause a violation of the
+ memory dependencies. Thus we add intra edges between
+ every two memory instructions in this case. */
+ if (flag_modulo_sched_allow_regmoves
+ && !TEST_BIT (dest_node->predecessors, j))
+ add_intra_loop_mem_dep (g, j_node, dest_node);
+ }
}
}
}
=== added file 'gcc/testsuite/gcc.dg/sms-9.c'
--- old/gcc/testsuite/gcc.dg/sms-9.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/sms-9.c 2011-07-04 11:00:06 +0000
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fmodulo-sched -fno-auto-inc-dec -O2 -fmodulo-sched-allow-regmoves" } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+struct df_ref_info
+{
+ unsigned int *begin;
+ unsigned int *count;
+};
+
+extern void *memset (void *s, int c, __SIZE_TYPE__ n);
+
+
+__attribute__ ((noinline))
+ int
+ df_reorganize_refs_by_reg_by_insn (struct df_ref_info *ref_info,
+ int num, unsigned int start)
+{
+ unsigned int m = num;
+ unsigned int offset = 77;
+ unsigned int r;
+
+ for (r = start; r < m; r++)
+ {
+ ref_info->begin[r] = offset;
+ offset += ref_info->count[r];
+ ref_info->count[r] = 0;
+ }
+
+ return offset;
+}
+
+int
+main ()
+{
+ struct df_ref_info temp;
+ int num = 100;
+ unsigned int start = 5;
+ int i, offset;
+
+ temp.begin = malloc (100 * sizeof (unsigned int));
+ temp.count = malloc (100 * sizeof (unsigned int));
+
+ memset (temp.begin, 0, sizeof (unsigned int) * num);
+ memset (temp.count, 0, sizeof (unsigned int) * num);
+
+ for (i = 0; i < num; i++)
+ temp.count[i] = i + 1;
+
+ offset = df_reorganize_refs_by_reg_by_insn (&temp, num, start);
+
+ if (offset != 5112)
+ abort ();
+
+ free (temp.begin);
+ free (temp.count);
+ return 0;
+}

View File

@@ -1,211 +0,0 @@
2011-07-11 Revital Eres <revital.eres@linaro.org>
Backport from mainline -r175091
gcc/
* modulo-sched.c (struct ps_insn): Remove row_rest_count
field.
(struct partial_schedule): Add rows_length field.
(verify_partial_schedule): Check rows_length.
(ps_insert_empty_row): Handle rows_length.
(create_partial_schedule): Likewise.
(free_partial_schedule): Likewise.
(reset_partial_schedule): Likewise.
(create_ps_insn): Remove rest_count argument.
(remove_node_from_ps): Update rows_length.
(add_node_to_ps): Update rows_length and call create_ps_insn without
passing row_rest_count.
(rotate_partial_schedule): Update rows_length.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000
+++ new/gcc/modulo-sched.c 2011-07-04 12:01:34 +0000
@@ -134,8 +134,6 @@
ps_insn_ptr next_in_row,
prev_in_row;
- /* The number of nodes in the same row that come after this node. */
- int row_rest_count;
};
/* Holds the partial schedule as an array of II rows. Each entry of the
@@ -149,6 +147,12 @@
/* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
ps_insn_ptr *rows;
+ /* rows_length[i] holds the number of instructions in the row.
+ It is used only (as an optimization) to back off quickly from
+ trying to schedule a node in a full row; that is, to avoid running
+ through futile DFA state transitions. */
+ int *rows_length;
+
/* The earliest absolute cycle of an insn in the partial schedule. */
int min_cycle;
@@ -1907,6 +1911,7 @@
int ii = ps->ii;
int new_ii = ii + 1;
int row;
+ int *rows_length_new;
verify_partial_schedule (ps, sched_nodes);
@@ -1921,9 +1926,11 @@
rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
+ rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
for (row = 0; row < split_row; row++)
{
rows_new[row] = ps->rows[row];
+ rows_length_new[row] = ps->rows_length[row];
ps->rows[row] = NULL;
for (crr_insn = rows_new[row];
crr_insn; crr_insn = crr_insn->next_in_row)
@@ -1944,6 +1951,7 @@
for (row = split_row; row < ii; row++)
{
rows_new[row + 1] = ps->rows[row];
+ rows_length_new[row + 1] = ps->rows_length[row];
ps->rows[row] = NULL;
for (crr_insn = rows_new[row + 1];
crr_insn; crr_insn = crr_insn->next_in_row)
@@ -1965,6 +1973,8 @@
+ (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
free (ps->rows);
ps->rows = rows_new;
+ free (ps->rows_length);
+ ps->rows_length = rows_length_new;
ps->ii = new_ii;
gcc_assert (ps->min_cycle >= 0);
@@ -2040,16 +2050,23 @@
ps_insn_ptr crr_insn;
for (row = 0; row < ps->ii; row++)
- for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
- {
- ddg_node_ptr u = crr_insn->node;
-
- gcc_assert (TEST_BIT (sched_nodes, u->cuid));
- /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
- popcount (sched_nodes) == number of insns in ps. */
- gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
- gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- }
+ {
+ int length = 0;
+
+ for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
+ {
+ ddg_node_ptr u = crr_insn->node;
+
+ length++;
+ gcc_assert (TEST_BIT (sched_nodes, u->cuid));
+ /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
+ popcount (sched_nodes) == number of insns in ps. */
+ gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+ gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
+ }
+
+ gcc_assert (ps->rows_length[row] == length);
+ }
}
@@ -2455,6 +2472,7 @@
{
partial_schedule_ptr ps = XNEW (struct partial_schedule);
ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
+ ps->rows_length = (int *) xcalloc (ii, sizeof (int));
ps->ii = ii;
ps->history = history;
ps->min_cycle = INT_MAX;
@@ -2493,6 +2511,7 @@
return;
free_ps_insns (ps);
free (ps->rows);
+ free (ps->rows_length);
free (ps);
}
@@ -2510,6 +2529,8 @@
ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
* sizeof (ps_insn_ptr));
memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
+ ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
+ memset (ps->rows_length, 0, new_ii * sizeof (int));
ps->ii = new_ii;
ps->min_cycle = INT_MAX;
ps->max_cycle = INT_MIN;
@@ -2538,14 +2559,13 @@
/* Creates an object of PS_INSN and initializes it to the given parameters. */
static ps_insn_ptr
-create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
+create_ps_insn (ddg_node_ptr node, int cycle)
{
ps_insn_ptr ps_i = XNEW (struct ps_insn);
ps_i->node = node;
ps_i->next_in_row = NULL;
ps_i->prev_in_row = NULL;
- ps_i->row_rest_count = rest_count;
ps_i->cycle = cycle;
return ps_i;
@@ -2578,6 +2598,8 @@
if (ps_i->next_in_row)
ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
}
+
+ ps->rows_length[row] -= 1;
free (ps_i);
return true;
}
@@ -2734,17 +2756,12 @@
sbitmap must_precede, sbitmap must_follow)
{
ps_insn_ptr ps_i;
- int rest_count = 1;
int row = SMODULO (cycle, ps->ii);
- if (ps->rows[row]
- && ps->rows[row]->row_rest_count >= issue_rate)
+ if (ps->rows_length[row] >= issue_rate)
return NULL;
- if (ps->rows[row])
- rest_count += ps->rows[row]->row_rest_count;
-
- ps_i = create_ps_insn (node, rest_count, cycle);
+ ps_i = create_ps_insn (node, cycle);
/* Finds and inserts PS_I according to MUST_FOLLOW and
MUST_PRECEDE. */
@@ -2754,6 +2771,7 @@
return NULL;
}
+ ps->rows_length[row] += 1;
return ps_i;
}
@@ -2909,11 +2927,16 @@
for (i = 0; i < backward_rotates; i++)
{
ps_insn_ptr first_row = ps->rows[0];
+ int first_row_length = ps->rows_length[0];
for (row = 0; row < last_row; row++)
- ps->rows[row] = ps->rows[row+1];
+ {
+ ps->rows[row] = ps->rows[row + 1];
+ ps->rows_length[row] = ps->rows_length[row + 1];
+ }
ps->rows[last_row] = first_row;
+ ps->rows_length[last_row] = first_row_length;
}
ps->max_cycle -= start_cycle;

View File

@@ -1,350 +0,0 @@
2011-07-11 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/
2011-06-22 Dmitry Plotnikov <dplotnikov@ispras.ru>
Dmitry Melnik <dm@ispras.ru>
* config/arm/arm.c (neon_immediate_valid_for_shift): New function.
(neon_output_shift_immediate): Ditto.
* config/arm/arm-protos.h (neon_immediate_valid_for_shift): New
prototype.
(neon_output_shift_immediate): Ditto.
* config/arm/neon.md (vashl<mode>3): Modified constraint.
(vashr<mode>3_imm): New insn pattern.
(vlshr<mode>3_imm): Ditto.
(vashr<mode>3): Modified constraint.
(vlshr<mode>3): Ditto.
* config/arm/predicates.md (imm_for_neon_lshift_operand): New
predicate.
(imm_for_neon_rshift_operand): Ditto.
(imm_lshift_or_reg_neon): Ditto.
(imm_rshift_or_reg_neon): Ditto.
* optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000
@@ -64,8 +64,12 @@
extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
int *);
+extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *,
+ int *, bool);
extern char *neon_output_logic_immediate (const char *, rtx *,
enum machine_mode, int, int);
+extern char *neon_output_shift_immediate (const char *, char, rtx *,
+ enum machine_mode, int, bool);
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
rtx (*) (rtx, rtx, rtx));
extern rtx neon_make_constant (rtx);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000
+++ new/gcc/config/arm/arm.c 2011-07-04 14:03:49 +0000
@@ -8863,6 +8863,66 @@
return 1;
}
+/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
+ the immediate is valid, write a constant suitable for using as an operand
+ to VSHR/VSHL to *MODCONST and the corresponding element width to
+ *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
+ because they have different limitations. */
+
+int
+neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
+ rtx *modconst, int *elementwidth,
+ bool isleftshift)
+{
+ unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+ unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
+ unsigned HOST_WIDE_INT last_elt = 0;
+ unsigned HOST_WIDE_INT maxshift;
+
+ /* Split vector constant out into a byte vector. */
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx el = CONST_VECTOR_ELT (op, i);
+ unsigned HOST_WIDE_INT elpart;
+
+ if (GET_CODE (el) == CONST_INT)
+ elpart = INTVAL (el);
+ else if (GET_CODE (el) == CONST_DOUBLE)
+ return 0;
+ else
+ gcc_unreachable ();
+
+ if (i != 0 && elpart != last_elt)
+ return 0;
+
+ last_elt = elpart;
+ }
+
+ /* Shift less than element size. */
+ maxshift = innersize * 8;
+
+ if (isleftshift)
+ {
+ /* Left shift immediate value can be from 0 to <size>-1. */
+ if (last_elt >= maxshift)
+ return 0;
+ }
+ else
+ {
+ /* Right shift immediate value can be from 1 to <size>. */
+ if (last_elt == 0 || last_elt > maxshift)
+ return 0;
+ }
+
+ if (elementwidth)
+ *elementwidth = innersize * 8;
+
+ if (modconst)
+ *modconst = CONST_VECTOR_ELT (op, 0);
+
+ return 1;
+}
+
/* Return a string suitable for output of Neon immediate logic operation
MNEM. */
@@ -8885,6 +8945,28 @@
return templ;
}
+/* Return a string suitable for output of Neon immediate shift operation
+ (VSHR or VSHL) MNEM. */
+
+char *
+neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
+ enum machine_mode mode, int quad,
+ bool isleftshift)
+{
+ int width, is_valid;
+ static char templ[40];
+
+ is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
+ gcc_assert (is_valid != 0);
+
+ if (quad)
+ sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
+ else
+ sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
+
+ return templ;
+}
+
/* Output a sequence of pairwise operations to implement a reduction.
NOTE: We do "too much work" here, because pairwise operations work on two
registers-worth of operands in one go. Unfortunately we can't exploit those
=== modified file 'gcc/config/arm/neon.md'
--- old/gcc/config/arm/neon.md 2011-07-01 09:19:21 +0000
+++ new/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000
@@ -956,15 +956,57 @@
; SImode elements.
(define_insn "vashl<mode>3"
- [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
- (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
- (match_operand:VDQIW 2 "s_register_operand" "w")))]
- "TARGET_NEON"
- "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "neon_type")
- (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
- (const_string "neon_vshl_ddd")
- (const_string "neon_shift_3")))]
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
+ (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
+ (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
+ "TARGET_NEON"
+ {
+ switch (which_alternative)
+ {
+ case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+ case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
+ <MODE>mode,
+ VALID_NEON_QREG_MODE (<MODE>mode),
+ true);
+ default: gcc_unreachable ();
+ }
+ }
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
+)
+
+(define_insn "vashr<mode>3_imm"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+ (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
+ "TARGET_NEON"
+ {
+ return neon_output_shift_immediate ("vshr", 's', &operands[2],
+ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+ false);
+ }
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
+)
+
+(define_insn "vlshr<mode>3_imm"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+ (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
+ "TARGET_NEON"
+ {
+ return neon_output_shift_immediate ("vshr", 'u', &operands[2],
+ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+ false);
+ }
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
)
; Used for implementing logical shift-right, which is a left-shift by a negative
@@ -1004,28 +1046,34 @@
(define_expand "vashr<mode>3"
[(set (match_operand:VDQIW 0 "s_register_operand" "")
(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
- (match_operand:VDQIW 2 "s_register_operand" "")))]
+ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
"TARGET_NEON"
{
rtx neg = gen_reg_rtx (<MODE>mode);
-
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
-
+ if (REG_P (operands[2]))
+ {
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+ }
+ else
+ emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vlshr<mode>3"
[(set (match_operand:VDQIW 0 "s_register_operand" "")
(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
- (match_operand:VDQIW 2 "s_register_operand" "")))]
+ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
"TARGET_NEON"
{
rtx neg = gen_reg_rtx (<MODE>mode);
-
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
-
+ if (REG_P (operands[2]))
+ {
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+ }
+ else
+ emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
DONE;
})
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-06-22 15:50:23 +0000
+++ new/gcc/config/arm/predicates.md 2011-07-04 14:03:49 +0000
@@ -585,6 +585,26 @@
return neon_immediate_valid_for_move (op, mode, NULL, NULL);
})
+(define_predicate "imm_for_neon_lshift_operand"
+ (match_code "const_vector")
+{
+ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true);
+})
+
+(define_predicate "imm_for_neon_rshift_operand"
+ (match_code "const_vector")
+{
+ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false);
+})
+
+(define_predicate "imm_lshift_or_reg_neon"
+ (ior (match_operand 0 "s_register_operand")
+ (match_operand 0 "imm_for_neon_lshift_operand")))
+
+(define_predicate "imm_rshift_or_reg_neon"
+ (ior (match_operand 0 "s_register_operand")
+ (match_operand 0 "imm_for_neon_rshift_operand")))
+
(define_predicate "imm_for_neon_logic_operand"
(match_code "const_vector")
{
=== modified file 'gcc/optabs.c'
--- old/gcc/optabs.c 2011-03-04 10:27:10 +0000
+++ new/gcc/optabs.c 2011-07-04 14:03:49 +0000
@@ -6171,6 +6171,9 @@
init_optab (usashl_optab, US_ASHIFT);
init_optab (ashr_optab, ASHIFTRT);
init_optab (lshr_optab, LSHIFTRT);
+ init_optabv (vashl_optab, ASHIFT);
+ init_optabv (vashr_optab, ASHIFTRT);
+ init_optabv (vlshr_optab, LSHIFTRT);
init_optab (rotl_optab, ROTATE);
init_optab (rotr_optab, ROTATERT);
init_optab (smin_optab, SMIN);
=== added file 'gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c'
--- old/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 2011-07-04 14:03:49 +0000
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */
+
+/* Verify that VSHR immediate is used. */
+void f1(int n, unsigned int x[], unsigned int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] >> 3;
+}
=== added file 'gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c'
--- old/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 2011-07-04 14:03:49 +0000
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */
+
+/* Verify that VSHR immediate is used. */
+void f1(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
=== added file 'gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c'
--- old/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 2011-07-04 14:03:49 +0000
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */
+
+/* Verify that VSHR immediate is used. */
+void f1(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] >> 3;
+}

View File

@@ -1,119 +0,0 @@
2011-07-13 Richard Sandiford <richard.sandiford@linaro.org>
Backport from mainline:
gcc/
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
* reload1.c (choose_reload_regs): Use mode sizes to check whether
an old reload register completely defines the required value.
gcc/testsuite/
2011-07-07 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.target/arm/neon-modes-3.c: New test.
=== modified file 'gcc/reload1.c'
--- old/gcc/reload1.c 2011-07-01 09:19:21 +0000
+++ new/gcc/reload1.c 2011-07-11 10:06:50 +0000
@@ -6451,6 +6451,8 @@
if (regno >= 0
&& reg_last_reload_reg[regno] != 0
+ && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno]))
+ >= GET_MODE_SIZE (mode) + byte)
#ifdef CANNOT_CHANGE_MODE_CLASS
/* Verify that the register it's in can be used in
mode MODE. */
@@ -6462,24 +6464,12 @@
{
enum reg_class rclass = rld[r].rclass, last_class;
rtx last_reg = reg_last_reload_reg[regno];
- enum machine_mode need_mode;
i = REGNO (last_reg);
i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode);
last_class = REGNO_REG_CLASS (i);
- if (byte == 0)
- need_mode = mode;
- else
- need_mode
- = smallest_mode_for_size
- (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT,
- GET_MODE_CLASS (mode) == MODE_PARTIAL_INT
- ? MODE_INT : GET_MODE_CLASS (mode));
-
- if ((GET_MODE_SIZE (GET_MODE (last_reg))
- >= GET_MODE_SIZE (need_mode))
- && reg_reloaded_contents[i] == regno
+ if (reg_reloaded_contents[i] == regno
&& TEST_HARD_REG_BIT (reg_reloaded_valid, i)
&& HARD_REGNO_MODE_OK (i, rld[r].mode)
&& (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i)
=== added file 'gcc/testsuite/gcc.target/arm/neon-modes-3.c'
--- old/gcc/testsuite/gcc.target/arm/neon-modes-3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-modes-3.c 2011-07-11 10:06:50 +0000
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n)
+{
+ float32x4x4_t a5, a6, a7, a8, a9;
+ int i;
+
+ a5 = *src;
+ a6 = *src;
+ a7 = *src;
+ a8 = *src;
+ a9 = *src;
+ while (n--)
+ {
+ for (i = 0; i < 8; i++)
+ {
+ float32x4x4_t a0, a1, a2, a3, a4;
+
+ a0 = *src;
+ a1 = *src;
+ a2 = *src;
+ a3 = *src;
+ a4 = *src;
+ *src = a0;
+ *dest = a0.val[0];
+ *dest = a0.val[3];
+ *src = a1;
+ *dest = a1.val[0];
+ *dest = a1.val[3];
+ *src = a2;
+ *dest = a2.val[0];
+ *dest = a2.val[3];
+ *src = a3;
+ *dest = a3.val[0];
+ *dest = a3.val[3];
+ *src = a4;
+ *dest = a4.val[0];
+ *dest = a4.val[3];
+ }
+ *src = a5;
+ *dest = a5.val[0];
+ *dest = a5.val[3];
+ *src = a6;
+ *dest = a6.val[0];
+ *dest = a6.val[3];
+ *src = a7;
+ *dest = a7.val[0];
+ *dest = a7.val[3];
+ *src = a8;
+ *dest = a8.val[0];
+ *dest = a8.val[3];
+ *src = a9;
+ *dest = a9.val[0];
+ *dest = a9.val[3];
+ }
+}

View File

@@ -1,67 +0,0 @@
2011-07-15 Michael Hope <michael.hope@linaro.org>
gcc/
Backport from mainline:
2011-04-05 Eric Botcazou <ebotcazou@adacore.com>
* ifcvt.c (cond_exec_process_insns): Disallow converting a block
that contains the prologue.
gcc/testsuite/
Backport from mainline:
2011-04-01 Bernd Schmidt <bernds@codesourcery.com>
* gcc.c-torture/compile/20110401-1.c: New test.
=== modified file 'gcc/ifcvt.c'
--- old/gcc/ifcvt.c 2010-12-14 00:23:40 +0000
+++ new/gcc/ifcvt.c 2011-07-11 04:02:28 +0000
@@ -1,5 +1,6 @@
/* If-conversion support.
- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010,
+ 2011
Free Software Foundation, Inc.
This file is part of GCC.
@@ -304,6 +305,10 @@
for (insn = start; ; insn = NEXT_INSN (insn))
{
+ /* dwarf2out can't cope with conditional prologues. */
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+ return FALSE;
+
if (NOTE_P (insn) || DEBUG_INSN_P (insn))
goto insn_done;
=== added file 'gcc/testsuite/gcc.c-torture/compile/20110401-1.c'
--- old/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 2011-07-11 04:02:28 +0000
@@ -0,0 +1,22 @@
+void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len)
+{
+ int k;
+ unsigned char temp[4];
+ if (len < 128) {
+ if (ans != ((void *) 0))
+ ans[0] = (unsigned char) len;
+ *ans_len = 1;
+ } else {
+ k = 0;
+ while (len) {
+ temp[k++] = len & 0xFF;
+ len = len >> 8;
+ }
+ *ans_len = k + 1;
+ if (ans != ((void *) 0)) {
+ ans[0] = ((unsigned char) k & 0x7F) + 128;
+ while (k--)
+ ans[*ans_len - 1 - k] = temp[k];
+ }
+ }
+}

View File

@@ -1,46 +0,0 @@
2011-07-15 Michael Hope <michael.hope@linaro.org>
gcc/
Backport from mainline:
2011-03-22 Eric Botcazou <ebotcazou@adacore.com>
* combine.c (simplify_set): Try harder to find the best CC mode when
simplifying a nested COMPARE on the RHS.
=== modified file 'gcc/combine.c'
--- old/gcc/combine.c 2011-05-27 14:31:18 +0000
+++ new/gcc/combine.c 2011-07-11 03:52:31 +0000
@@ -6287,10 +6287,18 @@
enum rtx_code new_code;
rtx op0, op1, tmp;
int other_changed = 0;
+ rtx inner_compare = NULL_RTX;
enum machine_mode compare_mode = GET_MODE (dest);
if (GET_CODE (src) == COMPARE)
- op0 = XEXP (src, 0), op1 = XEXP (src, 1);
+ {
+ op0 = XEXP (src, 0), op1 = XEXP (src, 1);
+ if (GET_CODE (op0) == COMPARE && op1 == const0_rtx)
+ {
+ inner_compare = op0;
+ op0 = XEXP (inner_compare, 0), op1 = XEXP (inner_compare, 1);
+ }
+ }
else
op0 = src, op1 = CONST0_RTX (GET_MODE (src));
@@ -6332,6 +6340,12 @@
need to use a different CC mode here. */
if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
compare_mode = GET_MODE (op0);
+ else if (inner_compare
+ && GET_MODE_CLASS (GET_MODE (inner_compare)) == MODE_CC
+ && new_code == old_code
+ && op0 == XEXP (inner_compare, 0)
+ && op1 == XEXP (inner_compare, 1))
+ compare_mode = GET_MODE (inner_compare);
else
compare_mode = SELECT_CC_MODE (new_code, op0, op1);

View File

@@ -1,192 +0,0 @@
2011-07-15 Michael Hope <michael.hope@linaro.org>
gcc/
Backport from mainline:
2011-06-29 Nathan Sidwell <nathan@codesourcery.com>
* config/arm/unwind-arm.c (enum __cxa_type_match_result): New.
(cxa_type_match): Correct declaration.
(__gnu_unwind_pr_common): Reconstruct
additional indirection when __cxa_type_match returns
succeeded_with_ptr_to_base.
libstdc++-v3/
Backport from mainline:
2011-06-29 Nathan Sidwell <nathan@codesourcery.com>
* libsupc++/eh_arm.c (__cxa_type_match): Construct address of
thrown object here. Return succeded_with_ptr_to_base for all
pointer cases.
=== modified file 'gcc/config/arm/unwind-arm.c'
--- old/gcc/config/arm/unwind-arm.c 2011-03-22 10:59:10 +0000
+++ new/gcc/config/arm/unwind-arm.c 2011-07-11 03:35:44 +0000
@@ -32,13 +32,18 @@
typedef unsigned char bool;
typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */
+enum __cxa_type_match_result
+ {
+ ctm_failed = 0,
+ ctm_succeeded = 1,
+ ctm_succeeded_with_ptr_to_base = 2
+ };
void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp);
bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp);
-bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp,
- const type_info *rttip,
- bool is_reference,
- void **matched_object);
+enum __cxa_type_match_result __attribute__((weak)) __cxa_type_match
+ (_Unwind_Control_Block *ucbp, const type_info *rttip,
+ bool is_reference, void **matched_object);
_Unwind_Ptr __attribute__((weak))
__gnu_Unwind_Find_exidx (_Unwind_Ptr, int *);
@@ -1107,6 +1112,7 @@
_uw rtti;
bool is_reference = (data[0] & uint32_highbit) != 0;
void *matched;
+ enum __cxa_type_match_result match_type;
/* Check for no-throw areas. */
if (data[1] == (_uw) -2)
@@ -1118,17 +1124,31 @@
{
/* Match a catch specification. */
rtti = _Unwind_decode_target2 ((_uw) &data[1]);
- if (!__cxa_type_match (ucbp, (type_info *) rtti,
- is_reference,
- &matched))
- matched = (void *)0;
+ match_type = __cxa_type_match (ucbp,
+ (type_info *) rtti,
+ is_reference,
+ &matched);
}
+ else
+ match_type = ctm_succeeded;
- if (matched)
+ if (match_type)
{
ucbp->barrier_cache.sp =
_Unwind_GetGR (context, R_SP);
- ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
+ // ctm_succeeded_with_ptr_to_base really
+ // means _c_t_m indirected the pointer
+ // object. We have to reconstruct the
+ // additional pointer layer by using a temporary.
+ if (match_type == ctm_succeeded_with_ptr_to_base)
+ {
+ ucbp->barrier_cache.bitpattern[2]
+ = (_uw) matched;
+ ucbp->barrier_cache.bitpattern[0]
+ = (_uw) &ucbp->barrier_cache.bitpattern[2];
+ }
+ else
+ ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
ucbp->barrier_cache.bitpattern[1] = (_uw) data;
return _URC_HANDLER_FOUND;
}
=== modified file 'libstdc++-v3/libsupc++/eh_arm.cc'
--- old/libstdc++-v3/libsupc++/eh_arm.cc 2011-01-03 20:52:22 +0000
+++ new/libstdc++-v3/libsupc++/eh_arm.cc 2011-07-11 03:35:44 +0000
@@ -30,10 +30,11 @@
using namespace __cxxabiv1;
-// Given the thrown type THROW_TYPE, pointer to a variable containing a
-// pointer to the exception object THROWN_PTR_P and a type CATCH_TYPE to
-// compare against, return whether or not there is a match and if so,
-// update *THROWN_PTR_P.
+// Given the thrown type THROW_TYPE, exception object UE_HEADER and a
+// type CATCH_TYPE to compare against, return whether or not there is
+// a match and if so, update *THROWN_PTR_P to point to either the
+// type-matched object, or in the case of a pointer type, the object
+// pointed to by the pointer.
extern "C" __cxa_type_match_result
__cxa_type_match(_Unwind_Exception* ue_header,
@@ -41,51 +42,51 @@
bool is_reference __attribute__((__unused__)),
void** thrown_ptr_p)
{
- bool forced_unwind = __is_gxx_forced_unwind_class(ue_header->exception_class);
- bool foreign_exception = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
- bool dependent_exception =
- __is_dependent_exception(ue_header->exception_class);
+ bool forced_unwind
+ = __is_gxx_forced_unwind_class(ue_header->exception_class);
+ bool foreign_exception
+ = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
+ bool dependent_exception
+ = __is_dependent_exception(ue_header->exception_class);
__cxa_exception* xh = __get_exception_header_from_ue(ue_header);
__cxa_dependent_exception *dx = __get_dependent_exception_from_ue(ue_header);
const std::type_info* throw_type;
+ void *thrown_ptr = 0;
if (forced_unwind)
throw_type = &typeid(abi::__forced_unwind);
else if (foreign_exception)
throw_type = &typeid(abi::__foreign_exception);
- else if (dependent_exception)
- throw_type = __get_exception_header_from_obj
- (dx->primaryException)->exceptionType;
else
- throw_type = xh->exceptionType;
-
- void* thrown_ptr = *thrown_ptr_p;
+ {
+ if (dependent_exception)
+ xh = __get_exception_header_from_obj (dx->primaryException);
+ throw_type = xh->exceptionType;
+ // We used to require the caller set the target of thrown_ptr_p,
+ // but that's incorrect -- the EHABI makes no such requirement
+ // -- and not all callers will set it. Fortunately callers that
+ // do initialize will always pass us the value we calculate
+ // here, so there's no backwards compatibility problem.
+ thrown_ptr = __get_object_from_ue (ue_header);
+ }
+
+ __cxa_type_match_result result = ctm_succeeded;
// Pointer types need to adjust the actual pointer, not
// the pointer to pointer that is the exception object.
// This also has the effect of passing pointer types
// "by value" through the __cxa_begin_catch return value.
if (throw_type->__is_pointer_p())
- thrown_ptr = *(void**) thrown_ptr;
+ {
+ thrown_ptr = *(void**) thrown_ptr;
+ // We need to indicate the indirection to our caller.
+ result = ctm_succeeded_with_ptr_to_base;
+ }
if (catch_type->__do_catch(throw_type, &thrown_ptr, 1))
{
*thrown_ptr_p = thrown_ptr;
-
- if (typeid(*catch_type) == typeid (typeid(void*)))
- {
- const __pointer_type_info *catch_pointer_type =
- static_cast<const __pointer_type_info *> (catch_type);
- const __pointer_type_info *throw_pointer_type =
- static_cast<const __pointer_type_info *> (throw_type);
-
- if (typeid (*catch_pointer_type->__pointee) != typeid (void)
- && (*catch_pointer_type->__pointee !=
- *throw_pointer_type->__pointee))
- return ctm_succeeded_with_ptr_to_base;
- }
-
- return ctm_succeeded;
+ return result;
}
return ctm_failed;

File diff suppressed because one or more lines are too long

View File

@@ -1,741 +0,0 @@
2011-07-21 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
PR middle-end/49736
* expr.c (all_zeros_p): Undo bogus part of last change.
2011-07-21 Richard Sandiford <richard.sandiford@linaro.org>
Backport from mainline:
gcc/cp/
2011-07-13 Richard Sandiford <richard.sandiford@linaro.org>
* typeck2.c (split_nonconstant_init_1): Pass the initializer directly,
rather than a pointer to it. Return true if the whole of the value
was initialized by the generated statements. Use
complete_ctor_at_level_p instead of count_type_elements.
gcc/
2011-07-13 Richard Sandiford <richard.sandiford@linaro.org>
* tree.h (categorize_ctor_elements): Remove comment. Fix long line.
(count_type_elements): Delete.
(complete_ctor_at_level_p): Declare.
* expr.c (flexible_array_member_p): New function, split out from...
(count_type_elements): ...here. Make static. Replace allow_flexarr
parameter with for_ctor_p. When for_ctor_p is true, return the
number of elements that should appear in the top-level constructor,
otherwise return an estimate of the number of scalars.
(categorize_ctor_elements): Replace p_must_clear with p_complete.
(categorize_ctor_elements_1): Likewise. Use complete_ctor_at_level_p.
(complete_ctor_at_level_p): New function, borrowing union logic
from old categorize_ctor_elements_1.
(mostly_zeros_p): Return true if the constructor is not complete.
(all_zeros_p): Update call to categorize_ctor_elements.
* gimplify.c (gimplify_init_constructor): Update call to
categorize_ctor_elements. Don't call count_type_elements.
Unconditionally prevent clearing for variable-sized types,
otherwise rely on categorize_ctor_elements to detect
incomplete initializers.
gcc/testsuite/
2011-07-13 Chung-Lin Tang <cltang@codesourcery.com>
* gcc.target/arm/pr48183.c: New test.
=== modified file 'gcc/cp/typeck2.c'
--- old/gcc/cp/typeck2.c 2011-05-20 21:29:14 +0000
+++ new/gcc/cp/typeck2.c 2011-07-13 13:17:31 +0000
@@ -473,18 +473,20 @@
/* The recursive part of split_nonconstant_init. DEST is an lvalue
- expression to which INIT should be assigned. INIT is a CONSTRUCTOR. */
+ expression to which INIT should be assigned. INIT is a CONSTRUCTOR.
+ Return true if the whole of the value was initialized by the
+ generated statements. */
-static void
-split_nonconstant_init_1 (tree dest, tree *initp)
+static bool
+split_nonconstant_init_1 (tree dest, tree init)
{
unsigned HOST_WIDE_INT idx;
- tree init = *initp;
tree field_index, value;
tree type = TREE_TYPE (dest);
tree inner_type = NULL;
bool array_type_p = false;
- HOST_WIDE_INT num_type_elements, num_initialized_elements;
+ bool complete_p = true;
+ HOST_WIDE_INT num_split_elts = 0;
switch (TREE_CODE (type))
{
@@ -496,7 +498,6 @@
case RECORD_TYPE:
case UNION_TYPE:
case QUAL_UNION_TYPE:
- num_initialized_elements = 0;
FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx,
field_index, value)
{
@@ -519,13 +520,14 @@
sub = build3 (COMPONENT_REF, inner_type, dest, field_index,
NULL_TREE);
- split_nonconstant_init_1 (sub, &value);
+ if (!split_nonconstant_init_1 (sub, value))
+ complete_p = false;
+ num_split_elts++;
}
else if (!initializer_constant_valid_p (value, inner_type))
{
tree code;
tree sub;
- HOST_WIDE_INT inner_elements;
/* FIXME: Ordered removal is O(1) so the whole function is
worst-case quadratic. This could be fixed using an aside
@@ -549,21 +551,9 @@
code = build_stmt (input_location, EXPR_STMT, code);
add_stmt (code);
- inner_elements = count_type_elements (inner_type, true);
- if (inner_elements < 0)
- num_initialized_elements = -1;
- else if (num_initialized_elements >= 0)
- num_initialized_elements += inner_elements;
- continue;
+ num_split_elts++;
}
}
-
- num_type_elements = count_type_elements (type, true);
- /* If all elements of the initializer are non-constant and
- have been split out, we don't need the empty CONSTRUCTOR. */
- if (num_type_elements > 0
- && num_type_elements == num_initialized_elements)
- *initp = NULL;
break;
case VECTOR_TYPE:
@@ -575,6 +565,7 @@
code = build2 (MODIFY_EXPR, type, dest, cons);
code = build_stmt (input_location, EXPR_STMT, code);
add_stmt (code);
+ num_split_elts += CONSTRUCTOR_NELTS (init);
}
break;
@@ -584,6 +575,8 @@
/* The rest of the initializer is now a constant. */
TREE_CONSTANT (init) = 1;
+ return complete_p && complete_ctor_at_level_p (TREE_TYPE (init),
+ num_split_elts, inner_type);
}
/* A subroutine of store_init_value. Splits non-constant static
@@ -599,7 +592,8 @@
if (TREE_CODE (init) == CONSTRUCTOR)
{
code = push_stmt_list ();
- split_nonconstant_init_1 (dest, &init);
+ if (split_nonconstant_init_1 (dest, init))
+ init = NULL_TREE;
code = pop_stmt_list (code);
DECL_INITIAL (dest) = init;
TREE_READONLY (dest) = 0;
=== modified file 'gcc/expr.c'
--- old/gcc/expr.c 2011-06-02 12:12:00 +0000
+++ new/gcc/expr.c 2011-07-14 11:52:32 +0000
@@ -4866,16 +4866,136 @@
return NULL_RTX;
}
+/* Return true if field F of structure TYPE is a flexible array. */
+
+static bool
+flexible_array_member_p (const_tree f, const_tree type)
+{
+ const_tree tf;
+
+ tf = TREE_TYPE (f);
+ return (DECL_CHAIN (f) == NULL
+ && TREE_CODE (tf) == ARRAY_TYPE
+ && TYPE_DOMAIN (tf)
+ && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
+ && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
+ && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
+ && int_size_in_bytes (type) >= 0);
+}
+
+/* If FOR_CTOR_P, return the number of top-level elements that a constructor
+ must have in order for it to completely initialize a value of type TYPE.
+ Return -1 if the number isn't known.
+
+ If !FOR_CTOR_P, return an estimate of the number of scalars in TYPE. */
+
+static HOST_WIDE_INT
+count_type_elements (const_tree type, bool for_ctor_p)
+{
+ switch (TREE_CODE (type))
+ {
+ case ARRAY_TYPE:
+ {
+ tree nelts;
+
+ nelts = array_type_nelts (type);
+ if (nelts && host_integerp (nelts, 1))
+ {
+ unsigned HOST_WIDE_INT n;
+
+ n = tree_low_cst (nelts, 1) + 1;
+ if (n == 0 || for_ctor_p)
+ return n;
+ else
+ return n * count_type_elements (TREE_TYPE (type), false);
+ }
+ return for_ctor_p ? -1 : 1;
+ }
+
+ case RECORD_TYPE:
+ {
+ unsigned HOST_WIDE_INT n;
+ tree f;
+
+ n = 0;
+ for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+ if (TREE_CODE (f) == FIELD_DECL)
+ {
+ if (!for_ctor_p)
+ n += count_type_elements (TREE_TYPE (f), false);
+ else if (!flexible_array_member_p (f, type))
+ /* Don't count flexible arrays, which are not supposed
+ to be initialized. */
+ n += 1;
+ }
+
+ return n;
+ }
+
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ tree f;
+ HOST_WIDE_INT n, m;
+
+ gcc_assert (!for_ctor_p);
+ /* Estimate the number of scalars in each field and pick the
+ maximum. Other estimates would do instead; the idea is simply
+ to make sure that the estimate is not sensitive to the ordering
+ of the fields. */
+ n = 1;
+ for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+ if (TREE_CODE (f) == FIELD_DECL)
+ {
+ m = count_type_elements (TREE_TYPE (f), false);
+ /* If the field doesn't span the whole union, add an extra
+ scalar for the rest. */
+ if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)),
+ TYPE_SIZE (type)) != 1)
+ m++;
+ if (n < m)
+ n = m;
+ }
+ return n;
+ }
+
+ case COMPLEX_TYPE:
+ return 2;
+
+ case VECTOR_TYPE:
+ return TYPE_VECTOR_SUBPARTS (type);
+
+ case INTEGER_TYPE:
+ case REAL_TYPE:
+ case FIXED_POINT_TYPE:
+ case ENUMERAL_TYPE:
+ case BOOLEAN_TYPE:
+ case POINTER_TYPE:
+ case OFFSET_TYPE:
+ case REFERENCE_TYPE:
+ return 1;
+
+ case ERROR_MARK:
+ return 0;
+
+ case VOID_TYPE:
+ case METHOD_TYPE:
+ case FUNCTION_TYPE:
+ case LANG_TYPE:
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Helper for categorize_ctor_elements. Identical interface. */
static bool
categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
- HOST_WIDE_INT *p_elt_count,
- bool *p_must_clear)
+ HOST_WIDE_INT *p_init_elts, bool *p_complete)
{
unsigned HOST_WIDE_INT idx;
- HOST_WIDE_INT nz_elts, elt_count;
- tree value, purpose;
+ HOST_WIDE_INT nz_elts, init_elts, num_fields;
+ tree value, purpose, elt_type;
/* Whether CTOR is a valid constant initializer, in accordance with what
initializer_constant_valid_p does. If inferred from the constructor
@@ -4884,7 +5004,9 @@
bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
nz_elts = 0;
- elt_count = 0;
+ init_elts = 0;
+ num_fields = 0;
+ elt_type = NULL_TREE;
FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, purpose, value)
{
@@ -4899,6 +5021,8 @@
mult = (tree_low_cst (hi_index, 1)
- tree_low_cst (lo_index, 1) + 1);
}
+ num_fields += mult;
+ elt_type = TREE_TYPE (value);
switch (TREE_CODE (value))
{
@@ -4906,11 +5030,11 @@
{
HOST_WIDE_INT nz = 0, ic = 0;
- bool const_elt_p
- = categorize_ctor_elements_1 (value, &nz, &ic, p_must_clear);
+ bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic,
+ p_complete);
nz_elts += mult * nz;
- elt_count += mult * ic;
+ init_elts += mult * ic;
if (const_from_elts_p && const_p)
const_p = const_elt_p;
@@ -4922,12 +5046,12 @@
case FIXED_CST:
if (!initializer_zerop (value))
nz_elts += mult;
- elt_count += mult;
+ init_elts += mult;
break;
case STRING_CST:
nz_elts += mult * TREE_STRING_LENGTH (value);
- elt_count += mult * TREE_STRING_LENGTH (value);
+ init_elts += mult * TREE_STRING_LENGTH (value);
break;
case COMPLEX_CST:
@@ -4935,7 +5059,7 @@
nz_elts += mult;
if (!initializer_zerop (TREE_IMAGPART (value)))
nz_elts += mult;
- elt_count += mult;
+ init_elts += mult;
break;
case VECTOR_CST:
@@ -4945,65 +5069,31 @@
{
if (!initializer_zerop (TREE_VALUE (v)))
nz_elts += mult;
- elt_count += mult;
+ init_elts += mult;
}
}
break;
default:
{
- HOST_WIDE_INT tc = count_type_elements (TREE_TYPE (value), true);
- if (tc < 1)
- tc = 1;
+ HOST_WIDE_INT tc = count_type_elements (elt_type, false);
nz_elts += mult * tc;
- elt_count += mult * tc;
+ init_elts += mult * tc;
if (const_from_elts_p && const_p)
- const_p = initializer_constant_valid_p (value, TREE_TYPE (value))
+ const_p = initializer_constant_valid_p (value, elt_type)
!= NULL_TREE;
}
break;
}
}
- if (!*p_must_clear
- && (TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE
- || TREE_CODE (TREE_TYPE (ctor)) == QUAL_UNION_TYPE))
- {
- tree init_sub_type;
- bool clear_this = true;
-
- if (!VEC_empty (constructor_elt, CONSTRUCTOR_ELTS (ctor)))
- {
- /* We don't expect more than one element of the union to be
- initialized. Not sure what we should do otherwise... */
- gcc_assert (VEC_length (constructor_elt, CONSTRUCTOR_ELTS (ctor))
- == 1);
-
- init_sub_type = TREE_TYPE (VEC_index (constructor_elt,
- CONSTRUCTOR_ELTS (ctor),
- 0)->value);
-
- /* ??? We could look at each element of the union, and find the
- largest element. Which would avoid comparing the size of the
- initialized element against any tail padding in the union.
- Doesn't seem worth the effort... */
- if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (ctor)),
- TYPE_SIZE (init_sub_type)) == 1)
- {
- /* And now we have to find out if the element itself is fully
- constructed. E.g. for union { struct { int a, b; } s; } u
- = { .s = { .a = 1 } }. */
- if (elt_count == count_type_elements (init_sub_type, false))
- clear_this = false;
- }
- }
-
- *p_must_clear = clear_this;
- }
+ if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
+ num_fields, elt_type))
+ *p_complete = false;
*p_nz_elts += nz_elts;
- *p_elt_count += elt_count;
+ *p_init_elts += init_elts;
return const_p;
}
@@ -5013,111 +5103,50 @@
and place it in *P_NZ_ELTS;
* how many scalar fields in total are in CTOR,
and place it in *P_ELT_COUNT.
- * if a type is a union, and the initializer from the constructor
- is not the largest element in the union, then set *p_must_clear.
+ * whether the constructor is complete -- in the sense that every
+ meaningful byte is explicitly given a value --
+ and place it in *P_COMPLETE.
Return whether or not CTOR is a valid static constant initializer, the same
as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */
bool
categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
- HOST_WIDE_INT *p_elt_count,
- bool *p_must_clear)
+ HOST_WIDE_INT *p_init_elts, bool *p_complete)
{
*p_nz_elts = 0;
- *p_elt_count = 0;
- *p_must_clear = false;
+ *p_init_elts = 0;
+ *p_complete = true;
- return
- categorize_ctor_elements_1 (ctor, p_nz_elts, p_elt_count, p_must_clear);
+ return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete);
}
-/* Count the number of scalars in TYPE. Return -1 on overflow or
- variable-sized. If ALLOW_FLEXARR is true, don't count flexible
- array member at the end of the structure. */
+/* TYPE is initialized by a constructor with NUM_ELTS elements, the last
+ of which had type LAST_TYPE. Each element was itself a complete
+ initializer, in the sense that every meaningful byte was explicitly
+ given a value. Return true if the same is true for the constructor
+ as a whole. */
-HOST_WIDE_INT
-count_type_elements (const_tree type, bool allow_flexarr)
+bool
+complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts,
+ const_tree last_type)
{
- const HOST_WIDE_INT max = ~((HOST_WIDE_INT)1 << (HOST_BITS_PER_WIDE_INT-1));
- switch (TREE_CODE (type))
+ if (TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
{
- case ARRAY_TYPE:
- {
- tree telts = array_type_nelts (type);
- if (telts && host_integerp (telts, 1))
- {
- HOST_WIDE_INT n = tree_low_cst (telts, 1) + 1;
- HOST_WIDE_INT m = count_type_elements (TREE_TYPE (type), false);
- if (n == 0)
- return 0;
- else if (max / n > m)
- return n * m;
- }
- return -1;
- }
-
- case RECORD_TYPE:
- {
- HOST_WIDE_INT n = 0, t;
- tree f;
-
- for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
- if (TREE_CODE (f) == FIELD_DECL)
- {
- t = count_type_elements (TREE_TYPE (f), false);
- if (t < 0)
- {
- /* Check for structures with flexible array member. */
- tree tf = TREE_TYPE (f);
- if (allow_flexarr
- && DECL_CHAIN (f) == NULL
- && TREE_CODE (tf) == ARRAY_TYPE
- && TYPE_DOMAIN (tf)
- && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
- && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
- && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
- && int_size_in_bytes (type) >= 0)
- break;
-
- return -1;
- }
- n += t;
- }
-
- return n;
- }
-
- case UNION_TYPE:
- case QUAL_UNION_TYPE:
- return -1;
-
- case COMPLEX_TYPE:
- return 2;
-
- case VECTOR_TYPE:
- return TYPE_VECTOR_SUBPARTS (type);
-
- case INTEGER_TYPE:
- case REAL_TYPE:
- case FIXED_POINT_TYPE:
- case ENUMERAL_TYPE:
- case BOOLEAN_TYPE:
- case POINTER_TYPE:
- case OFFSET_TYPE:
- case REFERENCE_TYPE:
- return 1;
-
- case ERROR_MARK:
- return 0;
-
- case VOID_TYPE:
- case METHOD_TYPE:
- case FUNCTION_TYPE:
- case LANG_TYPE:
- default:
- gcc_unreachable ();
+ if (num_elts == 0)
+ return false;
+
+ gcc_assert (num_elts == 1 && last_type);
+
+ /* ??? We could look at each element of the union, and find the
+ largest element. Which would avoid comparing the size of the
+ initialized element against any tail padding in the union.
+ Doesn't seem worth the effort... */
+ return simple_cst_equal (TYPE_SIZE (type), TYPE_SIZE (last_type)) == 1;
}
+
+ return count_type_elements (type, true) == num_elts;
}
/* Return 1 if EXP contains mostly (3/4) zeros. */
@@ -5126,18 +5155,12 @@
mostly_zeros_p (const_tree exp)
{
if (TREE_CODE (exp) == CONSTRUCTOR)
-
{
- HOST_WIDE_INT nz_elts, count, elts;
- bool must_clear;
-
- categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
- if (must_clear)
- return 1;
-
- elts = count_type_elements (TREE_TYPE (exp), false);
-
- return nz_elts < elts / 4;
+ HOST_WIDE_INT nz_elts, init_elts;
+ bool complete_p;
+
+ categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
+ return !complete_p || nz_elts < init_elts / 4;
}
return initializer_zerop (exp);
@@ -5149,12 +5172,11 @@
all_zeros_p (const_tree exp)
{
if (TREE_CODE (exp) == CONSTRUCTOR)
-
{
- HOST_WIDE_INT nz_elts, count;
- bool must_clear;
+ HOST_WIDE_INT nz_elts, init_elts;
+ bool complete_p;
- categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
+ categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
return nz_elts == 0;
}
=== modified file 'gcc/gimplify.c'
--- old/gcc/gimplify.c 2011-05-26 10:27:57 +0000
+++ new/gcc/gimplify.c 2011-07-13 13:17:31 +0000
@@ -3693,9 +3693,8 @@
case ARRAY_TYPE:
{
struct gimplify_init_ctor_preeval_data preeval_data;
- HOST_WIDE_INT num_type_elements, num_ctor_elements;
- HOST_WIDE_INT num_nonzero_elements;
- bool cleared, valid_const_initializer;
+ HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
+ bool cleared, complete_p, valid_const_initializer;
/* Aggregate types must lower constructors to initialization of
individual elements. The exception is that a CONSTRUCTOR node
@@ -3712,7 +3711,7 @@
can only do so if it known to be a valid constant initializer. */
valid_const_initializer
= categorize_ctor_elements (ctor, &num_nonzero_elements,
- &num_ctor_elements, &cleared);
+ &num_ctor_elements, &complete_p);
/* If a const aggregate variable is being initialized, then it
should never be a lose to promote the variable to be static. */
@@ -3750,26 +3749,29 @@
parts in, then generate code for the non-constant parts. */
/* TODO. There's code in cp/typeck.c to do this. */
- num_type_elements = count_type_elements (type, true);
+ if (int_size_in_bytes (TREE_TYPE (ctor)) < 0)
+ /* store_constructor will ignore the clearing of variable-sized
+ objects. Initializers for such objects must explicitly set
+ every field that needs to be set. */
+ cleared = false;
+ else if (!complete_p)
+ /* If the constructor isn't complete, clear the whole object
+ beforehand.
- /* If count_type_elements could not determine number of type elements
- for a constant-sized object, assume clearing is needed.
- Don't do this for variable-sized objects, as store_constructor
- will ignore the clearing of variable-sized objects. */
- if (num_type_elements < 0 && int_size_in_bytes (type) >= 0)
+ ??? This ought not to be needed. For any element not present
+ in the initializer, we should simply set them to zero. Except
+ we'd need to *find* the elements that are not present, and that
+ requires trickery to avoid quadratic compile-time behavior in
+ large cases or excessive memory use in small cases. */
cleared = true;
- /* If there are "lots" of zeros, then block clear the object first. */
- else if (num_type_elements - num_nonzero_elements
+ else if (num_ctor_elements - num_nonzero_elements
> CLEAR_RATIO (optimize_function_for_speed_p (cfun))
- && num_nonzero_elements < num_type_elements/4)
- cleared = true;
- /* ??? This bit ought not be needed. For any element not present
- in the initializer, we should simply set them to zero. Except
- we'd need to *find* the elements that are not present, and that
- requires trickery to avoid quadratic compile-time behavior in
- large cases or excessive memory use in small cases. */
- else if (num_ctor_elements < num_type_elements)
- cleared = true;
+ && num_nonzero_elements < num_ctor_elements / 4)
+ /* If there are "lots" of zeros, it's more efficient to clear
+ the memory and then set the nonzero elements. */
+ cleared = true;
+ else
+ cleared = false;
/* If there are "lots" of initialized elements, and all of them
are valid address constants, then the entire initializer can
=== added file 'gcc/testsuite/gcc.target/arm/pr48183.c'
--- old/gcc/testsuite/gcc.target/arm/pr48183.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/pr48183.c 2011-07-13 13:17:31 +0000
@@ -0,0 +1,25 @@
+/* testsuite/gcc.target/arm/pr48183.c */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O -g" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n)
+{
+ unsigned i;
+ int16x4x2_t input;
+ int32x4x2_t mid;
+ int32x4x2_t output;
+
+ for (i = 0; i < n/2; i += 8) {
+ input = vld2_s16(src + i);
+ mid.val[0] = vmovl_s16(input.val[0]);
+ mid.val[1] = vmovl_s16(input.val[1]);
+ output.val[0] = vshlq_n_s32(mid.val[0], 8);
+ output.val[1] = vshlq_n_s32(mid.val[1], 8);
+ vst2q_s32((int32_t *)dst + i, output);
+ }
+}
=== modified file 'gcc/tree.h'
--- old/gcc/tree.h 2011-07-01 09:19:21 +0000
+++ new/gcc/tree.h 2011-07-13 13:17:31 +0000
@@ -4627,21 +4627,10 @@
extern VEC(tree,gc) *ctor_to_vec (tree);
-/* Examine CTOR to discover:
- * how many scalar fields are set to nonzero values,
- and place it in *P_NZ_ELTS;
- * how many scalar fields in total are in CTOR,
- and place it in *P_ELT_COUNT.
- * if a type is a union, and the initializer from the constructor
- is not the largest element in the union, then set *p_must_clear.
-
- Return whether or not CTOR is a valid static constant initializer, the same
- as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */
-
-extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *,
- bool *);
-
-extern HOST_WIDE_INT count_type_elements (const_tree, bool);
+extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
+ HOST_WIDE_INT *, bool *);
+
+extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree);
/* integer_zerop (tree x) is nonzero if X is an integer constant of value 0. */

View File

@@ -1,27 +0,0 @@
2011-07-21 Richard Sandiford <rdsandiford@googlemail.com>
gcc/
Backport from mainline:
2011-07-21 Richard Sandiford <richard.sandiford@linaro.org>
* regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK.
=== modified file 'gcc/regcprop.c'
--- old/gcc/regcprop.c 2010-12-17 22:51:25 +0000
+++ new/gcc/regcprop.c 2011-07-21 11:30:53 +0000
@@ -418,10 +418,9 @@
offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0)
+ (BYTES_BIG_ENDIAN ? byteoffset : 0));
- return gen_rtx_raw_REG (new_mode,
- regno + subreg_regno_offset (regno, orig_mode,
- offset,
- new_mode));
+ regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
+ if (HARD_REGNO_MODE_OK (regno, new_mode))
+ return gen_rtx_raw_REG (new_mode, regno);
}
return NULL_RTX;
}

View File

@@ -1,62 +0,0 @@
2011-07-31 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r176970:
* modulo-sched.c: Change comment.
(reset_sched_times): Fix print message.
(print_partial_schedule): Add print info.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-07-04 12:01:34 +0000
+++ new/gcc/modulo-sched.c 2011-07-31 10:58:46 +0000
@@ -84,13 +84,14 @@
II cycles (i.e. use register copies to prevent a def from overwriting
itself before reaching the use).
- SMS works with countable loops whose loop count can be easily
- adjusted. This is because we peel a constant number of iterations
- into a prologue and epilogue for which we want to avoid emitting
- the control part, and a kernel which is to iterate that constant
- number of iterations less than the original loop. So the control
- part should be a set of insns clearly identified and having its
- own iv, not otherwise used in the loop (at-least for now), which
+ SMS works with countable loops (1) whose control part can be easily
+ decoupled from the rest of the loop and (2) whose loop count can
+ be easily adjusted. This is because we peel a constant number of
+ iterations into a prologue and epilogue for which we want to avoid
+ emitting the control part, and a kernel which is to iterate that
+ constant number of iterations less than the original loop. So the
+ control part should be a set of insns clearly identified and having
+ its own iv, not otherwise used in the loop (at-least for now), which
initializes a register before the loop to the number of iterations.
Currently SMS relies on the do-loop pattern to recognize such loops,
where (1) the control part comprises of all insns defining and/or
@@ -598,8 +599,8 @@
/* Print the scheduling times after the rotation. */
fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
"crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
- INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
- normalized_time);
+ INSN_UID (crr_insn->node->insn), normalized_time,
+ new_min_cycle);
if (JUMP_P (crr_insn->node->insn))
fprintf (dump_file, " (branch)");
fprintf (dump_file, "\n");
@@ -2550,8 +2551,13 @@
fprintf (dump, "\n[ROW %d ]: ", i);
while (ps_i)
{
- fprintf (dump, "%d, ",
- INSN_UID (ps_i->node->insn));
+ if (JUMP_P (ps_i->node->insn))
+ fprintf (dump, "%d (branch), ",
+ INSN_UID (ps_i->node->insn));
+ else
+ fprintf (dump, "%d, ",
+ INSN_UID (ps_i->node->insn));
+
ps_i = ps_i->next_in_row;
}
}

View File

@@ -1,458 +0,0 @@
2011-08-09 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r177235.
* modulo-sched.c (calculate_stage_count,
calculate_must_precede_follow, get_sched_window,
try_scheduling_node_in_cycle, remove_node_from_ps):
Add declaration.
(update_node_sched_params, set_must_precede_follow, optimize_sc):
New functions.
(reset_sched_times): Call update_node_sched_params.
(sms_schedule): Call optimize_sc.
(get_sched_window): Change function arguments.
(sms_schedule_by_order): Update call to get_sched_window.
Call set_must_precede_follow.
(calculate_stage_count): Add function argument.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-07-31 10:58:46 +0000
+++ new/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000
@@ -203,7 +203,16 @@
rtx, rtx);
static void duplicate_insns_of_cycles (partial_schedule_ptr,
int, int, int, rtx);
-static int calculate_stage_count (partial_schedule_ptr ps);
+static int calculate_stage_count (partial_schedule_ptr, int);
+static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+ int, int, sbitmap, sbitmap, sbitmap);
+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
+ sbitmap, int, int *, int *, int *);
+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+ int, int, sbitmap, int *, sbitmap,
+ sbitmap);
+static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
#define SCHED_FIRST_REG_MOVE(x) \
@@ -577,6 +586,36 @@
}
}
+/* Update the sched_params (time, row and stage) for node U using the II,
+ the CYCLE of U and MIN_CYCLE.
+ We're not simply taking the following
+ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
+ because the stages may not be aligned on cycle 0. */
+static void
+update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
+{
+ int sc_until_cycle_zero;
+ int stage;
+
+ SCHED_TIME (u) = cycle;
+ SCHED_ROW (u) = SMODULO (cycle, ii);
+
+ /* The calculation of stage count is done adding the number
+ of stages before cycle zero and after cycle zero. */
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+
+ if (SCHED_TIME (u) < 0)
+ {
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+ }
+ else
+ {
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+ }
+}
+
/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
SCHED_ROW and SCHED_STAGE. */
static void
@@ -592,7 +631,6 @@
ddg_node_ptr u = crr_insn->node;
int normalized_time = SCHED_TIME (u) - amount;
int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
- int sc_until_cycle_zero, stage;
if (dump_file)
{
@@ -608,23 +646,9 @@
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- SCHED_TIME (u) = normalized_time;
- SCHED_ROW (u) = SMODULO (normalized_time, ii);
-
- /* The calculation of stage count is done adding the number
- of stages before cycle zero and after cycle zero. */
- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
-
- if (SCHED_TIME (u) < 0)
- {
- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
- SCHED_STAGE (u) = sc_until_cycle_zero - stage;
- }
- else
- {
- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
- }
+
+ crr_insn->cycle = normalized_time;
+ update_node_sched_params (u, ii, normalized_time, new_min_cycle);
}
}
@@ -661,6 +685,206 @@
PREV_INSN (last));
}
+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+ respectively only if cycle C falls on the border of the scheduling
+ window boundaries marked by START and END cycles. STEP is the
+ direction of the window. */
+static inline void
+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
+ sbitmap *tmp_precede, sbitmap must_precede, int c,
+ int start, int end, int step)
+{
+ *tmp_precede = NULL;
+ *tmp_follow = NULL;
+
+ if (c == start)
+ {
+ if (step == 1)
+ *tmp_precede = must_precede;
+ else /* step == -1. */
+ *tmp_follow = must_follow;
+ }
+ if (c == end - step)
+ {
+ if (step == 1)
+ *tmp_follow = must_follow;
+ else /* step == -1. */
+ *tmp_precede = must_precede;
+ }
+
+}
+
+/* Return True if the branch can be moved to row ii-1 while
+ normalizing the partial schedule PS to start from cycle zero and thus
+ optimize the SC. Otherwise return False. */
+static bool
+optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
+{
+ int amount = PS_MIN_CYCLE (ps);
+ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
+ int start, end, step;
+ int ii = ps->ii;
+ bool ok = false;
+ int stage_count, stage_count_curr;
+
+ /* Compare the SC after normalization and SC after bringing the branch
+ to row ii-1. If they are equal just bail out. */
+ stage_count = calculate_stage_count (ps, amount);
+ stage_count_curr =
+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
+
+ if (stage_count == stage_count_curr)
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS SC already optimized.\n");
+
+ ok = false;
+ goto clear;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS Trying to optimize branch location\n");
+ fprintf (dump_file, "SMS partial schedule before trial:\n");
+ print_partial_schedule (ps, dump_file);
+ }
+
+ /* First, normalize the partial scheduling. */
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "SMS partial schedule after normalization (ii, %d, SC %d):\n",
+ ii, stage_count);
+ print_partial_schedule (ps, dump_file);
+ }
+
+ if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
+ {
+ ok = true;
+ goto clear;
+ }
+
+ sbitmap_ones (sched_nodes);
+
+ /* Calculate the new placement of the branch. It should be in row
+ ii-1 and fall into it's scheduling window. */
+ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
+ &step, &end) == 0)
+ {
+ bool success;
+ ps_insn_ptr next_ps_i;
+ int branch_cycle = SCHED_TIME (g->closing_branch);
+ int row = SMODULO (branch_cycle, ps->ii);
+ int num_splits = 0;
+ sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+ int c;
+
+ if (dump_file)
+ fprintf (dump_file, "\nTrying to schedule node %d "
+ "INSN = %d in (%d .. %d) step %d\n",
+ g->closing_branch->cuid,
+ (INSN_UID (g->closing_branch->insn)), start, end, step);
+
+ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
+ if (step == 1)
+ {
+ c = start + ii - SMODULO (start, ii) - 1;
+ gcc_assert (c >= start);
+ if (c >= end)
+ {
+ ok = false;
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ goto clear;
+ }
+ }
+ else
+ {
+ c = start - SMODULO (start, ii) - 1;
+ gcc_assert (c <= start);
+
+ if (c <= end)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ ok = false;
+ goto clear;
+ }
+ }
+
+ must_precede = sbitmap_alloc (g->num_nodes);
+ must_follow = sbitmap_alloc (g->num_nodes);
+
+ /* Try to schedule the branch is it's new cycle. */
+ calculate_must_precede_follow (g->closing_branch, start, end,
+ step, ii, sched_nodes,
+ must_precede, must_follow);
+
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, c, start, end, step);
+
+ /* Find the element in the partial schedule related to the closing
+ branch so we can remove it from it's current cycle. */
+ for (next_ps_i = ps->rows[row];
+ next_ps_i; next_ps_i = next_ps_i->next_in_row)
+ if (next_ps_i->node->cuid == g->closing_branch->cuid)
+ break;
+
+ gcc_assert (next_ps_i);
+ gcc_assert (remove_node_from_ps (ps, next_ps_i));
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid, c,
+ sched_nodes, &num_splits,
+ tmp_precede, tmp_follow);
+ gcc_assert (num_splits == 0);
+ if (!success)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d, "
+ "bringing it back to cycle %d\n", c, branch_cycle);
+
+ /* The branch was failed to be placed in row ii - 1.
+ Put it back in it's original place in the partial
+ schedualing. */
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, branch_cycle, start, end,
+ step);
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid,
+ branch_cycle, sched_nodes,
+ &num_splits, tmp_precede,
+ tmp_follow);
+ gcc_assert (success && (num_splits == 0));
+ ok = false;
+ }
+ else
+ {
+ /* The branch is placed in row ii - 1. */
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS success in moving branch to cycle %d\n", c);
+
+ update_node_sched_params (g->closing_branch, ii, c,
+ PS_MIN_CYCLE (ps));
+ ok = true;
+ }
+
+ free (must_precede);
+ free (must_follow);
+ }
+
+clear:
+ free (sched_nodes);
+ return ok;
+}
+
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int to_stage, int for_prolog, rtx count_reg)
@@ -1116,6 +1340,7 @@
int mii, rec_mii;
unsigned stage_count = 0;
HOST_WIDEST_INT loop_count = 0;
+ bool opt_sc_p = false;
if (! (g = g_arr[loop->num]))
continue;
@@ -1197,14 +1422,32 @@
set_node_sched_params (g);
ps = sms_schedule_by_order (g, mii, maxii, node_order);
-
- if (ps)
- {
- stage_count = calculate_stage_count (ps);
- gcc_assert(stage_count >= 1);
- PS_STAGE_COUNT(ps) = stage_count;
- }
-
+
+ if (ps)
+ {
+ /* Try to achieve optimized SC by normalizing the partial
+ schedule (having the cycles start from cycle zero).
+ The branch location must be placed in row ii-1 in the
+ final scheduling. If failed, shift all instructions to
+ position the branch in row ii-1. */
+ opt_sc_p = optimize_sc (ps, g);
+ if (opt_sc_p)
+ stage_count = calculate_stage_count (ps, 0);
+ else
+ {
+ /* Bring the branch to cycle ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ if (dump_file)
+ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
+
+ stage_count = calculate_stage_count (ps, amount);
+ }
+
+ gcc_assert (stage_count >= 1);
+ PS_STAGE_COUNT (ps) = stage_count;
+ }
+
/* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1 means that there is no interleaving between iterations thus
we let the scheduling passes do the job in this case. */
@@ -1225,12 +1468,16 @@
else
{
struct undo_replace_buff_elem *reg_move_replaces;
- int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ if (!opt_sc_p)
+ {
+ /* Rotate the partial schedule to have the branch in row ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ }
- /* Set the stage boundaries. The closing_branch was scheduled
- and should appear in the last (ii-1) row. */
- reset_sched_times (ps, amount);
- rotate_partial_schedule (ps, amount);
set_columns_for_ps (ps);
canon_loop (loop);
@@ -1382,13 +1629,11 @@
scheduling window is empty and zero otherwise. */
static int
-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
{
int start, step, end;
ddg_edge_ptr e;
- int u = nodes_order [i];
- ddg_node_ptr u_node = &ps->g->nodes[u];
sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
@@ -1800,7 +2045,7 @@
/* Try to get non-empty scheduling window. */
success = 0;
- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
+ if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
&step, &end) == 0)
{
if (dump_file)
@@ -1817,24 +2062,11 @@
for (c = start; c != end; c += step)
{
- sbitmap tmp_precede = NULL;
- sbitmap tmp_follow = NULL;
-
- if (c == start)
- {
- if (step == 1)
- tmp_precede = must_precede;
- else /* step == -1. */
- tmp_follow = must_follow;
- }
- if (c == end - step)
- {
- if (step == 1)
- tmp_follow = must_follow;
- else /* step == -1. */
- tmp_precede = must_precede;
- }
-
+ sbitmap tmp_precede, tmp_follow;
+
+ set_must_precede_follow (&tmp_follow, must_follow,
+ &tmp_precede, must_precede,
+ c, start, end, step);
success =
try_scheduling_node_in_cycle (ps, u_node, u, c,
sched_nodes,
@@ -2899,12 +3131,10 @@
}
/* Calculate the stage count of the partial schedule PS. The calculation
- takes into account the rotation to bring the closing branch to row
- ii-1. */
+ takes into account the rotation amount passed in ROTATION_AMOUNT. */
int
-calculate_stage_count (partial_schedule_ptr ps)
+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
{
- int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);

View File

@@ -1,39 +0,0 @@
2011-08-09 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r176972:
* ddg.c (create_ddg_dep_from_intra_loop_link): Remove
the creation of anti-dep edge from a branch.
(add_cross_iteration_register_deps):
Create anti-dep edge from a branch.
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-07-04 11:00:06 +0000
+++ new/gcc/ddg.c 2011-07-31 11:29:10 +0000
@@ -197,11 +197,6 @@
}
}
- /* If a true dep edge enters the branch create an anti edge in the
- opposite direction to prevent the creation of reg-moves. */
- if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
- create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
-
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
add_edge_to_ddg (g, e);
@@ -306,8 +301,11 @@
gcc_assert (first_def_node);
+ /* Always create the edge if the use node is a branch in
+ order to prevent the creation of reg-moves. */
if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
- || !flag_modulo_sched_allow_regmoves)
+ || !flag_modulo_sched_allow_regmoves
+ || JUMP_P (use_node->insn))
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);

View File

@@ -1,94 +0,0 @@
2011-08-11 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/
Backport from mainline:
2011-07-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/vfp.md ("*movdf_vfp"): Handle the VFP constraints
before the core constraints. Adjust attributes.
(*thumb2_movdf_vfp"): Likewise.
=== modified file 'gcc/config/arm/vfp.md'
--- old/gcc/config/arm/vfp.md 2011-01-20 22:03:29 +0000
+++ new/gcc/config/arm/vfp.md 2011-07-27 12:59:19 +0000
@@ -401,8 +401,8 @@
;; DFmode moves
(define_insn "*movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
"TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
&& ( register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
@@ -418,9 +418,9 @@
gcc_assert (TARGET_VFP_DOUBLE);
return \"fconstd%?\\t%P0, #%G1\";
case 3: case 4:
+ return output_move_vfp (operands);
+ case 5: case 6:
return output_move_double (operands);
- case 5: case 6:
- return output_move_vfp (operands);
case 7:
if (TARGET_VFP_SINGLE)
return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
@@ -435,7 +435,7 @@
"
[(set_attr "type"
"r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+ (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
(eq_attr "alternative" "7")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -449,8 +449,8 @@
)
(define_insn "*thumb2_movdf_vfp"
- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r")
- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r")
+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))]
"TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
"*
{
@@ -463,10 +463,10 @@
case 2:
gcc_assert (TARGET_VFP_DOUBLE);
return \"fconstd%?\\t%P0, #%G1\";
- case 3: case 4: case 8:
+ case 3: case 4:
+ return output_move_vfp (operands);
+ case 5: case 6: case 8:
return output_move_double (operands);
- case 5: case 6:
- return output_move_vfp (operands);
case 7:
if (TARGET_VFP_SINGLE)
return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
@@ -478,8 +478,8 @@
}
"
[(set_attr "type"
- "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+ "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
+ (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
(eq_attr "alternative" "7")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -487,8 +487,8 @@
(const_int 8)
(const_int 4))]
(const_int 4)))
- (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
- (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
+ (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
+ (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
)

View File

@@ -1,30 +0,0 @@
2011-08-15 Michael Hope <michael.hope@linaro.org>
Backport from mainline r177357
gcc/testsuite/
2011-08-04 Ian Bolton <ian.bolton@arm.com>
* gcc.target/arm/vfp-1.c: no large negative offsets on Thumb2.
=== modified file 'gcc/testsuite/gcc.target/arm/vfp-1.c'
--- old/gcc/testsuite/gcc.target/arm/vfp-1.c 2011-01-01 08:52:03 +0000
+++ new/gcc/testsuite/gcc.target/arm/vfp-1.c 2011-08-09 23:22:51 +0000
@@ -127,13 +127,13 @@
void test_ldst (float f[], double d[]) {
/* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */
- /* { dg-final { scan-assembler "flds.+ \\\[r0, #-1020\\\]" } } */
+ /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
/* { dg-final { scan-assembler "add.+ r0, #1024" } } */
- /* { dg-final { scan-assembler "fsts.+ \\\[r0, #0\\\]\n" } } */
+ /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */
f[256] = f[255] + f[-255];
/* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */
- /* { dg-final { scan-assembler "fldd.+ \\\[r1, #-1016\\\]" } } */
+ /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
/* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */
d[32] = d[127] + d[-127];
}

View File

@@ -1,33 +0,0 @@
2011-08-15 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* config/rs6000/rs6000.c (paired_expand_vector_init): Don't create
CONST_VECTORs with symbolic elements.
(rs6000_expand_vector_init): Likewise.
=== modified file 'gcc/config/rs6000/rs6000.c'
--- old/gcc/config/rs6000/rs6000.c 2011-07-27 18:17:15 +0000
+++ new/gcc/config/rs6000/rs6000.c 2011-08-16 08:59:36 +0000
@@ -5134,7 +5134,9 @@
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
++n_var;
}
if (n_var == 0)
@@ -5286,7 +5288,9 @@
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
++n_var, one_var = i;
else if (x != CONST0_RTX (inner_mode))
all_const_zero = false;

View File

@@ -1,61 +0,0 @@
2011-08-18 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-08-12 Richard Sandiford <rdsandiford@googlemail.com>
* config/arm/arm.c (get_label_padding): New function.
(create_fix_barrier, arm_reorg): Use it.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-07-04 14:03:49 +0000
+++ new/gcc/config/arm/arm.c 2011-08-12 08:08:31 +0000
@@ -11769,6 +11769,19 @@
return 0;
}
+/* Return the maximum amount of padding that will be inserted before
+ label LABEL. */
+
+static HOST_WIDE_INT
+get_label_padding (rtx label)
+{
+ HOST_WIDE_INT align, min_insn_size;
+
+ align = 1 << label_to_alignment (label);
+ min_insn_size = TARGET_THUMB ? 2 : 4;
+ return align > min_insn_size ? align - min_insn_size : 0;
+}
+
/* Move a minipool fix MP from its current location to before MAX_MP.
If MAX_MP is NULL, then MP doesn't need moving, but the addressing
constraints may need updating. */
@@ -12315,8 +12328,12 @@
within range. */
gcc_assert (GET_CODE (from) != BARRIER);
- /* Count the length of this insn. */
- count += get_attr_length (from);
+ /* Count the length of this insn. This must stay in sync with the
+ code that pushes minipool fixes. */
+ if (LABEL_P (from))
+ count += get_label_padding (from);
+ else
+ count += get_attr_length (from);
/* If there is a jump table, add its length. */
tmp = is_jump_table (from);
@@ -12736,6 +12753,11 @@
insn = table;
}
}
+ else if (LABEL_P (insn))
+ /* Add the worst-case padding due to alignment. We don't add
+ the _current_ padding because the minipool insertions
+ themselves might change it. */
+ address += get_label_padding (insn);
}
fix = minipool_fix_head;

View File

@@ -1,23 +0,0 @@
2011-08-26 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-08-26 Richard Sandiford <richard.sandiford@linaro.org>
* df-problems.c (df_note_bb_compute): Pass uses rather than defs
to df_set_dead_notes_for_mw.
=== modified file 'gcc/df-problems.c'
--- old/gcc/df-problems.c 2011-07-07 19:10:01 +0000
+++ new/gcc/df-problems.c 2011-08-26 14:32:47 +0000
@@ -3375,7 +3375,7 @@
while (*mws_rec)
{
struct df_mw_hardreg *mws = *mws_rec;
- if ((DF_MWS_REG_DEF_P (mws))
+ if (DF_MWS_REG_USE_P (mws)
&& !df_ignore_stack_reg (mws->start_regno))
{
bool really_add_notes = debug_insn != 0;

View File

@@ -1,23 +0,0 @@
2011-09-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/
2011-08-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/48328
* config/arm/arm.h (CASE_VECTOR_SHORTEN_MODE): Fix distance
for tbh instructions.
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-08-24 17:35:16 +0000
+++ new/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000
@@ -1961,7 +1961,7 @@
: min >= -4096 && max < 4096 \
? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \
: SImode) \
- : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode \
+ : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode \
: (max >= 0x200) ? HImode \
: QImode))

View File

@@ -1,75 +0,0 @@
2011-09-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/arm/cortex-a9.md ("cortex_a9_mult_long"): New.
("cortex_a9_multiply_long"): New and use above. Handle all
long multiply cases.
("cortex_a9_multiply"): Handle smmul and smmulr.
("cortex_a9_mac"): Handle smmla.
=== modified file 'gcc/config/arm/cortex-a9.md'
--- old/gcc/config/arm/cortex-a9.md 2011-01-18 15:28:08 +0000
+++ new/gcc/config/arm/cortex-a9.md 2011-08-26 08:52:15 +0000
@@ -68,7 +68,8 @@
"cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
(define_reservation "cortex_a9_mac"
"cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
-
+(define_reservation "cortex_a9_mult_long"
+ "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb")
;; Issue at the same time along the load store pipeline and
;; the VFP / Neon pipeline is not possible.
@@ -139,29 +140,35 @@
(eq_attr "insn" "smlaxy"))
"cortex_a9_mac16")
-
(define_insn_reservation "cortex_a9_multiply" 4
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "mul"))
+ (eq_attr "insn" "mul,smmul,smmulr"))
"cortex_a9_mult")
(define_insn_reservation "cortex_a9_mac" 4
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "mla"))
+ (eq_attr "insn" "mla,smmla"))
"cortex_a9_mac")
+(define_insn_reservation "cortex_a9_multiply_long" 5
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
+ "cortex_a9_mult_long")
+
;; An instruction with a result in E2 can be forwarded
;; to E2 or E1 or M1 or the load store unit in the next cycle.
(define_bypass 1 "cortex_a9_dp"
"cortex_a9_dp_shift, cortex_a9_multiply,
cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
+ cortex_a9_multiply_long")
(define_bypass 2 "cortex_a9_dp_shift"
"cortex_a9_dp_shift, cortex_a9_multiply,
cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
+ cortex_a9_multiply_long")
;; An instruction in the load store pipeline can provide
;; read access to a DP instruction in the P0 default pipeline
@@ -212,7 +219,7 @@
(define_bypass 1
"cortex_a9_fps"
- "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
+ "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long")
;; Scheduling on the FP_ADD pipeline.
(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")

View File

@@ -1,948 +0,0 @@
2011-09-12 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF mainline:
2011-08-30 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (optimal_immediate_sequence_1): Make b1, b2,
b3 and b4 unsigned.
2011-08-30 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_gen_constant): Set can_negate correctly
when code is SET.
2011-08-26 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (struct four_ints): New type.
(count_insns_for_constant): Delete function.
(find_best_start): Delete function.
(optimal_immediate_sequence): New function.
(optimal_immediate_sequence_1): New function.
(arm_gen_constant): Move constant splitting code to
optimal_immediate_sequence.
Rewrite constant negation/invertion code.
gcc/testsuite/
* gcc.target/arm/thumb2-replicated-constant1.c: New file.
* gcc.target/arm/thumb2-replicated-constant2.c: New file.
* gcc.target/arm/thumb2-replicated-constant3.c: New file.
* gcc.target/arm/thumb2-replicated-constant4.c: New file.
2011-08-26 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
* config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
Remove prototype. Remove static function type.
* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
Add arch attribute.
* config/arm/constraints.md (Pj, PJ): New constraints.
2011-04-20 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_gen_constant): Move mowv support ....
(const_ok_for_op): ... to here.
2011-04-20 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_gen_constant): Remove redundant can_invert.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-08-25 13:26:58 +0000
@@ -46,6 +46,7 @@
extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
extern int const_ok_for_arm (HOST_WIDE_INT);
+extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int);
extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-08-24 17:35:16 +0000
+++ new/gcc/config/arm/arm.c 2011-09-06 12:57:56 +0000
@@ -63,6 +63,11 @@
void (*arm_lang_output_object_attributes_hook)(void);
+struct four_ints
+{
+ int i[4];
+};
+
/* Forward function declarations. */
static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
static int arm_compute_static_chain_stack_bytes (void);
@@ -81,7 +86,6 @@
static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
-static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
static rtx emit_sfm (int, int);
static unsigned arm_size_return_regs (void);
static bool arm_assemble_integer (rtx, unsigned int, int);
@@ -129,7 +133,13 @@
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int);
-static int count_insns_for_constant (HOST_WIDE_INT, int);
+static int optimal_immediate_sequence (enum rtx_code code,
+ unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence);
+static int optimal_immediate_sequence_1 (enum rtx_code code,
+ unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence,
+ int i);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
static enum machine_mode arm_promote_function_mode (const_tree,
@@ -2525,7 +2535,7 @@
}
/* Return true if I is a valid constant for the operation CODE. */
-static int
+int
const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
{
if (const_ok_for_arm (i))
@@ -2533,7 +2543,21 @@
switch (code)
{
+ case SET:
+ /* See if we can use movw. */
+ if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
+ return 1;
+ else
+ return 0;
+
case PLUS:
+ /* See if we can use addw or subw. */
+ if (TARGET_THUMB2
+ && ((i & 0xfffff000) == 0
+ || ((-i) & 0xfffff000) == 0))
+ return 1;
+ /* else fall through. */
+
case COMPARE:
case EQ:
case NE:
@@ -2649,68 +2673,41 @@
1);
}
-/* Return the number of instructions required to synthesize the given
- constant, if we start emitting them from bit-position I. */
-static int
-count_insns_for_constant (HOST_WIDE_INT remainder, int i)
-{
- HOST_WIDE_INT temp1;
- int step_size = TARGET_ARM ? 2 : 1;
- int num_insns = 0;
-
- gcc_assert (TARGET_ARM || i == 0);
-
- do
- {
- int end;
-
- if (i <= 0)
- i += 32;
- if (remainder & (((1 << step_size) - 1) << (i - step_size)))
- {
- end = i - 8;
- if (end < 0)
- end += 32;
- temp1 = remainder & ((0x0ff << end)
- | ((i < end) ? (0xff >> (32 - end)) : 0));
- remainder &= ~temp1;
- num_insns++;
- i -= 8 - step_size;
- }
- i -= step_size;
- } while (remainder);
- return num_insns;
-}
-
-static int
-find_best_start (unsigned HOST_WIDE_INT remainder)
+/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
+ ARM/THUMB2 immediates, and add up to VAL.
+ Thr function return value gives the number of insns required. */
+static int
+optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence)
{
int best_consecutive_zeros = 0;
int i;
int best_start = 0;
+ int insns1, insns2;
+ struct four_ints tmp_sequence;
/* If we aren't targetting ARM, the best place to start is always at
- the bottom. */
- if (! TARGET_ARM)
- return 0;
-
- for (i = 0; i < 32; i += 2)
+ the bottom, otherwise look more closely. */
+ if (TARGET_ARM)
{
- int consecutive_zeros = 0;
-
- if (!(remainder & (3 << i)))
+ for (i = 0; i < 32; i += 2)
{
- while ((i < 32) && !(remainder & (3 << i)))
- {
- consecutive_zeros += 2;
- i += 2;
- }
- if (consecutive_zeros > best_consecutive_zeros)
- {
- best_consecutive_zeros = consecutive_zeros;
- best_start = i - consecutive_zeros;
- }
- i -= 2;
+ int consecutive_zeros = 0;
+
+ if (!(val & (3 << i)))
+ {
+ while ((i < 32) && !(val & (3 << i)))
+ {
+ consecutive_zeros += 2;
+ i += 2;
+ }
+ if (consecutive_zeros > best_consecutive_zeros)
+ {
+ best_consecutive_zeros = consecutive_zeros;
+ best_start = i - consecutive_zeros;
+ }
+ i -= 2;
+ }
}
}
@@ -2737,13 +2734,161 @@
the constant starting from `best_start', and also starting from
zero (i.e. with bit 31 first to be output). If `best_start' doesn't
yield a shorter sequence, we may as well use zero. */
+ insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
if (best_start != 0
- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
- && (count_insns_for_constant (remainder, 0) <=
- count_insns_for_constant (remainder, best_start)))
- best_start = 0;
-
- return best_start;
+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
+ {
+ insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
+ if (insns2 <= insns1)
+ {
+ *return_sequence = tmp_sequence;
+ insns1 = insns2;
+ }
+ }
+
+ return insns1;
+}
+
+/* As for optimal_immediate_sequence, but starting at bit-position I. */
+static int
+optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
+ struct four_ints *return_sequence, int i)
+{
+ int remainder = val & 0xffffffff;
+ int insns = 0;
+
+ /* Try and find a way of doing the job in either two or three
+ instructions.
+
+ In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
+ location. We start at position I. This may be the MSB, or
+ optimial_immediate_sequence may have positioned it at the largest block
+ of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
+ wrapping around to the top of the word when we drop off the bottom.
+ In the worst case this code should produce no more than four insns.
+
+ In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
+ constants, shifted to any arbitrary location. We should always start
+ at the MSB. */
+ do
+ {
+ int end;
+ unsigned int b1, b2, b3, b4;
+ unsigned HOST_WIDE_INT result;
+ int loc;
+
+ gcc_assert (insns < 4);
+
+ if (i <= 0)
+ i += 32;
+
+ /* First, find the next normal 12/8-bit shifted/rotated immediate. */
+ if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
+ {
+ loc = i;
+ if (i <= 12 && TARGET_THUMB2 && code == PLUS)
+ /* We can use addw/subw for the last 12 bits. */
+ result = remainder;
+ else
+ {
+ /* Use an 8-bit shifted/rotated immediate. */
+ end = i - 8;
+ if (end < 0)
+ end += 32;
+ result = remainder & ((0x0ff << end)
+ | ((i < end) ? (0xff >> (32 - end))
+ : 0));
+ i -= 8;
+ }
+ }
+ else
+ {
+ /* Arm allows rotates by a multiple of two. Thumb-2 allows
+ arbitrary shifts. */
+ i -= TARGET_ARM ? 2 : 1;
+ continue;
+ }
+
+ /* Next, see if we can do a better job with a thumb2 replicated
+ constant.
+
+ We do it this way around to catch the cases like 0x01F001E0 where
+ two 8-bit immediates would work, but a replicated constant would
+ make it worse.
+
+ TODO: 16-bit constants that don't clear all the bits, but still win.
+ TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
+ if (TARGET_THUMB2)
+ {
+ b1 = (remainder & 0xff000000) >> 24;
+ b2 = (remainder & 0x00ff0000) >> 16;
+ b3 = (remainder & 0x0000ff00) >> 8;
+ b4 = remainder & 0xff;
+
+ if (loc > 24)
+ {
+ /* The 8-bit immediate already found clears b1 (and maybe b2),
+ but must leave b3 and b4 alone. */
+
+ /* First try to find a 32-bit replicated constant that clears
+ almost everything. We can assume that we can't do it in one,
+ or else we wouldn't be here. */
+ unsigned int tmp = b1 & b2 & b3 & b4;
+ unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
+ + (tmp << 24);
+ unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
+ + (tmp == b3) + (tmp == b4);
+ if (tmp
+ && (matching_bytes >= 3
+ || (matching_bytes == 2
+ && const_ok_for_op (remainder & ~tmp2, code))))
+ {
+ /* At least 3 of the bytes match, and the fourth has at
+ least as many bits set, or two of the bytes match
+ and it will only require one more insn to finish. */
+ result = tmp2;
+ i = tmp != b1 ? 32
+ : tmp != b2 ? 24
+ : tmp != b3 ? 16
+ : 8;
+ }
+
+ /* Second, try to find a 16-bit replicated constant that can
+ leave three of the bytes clear. If b2 or b4 is already
+ zero, then we can. If the 8-bit from above would not
+ clear b2 anyway, then we still win. */
+ else if (b1 == b3 && (!b2 || !b4
+ || (remainder & 0x00ff0000 & ~result)))
+ {
+ result = remainder & 0xff00ff00;
+ i = 24;
+ }
+ }
+ else if (loc > 16)
+ {
+ /* The 8-bit immediate already found clears b2 (and maybe b3)
+ and we don't get here unless b1 is alredy clear, but it will
+ leave b4 unchanged. */
+
+ /* If we can clear b2 and b4 at once, then we win, since the
+ 8-bits couldn't possibly reach that far. */
+ if (b2 == b4)
+ {
+ result = remainder & 0x00ff00ff;
+ i = 16;
+ }
+ }
+ }
+
+ return_sequence->i[insns++] = result;
+ remainder &= ~result;
+
+ if (code == SET || code == MINUS)
+ code = PLUS;
+ }
+ while (remainder);
+
+ return insns;
}
/* Emit an instruction with the indicated PATTERN. If COND is
@@ -2760,7 +2905,6 @@
/* As above, but extra parameter GENERATE which, if clear, suppresses
RTL generation. */
-/* ??? This needs more work for thumb2. */
static int
arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
@@ -2772,15 +2916,15 @@
int final_invert = 0;
int can_negate_initial = 0;
int i;
- int num_bits_set = 0;
int set_sign_bit_copies = 0;
int clear_sign_bit_copies = 0;
int clear_zero_bit_copies = 0;
int set_zero_bit_copies = 0;
- int insns = 0;
+ int insns = 0, neg_insns, inv_insns;
unsigned HOST_WIDE_INT temp1, temp2;
unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
- int step_size = TARGET_ARM ? 2 : 1;
+ struct four_ints *immediates;
+ struct four_ints pos_immediates, neg_immediates, inv_immediates;
/* Find out which operations are safe for a given CODE. Also do a quick
check for degenerate cases; these can occur when DImode operations
@@ -2789,7 +2933,6 @@
{
case SET:
can_invert = 1;
- can_negate = 1;
break;
case PLUS:
@@ -2817,9 +2960,6 @@
gen_rtx_SET (VOIDmode, target, source));
return 1;
}
-
- if (TARGET_THUMB2)
- can_invert = 1;
break;
case AND:
@@ -2861,6 +3001,7 @@
gen_rtx_NOT (mode, source)));
return 1;
}
+ final_invert = 1;
break;
case MINUS:
@@ -2883,7 +3024,6 @@
source)));
return 1;
}
- can_negate = 1;
break;
@@ -2892,9 +3032,7 @@
}
/* If we can do it in one insn get out quickly. */
- if (const_ok_for_arm (val)
- || (can_negate_initial && const_ok_for_arm (-val))
- || (can_invert && const_ok_for_arm (~val)))
+ if (const_ok_for_op (val, code))
{
if (generate)
emit_constant_insn (cond,
@@ -2947,15 +3085,6 @@
switch (code)
{
case SET:
- /* See if we can use movw. */
- if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
- {
- if (generate)
- emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
- GEN_INT (val)));
- return 1;
- }
-
/* See if we can do this by sign_extending a constant that is known
to be negative. This is a good, way of doing it, since the shift
may well merge into a subsequent insn. */
@@ -3306,121 +3435,97 @@
break;
}
- for (i = 0; i < 32; i++)
- if (remainder & (1 << i))
- num_bits_set++;
-
- if ((code == AND)
- || (code != IOR && can_invert && num_bits_set > 16))
- remainder ^= 0xffffffff;
- else if (code == PLUS && num_bits_set > 16)
- remainder = (-remainder) & 0xffffffff;
-
- /* For XOR, if more than half the bits are set and there's a sequence
- of more than 8 consecutive ones in the pattern then we can XOR by the
- inverted constant and then invert the final result; this may save an
- instruction and might also lead to the final mvn being merged with
- some other operation. */
- else if (code == XOR && num_bits_set > 16
- && (count_insns_for_constant (remainder ^ 0xffffffff,
- find_best_start
- (remainder ^ 0xffffffff))
- < count_insns_for_constant (remainder,
- find_best_start (remainder))))
- {
- remainder ^= 0xffffffff;
- final_invert = 1;
+ /* Calculate what the instruction sequences would be if we generated it
+ normally, negated, or inverted. */
+ if (code == AND)
+ /* AND cannot be split into multiple insns, so invert and use BIC. */
+ insns = 99;
+ else
+ insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
+
+ if (can_negate)
+ neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
+ &neg_immediates);
+ else
+ neg_insns = 99;
+
+ if (can_invert || final_invert)
+ inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
+ &inv_immediates);
+ else
+ inv_insns = 99;
+
+ immediates = &pos_immediates;
+
+ /* Is the negated immediate sequence more efficient? */
+ if (neg_insns < insns && neg_insns <= inv_insns)
+ {
+ insns = neg_insns;
+ immediates = &neg_immediates;
+ }
+ else
+ can_negate = 0;
+
+ /* Is the inverted immediate sequence more efficient?
+ We must allow for an extra NOT instruction for XOR operations, although
+ there is some chance that the final 'mvn' will get optimized later. */
+ if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
+ {
+ insns = inv_insns;
+ immediates = &inv_immediates;
}
else
{
can_invert = 0;
- can_negate = 0;
+ final_invert = 0;
}
- /* Now try and find a way of doing the job in either two or three
- instructions.
- We start by looking for the largest block of zeros that are aligned on
- a 2-bit boundary, we then fill up the temps, wrapping around to the
- top of the word when we drop off the bottom.
- In the worst case this code should produce no more than four insns.
- Thumb-2 constants are shifted, not rotated, so the MSB is always the
- best place to start. */
-
- /* ??? Use thumb2 replicated constants when the high and low halfwords are
- the same. */
- {
- /* Now start emitting the insns. */
- i = find_best_start (remainder);
- do
- {
- int end;
-
- if (i <= 0)
- i += 32;
- if (remainder & (3 << (i - 2)))
- {
- end = i - 8;
- if (end < 0)
- end += 32;
- temp1 = remainder & ((0x0ff << end)
- | ((i < end) ? (0xff >> (32 - end)) : 0));
- remainder &= ~temp1;
-
- if (generate)
- {
- rtx new_src, temp1_rtx;
-
- if (code == SET || code == MINUS)
- {
- new_src = (subtargets ? gen_reg_rtx (mode) : target);
- if (can_invert && code != MINUS)
- temp1 = ~temp1;
- }
- else
- {
- if ((final_invert || remainder) && subtargets)
- new_src = gen_reg_rtx (mode);
- else
- new_src = target;
- if (can_invert)
- temp1 = ~temp1;
- else if (can_negate)
- temp1 = -temp1;
- }
-
- temp1 = trunc_int_for_mode (temp1, mode);
- temp1_rtx = GEN_INT (temp1);
-
- if (code == SET)
- ;
- else if (code == MINUS)
- temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
- else
- temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
-
- emit_constant_insn (cond,
- gen_rtx_SET (VOIDmode, new_src,
- temp1_rtx));
- source = new_src;
- }
-
- if (code == SET)
- {
- can_invert = 0;
- code = PLUS;
- }
- else if (code == MINUS)
+ /* Now output the chosen sequence as instructions. */
+ if (generate)
+ {
+ for (i = 0; i < insns; i++)
+ {
+ rtx new_src, temp1_rtx;
+
+ temp1 = immediates->i[i];
+
+ if (code == SET || code == MINUS)
+ new_src = (subtargets ? gen_reg_rtx (mode) : target);
+ else if ((final_invert || i < (insns - 1)) && subtargets)
+ new_src = gen_reg_rtx (mode);
+ else
+ new_src = target;
+
+ if (can_invert)
+ temp1 = ~temp1;
+ else if (can_negate)
+ temp1 = -temp1;
+
+ temp1 = trunc_int_for_mode (temp1, mode);
+ temp1_rtx = GEN_INT (temp1);
+
+ if (code == SET)
+ ;
+ else if (code == MINUS)
+ temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+ else
+ temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+
+ emit_constant_insn (cond,
+ gen_rtx_SET (VOIDmode, new_src,
+ temp1_rtx));
+ source = new_src;
+
+ if (code == SET)
+ {
+ can_negate = can_invert;
+ can_invert = 0;
code = PLUS;
-
- insns++;
- i -= 8 - step_size;
- }
- /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
- shifts. */
- i -= step_size;
- }
- while (remainder);
- }
+ }
+ else if (code == MINUS)
+ code = PLUS;
+ }
+ }
if (final_invert)
{
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-08-25 11:42:09 +0000
+++ new/gcc/config/arm/arm.md 2011-08-25 13:26:58 +0000
@@ -701,21 +701,24 @@
;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will
;; put the duplicated register first, and not try the commutative version.
(define_insn_and_split "*arm_addsi3"
- [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r")
- (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")
- (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
+ (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
+ (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
"TARGET_32BIT"
"@
add%?\\t%0, %1, %2
add%?\\t%0, %1, %2
add%?\\t%0, %2, %1
- sub%?\\t%0, %1, #%n2
- sub%?\\t%0, %1, #%n2
+ addw%?\\t%0, %1, %2
+ addw%?\\t%0, %1, %2
+ sub%?\\t%0, %1, #%n2
+ sub%?\\t%0, %1, #%n2
+ subw%?\\t%0, %1, #%n2
+ subw%?\\t%0, %1, #%n2
#"
"TARGET_32BIT
&& GET_CODE (operands[2]) == CONST_INT
- && !(const_ok_for_arm (INTVAL (operands[2]))
- || const_ok_for_arm (-INTVAL (operands[2])))
+ && !const_ok_for_op (INTVAL (operands[2]), PLUS)
&& (reload_completed || !arm_eliminable_register (operands[1]))"
[(clobber (const_int 0))]
"
@@ -724,8 +727,9 @@
operands[1], 0);
DONE;
"
- [(set_attr "length" "4,4,4,4,4,16")
- (set_attr "predicable" "yes")]
+ [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+ (set_attr "predicable" "yes")
+ (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
)
(define_insn_and_split "*thumb1_addsi3"
=== modified file 'gcc/config/arm/constraints.md'
--- old/gcc/config/arm/constraints.md 2011-01-03 20:52:22 +0000
+++ new/gcc/config/arm/constraints.md 2011-08-25 13:26:58 +0000
@@ -31,7 +31,7 @@
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -74,6 +74,18 @@
(and (match_code "const_int")
(match_test "(ival & 0xffff0000) == 0")))))
+(define_constraint "Pj"
+ "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
+ (and (match_code "const_int")
+ (and (match_test "TARGET_THUMB2")
+ (match_test "(ival & 0xfffff000) == 0"))))
+
+(define_constraint "PJ"
+ "@internal A constant that satisfies the Pj constrant if negated."
+ (and (match_code "const_int")
+ (and (match_test "TARGET_THUMB2")
+ (match_test "((-ival) & 0xfffff000) == 0"))))
+
(define_register_constraint "k" "STACK_REG"
"@internal The stack register.")
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,27 @@
+/* Ensure simple replicated constant immediates work. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a + 0xfefefefe;
+}
+
+/* { dg-final { scan-assembler "add.*#-16843010" } } */
+
+int
+foo2 (int a)
+{
+ return a - 0xab00ab00;
+}
+
+/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
+
+int
+foo3 (int a)
+{
+ return a & 0x00cd00cd;
+}
+
+/* { dg-final { scan-assembler "and.*#13435085" } } */
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,75 @@
+/* Ensure split constants can use replicated patterns. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a + 0xfe00fe01;
+}
+
+/* { dg-final { scan-assembler "add.*#-33489408" } } */
+/* { dg-final { scan-assembler "add.*#1" } } */
+
+int
+foo2 (int a)
+{
+ return a + 0xdd01dd00;
+}
+
+/* { dg-final { scan-assembler "add.*#-587145984" } } */
+/* { dg-final { scan-assembler "add.*#65536" } } */
+
+int
+foo3 (int a)
+{
+ return a + 0x00443344;
+}
+
+/* { dg-final { scan-assembler "add.*#4456516" } } */
+/* { dg-final { scan-assembler "add.*#13056" } } */
+
+int
+foo4 (int a)
+{
+ return a + 0x77330033;
+}
+
+/* { dg-final { scan-assembler "add.*#1996488704" } } */
+/* { dg-final { scan-assembler "add.*#3342387" } } */
+
+int
+foo5 (int a)
+{
+ return a + 0x11221122;
+}
+
+/* { dg-final { scan-assembler "add.*#285217024" } } */
+/* { dg-final { scan-assembler "add.*#2228258" } } */
+
+int
+foo6 (int a)
+{
+ return a + 0x66666677;
+}
+
+/* { dg-final { scan-assembler "add.*#1717986918" } } */
+/* { dg-final { scan-assembler "add.*#17" } } */
+
+int
+foo7 (int a)
+{
+ return a + 0x99888888;
+}
+
+/* { dg-final { scan-assembler "add.*#-2004318072" } } */
+/* { dg-final { scan-assembler "add.*#285212672" } } */
+
+int
+foo8 (int a)
+{
+ return a + 0xdddddfff;
+}
+
+/* { dg-final { scan-assembler "add.*#-572662307" } } */
+/* { dg-final { scan-assembler "addw.*#546" } } */
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,28 @@
+/* Ensure negated/inverted replicated constant immediates work. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a | 0xffffff00;
+}
+
+/* { dg-final { scan-assembler "orn.*#255" } } */
+
+int
+foo2 (int a)
+{
+ return a & 0xffeeffee;
+}
+
+/* { dg-final { scan-assembler "bic.*#1114129" } } */
+
+int
+foo3 (int a)
+{
+ return a & 0xaaaaaa00;
+}
+
+/* { dg-final { scan-assembler "and.*#-1431655766" } } */
+/* { dg-final { scan-assembler "bic.*#170" } } */
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 2011-08-25 13:31:00 +0000
@@ -0,0 +1,22 @@
+/* Ensure replicated constants don't make things worse. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ /* It might be tempting to use 0x01000100, but it wouldn't help. */
+ return a + 0x01f001e0;
+}
+
+/* { dg-final { scan-assembler "add.*#32505856" } } */
+/* { dg-final { scan-assembler "add.*#480" } } */
+
+int
+foo2 (int a)
+{
+ return a + 0x0f100e10;
+}
+
+/* { dg-final { scan-assembler "add.*#252706816" } } */
+/* { dg-final { scan-assembler "add.*#3600" } } */

View File

@@ -1,201 +0,0 @@
2011-09-12 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
PR target/49030
* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
* config/arm/arm.c (maybe_get_arm_condition_code): New function,
reusing the old code from get_arm_condition_code. Return ARM_NV
for invalid comparison codes.
(get_arm_condition_code): Redefine in terms of
maybe_get_arm_condition_code.
* config/arm/predicates.md (arm_comparison_operator): Use
maybe_get_arm_condition_code.
gcc/testsuite/
PR target/49030
* gcc.dg/torture/pr49030.c: New test.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-08-25 13:26:58 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-09-12 11:03:11 +0000
@@ -179,6 +179,7 @@
#endif
extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
#ifdef RTX_CODE
+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
extern void thumb1_final_prescan_insn (rtx);
extern void thumb2_final_prescan_insn (rtx);
extern const char *thumb_load_double_from_address (rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-06 12:57:56 +0000
+++ new/gcc/config/arm/arm.c 2011-09-12 11:03:11 +0000
@@ -17494,10 +17494,10 @@
decremented/zeroed by arm_asm_output_opcode as the insns are output. */
/* Returns the index of the ARM condition code string in
- `arm_condition_codes'. COMPARISON should be an rtx like
- `(eq (...) (...))'. */
-static enum arm_cond_code
-get_arm_condition_code (rtx comparison)
+ `arm_condition_codes', or ARM_NV if the comparison is invalid.
+ COMPARISON should be an rtx like `(eq (...) (...))'. */
+enum arm_cond_code
+maybe_get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
enum arm_cond_code code;
@@ -17521,11 +17521,11 @@
case CC_DLTUmode: code = ARM_CC;
dominance:
- gcc_assert (comp_code == EQ || comp_code == NE);
-
if (comp_code == EQ)
return ARM_INVERSE_CONDITION_CODE (code);
- return code;
+ if (comp_code == NE)
+ return code;
+ return ARM_NV;
case CC_NOOVmode:
switch (comp_code)
@@ -17534,7 +17534,7 @@
case EQ: return ARM_EQ;
case GE: return ARM_PL;
case LT: return ARM_MI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Zmode:
@@ -17542,7 +17542,7 @@
{
case NE: return ARM_NE;
case EQ: return ARM_EQ;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Nmode:
@@ -17550,7 +17550,7 @@
{
case NE: return ARM_MI;
case EQ: return ARM_PL;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCFPEmode:
@@ -17575,7 +17575,7 @@
/* UNEQ and LTGT do not have a representation. */
case UNEQ: /* Fall through. */
case LTGT: /* Fall through. */
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_SWPmode:
@@ -17591,7 +17591,7 @@
case GTU: return ARM_CC;
case LEU: return ARM_CS;
case LTU: return ARM_HI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Cmode:
@@ -17599,7 +17599,7 @@
{
case LTU: return ARM_CS;
case GEU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_CZmode:
@@ -17611,7 +17611,7 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_NCVmode:
@@ -17621,7 +17621,7 @@
case LT: return ARM_LT;
case GEU: return ARM_CS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCmode:
@@ -17637,13 +17637,22 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
default: gcc_unreachable ();
}
}
+/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
+{
+ enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
+ gcc_assert (code != ARM_NV);
+ return code;
+}
+
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
instructions. */
void
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-08-13 08:40:36 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-05 09:40:19 +0000
@@ -242,10 +242,9 @@
;; True for integer comparisons and, if FP is active, for comparisons
;; other than LTGT or UNEQ.
(define_special_predicate "arm_comparison_operator"
- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
- && (TARGET_FPA || TARGET_VFP)")
- (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
+ (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
+ unordered,ordered,unlt,unle,unge,ungt")
+ (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
(define_special_predicate "lt_ge_comparison_operator"
(match_code "lt,ge"))
=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-09-05 09:40:19 +0000
@@ -0,0 +1,19 @@
+void
+sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
+ unsigned long dst_skip)
+{
+ long long y;
+ while (nsamples--)
+ {
+ y = (long long) (*src * 8388608.0f) << 8;
+ if (y > 2147483647) {
+ *(int *) dst = 2147483647;
+ } else if (y < -2147483647 - 1) {
+ *(int *) dst = -2147483647 - 1;
+ } else {
+ *(int *) dst = (int) y;
+ }
+ dst += dst_skip;
+ src++;
+ }
+}

View File

@@ -1,38 +0,0 @@
2011-09-01 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/predicates.md (shift_amount_operand): Ensure shift
amount is positive.
gcc/testsuite/
* gcc.dg/pr50193-1.c: New file.
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-05 09:40:19 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-12 11:24:34 +0000
@@ -132,7 +132,8 @@
(define_predicate "shift_amount_operand"
(ior (and (match_test "TARGET_ARM")
(match_operand 0 "s_register_operand"))
- (match_operand 0 "const_int_operand")))
+ (and (match_operand 0 "const_int_operand")
+ (match_test "INTVAL (op) > 0"))))
(define_predicate "arm_add_operand"
(ior (match_operand 0 "arm_rhs_operand")
=== added file 'gcc/testsuite/gcc.dg/pr50193-1.c'
--- old/gcc/testsuite/gcc.dg/pr50193-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/pr50193-1.c 2011-09-01 12:22:14 +0000
@@ -0,0 +1,10 @@
+/* PR 50193: ARM: ICE on a | (b << negative-constant) */
+/* Ensure that the compiler doesn't ICE. */
+
+/* { dg-options "-O2" } */
+
+int
+foo(int a, int b)
+{
+ return a | (b << -3); /* { dg-warning "left shift count is negative" } */
+}

View File

@@ -1,47 +0,0 @@
2011-09-12 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF mainline:
2011-09-08 Andrew Stubbs <ams@codesourcery.com>
PR tree-optimization/50318
gcc/
* tree-ssa-math-opts.c (convert_plusminus_to_widen): Correct
typo in use of mult_rhs1 and mult_rhs2.
gcc/testsuite/
* gcc.target/arm/pr50318-1.c: New file.
=== added file 'gcc/testsuite/gcc.target/arm/pr50318-1.c'
--- old/gcc/testsuite/gcc.target/arm/pr50318-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/pr50318-1.c 2011-09-08 20:11:43 +0000
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm_dsp } */
+
+long long test (unsigned int sec, unsigned long long nsecs)
+{
+ return (long long)(long)sec * 1000000000L + (long long)(unsigned
+ long)nsecs;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
=== modified file 'gcc/tree-ssa-math-opts.c'
--- old/gcc/tree-ssa-math-opts.c 2011-08-09 10:26:48 +0000
+++ new/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000
@@ -1699,9 +1699,9 @@
/* Handle constants. */
if (TREE_CODE (mult_rhs1) == INTEGER_CST)
- rhs1 = fold_convert (type1, mult_rhs1);
+ mult_rhs1 = fold_convert (type1, mult_rhs1);
if (TREE_CODE (mult_rhs2) == INTEGER_CST)
- rhs2 = fold_convert (type2, mult_rhs2);
+ mult_rhs2 = fold_convert (type2, mult_rhs2);
gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
add_rhs);

View File

@@ -1,92 +0,0 @@
2011-09-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
gcc/testsuite/
* gcc.target/arm/pr50099.c: Fix testcase from previous commit.
2011-09-12 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
LP:838994
gcc/
Backport from mainline.
2011-09-06 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/50099
* config/arm/iterators.md (qhs_zextenddi_cstr): New.
(qhs_zextenddi_op): New.
* config/arm/arm.md ("zero_extend<mode>di2"): Use them.
* config/arm/predicates.md ("arm_extendqisi_mem_op"):
Distinguish between ARM and Thumb2 states.
gcc/testsuite/
* gcc.target/arm/pr50099.c: New test.
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-08-25 13:26:58 +0000
+++ new/gcc/config/arm/arm.md 2011-09-12 12:32:29 +0000
@@ -4136,8 +4136,8 @@
(define_insn "zero_extend<mode>di2"
[(set (match_operand:DI 0 "s_register_operand" "=r")
- (zero_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
- "<qhs_extenddi_cstr>")))]
+ (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
+ "<qhs_zextenddi_cstr>")))]
"TARGET_32BIT <qhs_zextenddi_cond>"
"#"
[(set_attr "length" "8")
=== modified file 'gcc/config/arm/iterators.md'
--- old/gcc/config/arm/iterators.md 2011-05-03 15:14:56 +0000
+++ new/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000
@@ -379,10 +379,14 @@
(define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
(define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
(QI "&& arm_arch6")])
+(define_mode_attr qhs_zextenddi_op [(SI "s_register_operand")
+ (HI "nonimmediate_operand")
+ (QI "nonimmediate_operand")])
(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
(HI "nonimmediate_operand")
- (QI "nonimmediate_operand")])
-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+ (QI "arm_reg_or_extendqisi_mem_op")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
;;----------------------------------------------------------------------------
;; Code attributes
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-12 11:24:34 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-12 12:32:29 +0000
@@ -289,8 +289,11 @@
(define_special_predicate "arm_extendqisi_mem_op"
(and (match_operand 0 "memory_operand")
- (match_test "arm_legitimate_address_outer_p (mode, XEXP (op, 0),
- SIGN_EXTEND, 0)")))
+ (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode,
+ XEXP (op, 0),
+ SIGN_EXTEND,
+ 0)
+ : memory_address_p (QImode, XEXP (op, 0))")))
(define_special_predicate "arm_reg_or_extendqisi_mem_op"
(ior (match_operand 0 "arm_extendqisi_mem_op")
=== added file 'gcc/testsuite/gcc.target/arm/pr50099.c'
--- old/gcc/testsuite/gcc.target/arm/pr50099.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/pr50099.c 2011-09-09 16:42:45 +0000
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo (signed char * arg)
+{
+ long long temp_1;
+
+ temp_1 = arg[256];
+ return temp_1;
+}

View File

@@ -1,767 +0,0 @@
2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from FSF mainline:
2011-04-06 Wei Guozhi <carrot@google.com>
PR target/47855
gcc/
* config/arm/arm.md (arm_cmpsi_insn): Compute attr "length".
(arm_cond_branch): Likewise.
(arm_cond_branch_reversed): Likewise.
(arm_jump): Likewise.
(push_multi): Likewise.
* config/arm/constraints.md (Py): New constraint.
2011-04-08 Wei Guozhi <carrot@google.com>
PR target/47855
* config/arm/arm-protos.h (arm_attr_length_push_multi): New prototype.
* config/arm/arm.c (arm_attr_length_push_multi): New function.
* config/arm/arm.md (*push_multi): Change the length computation to
call a C function.
2011-08-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from FSF mainline:
2011-08-18 Jiangning Liu <jiangning.liu@arm.com>
gcc/
* config/arm/arm.md (*ior_scc_scc): Enable for Thumb2 as well.
(*ior_scc_scc_cmp): Likewise
(*and_scc_scc): Likewise.
(*and_scc_scc_cmp): Likewise.
(*and_scc_scc_nodom): Likewise.
(*cmp_ite0, *cmp_ite1, *cmp_and, *cmp_ior): Handle Thumb2.
gcc/testsuite
* gcc.target/arm/thumb2-cond-cmp-1.c: New. Make sure conditional
compare can be generated.
* gcc.target/arm/thumb2-cond-cmp-2.c: Likewise.
* gcc.target/arm/thumb2-cond-cmp-3.c: Likewise.
* gcc.target/arm/thumb2-cond-cmp-4.c: Likewise.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-09-12 11:03:11 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-09-12 14:14:00 +0000
@@ -156,6 +156,7 @@
extern const char *arm_output_memory_barrier (rtx *);
extern const char *arm_output_sync_insn (rtx, rtx *);
extern unsigned int arm_sync_loop_insns (rtx , rtx *);
+extern int arm_attr_length_push_multi(rtx, rtx);
#if defined TREE_CODE
extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-12 11:03:11 +0000
+++ new/gcc/config/arm/arm.c 2011-09-12 14:14:00 +0000
@@ -24391,4 +24391,30 @@
return NO_REGS;
}
+/* Compute the atrribute "length" of insn "*push_multi".
+ So this function MUST be kept in sync with that insn pattern. */
+int
+arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
+{
+ int i, regno, hi_reg;
+ int num_saves = XVECLEN (parallel_op, 0);
+
+ /* ARM mode. */
+ if (TARGET_ARM)
+ return 4;
+
+ /* Thumb2 mode. */
+ regno = REGNO (first_op);
+ hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+ for (i = 1; i < num_saves && !hi_reg; i++)
+ {
+ regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
+ hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+ }
+
+ if (!hi_reg)
+ return 2;
+ return 4;
+}
+
#include "gt-arm.h"
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-09-12 12:32:29 +0000
+++ new/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000
@@ -48,6 +48,15 @@
(DOM_CC_X_OR_Y 2)
]
)
+;; conditional compare combination
+(define_constants
+ [(CMP_CMP 0)
+ (CMN_CMP 1)
+ (CMP_CMN 2)
+ (CMN_CMN 3)
+ (NUM_OF_COND_CMP 4)
+ ]
+)
;; UNSPEC Usage:
;; Note: sin and cos are no-longer used.
@@ -7198,13 +7207,17 @@
(define_insn "*arm_cmpsi_insn"
[(set (reg:CC CC_REGNUM)
- (compare:CC (match_operand:SI 0 "s_register_operand" "r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L")))]
+ (compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r")
+ (match_operand:SI 1 "arm_add_operand" "Py,r,rI,L")))]
"TARGET_32BIT"
"@
cmp%?\\t%0, %1
+ cmp%?\\t%0, %1
+ cmp%?\\t%0, %1
cmn%?\\t%0, #%n1"
- [(set_attr "conds" "set")]
+ [(set_attr "conds" "set")
+ (set_attr "arch" "t2,t2,any,any")
+ (set_attr "length" "2,2,4,4")]
)
(define_insn "*cmpsi_shiftsi"
@@ -7375,7 +7388,14 @@
return \"b%d1\\t%l0\";
"
[(set_attr "conds" "use")
- (set_attr "type" "branch")]
+ (set_attr "type" "branch")
+ (set (attr "length")
+ (if_then_else
+ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+ (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+ (le (minus (match_dup 0) (pc)) (const_int 256))))
+ (const_int 2)
+ (const_int 4)))]
)
(define_insn "*arm_cond_branch_reversed"
@@ -7394,7 +7414,14 @@
return \"b%D1\\t%l0\";
"
[(set_attr "conds" "use")
- (set_attr "type" "branch")]
+ (set_attr "type" "branch")
+ (set (attr "length")
+ (if_then_else
+ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+ (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+ (le (minus (match_dup 0) (pc)) (const_int 256))))
+ (const_int 2)
+ (const_int 4)))]
)
@@ -7846,7 +7873,14 @@
return \"b%?\\t%l0\";
}
"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set (attr "length")
+ (if_then_else
+ (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+ (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+ (le (minus (match_dup 0) (pc)) (const_int 2048))))
+ (const_int 2)
+ (const_int 4)))]
)
(define_insn "*thumb_jump"
@@ -8931,40 +8965,85 @@
(set_attr "length" "8,12")]
)
-;; ??? Is it worth using these conditional patterns in Thumb-2 mode?
(define_insn "*cmp_ite0"
[(set (match_operand 6 "dominant_cc_register" "")
(compare
(if_then_else:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
(const_int 0))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
{
- static const char * const opcodes[4][2] =
- {
- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
- };
+ static const char * const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%d5\\t%0, %1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmp%d5\\t%0, %1\",
+ \"cmn%d4\\t%2, #%n3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmn%d4\\t%2, #%n3\"}
+ };
+ static const char * const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%2, %3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmp\\t%2, %3\",
+ \"cmn\\t%0, #%n1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmn\\t%0, #%n1\"}
+ };
+ static const char * const ite[2] =
+ {
+ \"it\\t%d5\",
+ \"it\\t%d4\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
int swap =
comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
- return opcodes[which_alternative][swap];
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
}"
[(set_attr "conds" "set")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn "*cmp_ite1"
@@ -8972,35 +9051,81 @@
(compare
(if_then_else:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
(const_int 1))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
{
- static const char * const opcodes[4][2] =
- {
- {\"cmp\\t%0, %1\;cmp%d4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
- {\"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
- };
+ static const char * const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%0, %1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmp\\t%0, %1\",
+ \"cmn\\t%2, #%n3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmn\\t%2, #%n3\"}
+ };
+ static const char * const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%d4\\t%2, %3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmp%d4\\t%2, %3\",
+ \"cmn%D5\\t%0, #%n1\"},
+ {\"cmn%d4\\t%2, #%n3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmn%d4\\t%2, #%n3\",
+ \"cmn%D5\\t%0, #%n1\"}
+ };
+ static const char * const ite[2] =
+ {
+ \"it\\t%d4\",
+ \"it\\t%D5\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
int swap =
comparison_dominates_p (GET_CODE (operands[5]),
reverse_condition (GET_CODE (operands[4])));
- return opcodes[which_alternative][swap];
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
}"
[(set_attr "conds" "set")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn "*cmp_and"
@@ -9008,34 +9133,80 @@
(compare
(and:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
{
- static const char *const opcodes[4][2] =
- {
- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
- };
+ static const char *const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%d5\\t%0, %1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmp%d4\\t%2, %3\"},
+ {\"cmp%d5\\t%0, %1\",
+ \"cmn%d4\\t%2, #%n3\"},
+ {\"cmn%d5\\t%0, #%n1\",
+ \"cmn%d4\\t%2, #%n3\"}
+ };
+ static const char *const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%2, %3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmp\\t%2, %3\",
+ \"cmn\\t%0, #%n1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmp\\t%0, %1\"},
+ {\"cmn\\t%2, #%n3\",
+ \"cmn\\t%0, #%n1\"}
+ };
+ static const char *const ite[2] =
+ {
+ \"it\\t%d5\",
+ \"it\\t%d4\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
int swap =
comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
- return opcodes[which_alternative][swap];
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
}"
[(set_attr "conds" "set")
(set_attr "predicable" "no")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn "*cmp_ior"
@@ -9043,34 +9214,80 @@
(compare
(ior:SI
(match_operator 4 "arm_comparison_operator"
- [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
- (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+ [(match_operand:SI 0 "s_register_operand"
+ "l,l,l,r,r,r,r,r,r")
+ (match_operand:SI 1 "arm_add_operand"
+ "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
(match_operator:SI 5 "arm_comparison_operator"
- [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
- (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+ [(match_operand:SI 2 "s_register_operand"
+ "l,r,r,l,l,r,r,r,r")
+ (match_operand:SI 3 "arm_add_operand"
+ "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
(const_int 0)))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"*
-{
- static const char *const opcodes[4][2] =
{
- {\"cmp\\t%0, %1\;cmp%D4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmp%D4\\t%2, %3\",
- \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
- {\"cmp\\t%0, %1\;cmn%D4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
- {\"cmn\\t%0, #%n1\;cmn%D4\\t%2, #%n3\",
- \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
- };
- int swap =
- comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+ static const char *const cmp1[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp\\t%0, %1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmp\\t%2, %3\"},
+ {\"cmp\\t%0, %1\",
+ \"cmn\\t%2, #%n3\"},
+ {\"cmn\\t%0, #%n1\",
+ \"cmn\\t%2, #%n3\"}
+ };
+ static const char *const cmp2[NUM_OF_COND_CMP][2] =
+ {
+ {\"cmp%D4\\t%2, %3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmp%D4\\t%2, %3\",
+ \"cmn%D5\\t%0, #%n1\"},
+ {\"cmn%D4\\t%2, #%n3\",
+ \"cmp%D5\\t%0, %1\"},
+ {\"cmn%D4\\t%2, #%n3\",
+ \"cmn%D5\\t%0, #%n1\"}
+ };
+ static const char *const ite[2] =
+ {
+ \"it\\t%D4\",
+ \"it\\t%D5\"
+ };
+ static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+ CMP_CMP, CMN_CMP, CMP_CMP,
+ CMN_CMP, CMP_CMN, CMN_CMN};
+ int swap =
+ comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
- return opcodes[which_alternative][swap];
-}
-"
+ output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+ if (TARGET_THUMB2) {
+ output_asm_insn (ite[swap], operands);
+ }
+ output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+ return \"\";
+ }
+ "
[(set_attr "conds" "set")
- (set_attr "length" "8")]
+ (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+ (set_attr_alternative "length"
+ [(const_int 6)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (const_int 8)
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))
+ (if_then_else (eq_attr "is_thumb" "no")
+ (const_int 8)
+ (const_int 10))])]
)
(define_insn_and_split "*ior_scc_scc"
@@ -9082,11 +9299,11 @@
[(match_operand:SI 4 "s_register_operand" "r")
(match_operand:SI 5 "arm_add_operand" "rIL")])))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_ARM
+ "TARGET_32BIT
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y)
!= CCmode)"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(set (match_dup 7)
(compare
(ior:SI
@@ -9115,9 +9332,9 @@
(set (match_operand:SI 7 "s_register_operand" "=r")
(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(set (match_dup 0)
(compare
(ior:SI
@@ -9138,11 +9355,11 @@
[(match_operand:SI 4 "s_register_operand" "r")
(match_operand:SI 5 "arm_add_operand" "rIL")])))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_ARM
+ "TARGET_32BIT
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
!= CCmode)"
"#"
- "TARGET_ARM && reload_completed
+ "TARGET_32BIT && reload_completed
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
!= CCmode)"
[(set (match_dup 7)
@@ -9173,9 +9390,9 @@
(set (match_operand:SI 7 "s_register_operand" "=r")
(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
- "TARGET_ARM"
+ "TARGET_32BIT"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(set (match_dup 0)
(compare
(and:SI
@@ -9200,11 +9417,11 @@
[(match_operand:SI 4 "s_register_operand" "r,r,r")
(match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")])))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_ARM
+ "TARGET_32BIT
&& (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
== CCmode)"
"#"
- "TARGET_ARM && reload_completed"
+ "TARGET_32BIT && reload_completed"
[(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 1) (match_dup 2)]))
(clobber (reg:CC CC_REGNUM))])
@@ -10314,6 +10531,8 @@
;; Push multiple registers to the stack. Registers are in parallel (use ...)
;; expressions. For simplicity, the first register is also in the unspec
;; part.
+;; To avoid the usage of GNU extension, the length attribute is computed
+;; in a C function arm_attr_length_push_multi.
(define_insn "*push_multi"
[(match_parallel 2 "multi_register_push"
[(set (match_operand:BLK 0 "memory_operand" "=m")
@@ -10353,7 +10572,9 @@
return \"\";
}"
- [(set_attr "type" "store4")]
+ [(set_attr "type" "store4")
+ (set (attr "length")
+ (symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))]
)
(define_insn "stack_tie"
=== modified file 'gcc/config/arm/constraints.md'
--- old/gcc/config/arm/constraints.md 2011-08-25 13:26:58 +0000
+++ new/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000
@@ -31,7 +31,7 @@
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -201,6 +201,11 @@
(and (match_code "const_int")
(match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+(define_constraint "Py"
+ "@internal In Thumb-2 state a constant in the range 0 to 255"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255")))
+
(define_constraint "G"
"In ARM/Thumb-2 state a valid FPA immediate constant."
(and (match_code "const_double")
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,13 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpne" } } */
+
+int f(int i, int j)
+{
+ if ( (i == '+') || (j == '-') ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,13 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpeq" } } */
+
+int f(int i, int j)
+{
+ if ( (i == '+') && (j == '-') ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,12 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpgt" } } */
+
+int f(int i, int j)
+{
+ if ( (i >= '+') ? (j > '-') : 0)
+ return 1;
+ else
+ return 0;
+}
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c'
--- old/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c 2011-09-12 14:14:00 +0000
@@ -0,0 +1,12 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpgt" } } */
+
+int f(int i, int j)
+{
+ if ( (i >= '+') ? (j <= '-') : 1)
+ return 1;
+ else
+ return 0;
+}

View File

@@ -1,203 +0,0 @@
2011-09-15 Richard Sandiford <richard.sandiford@linaro.org>
Revert:
gcc/
PR target/49030
* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
* config/arm/arm.c (maybe_get_arm_condition_code): New function,
reusing the old code from get_arm_condition_code. Return ARM_NV
for invalid comparison codes.
(get_arm_condition_code): Redefine in terms of
maybe_get_arm_condition_code.
* config/arm/predicates.md (arm_comparison_operator): Use
maybe_get_arm_condition_code.
gcc/testsuite/
PR target/49030
* gcc.dg/torture/pr49030.c: New test.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000
@@ -180,7 +180,6 @@
#endif
extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
#ifdef RTX_CODE
-extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
extern void thumb1_final_prescan_insn (rtx);
extern void thumb2_final_prescan_insn (rtx);
extern const char *thumb_load_double_from_address (rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000
@@ -17494,10 +17494,10 @@
decremented/zeroed by arm_asm_output_opcode as the insns are output. */
/* Returns the index of the ARM condition code string in
- `arm_condition_codes', or ARM_NV if the comparison is invalid.
- COMPARISON should be an rtx like `(eq (...) (...))'. */
-enum arm_cond_code
-maybe_get_arm_condition_code (rtx comparison)
+ `arm_condition_codes'. COMPARISON should be an rtx like
+ `(eq (...) (...))'. */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
enum arm_cond_code code;
@@ -17521,11 +17521,11 @@
case CC_DLTUmode: code = ARM_CC;
dominance:
+ gcc_assert (comp_code == EQ || comp_code == NE);
+
if (comp_code == EQ)
return ARM_INVERSE_CONDITION_CODE (code);
- if (comp_code == NE)
- return code;
- return ARM_NV;
+ return code;
case CC_NOOVmode:
switch (comp_code)
@@ -17534,7 +17534,7 @@
case EQ: return ARM_EQ;
case GE: return ARM_PL;
case LT: return ARM_MI;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_Zmode:
@@ -17542,7 +17542,7 @@
{
case NE: return ARM_NE;
case EQ: return ARM_EQ;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_Nmode:
@@ -17550,7 +17550,7 @@
{
case NE: return ARM_MI;
case EQ: return ARM_PL;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CCFPEmode:
@@ -17575,7 +17575,7 @@
/* UNEQ and LTGT do not have a representation. */
case UNEQ: /* Fall through. */
case LTGT: /* Fall through. */
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_SWPmode:
@@ -17591,7 +17591,7 @@
case GTU: return ARM_CC;
case LEU: return ARM_CS;
case LTU: return ARM_HI;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_Cmode:
@@ -17599,7 +17599,7 @@
{
case LTU: return ARM_CS;
case GEU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_CZmode:
@@ -17611,7 +17611,7 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CC_NCVmode:
@@ -17621,7 +17621,7 @@
case LT: return ARM_LT;
case GEU: return ARM_CS;
case LTU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
case CCmode:
@@ -17637,22 +17637,13 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: return ARM_NV;
+ default: gcc_unreachable ();
}
default: gcc_unreachable ();
}
}
-/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
-static enum arm_cond_code
-get_arm_condition_code (rtx comparison)
-{
- enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
- gcc_assert (code != ARM_NV);
- return code;
-}
-
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
instructions. */
void
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-12 12:32:29 +0000
+++ new/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000
@@ -243,9 +243,10 @@
;; True for integer comparisons and, if FP is active, for comparisons
;; other than LTGT or UNEQ.
(define_special_predicate "arm_comparison_operator"
- (and (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
- unordered,ordered,unlt,unle,unge,ungt")
- (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
+ (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
+ (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
+ && (TARGET_FPA || TARGET_VFP)")
+ (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
(define_special_predicate "lt_ge_comparison_operator"
(match_code "lt,ge"))
=== removed file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-09-05 09:40:19 +0000
+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
@@ -1,19 +0,0 @@
-void
-sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
- unsigned long dst_skip)
-{
- long long y;
- while (nsamples--)
- {
- y = (long long) (*src * 8388608.0f) << 8;
- if (y > 2147483647) {
- *(int *) dst = 2147483647;
- } else if (y < -2147483647 - 1) {
- *(int *) dst = -2147483647 - 1;
- } else {
- *(int *) dst = (int) y;
- }
- dst += dst_skip;
- src++;
- }
-}

View File

@@ -1,80 +0,0 @@
2011-09-22 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from trunk -r178804:
modulo-sched.c (remove_node_from_ps): Return void
instead of bool.
(optimize_sc): Adjust call to remove_node_from_ps.
(sms_schedule): Add print info.
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000
+++ new/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
@@ -211,7 +211,7 @@
static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
int, int, sbitmap, int *, sbitmap,
sbitmap);
-static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
@@ -834,8 +834,7 @@
if (next_ps_i->node->cuid == g->closing_branch->cuid)
break;
- gcc_assert (next_ps_i);
- gcc_assert (remove_node_from_ps (ps, next_ps_i));
+ remove_node_from_ps (ps, next_ps_i);
success =
try_scheduling_node_in_cycle (ps, g->closing_branch,
g->closing_branch->cuid, c,
@@ -1485,8 +1484,8 @@
if (dump_file)
{
fprintf (dump_file,
- "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
- stage_count);
+ "%s:%d SMS succeeded %d %d (with ii, sc)\n",
+ insn_file (tail), insn_line (tail), ps->ii, stage_count);
print_partial_schedule (ps, dump_file);
}
@@ -2810,22 +2809,18 @@
}
-/* Removes the given PS_INSN from the partial schedule. Returns false if the
- node is not found in the partial schedule, else returns true. */
-static bool
+/* Removes the given PS_INSN from the partial schedule. */
+static void
remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
{
int row;
- if (!ps || !ps_i)
- return false;
-
+ gcc_assert (ps && ps_i);
+
row = SMODULO (ps_i->cycle, ps->ii);
if (! ps_i->prev_in_row)
{
- if (ps_i != ps->rows[row])
- return false;
-
+ gcc_assert (ps_i == ps->rows[row]);
ps->rows[row] = ps_i->next_in_row;
if (ps->rows[row])
ps->rows[row]->prev_in_row = NULL;
@@ -2839,7 +2834,7 @@
ps->rows_length[row] -= 1;
free (ps_i);
- return true;
+ return;
}
/* Unlike what literature describes for modulo scheduling (which focuses

View File

@@ -1,528 +0,0 @@
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
Replace check_effective_target_arm_neon with
check_effective_target_arm_neon_ok.
Backport from mainline:
2011-09-06 Ira Rosen <ira.rosen@linaro.org>
gcc/
* config/arm/arm.c (arm_preferred_simd_mode): Check
TARGET_NEON_VECTORIZE_DOUBLE instead of
TARGET_NEON_VECTORIZE_QUAD.
(arm_autovectorize_vector_sizes): Likewise.
* config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
mask of mvectorize-with-neon-double. Add RejectNegative.
(mvectorize-with-neon-double): New.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
New procedure.
(add_options_for_quad_vectors): Replace with ...
(add_options_for_double_vectors): ... this.
* gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
support multiple vector sizes since the vectorizer attempts to
vectorize with both vector sizes.
* gcc.dg/vect/no-vfa-vect-79.c,
gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
* gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
* gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
gcc.dg/vect/vect-40.c: Likewise.
* gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
redundant.
* gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
Likewise.
* gcc.dg/vect/vect-peel-4.c: Make ia global.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000
+++ new/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
@@ -22974,7 +22974,7 @@
return false;
}
-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+/* Use the option -mvectorize-with-neon-double to override the use of quardword
registers when autovectorizing for Neon, at least until multiple vector
widths are supported properly by the middle-end. */
@@ -22985,15 +22985,15 @@
switch (mode)
{
case SFmode:
- return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
case SImode:
- return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
case HImode:
- return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
case QImode:
- return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
case DImode:
- if (TARGET_NEON_VECTORIZE_QUAD)
+ if (!TARGET_NEON_VECTORIZE_DOUBLE)
return V2DImode;
break;
@@ -24226,7 +24226,7 @@
static unsigned int
arm_autovectorize_vector_sizes (void)
{
- return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+ return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
}
static bool
=== modified file 'gcc/config/arm/arm.opt'
--- old/gcc/config/arm/arm.opt 2009-06-18 11:24:10 +0000
+++ new/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
@@ -158,9 +158,13 @@
Assume big endian bytes, little endian words
mvectorize-with-neon-quad
-Target Report Mask(NEON_VECTORIZE_QUAD)
+Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
Use Neon quad-word (rather than double-word) registers for vectorization
+mvectorize-with-neon-double
+Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
+Use Neon double-word (rather than quad-word) registers for vectorization
+
mword-relocations
Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
Only generate absolute relocations on word sized values.
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2007-09-04 12:05:19 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2011-09-19 07:44:24 +0000
@@ -45,6 +45,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2007-09-12 07:48:44 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2011-09-19 07:44:24 +0000
@@ -53,6 +53,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2007-09-12 07:48:44 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2011-09-19 07:44:24 +0000
@@ -53,6 +53,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2011-09-19 07:44:24 +0000
@@ -58,5 +58,6 @@
If/when the aliasing problems are resolved, unalignment may
prevent vectorization on some targets. */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2011-09-19 07:44:24 +0000
@@ -46,5 +46,6 @@
If/when the aliasing problems are resolved, unalignment may
prevent vectorization on some targets. */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c'
--- old/gcc/testsuite/gcc.dg/vect/slp-25.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000
+++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-104.c 2007-09-12 07:48:44 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-104.c 2011-09-19 07:44:24 +0000
@@ -64,6 +64,7 @@
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-109.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-109.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-40.c 2009-05-25 14:18:21 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-40.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-42.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-42.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-46.c 2009-05-25 14:18:21 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-46.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-48.c 2009-11-04 10:22:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-48.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-52.c 2009-11-04 10:22:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-52.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-54.c 2009-10-27 11:46:07 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-54.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-96.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-96.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2009-11-04 10:22:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2009-11-10 18:01:22 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2011-09-19 07:44:24 +0000
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2011-09-19 07:44:24 +0000
@@ -22,5 +22,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2009-06-16 06:21:12 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2011-09-19 07:44:24 +0000
@@ -20,5 +20,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2007-08-19 11:02:48 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2011-09-19 07:44:24 +0000
@@ -22,5 +22,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2011-09-19 07:44:24 +0000
@@ -37,5 +37,6 @@
return 0;
}
-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2011-09-19 07:44:24 +0000
@@ -49,5 +49,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2009-05-08 12:39:01 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2011-09-19 07:44:24 +0000
@@ -49,5 +49,6 @@
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_float } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include <signal.h>
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-01-10 12:41:40 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-01-10 12:41:40 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000
@@ -1,5 +1,4 @@
/* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
#include <stdarg.h>
#include "tree-vect.h"
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-01-10 12:41:40 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000
@@ -6,12 +6,12 @@
#define N 128
int ib[N+7];
+int ia[N+1];
__attribute__ ((noinline))
int main1 ()
{
int i;
- int ia[N+1];
/* Don't peel keeping one load and the store aligned. */
for (i = 0; i <= N; i++)
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2010-05-27 12:23:45 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2011-09-19 07:44:24 +0000
@@ -58,7 +58,8 @@
}
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
=== modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90'
--- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2005-07-25 11:05:07 +0000
+++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2011-09-19 07:44:24 +0000
@@ -19,6 +19,7 @@
end
! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } }
-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } }
! { dg-final { cleanup-tree-dump "vect" } }
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-08-13 08:32:32 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
@@ -3265,6 +3265,24 @@
}]
}
+# Return 1 if the target supports multiple vector sizes
+
+proc check_effective_target_vect_multiple_sizes { } {
+ global et_vect_multiple_sizes
+
+ if [info exists et_vect_multiple_sizes_saved] {
+ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
+ } else {
+ set et_vect_multiple_sizes_saved 0
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+ set et_vect_multiple_sizes_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2
+ return $et_vect_multiple_sizes_saved
+}
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
@@ -3648,11 +3666,11 @@
return $flags
}
-# Add to FLAGS the flags needed to enable 128-bit vectors.
+# Add to FLAGS the flags needed to enable 64-bit vectors.
-proc add_options_for_quad_vectors { flags } {
+proc add_options_for_double_vectors { flags } {
if [is-effective-target arm_neon_ok] {
- return "$flags -mvectorize-with-neon-quad"
+ return "$flags -mvectorize-with-neon-double"
}
return $flags

View File

@@ -1,387 +0,0 @@
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
(neon_move_hi_quad_<mode>): Likewise.
(move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-27 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi)
(neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di)
(neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si)
(neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands
that produce subreg moves. Define using VQX iterators.
2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-14 Richard Sandiford <richard.sandiford@linaro.org>
* simplify-rtx.c (simplify_subreg): Check that the inner mode is
a scalar integer before applying integer-only optimisations to
inner arithmetic.
=== modified file 'gcc/config/arm/neon.md'
--- old/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000
+++ new/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
@@ -1235,66 +1235,14 @@
(const_string "neon_int_1") (const_string "neon_int_5")))]
)
-; FIXME: We wouldn't need the following insns if we could write subregs of
-; vector registers. Make an attempt at removing unnecessary moves, though
-; we're really at the mercy of the register allocator.
-
-(define_insn "neon_move_lo_quad_<mode>"
- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
- (vec_concat:ANY128
- (match_operand:<V_HALF> 1 "s_register_operand" "w")
- (vec_select:<V_HALF>
- (match_dup 0)
- (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%e0, %P1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_move_hi_quad_<mode>"
- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
- (vec_concat:ANY128
- (vec_select:<V_HALF>
- (match_dup 0)
- (match_operand:ANY128 2 "vect_par_constant_low" ""))
- (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
-
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%f0, %P1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
(define_expand "move_hi_quad_<mode>"
[(match_operand:ANY128 0 "s_register_operand" "")
(match_operand:<V_HALF> 1 "s_register_operand" "")]
"TARGET_NEON"
{
- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
- rtx t1;
- int i;
-
- for (i=0; i < (<V_mode_nunits>/2); i++)
- RTVEC_ELT (v, i) = GEN_INT (i);
-
- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
- emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
-
+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
+ GET_MODE_SIZE (<V_HALF>mode)),
+ operands[1]);
DONE;
})
@@ -1303,16 +1251,9 @@
(match_operand:<V_HALF> 1 "s_register_operand" "")]
"TARGET_NEON"
{
- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
- rtx t1;
- int i;
-
- for (i=0; i < (<V_mode_nunits>/2); i++)
- RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
-
- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
- emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
-
+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
+ <MODE>mode, 0),
+ operands[1]);
DONE;
})
@@ -2950,183 +2891,27 @@
(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vget_highv16qi"
- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
- (parallel [(const_int 8) (const_int 9)
- (const_int 10) (const_int 11)
- (const_int 12) (const_int 13)
- (const_int 14) (const_int 15)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv8hi"
- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
- (parallel [(const_int 4) (const_int 5)
- (const_int 6) (const_int 7)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4si"
- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4sf"
- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv2di"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
- (parallel [(const_int 1)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src + 2)
- return "vmov\t%P0, %f1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv16qi"
- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)
- (const_int 2) (const_int 3)
- (const_int 4) (const_int 5)
- (const_int 6) (const_int 7)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv8hi"
- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)
- (const_int 2) (const_int 3)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4si"
- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4sf"
- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv2di"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
- (parallel [(const_int 0)])))]
- "TARGET_NEON"
-{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
-
- if (dest != src)
- return "vmov\t%P0, %e1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
+(define_expand "neon_vget_high<mode>"
+ [(match_operand:<V_HALF> 0 "s_register_operand")
+ (match_operand:VQX 1 "s_register_operand")]
+ "TARGET_NEON"
+{
+ emit_move_insn (operands[0],
+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
+ GET_MODE_SIZE (<V_HALF>mode)));
+ DONE;
+})
+
+(define_expand "neon_vget_low<mode>"
+ [(match_operand:<V_HALF> 0 "s_register_operand")
+ (match_operand:VQX 1 "s_register_operand")]
+ "TARGET_NEON"
+{
+ emit_move_insn (operands[0],
+ simplify_gen_subreg (<V_HALF>mode, operands[1],
+ <MODE>mode, 0));
+ DONE;
+})
(define_insn "neon_vcvt<mode>"
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
=== modified file 'gcc/simplify-rtx.c'
--- old/gcc/simplify-rtx.c 2011-08-13 08:32:32 +0000
+++ new/gcc/simplify-rtx.c 2011-09-28 15:11:59 +0000
@@ -5567,6 +5567,7 @@
/* Optimize SUBREG truncations of zero and sign extended values. */
if ((GET_CODE (op) == ZERO_EXTEND
|| GET_CODE (op) == SIGN_EXTEND)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode))
{
unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
@@ -5605,6 +5606,7 @@
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
&& SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
/* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
to avoid the possibility that an outer LSHIFTRT shifts by more
than the sign extension's sign_bit_copies and introduces zeros
@@ -5624,6 +5626,7 @@
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
&& SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
&& CONST_INT_P (XEXP (op, 1))
&& GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5638,6 +5641,7 @@
the outer subreg is effectively a truncation to the original mode. */
if (GET_CODE (op) == ASHIFT
&& SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
&& CONST_INT_P (XEXP (op, 1))
&& (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5651,7 +5655,7 @@
/* Recognize a word extraction from a multi-word subreg. */
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
- && SCALAR_INT_MODE_P (outermode)
+ && SCALAR_INT_MODE_P (innermode)
&& GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD
&& GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode))
&& CONST_INT_P (XEXP (op, 1))
@@ -5673,6 +5677,7 @@
if ((GET_CODE (op) == LSHIFTRT
|| GET_CODE (op) == ASHIFTRT)
+ && SCALAR_INT_MODE_P (innermode)
&& MEM_P (XEXP (op, 0))
&& CONST_INT_P (XEXP (op, 1))
&& GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))

View File

@@ -1,290 +0,0 @@
2011-10-01 Revital Eres <revital.eres@linaro.org>
gcc/
Backport from mainline -r179380 and -r179381
* ddg.c (autoinc_var_is_used_p): New function.
(create_ddg_dep_from_intra_loop_link,
add_cross_iteration_register_deps): Call it.
* ddg.h (autoinc_var_is_used_p): Declare.
* modulo-sched.c (sms_schedule): Handle instructions with REG_INC.
(generate_reg_moves): Call autoinc_var_is_used_p. Skip
instructions that do not set a register and verify no regmoves
are created for !single_set instructions.
gcc/testsuite/
* gcc.dg/sms-10.c: New file
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-07-31 11:29:10 +0000
+++ new/gcc/ddg.c 2011-10-02 06:56:53 +0000
@@ -145,6 +145,27 @@
return rtx_mem_access_p (PATTERN (insn));
}
+/* Return true if DEF_INSN contains address being auto-inc or auto-dec
+ which is used in USE_INSN. Otherwise return false. The result is
+ being used to decide whether to remove the edge between def_insn and
+ use_insn when -fmodulo-sched-allow-regmoves is set. This function
+ doesn't need to consider the specific address register; no reg_moves
+ will be allowed for any life range defined by def_insn and used
+ by use_insn, if use_insn uses an address register auto-inc'ed by
+ def_insn. */
+bool
+autoinc_var_is_used_p (rtx def_insn, rtx use_insn)
+{
+ rtx note;
+
+ for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1))
+ if (REG_NOTE_KIND (note) == REG_INC
+ && reg_referenced_p (XEXP (note, 0), PATTERN (use_insn)))
+ return true;
+
+ return false;
+}
+
/* Computes the dependence parameters (latency, distance etc.), creates
a ddg_edge and adds it to the given DDG. */
static void
@@ -173,10 +194,15 @@
compensate for that by generating reg-moves based on the life-range
analysis. The anti-deps that will be deleted are the ones which
have true-deps edges in the opposite direction (in other words
- the kernel has only one def of the relevant register). TODO:
- support the removal of all anti-deps edges, i.e. including those
+ the kernel has only one def of the relevant register).
+ If the address that is being auto-inc or auto-dec in DEST_NODE
+ is used in SRC_NODE then do not remove the edge to make sure
+ reg-moves will not be created for this address.
+ TODO: support the removal of all anti-deps edges, i.e. including those
whose register has multiple defs in the loop. */
- if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
+ if (flag_modulo_sched_allow_regmoves
+ && (t == ANTI_DEP && dt == REG_DEP)
+ && !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
{
rtx set;
@@ -302,10 +328,14 @@
gcc_assert (first_def_node);
/* Always create the edge if the use node is a branch in
- order to prevent the creation of reg-moves. */
+ order to prevent the creation of reg-moves.
+ If the address that is being auto-inc or auto-dec in LAST_DEF
+ is used in USE_INSN then do not remove the edge to make sure
+ reg-moves will not be created for that address. */
if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
|| !flag_modulo_sched_allow_regmoves
- || JUMP_P (use_node->insn))
+ || JUMP_P (use_node->insn)
+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn))
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
=== modified file 'gcc/ddg.h'
--- old/gcc/ddg.h 2009-11-25 10:55:54 +0000
+++ new/gcc/ddg.h 2011-10-02 06:56:53 +0000
@@ -186,4 +186,6 @@
int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
+bool autoinc_var_is_used_p (rtx, rtx);
+
#endif /* GCC_DDG_H */
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
+++ new/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000
@@ -477,7 +477,12 @@
sbitmap *uses_of_defs;
rtx last_reg_move;
rtx prev_reg, old_reg;
-
+ rtx set = single_set (u->insn);
+
+ /* Skip instructions that do not set a register. */
+ if ((set && !REG_P (SET_DEST (set))))
+ continue;
+
/* Compute the number of reg_moves needed for u, by looking at life
ranges started at u (excluding self-loops). */
for (e = u->out; e; e = e->next_out)
@@ -494,6 +499,20 @@
&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
nreg_moves4e--;
+ if (nreg_moves4e >= 1)
+ {
+ /* !single_set instructions are not supported yet and
+ thus we do not except to encounter them in the loop
+ except from the doloop part. For the latter case
+ we assume no regmoves are generated as the doloop
+ instructions are tied to the branch with an edge. */
+ gcc_assert (set);
+ /* If the instruction contains auto-inc register then
+ validate that the regmov is being generated for the
+ target regsiter rather then the inc'ed register. */
+ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
+ }
+
nreg_moves = MAX (nreg_moves, nreg_moves4e);
}
@@ -1266,12 +1285,10 @@
continue;
}
- /* Don't handle BBs with calls or barriers or auto-increment insns
- (to avoid creating invalid reg-moves for the auto-increment insns),
+ /* Don't handle BBs with calls or barriers
or !single_set with the exception of instructions that include
count_reg---these instructions are part of the control part
that do-loop recognizes.
- ??? Should handle auto-increment insns.
??? Should handle insns defining subregs. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
{
@@ -1282,7 +1299,6 @@
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
&& !reg_mentioned_p (count_reg, insn))
- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
&& GET_CODE (SET_DEST (set)) == SUBREG))
break;
@@ -1296,8 +1312,6 @@
fprintf (dump_file, "SMS loop-with-call\n");
else if (BARRIER_P (insn))
fprintf (dump_file, "SMS loop-with-barrier\n");
- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
- fprintf (dump_file, "SMS reg inc\n");
else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
fprintf (dump_file, "SMS loop-with-not-single-set\n");
=== added file 'gcc/testsuite/gcc.dg/sms-10.c'
--- old/gcc/testsuite/gcc.dg/sms-10.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/sms-10.c 2011-10-02 06:56:53 +0000
@@ -0,0 +1,118 @@
+ /* { dg-do run } */
+ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
+
+
+typedef __SIZE_TYPE__ size_t;
+extern void *malloc (size_t);
+extern void free (void *);
+extern void abort (void);
+
+struct regstat_n_sets_and_refs_t
+{
+ int sets;
+ int refs;
+};
+
+struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs;
+
+struct df_reg_info
+{
+ unsigned int n_refs;
+};
+
+struct df_d
+{
+ struct df_reg_info **def_regs;
+ struct df_reg_info **use_regs;
+};
+struct df_d *df;
+
+static inline int
+REG_N_SETS (int regno)
+{
+ return regstat_n_sets_and_refs[regno].sets;
+}
+
+__attribute__ ((noinline))
+ int max_reg_num (void)
+{
+ return 100;
+}
+
+__attribute__ ((noinline))
+ void regstat_init_n_sets_and_refs (void)
+{
+ unsigned int i;
+ unsigned int max_regno = max_reg_num ();
+
+ for (i = 0; i < max_regno; i++)
+ {
+ (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs));
+ (regstat_n_sets_and_refs[i].refs =
+ (df->use_regs[(i)]->n_refs) + REG_N_SETS (i));
+ }
+}
+
+int a_sets[100] =
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
+ 84, 85, 86,
+ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
+};
+
+int a_refs[100] =
+ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
+ 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
+ 78, 80, 82,
+ 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116,
+ 118, 120,
+ 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150,
+ 152, 154, 156,
+ 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
+ 188, 190, 192,
+ 194, 196, 198
+};
+
+int
+main ()
+{
+ struct df_reg_info *b[100], *c[100];
+ struct df_d df1;
+ size_t s = sizeof (struct df_reg_info);
+ struct regstat_n_sets_and_refs_t a[100];
+
+ df = &df1;
+ regstat_n_sets_and_refs = a;
+ int i;
+
+ for (i = 0; i < 100; i++)
+ {
+ b[i] = (struct df_reg_info *) malloc (s);
+ b[i]->n_refs = i;
+ c[i] = (struct df_reg_info *) malloc (s);
+ c[i]->n_refs = i;
+ }
+
+ df1.def_regs = b;
+ df1.use_regs = c;
+ regstat_init_n_sets_and_refs ();
+
+ for (i = 0; i < 100; i++)
+ if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i]))
+ abort ();
+
+ for (i = 0; i < 100; i++)
+ {
+ free (b[i]);
+ free (c[i]);
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "sms" } } */

View File

@@ -1,105 +0,0 @@
2011-10-03 Michael Hope <michael.hope@linaro.org>
Backport from mainline:
2011-09-13 Sevak Sargsyan <sevak.sargsyan@ispras.ru>
gcc/
* config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New
define_insn patterns for combine.
gcc/testsuite/
* gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test.
=== modified file 'gcc/config/arm/neon.md'
--- old/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
+++ new/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000
@@ -5428,3 +5428,32 @@
emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
DONE;
})
+
+(define_insn "neon_vabd<mode>_2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vabd<mode>_3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")]
+ UNSPEC_VSUB)))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
=== added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c'
--- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 2011-10-03 01:32:17 +0000
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+float32x2_t f_sub_abs_to_vabd_32()
+{
+ float32x2_t val1 = vdup_n_f32 (10);
+ float32x2_t val2 = vdup_n_f32 (30);
+ float32x2_t sres = vsub_f32(val1, val2);
+ float32x2_t res = vabs_f32 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.f32" } }*/
+
+#include <arm_neon.h>
+int8x8_t sub_abs_to_vabd_8()
+{
+ int8x8_t val1 = vdup_n_s8 (10);
+ int8x8_t val2 = vdup_n_s8 (30);
+ int8x8_t sres = vsub_s8(val1, val2);
+ int8x8_t res = vabs_s8 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s8" } }*/
+
+int16x4_t sub_abs_to_vabd_16()
+{
+ int16x4_t val1 = vdup_n_s16 (10);
+ int16x4_t val2 = vdup_n_s16 (30);
+ int16x4_t sres = vsub_s16(val1, val2);
+ int16x4_t res = vabs_s16 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s16" } }*/
+
+int32x2_t sub_abs_to_vabd_32()
+{
+ int32x2_t val1 = vdup_n_s32 (10);
+ int32x2_t val2 = vdup_n_s32 (30);
+ int32x2_t sres = vsub_s32(val1, val2);
+ int32x2_t res = vabs_s32 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s32" } }*/

View File

@@ -1,436 +0,0 @@
2011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
* config/arm/predicates.md (expandable_comparison_operator): New
predicate, extracted from...
(arm_comparison_operator): ...here.
* config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4)
(cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc)
(movdfcc): Use expandable_comparison_operator.
gcc/testsuite/
Backport from mainline:
2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.target/arm/cmp-1.c: New test.
* gcc.target/arm/cmp-2.c: Likewise.
2011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
Backport from mainline:
2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
PR target/49030
* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
* config/arm/arm.c (maybe_get_arm_condition_code): New function,
reusing the old code from get_arm_condition_code. Return ARM_NV
for invalid comparison codes.
(get_arm_condition_code): Redefine in terms of
maybe_get_arm_condition_code.
* config/arm/predicates.md (arm_comparison_operator): Use
maybe_get_arm_condition_code.
gcc/testsuite/
Backport from mainline:
2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
PR target/49030
* gcc.dg/torture/pr49030.c: New test.
=== modified file 'gcc/config/arm/arm-protos.h'
--- old/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000
+++ new/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
@@ -180,6 +180,7 @@
#endif
extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
#ifdef RTX_CODE
+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
extern void thumb1_final_prescan_insn (rtx);
extern void thumb2_final_prescan_insn (rtx);
extern const char *thumb_load_double_from_address (rtx *);
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
+++ new/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
@@ -17494,10 +17494,10 @@
decremented/zeroed by arm_asm_output_opcode as the insns are output. */
/* Returns the index of the ARM condition code string in
- `arm_condition_codes'. COMPARISON should be an rtx like
- `(eq (...) (...))'. */
-static enum arm_cond_code
-get_arm_condition_code (rtx comparison)
+ `arm_condition_codes', or ARM_NV if the comparison is invalid.
+ COMPARISON should be an rtx like `(eq (...) (...))'. */
+enum arm_cond_code
+maybe_get_arm_condition_code (rtx comparison)
{
enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
enum arm_cond_code code;
@@ -17521,11 +17521,11 @@
case CC_DLTUmode: code = ARM_CC;
dominance:
- gcc_assert (comp_code == EQ || comp_code == NE);
-
if (comp_code == EQ)
return ARM_INVERSE_CONDITION_CODE (code);
- return code;
+ if (comp_code == NE)
+ return code;
+ return ARM_NV;
case CC_NOOVmode:
switch (comp_code)
@@ -17534,7 +17534,7 @@
case EQ: return ARM_EQ;
case GE: return ARM_PL;
case LT: return ARM_MI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Zmode:
@@ -17542,7 +17542,7 @@
{
case NE: return ARM_NE;
case EQ: return ARM_EQ;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Nmode:
@@ -17550,7 +17550,7 @@
{
case NE: return ARM_MI;
case EQ: return ARM_PL;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCFPEmode:
@@ -17575,7 +17575,7 @@
/* UNEQ and LTGT do not have a representation. */
case UNEQ: /* Fall through. */
case LTGT: /* Fall through. */
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_SWPmode:
@@ -17591,7 +17591,7 @@
case GTU: return ARM_CC;
case LEU: return ARM_CS;
case LTU: return ARM_HI;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_Cmode:
@@ -17599,7 +17599,7 @@
{
case LTU: return ARM_CS;
case GEU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_CZmode:
@@ -17611,7 +17611,7 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CC_NCVmode:
@@ -17621,7 +17621,7 @@
case LT: return ARM_LT;
case GEU: return ARM_CS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
case CCmode:
@@ -17637,13 +17637,22 @@
case GTU: return ARM_HI;
case LEU: return ARM_LS;
case LTU: return ARM_CC;
- default: gcc_unreachable ();
+ default: return ARM_NV;
}
default: gcc_unreachable ();
}
}
+/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
+{
+ enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
+ gcc_assert (code != ARM_NV);
+ return code;
+}
+
/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
instructions. */
void
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000
+++ new/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
@@ -6543,7 +6543,7 @@
(define_expand "cbranchsi4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:SI 1 "s_register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -6594,7 +6594,7 @@
(define_expand "cbranchsf4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:SF 1 "s_register_operand" "")
(match_operand:SF 2 "arm_float_compare_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -6606,7 +6606,7 @@
(define_expand "cbranchdf4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:DF 1 "s_register_operand" "")
(match_operand:DF 2 "arm_float_compare_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -6618,7 +6618,7 @@
(define_expand "cbranchdi4"
[(set (pc) (if_then_else
- (match_operator 0 "arm_comparison_operator"
+ (match_operator 0 "expandable_comparison_operator"
[(match_operand:DI 1 "cmpdi_operand" "")
(match_operand:DI 2 "cmpdi_operand" "")])
(label_ref (match_operand 3 "" ""))
@@ -7473,7 +7473,7 @@
(define_expand "cstoresi4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:SI 2 "s_register_operand" "")
(match_operand:SI 3 "reg_or_int_operand" "")]))]
"TARGET_32BIT || TARGET_THUMB1"
@@ -7609,7 +7609,7 @@
(define_expand "cstoresf4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:SF 2 "s_register_operand" "")
(match_operand:SF 3 "arm_float_compare_operand" "")]))]
"TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -7619,7 +7619,7 @@
(define_expand "cstoredf4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:DF 2 "s_register_operand" "")
(match_operand:DF 3 "arm_float_compare_operand" "")]))]
"TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
@@ -7629,7 +7629,7 @@
(define_expand "cstoredi4"
[(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "arm_comparison_operator"
+ (match_operator:SI 1 "expandable_comparison_operator"
[(match_operand:DI 2 "cmpdi_operand" "")
(match_operand:DI 3 "cmpdi_operand" "")]))]
"TARGET_32BIT"
@@ -7749,7 +7749,7 @@
(define_expand "movsicc"
[(set (match_operand:SI 0 "s_register_operand" "")
- (if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
+ (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "")
(match_operand:SI 2 "arm_not_operand" "")
(match_operand:SI 3 "arm_not_operand" "")))]
"TARGET_32BIT"
@@ -7769,7 +7769,7 @@
(define_expand "movsfcc"
[(set (match_operand:SF 0 "s_register_operand" "")
- (if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
+ (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
(match_operand:SF 2 "s_register_operand" "")
(match_operand:SF 3 "nonmemory_operand" "")))]
"TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -7795,7 +7795,7 @@
(define_expand "movdfcc"
[(set (match_operand:DF 0 "s_register_operand" "")
- (if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
+ (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
(match_operand:DF 2 "s_register_operand" "")
(match_operand:DF 3 "arm_float_add_operand" "")))]
"TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000
+++ new/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
@@ -242,11 +242,15 @@
;; True for integer comparisons and, if FP is active, for comparisons
;; other than LTGT or UNEQ.
+(define_special_predicate "expandable_comparison_operator"
+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
+ unordered,ordered,unlt,unle,unge,ungt"))
+
+;; Likewise, but only accept comparisons that are directly supported
+;; by ARM condition codes.
(define_special_predicate "arm_comparison_operator"
- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
- && (TARGET_FPA || TARGET_VFP)")
- (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
+ (and (match_operand 0 "expandable_comparison_operator")
+ (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
(define_special_predicate "lt_ge_comparison_operator"
(match_code "lt,ge"))
=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-10-03 09:46:40 +0000
@@ -0,0 +1,19 @@
+void
+sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
+ unsigned long dst_skip)
+{
+ long long y;
+ while (nsamples--)
+ {
+ y = (long long) (*src * 8388608.0f) << 8;
+ if (y > 2147483647) {
+ *(int *) dst = 2147483647;
+ } else if (y < -2147483647 - 1) {
+ *(int *) dst = -2147483647 - 1;
+ } else {
+ *(int *) dst = (int) y;
+ }
+ dst += dst_skip;
+ src++;
+ }
+}
=== added file 'gcc/testsuite/gcc.target/arm/cmp-1.c'
--- old/gcc/testsuite/gcc.target/arm/cmp-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/cmp-1.c 2011-10-03 09:47:33 +0000
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
+/* { dg-final { scan-assembler-not "__aeabi" } } */
+int x, y;
+
+#define TEST_EXPR(NAME, ARGS, EXPR) \
+ int NAME##1 ARGS { return (EXPR); } \
+ int NAME##2 ARGS { return !(EXPR); } \
+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
+ void NAME##4 ARGS { if (EXPR) x++; } \
+ void NAME##5 ARGS { if (!(EXPR)) x++; }
+
+#define TEST(NAME, TYPE, OPERATOR) \
+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2) \
+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2) \
+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2) \
+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \
+ TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100) \
+ TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1)
+
+#define TEST_OP(NAME, OPERATOR) \
+ TEST (sc_##NAME, signed char, OPERATOR) \
+ TEST (uc_##NAME, unsigned char, OPERATOR) \
+ TEST (ss_##NAME, short, OPERATOR) \
+ TEST (us_##NAME, unsigned short, OPERATOR) \
+ TEST (si_##NAME, int, OPERATOR) \
+ TEST (ui_##NAME, unsigned int, OPERATOR) \
+ TEST (sll_##NAME, long long, OPERATOR) \
+ TEST (ull_##NAME, unsigned long long, OPERATOR)
+
+TEST_OP (eq, ==)
+TEST_OP (ne, !=)
+TEST_OP (lt, <)
+TEST_OP (gt, >)
+TEST_OP (le, <=)
+TEST_OP (ge, >=)
=== added file 'gcc/testsuite/gcc.target/arm/cmp-2.c'
--- old/gcc/testsuite/gcc.target/arm/cmp-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/cmp-2.c 2011-10-03 09:47:33 +0000
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_vfp_ok } */
+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
+/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
+/* { dg-final { scan-assembler-not "__aeabi" } } */
+int x, y;
+
+#define EQ(X, Y) ((X) == (Y))
+#define NE(X, Y) ((X) != (Y))
+#define LT(X, Y) ((X) < (Y))
+#define GT(X, Y) ((X) > (Y))
+#define LE(X, Y) ((X) <= (Y))
+#define GE(X, Y) ((X) >= (Y))
+
+#define TEST_EXPR(NAME, ARGS, EXPR) \
+ int NAME##1 ARGS { return (EXPR); } \
+ int NAME##2 ARGS { return !(EXPR); } \
+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
+ void NAME##4 ARGS { if (EXPR) x++; } \
+ void NAME##5 ARGS { if (!(EXPR)) x++; }
+
+#define TEST(NAME, TYPE, OPERATOR) \
+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2)) \
+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2)) \
+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2)) \
+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2)) \
+ TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100)) \
+ TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1))
+
+#define TEST_OP(NAME, OPERATOR) \
+ TEST (f_##NAME, float, OPERATOR) \
+ TEST (d_##NAME, double, OPERATOR) \
+ TEST (ld_##NAME, long double, OPERATOR)
+
+TEST_OP (eq, EQ)
+TEST_OP (ne, NE)
+TEST_OP (lt, LT)
+TEST_OP (gt, GT)
+TEST_OP (le, LE)
+TEST_OP (ge, GE)
+TEST_OP (blt, __builtin_isless)
+TEST_OP (bgt, __builtin_isgreater)
+TEST_OP (ble, __builtin_islessequal)
+TEST_OP (bge, __builtin_isgreaterequal)
+/* This one should be expanded into separate ordered and equality
+ comparisons. */
+TEST_OP (blg, __builtin_islessgreater)
+TEST_OP (bun, __builtin_isunordered)

View File

@@ -1,378 +0,0 @@
2011-10-06 Ira Rosen <ira.rosen@linaro.org>
Backport from mainline:
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-data-ref.c (dr_analyze_innermost): Add new argument.
Allow not simple iv if analyzing basic block.
(create_data_ref): Update call to dr_analyze_innermost.
(stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise.
* tree-loop-distribution.c (generate_memset_zero): Likewise.
* tree-predcom.c (find_looparound_phi): Likewise.
* tree-data-ref.h (dr_analyze_innermost): Add new argument.
gcc/testsuite/
* gcc.dg/vect/bb-slp-24.c: New.
2011-09-15 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow
read-after-read dependencies in basic block SLP.
gcc/testsuite/
* gcc.dg/vect/bb-slp-25.c: New.
2011-04-21 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use
operand_equal_p to compare DR_BASE_ADDRESSes.
(vect_check_interleaving): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-119.c: New test.
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 2011-10-02 08:43:10 +0000
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h,
+ int stride, int dummy)
+{
+ int i;
+ h /= 8;
+ for (i = 0; i < h; i++)
+ {
+ dst[0] += A*src[0];
+ dst[1] += A*src[1];
+ dst[2] += A*src[2];
+ dst[3] += A*src[3];
+ dst[4] += A*src[4];
+ dst[5] += A*src[5];
+ dst[6] += A*src[6];
+ dst[7] += A*src[7];
+ dst += stride;
+ src += stride;
+ if (dummy == 32)
+ abort ();
+ }
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i;
+ }
+
+ foo (dst, src, N, 8, 0);
+
+ for (i = 0; i < N; i++)
+ {
+ if (dst[i] != A * i)
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 2011-10-02 08:43:10 +0000
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
+{
+ int i;
+ h /= 16;
+ for (i = 0; i < h; i++)
+ {
+ dst[0] += A*src[0] + src[stride];
+ dst[1] += A*src[1] + src[1+stride];
+ dst[2] += A*src[2] + src[2+stride];
+ dst[3] += A*src[3] + src[3+stride];
+ dst[4] += A*src[4] + src[4+stride];
+ dst[5] += A*src[5] + src[5+stride];
+ dst[6] += A*src[6] + src[6+stride];
+ dst[7] += A*src[7] + src[7+stride];
+ dst += 8;
+ src += 8;
+ if (dummy == 32)
+ abort ();
+ }
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i;
+ }
+
+ foo (dst, src, N, 8, 0);
+
+ for (i = 0; i < N/2; i++)
+ {
+ if (dst[i] != A * i + i + 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== added file 'gcc/testsuite/gcc.dg/vect/vect-119.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-119.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-119.c 2011-10-02 08:43:10 +0000
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+
+#define OUTER 32
+#define INNER 40
+
+static unsigned int
+bar (const unsigned int x[INNER][2], unsigned int sum)
+{
+ int i;
+
+ for (i = 0; i < INNER; i++)
+ sum += x[i][0] * x[i][0] + x[i][1] * x[i][1];
+ return sum;
+}
+
+unsigned int foo (const unsigned int x[OUTER][INNER][2])
+{
+ int i;
+ unsigned int sum;
+
+ sum = 0.0f;
+ for (i = 0; i < OUTER; i++)
+ sum = bar (x[i], sum);
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/tree-data-ref.c'
--- old/gcc/tree-data-ref.c 2011-05-26 14:27:33 +0000
+++ new/gcc/tree-data-ref.c 2011-10-02 08:43:10 +0000
@@ -721,11 +721,11 @@
}
/* Analyzes the behavior of the memory reference DR in the innermost loop or
- basic block that contains it. Returns true if analysis succeed or false
+ basic block that contains it. Returns true if analysis succeed or false
otherwise. */
bool
-dr_analyze_innermost (struct data_reference *dr)
+dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
{
gimple stmt = DR_STMT (dr);
struct loop *loop = loop_containing_stmt (stmt);
@@ -768,14 +768,25 @@
}
else
base = build_fold_addr_expr (base);
+
if (in_loop)
{
if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
false))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return false;
+ if (nest)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "failed: evolution of base is not"
+ " affine.\n");
+ return false;
+ }
+ else
+ {
+ base_iv.base = base;
+ base_iv.step = ssize_int (0);
+ base_iv.no_overflow = true;
+ }
}
}
else
@@ -800,10 +811,18 @@
else if (!simple_iv (loop, loop_containing_stmt (stmt),
poffset, &offset_iv, false))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of offset is not"
- " affine.\n");
- return false;
+ if (nest)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "failed: evolution of offset is not"
+ " affine.\n");
+ return false;
+ }
+ else
+ {
+ offset_iv.base = poffset;
+ offset_iv.step = ssize_int (0);
+ }
}
}
@@ -967,7 +986,7 @@
DR_REF (dr) = memref;
DR_IS_READ (dr) = is_read;
- dr_analyze_innermost (dr);
+ dr_analyze_innermost (dr, nest);
dr_analyze_indices (dr, nest, loop);
dr_analyze_alias (dr);
@@ -5185,7 +5204,7 @@
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
- res = dr_analyze_innermost (dr)
+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
&& stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
free_data_ref (dr);
@@ -5225,7 +5244,7 @@
DR_STMT (dr) = stmt;
DR_REF (dr) = *ref->pos;
- dr_analyze_innermost (dr);
+ dr_analyze_innermost (dr, loop_containing_stmt (stmt));
base_address = DR_BASE_ADDRESS (dr);
if (!base_address)
=== modified file 'gcc/tree-data-ref.h'
--- old/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000
+++ new/gcc/tree-data-ref.h 2011-10-02 08:43:10 +0000
@@ -386,7 +386,7 @@
DEF_VEC_ALLOC_O (data_ref_loc, heap);
bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **);
-bool dr_analyze_innermost (struct data_reference *);
+bool dr_analyze_innermost (struct data_reference *, struct loop *);
extern bool compute_data_dependences_for_loop (struct loop *, bool,
VEC (loop_p, heap) **,
VEC (data_reference_p, heap) **,
=== modified file 'gcc/tree-loop-distribution.c'
--- old/gcc/tree-loop-distribution.c 2011-05-11 13:07:54 +0000
+++ new/gcc/tree-loop-distribution.c 2011-10-02 08:43:10 +0000
@@ -267,7 +267,7 @@
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
- res = dr_analyze_innermost (dr);
+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
=== modified file 'gcc/tree-predcom.c'
--- old/gcc/tree-predcom.c 2011-02-11 14:19:44 +0000
+++ new/gcc/tree-predcom.c 2011-10-02 08:43:10 +0000
@@ -1114,7 +1114,7 @@
memset (&init_dr, 0, sizeof (struct data_reference));
DR_REF (&init_dr) = init_ref;
DR_STMT (&init_dr) = phi;
- if (!dr_analyze_innermost (&init_dr))
+ if (!dr_analyze_innermost (&init_dr, loop))
return NULL;
if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
=== modified file 'gcc/tree-vect-data-refs.c'
--- old/gcc/tree-vect-data-refs.c 2011-07-04 11:13:51 +0000
+++ new/gcc/tree-vect-data-refs.c 2011-10-02 08:43:10 +0000
@@ -353,11 +353,7 @@
/* Check that the data-refs have same bases and offsets. If not, we can't
determine if they are dependent. */
- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
|| !dr_equal_offsets_p (dra, drb))
return true;
@@ -403,11 +399,7 @@
/* Check that the data-refs have same first location (except init) and they
are both either store or load (not load and store). */
- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
|| !dr_equal_offsets_p (dra, drb)
|| !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
|| DR_IS_READ (dra) != DR_IS_READ (drb))
@@ -615,6 +607,11 @@
if (vect_check_interleaving (dra, drb))
return false;
+ /* Read-read is OK (we need this check here, after checking for
+ interleaving). */
+ if (DR_IS_READ (dra) && DR_IS_READ (drb))
+ return false;
+
if (vect_print_dump_info (REPORT_DR_DETAILS))
{
fprintf (vect_dump, "can't determine dependence between ");

View File

@@ -1,240 +0,0 @@
2011-10-06 Ira Rosen <ira.rosen@linaro.org>
gcc/testsuite/
* gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block
vectorizable.
Backport from mainline:
2011-09-25 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
of vect_analyze_bb here.
(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect64): New.
* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
of multiple vector sizes.
* gcc.dg/vect/bb-slp-26.c: New.
=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2010-11-22 12:16:52 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
@@ -49,6 +49,7 @@
}
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 2011-10-02 10:40:34 +0000
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+char src[N], dst[N];
+
+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
+ int stride, int dummy)
+{
+ int i;
+ h /= 16;
+ for (i = 0; i < h; i++)
+ {
+ dst[0] += A*src[0];
+ dst[1] += A*src[1];
+ dst[2] += A*src[2];
+ dst[3] += A*src[3];
+ dst[4] += A*src[4];
+ dst[5] += A*src[5];
+ dst[6] += A*src[6];
+ dst[7] += A*src[7];
+ dst += 8;
+ src += 8;
+ if (dummy == 32)
+ abort ();
+ }
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i/8;
+ }
+
+ foo (dst, src, N, 8, 0);
+
+ for (i = 0; i < N/2; i++)
+ {
+ if (dst[i] != A * src[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
@@ -3283,6 +3283,24 @@
return $et_vect_multiple_sizes_saved
}
+# Return 1 if the target supports vectors of 64 bits.
+
+proc check_effective_target_vect64 { } {
+ global et_vect64
+
+ if [info exists et_vect64_saved] {
+ verbose "check_effective_target_vect64: using cached result" 2
+ } else {
+ set et_vect64_saved 0
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+ set et_vect64_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
+ return $et_vect64_saved
+}
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
=== modified file 'gcc/tree-vect-slp.c'
--- old/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000
+++ new/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
@@ -1664,42 +1664,18 @@
/* Check if the basic block can be vectorized. */
-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
{
bb_vec_info bb_vinfo;
VEC (ddr_p, heap) *ddrs;
VEC (slp_instance, heap) *slp_instances;
slp_instance instance;
- int i, insns = 0;
- gimple_stmt_iterator gsi;
+ int i;
int min_vf = 2;
int max_vf = MAX_VECTORIZATION_FACTOR;
bool data_dependence_in_bb = false;
- current_vector_size = 0;
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (!is_gimple_debug (stmt)
- && !gimple_nop_p (stmt)
- && gimple_code (stmt) != GIMPLE_LABEL)
- insns++;
- }
-
- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
- {
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
- fprintf (vect_dump, "not vectorized: too many instructions in basic "
- "block.\n");
-
- return NULL;
- }
-
bb_vinfo = new_bb_vec_info (bb);
if (!bb_vinfo)
return NULL;
@@ -1819,6 +1795,61 @@
}
+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+ bb_vec_info bb_vinfo;
+ int insns = 0;
+ gimple_stmt_iterator gsi;
+ unsigned int vector_sizes;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!is_gimple_debug (stmt)
+ && !gimple_nop_p (stmt)
+ && gimple_code (stmt) != GIMPLE_LABEL)
+ insns++;
+ }
+
+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: too many instructions in basic "
+ "block.\n");
+
+ return NULL;
+ }
+
+ /* Autodetect first vector size we try. */
+ current_vector_size = 0;
+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+ while (1)
+ {
+ bb_vinfo = vect_slp_analyze_bb_1 (bb);
+ if (bb_vinfo)
+ return bb_vinfo;
+
+ destroy_bb_vec_info (bb_vinfo);
+
+ vector_sizes &= ~current_vector_size;
+ if (vector_sizes == 0
+ || current_vector_size == 0)
+ return NULL;
+
+ /* Try the next biggest vector size. */
+ current_vector_size = 1 << floor_log2 (vector_sizes);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "***** Re-trying analysis with "
+ "vector size %d\n", current_vector_size);
+ }
+}
+
+
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor).
However, the actual number of vector stmts for every SLP node depends on

View File

@@ -1,124 +0,0 @@
2011-10-13 Andrew Stubbs <ams@codesourcery.com>
Backport from mainline:
2011-10-07 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/predicates.md (shift_amount_operand): Remove constant
range check.
(shift_operator): Check range of constants for all shift operators.
gcc/testsuite/
* gcc.dg/pr50193-1.c: New file.
* gcc.target/arm/shiftable.c: New file.
=== modified file 'gcc/config/arm/predicates.md'
--- old/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
+++ new/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000
@@ -129,11 +129,12 @@
(ior (match_operand 0 "arm_rhs_operand")
(match_operand 0 "memory_operand")))
+;; This doesn't have to do much because the constant is already checked
+;; in the shift_operator predicate.
(define_predicate "shift_amount_operand"
(ior (and (match_test "TARGET_ARM")
(match_operand 0 "s_register_operand"))
- (and (match_operand 0 "const_int_operand")
- (match_test "INTVAL (op) > 0"))))
+ (match_operand 0 "const_int_operand")))
(define_predicate "arm_add_operand"
(ior (match_operand 0 "arm_rhs_operand")
@@ -219,13 +220,20 @@
(match_test "mode == GET_MODE (op)")))
;; True for shift operators.
+;; Notes:
+;; * mult is only permitted with a constant shift amount
+;; * patterns that permit register shift amounts only in ARM mode use
+;; shift_amount_operand, patterns that always allow registers do not,
+;; so we don't have to worry about that sort of thing here.
(define_special_predicate "shift_operator"
(and (ior (ior (and (match_code "mult")
(match_test "power_of_two_operand (XEXP (op, 1), mode)"))
(and (match_code "rotate")
(match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
- (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
+ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
+ (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT
+ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
(match_test "mode == GET_MODE (op)")))
;; True for MULT, to identify which variant of shift_operator is in use.
=== added file 'gcc/testsuite/gcc.target/arm/shiftable.c'
--- old/gcc/testsuite/gcc.target/arm/shiftable.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/shiftable.c 2011-10-10 11:43:28 +0000
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm32 } */
+
+/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some
+ of these as a left shift, others as a multiply. Check that we match the
+ right one. */
+
+int
+plus (int a, int b)
+{
+ return (a * 64) + b;
+}
+
+/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */
+
+int
+minus (int a, int b)
+{
+ return a - (b * 64);
+}
+
+/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */
+
+int
+ior (int a, int b)
+{
+ return (a * 64) | b;
+}
+
+/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */
+
+int
+xor (int a, int b)
+{
+ return (a * 64) ^ b;
+}
+
+/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */
+
+int
+and (int a, int b)
+{
+ return (a * 64) & b;
+}
+
+/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */
+
+int
+rsb (int a, int b)
+{
+ return (a * 64) - b;
+}
+
+/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */
+
+int
+mvn (int a, int b)
+{
+ return ~(a * 64);
+}
+
+/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */

View File

@@ -1,362 +0,0 @@
2011-10-16 Ira Rosen <ira.rosen@linaro.org>
Backport from mainline:
2011-09-27 Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
vectorization.
(vectorizable_type_promotion): Likewise.
(vect_analyze_stmt): Call vectorizable_type_demotion and
vectorizable_type_promotion for basic blocks.
(supportable_widening_operation): Don't assume loop vectorization.
* tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
basic blocks. Update vectorization factor for basic block
vectorization.
(vect_analyze_slp_instance): Allow multiple types for basic block
vectorization. Recheck unrolling factor after construction of SLP
instance.
gcc/testsuite/
* gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit
vectors.
* gcc.dg/vect/bb-slp-27.c: New.
* gcc.dg/vect/bb-slp-28.c: New.
2011-10-04 Ira Rosen <ira.rosen@linaro.org>
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
Make et_vect_multiple_sizes_saved global.
(check_effective_target_vect64): Make et_vect64_saved global.
=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-06 11:08:08 +0000
@@ -48,8 +48,6 @@
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 2011-10-06 11:08:08 +0000
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define N 16
+
+short src[N], dst[N];
+
+void foo (int a)
+{
+ dst[0] += a*src[0];
+ dst[1] += a*src[1];
+ dst[2] += a*src[2];
+ dst[3] += a*src[3];
+ dst[4] += a*src[4];
+ dst[5] += a*src[5];
+ dst[6] += a*src[6];
+ dst[7] += a*src[7];
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 0;
+ src[i] = i;
+ }
+
+ foo (A);
+
+ for (i = 0; i < 8; i++)
+ {
+ if (dst[i] != A * i)
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c'
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 2011-10-06 11:08:08 +0000
@@ -0,0 +1,71 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 300
+#define N 16
+
+char src[N];
+short dst[N];
+short src1[N], dst1[N];
+
+void foo (int a)
+{
+ dst[0] = (short) (a * (int) src[0]);
+ dst[1] = (short) (a * (int) src[1]);
+ dst[2] = (short) (a * (int) src[2]);
+ dst[3] = (short) (a * (int) src[3]);
+ dst[4] = (short) (a * (int) src[4]);
+ dst[5] = (short) (a * (int) src[5]);
+ dst[6] = (short) (a * (int) src[6]);
+ dst[7] = (short) (a * (int) src[7]);
+ dst[8] = (short) (a * (int) src[8]);
+ dst[9] = (short) (a * (int) src[9]);
+ dst[10] = (short) (a * (int) src[10]);
+ dst[11] = (short) (a * (int) src[11]);
+ dst[12] = (short) (a * (int) src[12]);
+ dst[13] = (short) (a * (int) src[13]);
+ dst[14] = (short) (a * (int) src[14]);
+ dst[15] = (short) (a * (int) src[15]);
+
+ dst1[0] += src1[0];
+ dst1[1] += src1[1];
+ dst1[2] += src1[2];
+ dst1[3] += src1[3];
+ dst1[4] += src1[4];
+ dst1[5] += src1[5];
+ dst1[6] += src1[6];
+ dst1[7] += src1[7];
+}
+
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ dst[i] = 2;
+ dst1[i] = 0;
+ src[i] = i;
+ src1[i] = i+2;
+ }
+
+ foo (A);
+
+ for (i = 0; i < N; i++)
+ {
+ if (dst[i] != A * i
+ || (i < N/2 && dst1[i] != i + 2))
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_pack_trunc && vect_unpack } } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000
@@ -3268,7 +3268,7 @@
# Return 1 if the target supports multiple vector sizes
proc check_effective_target_vect_multiple_sizes { } {
- global et_vect_multiple_sizes
+ global et_vect_multiple_sizes_saved
if [info exists et_vect_multiple_sizes_saved] {
verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
@@ -3286,7 +3286,7 @@
# Return 1 if the target supports vectors of 64 bits.
proc check_effective_target_vect64 { } {
- global et_vect64
+ global et_vect64_saved
if [info exists et_vect64_saved] {
verbose "check_effective_target_vect64: using cached result" 2
=== modified file 'gcc/tree-vect-slp.c'
--- old/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
+++ new/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000
@@ -386,20 +386,15 @@
return false;
}
- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (ncopies != 1)
- {
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "SLP with multiple types ");
-
- /* FORNOW: multiple types are unsupported in BB SLP. */
- if (bb_vinfo)
- return false;
- }
-
/* In case of multiple types we need to detect the smallest type. */
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ {
+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (bb_vinfo)
+ vectorization_factor = *max_nunits;
+ }
+
+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
if (is_gimple_call (stmt))
rhs_code = CALL_EXPR;
@@ -1183,7 +1178,6 @@
if (loop_vinfo)
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
else
- /* No multitypes in BB SLP. */
vectorization_factor = nunits;
/* Calculate the unrolling factor. */
@@ -1246,16 +1240,23 @@
&max_nunits, &load_permutation, &loads,
vectorization_factor))
{
+ /* Calculate the unrolling factor based on the smallest type. */
+ if (max_nunits > nunits)
+ unrolling_factor = least_common_multiple (max_nunits, group_size)
+ / group_size;
+
+ if (unrolling_factor != 1 && !loop_vinfo)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
+ " block SLP");
+ return false;
+ }
+
/* Create a new SLP instance. */
new_instance = XNEW (struct _slp_instance);
SLP_INSTANCE_TREE (new_instance) = node;
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
- /* Calculate the unrolling factor based on the smallest type in the
- loop. */
- if (max_nunits > nunits)
- unrolling_factor = least_common_multiple (max_nunits, group_size)
- / group_size;
-
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
=== modified file 'gcc/tree-vect-stmts.c'
--- old/gcc/tree-vect-stmts.c 2011-10-04 08:57:25 +0000
+++ new/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000
@@ -3081,11 +3081,9 @@
VEC (tree, heap) *vec_oprnds0 = NULL;
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
tree last_oprnd, intermediate_type;
-
- /* FORNOW: not supported by basic block SLP vectorization. */
- gcc_assert (loop_vinfo);
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
@@ -3113,7 +3111,7 @@
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
&& CONVERT_EXPR_CODE_P (code))))
return false;
- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
&def_stmt, &def, &dt[0], &vectype_in))
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -3360,11 +3358,9 @@
int multi_step_cvt = 0;
VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
-
- /* FORNOW: not supported by basic block SLP vectorization. */
- gcc_assert (loop_vinfo);
-
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
@@ -3393,7 +3389,7 @@
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
&& CONVERT_EXPR_CODE_P (code))))
return false;
- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
&def_stmt, &def, &dt[0], &vectype_in))
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -5153,7 +5149,9 @@
else
{
if (bb_vinfo)
- ok = (vectorizable_shift (stmt, NULL, NULL, node)
+ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
+ || vectorizable_type_demotion (stmt, NULL, NULL, node)
+ || vectorizable_shift (stmt, NULL, NULL, node)
|| vectorizable_operation (stmt, NULL, NULL, node)
|| vectorizable_assignment (stmt, NULL, NULL, node)
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
@@ -5780,7 +5778,7 @@
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+ struct loop *vect_loop = NULL;
bool ordered_p;
enum machine_mode vec_mode;
enum insn_code icode1, icode2;
@@ -5789,6 +5787,9 @@
tree wide_vectype = vectype_out;
enum tree_code c1, c2;
+ if (loop_info)
+ vect_loop = LOOP_VINFO_LOOP (loop_info);
+
/* The result of a vectorized widening operation usually requires two vectors
(because the widened results do not fit int one vector). The generated
vector results would normally be expected to be generated in the same
@@ -5809,7 +5810,8 @@
iterations in parallel). We therefore don't allow to change the order
of the computation in the inner-loop during outer-loop vectorization. */
- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
+ if (vect_loop
+ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
&& !nested_in_vect_loop_p (vect_loop, stmt))
ordered_p = false;
else

View File

@@ -1,622 +0,0 @@
2011-10-17 Michael Hope <michael.hope@linaro.org>
Backport from mainline r178852:
2011-09-14 Julian Brown <julian@codesourcery.com>
gcc/
* config/arm/arm.c (arm_override_options): Add unaligned_access
support.
(arm_file_start): Emit attribute for unaligned access as appropriate.
* config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
(UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
(insv, extzv): Add unaligned-access support.
(extv): Change to expander. Likewise.
(extzv_t1, extv_regsi): Add helpers.
(unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
(unaligned_storesi, unaligned_storehi): New.
(*extv_reg): New (previous extv implementation).
* config/arm/arm.opt (munaligned_access): Add option.
* config/arm/constraints.md (Uw): New constraint.
* expmed.c (store_bit_field_1): Adjust bitfield numbering according
to size of access, not size of unit, when BITS_BIG_ENDIAN !=
BYTES_BIG_ENDIAN. Don't use bitfield accesses for
volatile accesses when -fstrict-volatile-bitfields is in effect.
(extract_bit_field_1): Likewise.
Backport from mainline r172697:
2011-04-19 Wei Guozhi <carrot@google.com>
PR target/47855
gcc/
* config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype.
* config/arm/arm.c (thumb1_legitimate_address_p): Remove the static
linkage.
* config/arm/constraints.md (Uu): New constraint.
* config/arm/arm.md (*arm_movqi_insn): Compute attr "length".
=== modified file 'gcc/config/arm/arm-protos.h'
Index: gcc-4_6-branch/gcc/config/arm/arm-protos.h
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm-protos.h 2012-03-05 16:07:15.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm-protos.h 2012-03-05 16:07:50.392936694 -0800
@@ -59,6 +59,7 @@
int);
extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
int);
+extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
extern int arm_const_double_rtx (rtx);
extern int neg_const_double_rtx_ok_for_fpa (rtx);
extern int vfp3_const_double_rtx (rtx);
Index: gcc-4_6-branch/gcc/config/arm/arm.c
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.c 2012-03-05 16:07:15.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.c 2012-03-05 16:07:50.400936694 -0800
@@ -2065,6 +2065,28 @@
fix_cm3_ldrd = 0;
}
+ /* Enable -munaligned-access by default for
+ - all ARMv6 architecture-based processors
+ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+
+ Disable -munaligned-access by default for
+ - all pre-ARMv6 architecture-based processors
+ - ARMv6-M architecture-based processors. */
+
+ if (unaligned_access == 2)
+ {
+ if (arm_arch6 && (arm_arch_notm || arm_arch7))
+ unaligned_access = 1;
+ else
+ unaligned_access = 0;
+ }
+ else if (unaligned_access == 1
+ && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
+ {
+ warning (0, "target CPU does not support unaligned accesses");
+ unaligned_access = 0;
+ }
+
if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
@@ -6123,7 +6145,7 @@
addresses based on the frame pointer or arg pointer until the
reload pass starts. This is so that eliminating such addresses
into stack based ones won't produce impossible code. */
-static int
+int
thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
{
/* ??? Not clear if this is right. Experiment. */
@@ -22251,6 +22273,10 @@
val = 6;
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+ /* Tag_CPU_unaligned_access. */
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
+ unaligned_access);
+
/* Tag_ABI_FP_16bit_format. */
if (arm_fp16_format)
asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
Index: gcc-4_6-branch/gcc/config/arm/arm.md
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.md 2012-03-05 16:07:15.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.md 2012-03-05 16:09:26.284941314 -0800
@@ -114,6 +114,10 @@
; another symbolic address.
(UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
(UNSPEC_PIC_UNIFIED 29) ; Create a common pic addressing form.
+ (UNSPEC_UNALIGNED_LOAD 30) ; Used to represent ldr/ldrh instructions that access
+ ; unaligned locations, on architectures which support
+ ; that.
+ (UNSPEC_UNALIGNED_STORE 31) ; Same for str/strh.
]
)
@@ -2461,10 +2465,10 @@
;;; this insv pattern, so this pattern needs to be reevalutated.
(define_expand "insv"
- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
- (match_operand:SI 1 "general_operand" "")
- (match_operand:SI 2 "general_operand" ""))
- (match_operand:SI 3 "reg_or_int_operand" ""))]
+ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+ (match_operand 1 "general_operand" "")
+ (match_operand 2 "general_operand" ""))
+ (match_operand 3 "reg_or_int_operand" ""))]
"TARGET_ARM || arm_arch_thumb2"
"
{
@@ -2475,35 +2479,70 @@
if (arm_arch_thumb2)
{
- bool use_bfi = TRUE;
-
- if (GET_CODE (operands[3]) == CONST_INT)
+ if (unaligned_access && MEM_P (operands[0])
+ && s_register_operand (operands[3], GET_MODE (operands[3]))
+ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
{
- HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+ rtx base_addr;
+
+ if (BYTES_BIG_ENDIAN)
+ start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
+ - start_bit;
- if (val == 0)
+ if (width == 32)
{
- emit_insn (gen_insv_zero (operands[0], operands[1],
- operands[2]));
- DONE;
+ base_addr = adjust_address (operands[0], SImode,
+ start_bit / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
}
+ else
+ {
+ rtx tmp = gen_reg_rtx (HImode);
- /* See if the set can be done with a single orr instruction. */
- if (val == mask && const_ok_for_arm (val << start_bit))
- use_bfi = FALSE;
+ base_addr = adjust_address (operands[0], HImode,
+ start_bit / BITS_PER_UNIT);
+ emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
+ emit_insn (gen_unaligned_storehi (base_addr, tmp));
+ }
+ DONE;
}
-
- if (use_bfi)
+ else if (s_register_operand (operands[0], GET_MODE (operands[0])))
{
- if (GET_CODE (operands[3]) != REG)
- operands[3] = force_reg (SImode, operands[3]);
+ bool use_bfi = TRUE;
- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
+ if (GET_CODE (operands[3]) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+
+ if (val == 0)
+ {
+ emit_insn (gen_insv_zero (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
+
+ /* See if the set can be done with a single orr instruction. */
+ if (val == mask && const_ok_for_arm (val << start_bit))
+ use_bfi = FALSE;
+ }
+
+ if (use_bfi)
+ {
+ if (GET_CODE (operands[3]) != REG)
+ operands[3] = force_reg (SImode, operands[3]);
+
+ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
}
+ else
+ FAIL;
}
+ if (!s_register_operand (operands[0], GET_MODE (operands[0])))
+ FAIL;
+
target = copy_rtx (operands[0]);
/* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
subreg as the final target. */
@@ -3695,12 +3734,10 @@
;; to reduce register pressure later on.
(define_expand "extzv"
- [(set (match_dup 4)
- (ashift:SI (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "const_int_operand" "")))
- (set (match_operand:SI 0 "register_operand" "")
- (lshiftrt:SI (match_dup 4)
- (match_operand:SI 3 "const_int_operand" "")))]
+ [(set (match_operand 0 "s_register_operand" "")
+ (zero_extract (match_operand 1 "nonimmediate_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
"TARGET_THUMB1 || arm_arch_thumb2"
"
{
@@ -3709,10 +3746,57 @@
if (arm_arch_thumb2)
{
- emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
+ HOST_WIDE_INT width = INTVAL (operands[2]);
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+ if (unaligned_access && MEM_P (operands[1])
+ && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
+ {
+ rtx base_addr;
+
+ if (BYTES_BIG_ENDIAN)
+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width
+ - bitpos;
+
+ if (width == 32)
+ {
+ base_addr = adjust_address (operands[1], SImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+ }
+ else
+ {
+ rtx dest = operands[0];
+ rtx tmp = gen_reg_rtx (SImode);
+
+ /* We may get a paradoxical subreg here. Strip it off. */
+ if (GET_CODE (dest) == SUBREG
+ && GET_MODE (dest) == SImode
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
+ dest = SUBREG_REG (dest);
+
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+ FAIL;
+
+ base_addr = adjust_address (operands[1], HImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
+ }
+ DONE;
+ }
+ else if (s_register_operand (operands[1], GET_MODE (operands[1])))
+ {
+ emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+ else
+ FAIL;
}
+
+ if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+ FAIL;
operands[3] = GEN_INT (rshift);
@@ -3722,12 +3806,154 @@
DONE;
}
- operands[2] = GEN_INT (lshift);
- operands[4] = gen_reg_rtx (SImode);
+ emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
+ operands[3], gen_reg_rtx (SImode)));
+ DONE;
}"
)
-(define_insn "extv"
+;; Helper for extzv, for the Thumb-1 register-shifts case.
+
+(define_expand "extzv_t1"
+ [(set (match_operand:SI 4 "s_register_operand" "")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (set (match_operand:SI 0 "s_register_operand" "")
+ (lshiftrt:SI (match_dup 4)
+ (match_operand:SI 3 "const_int_operand" "")))]
+ "TARGET_THUMB1"
+ "")
+
+(define_expand "extv"
+ [(set (match_operand 0 "s_register_operand" "")
+ (sign_extract (match_operand 1 "nonimmediate_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ "arm_arch_thumb2"
+{
+ HOST_WIDE_INT width = INTVAL (operands[2]);
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+ if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
+ && (bitpos % BITS_PER_UNIT) == 0)
+ {
+ rtx base_addr;
+
+ if (BYTES_BIG_ENDIAN)
+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
+
+ if (width == 32)
+ {
+ base_addr = adjust_address (operands[1], SImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+ }
+ else
+ {
+ rtx dest = operands[0];
+ rtx tmp = gen_reg_rtx (SImode);
+
+ /* We may get a paradoxical subreg here. Strip it off. */
+ if (GET_CODE (dest) == SUBREG
+ && GET_MODE (dest) == SImode
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
+ dest = SUBREG_REG (dest);
+
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+ FAIL;
+
+ base_addr = adjust_address (operands[1], HImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadhis (tmp, base_addr));
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
+ }
+
+ DONE;
+ }
+ else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+ FAIL;
+ else if (GET_MODE (operands[0]) == SImode
+ && GET_MODE (operands[1]) == SImode)
+ {
+ emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+
+ FAIL;
+})
+
+; Helper to expand register forms of extv with the proper modes.
+
+(define_expand "extv_regsi"
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ ""
+{
+})
+
+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
+
+(define_insn "unaligned_loadsi"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%?\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load1")])
+
+(define_insn "unaligned_loadhis"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (sign_extend:SI
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD)))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%(sh%)\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_loadhiu"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (zero_extend:SI
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD)))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%(h%)\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_storesi"
+ [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "str%?\t%1, %0\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store1")])
+
+(define_insn "unaligned_storehi"
+ [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
+ (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "str%(h%)\t%1, %0\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store1")])
+
+(define_insn "*extv_reg"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "const_int_operand" "M")
@@ -6069,8 +6295,8 @@
(define_insn "*arm_movqi_insn"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
- (match_operand:QI 1 "general_operand" "rI,K,m,r"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m")
+ (match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))]
"TARGET_32BIT
&& ( register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode))"
@@ -6078,10 +6304,14 @@
mov%?\\t%0, %1
mvn%?\\t%0, #%B1
ldr%(b%)\\t%0, %1
+ str%(b%)\\t%1, %0
+ ldr%(b%)\\t%0, %1
str%(b%)\\t%1, %0"
- [(set_attr "type" "*,*,load1,store1")
- (set_attr "insn" "mov,mvn,*,*")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "*,*,load1,store1,load1,store1")
+ (set_attr "insn" "mov,mvn,*,*,*,*")
+ (set_attr "predicable" "yes")
+ (set_attr "arch" "any,any,t2,t2,any,any")
+ (set_attr "length" "4,4,2,2,4,4")]
)
(define_insn "*thumb1_movqi_insn"
Index: gcc-4_6-branch/gcc/config/arm/arm.opt
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/arm.opt 2012-03-05 16:07:14.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/arm.opt 2012-03-05 16:07:50.404936697 -0800
@@ -173,3 +173,7 @@
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
that may trigger Cortex-M3 errata.
+
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned word and halfword accesses to packed data.
Index: gcc-4_6-branch/gcc/config/arm/constraints.md
===================================================================
--- gcc-4_6-branch.orig/gcc/config/arm/constraints.md 2012-03-05 16:07:14.000000000 -0800
+++ gcc-4_6-branch/gcc/config/arm/constraints.md 2012-03-05 16:07:50.404936697 -0800
@@ -36,6 +36,7 @@
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
;; in ARM state: Uq
+;; in Thumb state: Uu, Uw
(define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
@@ -344,6 +345,27 @@
(and (match_code "mem")
(match_test "REG_P (XEXP (op, 0))")))
+(define_memory_constraint "Uu"
+ "@internal
+ In Thumb state an address that is valid in 16bit encoding."
+ (and (match_code "mem")
+ (match_test "TARGET_THUMB
+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+ 0)")))
+
+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
+; are actually LDM/STM instructions, so cannot be used to access unaligned
+; data.
+(define_memory_constraint "Uw"
+ "@internal
+ In Thumb state an address that is valid in 16bit encoding, and that can be
+ used for unaligned accesses."
+ (and (match_code "mem")
+ (match_test "TARGET_THUMB
+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+ 0)
+ && GET_CODE (XEXP (op, 0)) != POST_INC")))
+
;; We used to have constraint letters for S and R in ARM state, but
;; all uses of these now appear to have been removed.
Index: gcc-4_6-branch/gcc/expmed.c
===================================================================
--- gcc-4_6-branch.orig/gcc/expmed.c 2012-01-04 15:37:51.000000000 -0800
+++ gcc-4_6-branch/gcc/expmed.c 2012-03-05 16:07:50.404936697 -0800
@@ -657,6 +657,10 @@
&& GET_MODE (value) != BLKmode
&& bitsize > 0
&& GET_MODE_BITSIZE (op_mode) >= bitsize
+ /* Do not use insv for volatile bitfields when
+ -fstrict-volatile-bitfields is in effect. */
+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
+ && flag_strict_volatile_bitfields > 0)
&& ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
&& (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
&& insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
@@ -700,19 +704,21 @@
copy_back = true;
}
- /* On big-endian machines, we count bits from the most significant.
- If the bit field insn does not, we must invert. */
-
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
- xbitpos = unit - bitsize - xbitpos;
-
/* We have been counting XBITPOS within UNIT.
Count instead within the size of the register. */
- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
unit = GET_MODE_BITSIZE (op_mode);
+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
+ "backwards" from the size of the unit we are inserting into.
+ Otherwise, we count bits from the most significant on a
+ BYTES/BITS_BIG_ENDIAN machine. */
+
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ xbitpos = unit - bitsize - xbitpos;
+
/* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
value1 = value;
if (GET_MODE (value) != op_mode)
@@ -1528,6 +1534,10 @@
if (ext_mode != MAX_MACHINE_MODE
&& bitsize > 0
&& GET_MODE_BITSIZE (ext_mode) >= bitsize
+ /* Do not use extv/extzv for volatile bitfields when
+ -fstrict-volatile-bitfields is in effect. */
+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
+ && flag_strict_volatile_bitfields > 0)
/* If op0 is a register, we need it in EXT_MODE to make it
acceptable to the format of ext(z)v. */
&& !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
@@ -1552,17 +1562,20 @@
/* Get ref to first byte containing part of the field. */
xop0 = adjust_address (xop0, byte_mode, xoffset);
- /* On big-endian machines, we count bits from the most significant.
- If the bit field insn does not, we must invert. */
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
- xbitpos = unit - bitsize - xbitpos;
-
/* Now convert from counting within UNIT to counting in EXT_MODE. */
- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
unit = GET_MODE_BITSIZE (ext_mode);
+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
+ "backwards" from the size of the unit we are extracting from.
+ Otherwise, we count bits from the most significant on a
+ BYTES/BITS_BIG_ENDIAN machine. */
+
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ xbitpos = unit - bitsize - xbitpos;
+
if (xtarget == 0)
xtarget = xspec_target = gen_reg_rtx (tmode);

Some files were not shown because too many files have changed in this diff Show More