mirror of
https://github.com/openembedded/meta-openembedded.git
synced 2026-05-06 16:58:24 +00:00
frr: fix mgmtd crash on ARM32
Backport fix[1] for MGMT crash on first start on ARM32 platforms[2]. [1] https://github.com/FRRouting/frr/pull/21651 [2] https://github.com/FRRouting/frr/issues/20087 Signed-off-by: Yi Zhao <yi.zhao@windriver.com> Signed-off-by: Khem Raj <khem.raj@oss.qualcomm.com>
This commit is contained in:
@@ -0,0 +1,352 @@
|
||||
From 5959a3d0cbc73b0c41134bf0d9944a6bd40ba510 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Hopps <chopps@labn.net>
|
||||
Date: Sat, 18 Apr 2026 03:01:46 +0000
|
||||
Subject: [PATCH] lib: fix mgmt_msg recv to deal with mis-alignment
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
We need our messages to start on 64 bit boundaries as the message buffer
|
||||
is accessed directly as structured data. In particular on ARM32 arch
|
||||
using the data this way lead to unaligned access and SIGBUS.
|
||||
|
||||
The minor optimization of reading multiple messages into a single stream
|
||||
buffer complicated this. Instead we KISS and switch to one message per
|
||||
stream buffer.
|
||||
|
||||
Fixes #20087.
|
||||
|
||||
Signed-off-by: Christian Hopps <chopps@labn.net>
|
||||
Co-developed-by: Samir MOUHOUNE <samir.mouhoune@nav-timing.safrangroup.com>
|
||||
Co-developed-by: Alexis Lothoré <alexis.lothore@bootlin.com>
|
||||
|
||||
See also PR #20985
|
||||
|
||||
This issue was identified and another solution was provided by Samir
|
||||
MOUHOUNE with the following commit message comments:
|
||||
|
||||
On ARM32 systems, mgmtd crashes at startup on an alignment fault:
|
||||
|
||||
```
|
||||
frrinit.sh[158]: Starting watchfrr with command: ' /usr/sbin/watchfrr -d mgmtd zebra staticd'
|
||||
watchfrr[168]: [T83RR-8SM5G] watchfrr 10.5.2 starting: vty@0
|
||||
watchfrr[168]: [ZCJ3S-SPH5S] mgmtd state -> down : initial connection attempt failed
|
||||
watchfrr[168]: [ZCJ3S-SPH5S] zebra state -> down : initial connection attempt failed
|
||||
watchfrr[168]: [ZCJ3S-SPH5S] staticd state -> down : initial connection attempt failed
|
||||
watchfrr[168]: [YFT0P-5Q5YX] Forked background command [pid 169]: /usr/sbin/watchfrr.sh restart all
|
||||
frrinit.sh[180]: 2026/02/27 09:14:13 ZEBRA: [KGY44-D47GD][EC 4043309111] Disabling MPLS support (no kernel support)
|
||||
watchfrr[168]: [QDG3Y-BY5TN] zebra state -> up : connect succeeded
|
||||
kernel: Alignment trap: not handling instruction edc30b02 at [<004c3c1c>]
|
||||
kernel: 8<--- cut here ---
|
||||
kernel: Unhandled fault: alignment exception (0x801) at 0x008879f6
|
||||
kernel: [008879f6] *pgd=9baf6831
|
||||
watchfrr[168]: [YFT0P-5Q5YX] Forked background command [pid 189]: /usr/sbin/watchfrr.sh restart mgmtd
|
||||
frrinit.sh[189]: Cannot stop mgmtd: pid 179 not running
|
||||
watchfrr.sh[196]: Cannot stop mgmtd: pid 179 not running
|
||||
frrinit.sh[202]: [202|zebra] sending configuration
|
||||
frrinit.sh[202]: [202|zebra] done
|
||||
frrinit.sh[216]: [216|watchfrr] sending configuration
|
||||
frrinit.sh[218]: [218|staticd] sending configuration
|
||||
watchfrr[168]: [VTVCM-Y2NW3] Configuration Read in Took: 00:00:00
|
||||
frrinit.sh[199]: Waiting for children to finish applying config...
|
||||
frrinit.sh[216]: [216|watchfrr] done
|
||||
```
|
||||
|
||||
When checking crashlogs in /var/tmp/frr, mgmt gives the following:
|
||||
|
||||
```
|
||||
MGMTD: Received signal 7 at 1772183653 (si_addr 0x8879f6); aborting...
|
||||
MGMTD: /lib/libfrr.so.0(zlog_backtrace_sigsafe+0x5c) [0xb6e89c90]
|
||||
MGMTD: /lib/libfrr.so.0(zlog_signal+0xe0) [0xb6e89e80]
|
||||
MGMTD: /lib/libfrr.so.0(+0xd4374) [0xb6ed3374]
|
||||
MGMTD: /lib/libc.so.6(__default_rt_sa_restorer+0) [0xb6ab4d90]
|
||||
MGMTD: /usr/sbin/mgmtd(mgmt_fe_adapter_send_notify+0x6b8) [0x4c3c20]
|
||||
MGMTD: /lib/libfrr.so.0(mgmt_msg_procbufs+0x124) [0xb6e976b8]
|
||||
MGMTD: /lib/libfrr.so.0(+0x98798) [0xb6e97798]
|
||||
MGMTD: /lib/libfrr.so.0(event_call+0xa8) [0xb6ee739c]
|
||||
MGMTD: /lib/libfrr.so.0(frr_run+0xd4) [0xb6e80fc8]
|
||||
MGMTD: /usr/sbin/mgmtd(main+0x188) [0x4bd7ec]
|
||||
MGMTD: /lib/libc.so.6(+0x236b0) [0xb6a9f6b0]
|
||||
MGMTD: /lib/libc.so.6(__libc_start_main+0x98) [0xb6a9f790]
|
||||
MGMTD: in thread msg_conn_proc_msgs scheduled from lib/mgmt_msg.c:543 msg_conn_sched_proc_msgs()
|
||||
```
|
||||
|
||||
The issue is that messages are queued for sending/receive back-to-back
|
||||
with no padding. This means that when mgmt creates a pointer back to the
|
||||
data waiting in queue and tries to access fields inside the dereferenced
|
||||
message, those accesses are not performed with the alignment constraints
|
||||
required by some architectures. For example, ARM ABI AAPCS32 ([1])
|
||||
states that structures alignment should be the same as the "most
|
||||
aligned" member; so a struct mgmt_msg_header, which contains some
|
||||
uint64_t fields (which are 8-bytes alignes), should be 8-bytes aligned
|
||||
as well.
|
||||
|
||||
On x86, this goes unnoticed because the CPU handles unaligned access
|
||||
transparently. On ARM 32-bit with NEON/VFP, the compiler generates
|
||||
64-bit store instructions that trap on unaligned addresses. The kernel
|
||||
cannot emulate these instructions and kills the process with SIGBUS.
|
||||
|
||||
[1] https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#data-types-and-alignment
|
||||
|
||||
Upstream-Status: Backport [https://github.com/FRRouting/frr/commit/5959a3d0cbc73b0c41134bf0d9944a6bd40ba510]
|
||||
|
||||
Signed-off-by: Christian Hopps <chopps@labn.net>
|
||||
(cherry picked from commit ae7d79f8ff25d5750e5796567ff6317030900d40)
|
||||
Signed-off-by: Yi Zhao <yi.zhao@windriver.com>
|
||||
---
|
||||
lib/mgmt_be_client.h | 3 +-
|
||||
lib/mgmt_fe_client.h | 3 +-
|
||||
lib/mgmt_msg.c | 157 ++++++++++++++++++-------------------------
|
||||
3 files changed, 68 insertions(+), 95 deletions(-)
|
||||
|
||||
diff --git a/lib/mgmt_be_client.h b/lib/mgmt_be_client.h
|
||||
index f5627e3c4e..2f412a6fbd 100644
|
||||
--- a/lib/mgmt_be_client.h
|
||||
+++ b/lib/mgmt_be_client.h
|
||||
@@ -21,7 +21,8 @@ extern "C" {
|
||||
|
||||
#define MGMTD_BE_MAX_NUM_MSG_PROC 500
|
||||
#define MGMTD_BE_MAX_NUM_MSG_WRITE 1000
|
||||
-#define MGMTD_BE_MAX_MSG_LEN (64 * 1024)
|
||||
+/* Messages can be any size, this is just the preallocated buffer size */
|
||||
+#define MGMTD_BE_MAX_MSG_LEN (4 * 1024)
|
||||
|
||||
#define MGMTD_BE_CONTAINER_NODE_VAL "<<container>>"
|
||||
|
||||
diff --git a/lib/mgmt_fe_client.h b/lib/mgmt_fe_client.h
|
||||
index 8ff08b566a..3005c5dd01 100644
|
||||
--- a/lib/mgmt_fe_client.h
|
||||
+++ b/lib/mgmt_fe_client.h
|
||||
@@ -29,7 +29,8 @@ extern "C" {
|
||||
|
||||
#define MGMTD_FE_MAX_NUM_MSG_PROC 500
|
||||
#define MGMTD_FE_MAX_NUM_MSG_WRITE 100
|
||||
-#define MGMTD_FE_MAX_MSG_LEN (64 * 1024)
|
||||
+/* Messages can be any size, this is just the preallocated buffer size */
|
||||
+#define MGMTD_FE_MAX_MSG_LEN (4 * 1024)
|
||||
|
||||
/***************************************************************
|
||||
* Data-structures
|
||||
diff --git a/lib/mgmt_msg.c b/lib/mgmt_msg.c
|
||||
index f299b52873..fb56f58ab5 100644
|
||||
--- a/lib/mgmt_msg.c
|
||||
+++ b/lib/mgmt_msg.c
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "network.h"
|
||||
#include "sockopt.h"
|
||||
#include "stream.h"
|
||||
+#include "zlog.h"
|
||||
#include "frrevent.h"
|
||||
#include "mgmt_msg.h"
|
||||
#include "mgmt_msg_native.h"
|
||||
@@ -39,8 +40,8 @@ static bool trace;
|
||||
DEFINE_MTYPE(LIB, MSG_CONN, "msg connection state");
|
||||
|
||||
/**
|
||||
- * Read data from a socket into streams containing 1 or more full msgs headed by
|
||||
- * mgmt_msg_hdr which contain API messages (currently protobuf).
|
||||
+ * Read data from a socket into a stream containing 1 full msg headed by
|
||||
+ * mgmt_msg_hdr.
|
||||
*
|
||||
* Args:
|
||||
* ms: mgmt_msg_state for this process.
|
||||
@@ -57,96 +58,80 @@ enum mgmt_msg_rsched mgmt_msg_read(struct mgmt_msg_state *ms, int fd,
|
||||
bool debug)
|
||||
{
|
||||
const char *dbgtag = debug ? ms->idtag : NULL;
|
||||
- size_t avail = STREAM_WRITEABLE(ms->ins);
|
||||
struct mgmt_msg_hdr *mhdr = NULL;
|
||||
- size_t total = 0;
|
||||
- size_t mcount = 0;
|
||||
- ssize_t n, left;
|
||||
+ struct stream *news;
|
||||
+ size_t nread;
|
||||
+ ssize_t n;
|
||||
|
||||
assert(ms && fd != -1);
|
||||
- MGMT_MSG_TRACE(dbgtag, "enter with %zu bytes available to read on fd %d", avail, fd);
|
||||
+ MGMT_MSG_TRACE(dbgtag, "enter to read from fd %d", fd);
|
||||
+
|
||||
+ assert(stream_get_getp(ms->ins) == 0);
|
||||
+ nread = stream_get_endp(ms->ins);
|
||||
|
||||
/*
|
||||
- * Read as much as we can into the stream.
|
||||
+ * Get header, validate, and resize the stream, if needed, to fit incoming message.
|
||||
*/
|
||||
- while (avail > sizeof(struct mgmt_msg_hdr)) {
|
||||
- n = stream_read_try(ms->ins, fd, avail);
|
||||
-
|
||||
- /* -2 is normal nothing read, and to retry */
|
||||
- if (n == -2) {
|
||||
- MGMT_MSG_TRACE(dbgtag, "nothing more to read on fd %d", fd);
|
||||
- break;
|
||||
- }
|
||||
- if (n <= 0) {
|
||||
- if (n == 0)
|
||||
- MGMT_MSG_ERR(ms, "got EOF/disconnect on fd %d", fd);
|
||||
- else
|
||||
- MGMT_MSG_ERR(ms, "got error while reading on fd %d: '%s'", fd,
|
||||
- safe_strerror(errno));
|
||||
- return MSR_DISCONNECT;
|
||||
+ if (nread < sizeof(struct mgmt_msg_hdr)) {
|
||||
+ while (nread < sizeof(struct mgmt_msg_hdr)) {
|
||||
+ n = stream_read_try(ms->ins, fd, sizeof(struct mgmt_msg_hdr) - nread);
|
||||
+ if (n <= 0)
|
||||
+ goto not_done;
|
||||
+ nread += n;
|
||||
+ ms->nrxb += n;
|
||||
}
|
||||
- MGMT_MSG_TRACE(dbgtag, "read %zd bytes on fd %d", n, fd);
|
||||
- ms->nrxb += n;
|
||||
- avail -= n;
|
||||
- }
|
||||
|
||||
- /*
|
||||
- * Check if we have read a complete messages or not.
|
||||
- */
|
||||
- assert(stream_get_getp(ms->ins) == 0);
|
||||
- left = stream_get_endp(ms->ins);
|
||||
- while (left > (ssize_t)sizeof(struct mgmt_msg_hdr)) {
|
||||
- mhdr = (struct mgmt_msg_hdr *)(STREAM_DATA(ms->ins) + total);
|
||||
+ /* Validate the header is sane */
|
||||
+ mhdr = (struct mgmt_msg_hdr *)STREAM_DATA(ms->ins);
|
||||
if (!MGMT_MSG_IS_MARKER(mhdr->marker)) {
|
||||
MGMT_MSG_DBG(dbgtag, "recv corrupt buffer on fd %d, disconnect", fd);
|
||||
return MSR_DISCONNECT;
|
||||
+ } else if (mhdr->len <= sizeof(struct mgmt_msg_hdr)) {
|
||||
+ MGMT_MSG_DBG(dbgtag, "recv invalid message length %u on fd %d, disconnect",
|
||||
+ mhdr->len, fd);
|
||||
+ return MSR_DISCONNECT;
|
||||
}
|
||||
- if ((ssize_t)mhdr->len > left)
|
||||
- break;
|
||||
-
|
||||
- MGMT_MSG_TRACE(dbgtag, "read full message on fd %d len %u", fd, mhdr->len);
|
||||
- total += mhdr->len;
|
||||
- left -= mhdr->len;
|
||||
- mcount++;
|
||||
- }
|
||||
|
||||
- if (!mcount) {
|
||||
- /* Didn't manage to read a full message */
|
||||
- if (mhdr && avail == 0) {
|
||||
- struct stream *news;
|
||||
- /*
|
||||
- * Message was longer than what was left and we have no
|
||||
- * available space to read more in. B/c mcount == 0 the
|
||||
- * message starts at the beginning of the stream so
|
||||
- * therefor the stream is too small to fit the message..
|
||||
- * Resize the stream to fit.
|
||||
- */
|
||||
+ /* See if message will fit in the stream, realloc if not */
|
||||
+ if (mhdr->len > ms->ins->size) {
|
||||
+ MGMT_MSG_DBG(dbgtag,
|
||||
+ "message length %u is greater than available %zu on fd %d",
|
||||
+ mhdr->len, ms->ins->size, fd);
|
||||
news = stream_new(mhdr->len);
|
||||
- stream_put(news, mhdr, left);
|
||||
- stream_set_endp(news, left);
|
||||
+ stream_put(news, mhdr, sizeof(struct mgmt_msg_hdr));
|
||||
stream_free(ms->ins);
|
||||
ms->ins = news;
|
||||
}
|
||||
- return MSR_SCHED_STREAM;
|
||||
}
|
||||
|
||||
- /*
|
||||
- * We have read at least one message into the stream, queue it up.
|
||||
- */
|
||||
- mhdr = (struct mgmt_msg_hdr *)(STREAM_DATA(ms->ins) + total);
|
||||
- stream_set_endp(ms->ins, total);
|
||||
- stream_fifo_push(&ms->inq, ms->ins);
|
||||
- if (left < (ssize_t)sizeof(struct mgmt_msg_hdr))
|
||||
- ms->ins = stream_new(ms->max_msg_sz);
|
||||
- else
|
||||
- /* handle case where message is greater than max */
|
||||
- ms->ins = stream_new(MAX(ms->max_msg_sz, mhdr->len));
|
||||
- if (left) {
|
||||
- stream_put(ms->ins, mhdr, left);
|
||||
- stream_set_endp(ms->ins, left);
|
||||
+ /* Read the rest of the message. */
|
||||
+ mhdr = (struct mgmt_msg_hdr *)STREAM_DATA(ms->ins);
|
||||
+ while (nread < mhdr->len) {
|
||||
+ n = stream_read_try(ms->ins, fd, mhdr->len - nread);
|
||||
+ if (n <= 0)
|
||||
+ goto not_done;
|
||||
+ nread += n;
|
||||
+ ms->nrxb += n;
|
||||
+ MGMT_MSG_TRACE(dbgtag, "read %zd from fd %d (%zu of %u)", n, fd, nread, mhdr->len);
|
||||
}
|
||||
|
||||
+ /* We've got a full message, push it onto the FIFO and setup for the next message. */
|
||||
+ MGMT_MSG_TRACE(dbgtag, "read full msg %zu/%u from fd %d", nread, mhdr->len, fd);
|
||||
+ stream_fifo_push(&ms->inq, ms->ins);
|
||||
+ ms->ins = stream_new(ms->max_msg_sz);
|
||||
return MSR_SCHED_BOTH;
|
||||
+
|
||||
+not_done:
|
||||
+ if (n == -2) {
|
||||
+ MGMT_MSG_TRACE(dbgtag, "nothing more to read on fd %d", fd);
|
||||
+ return MSR_SCHED_STREAM;
|
||||
+ }
|
||||
+ if (n == 0)
|
||||
+ MGMT_MSG_ERR(ms, "got EOF/disconnect on fd %d", fd);
|
||||
+ else
|
||||
+ MGMT_MSG_ERR(ms, "got error while reading on fd %d: '%s'", fd,
|
||||
+ safe_strerror(errno));
|
||||
+ return MSR_DISCONNECT;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -171,7 +156,6 @@ bool mgmt_msg_procbufs(struct mgmt_msg_state *ms,
|
||||
const char *dbgtag = debug ? ms->idtag : NULL;
|
||||
struct mgmt_msg_hdr *mhdr;
|
||||
struct stream *work;
|
||||
- uint8_t *data;
|
||||
size_t left, nproc;
|
||||
|
||||
MGMT_MSG_TRACE(dbgtag, "Have %zu streams to process", ms->inq.count);
|
||||
@@ -182,30 +166,17 @@ bool mgmt_msg_procbufs(struct mgmt_msg_state *ms,
|
||||
if (!work)
|
||||
break;
|
||||
|
||||
- data = STREAM_DATA(work);
|
||||
left = stream_get_endp(work);
|
||||
MGMT_MSG_TRACE(dbgtag, "Processing stream of len %zu", left);
|
||||
-
|
||||
- for (; left > sizeof(struct mgmt_msg_hdr);
|
||||
- left -= mhdr->len, data += mhdr->len) {
|
||||
- mhdr = (struct mgmt_msg_hdr *)data;
|
||||
-
|
||||
- assert(MGMT_MSG_IS_MARKER(mhdr->marker));
|
||||
- assert(left >= mhdr->len);
|
||||
-
|
||||
- /*
|
||||
- * Q: if the handler disconnects should stop/flush?
|
||||
- */
|
||||
- handle_msg(MGMT_MSG_MARKER_VERSION(mhdr->marker), (uint8_t *)(mhdr + 1),
|
||||
- mhdr->len - sizeof(struct mgmt_msg_hdr), user);
|
||||
- ms->nrxm++;
|
||||
- nproc++;
|
||||
- }
|
||||
-
|
||||
- if (work != ms->ins)
|
||||
- stream_free(work); /* Free it up */
|
||||
- else
|
||||
- stream_reset(work); /* Reset stream for next read */
|
||||
+ /*
|
||||
+ * Q: if the handler disconnects should we stop/flush?
|
||||
+ */
|
||||
+ mhdr = (struct mgmt_msg_hdr *)STREAM_DATA(work);
|
||||
+ handle_msg(MGMT_MSG_MARKER_VERSION(mhdr->marker), (uint8_t *)(mhdr + 1),
|
||||
+ mhdr->len - sizeof(struct mgmt_msg_hdr), user);
|
||||
+ ms->nrxm++;
|
||||
+ nproc++;
|
||||
+ stream_free(work); /* Free it up */
|
||||
}
|
||||
|
||||
/* return true if should reschedule b/c more to process. */
|
||||
--
|
||||
2.43.0
|
||||
|
||||
@@ -12,6 +12,7 @@ LIC_FILES_CHKSUM = "file://doc/licenses/GPL-2.0;md5=b234ee4d69f5fce4486a80fdaf4a
|
||||
|
||||
SRC_URI = "git://github.com/FRRouting/frr.git;protocol=https;branch=stable/10.6;tag=frr-${PV} \
|
||||
file://frr.pam \
|
||||
file://0001-lib-fix-mgmt_msg-recv-to-deal-with-mis-alignment.patch \
|
||||
"
|
||||
SRCREV = "71da51baee6fb2a02b24262defc46591c86e8a81"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user