mirror of
https://github.com/openembedded/meta-openembedded.git
synced 2026-05-07 05:10:20 +00:00
python3-cbor2: patch CVE-2026-26209
Backport the patch[1] which fixes this vulnerability as mentioned in the comment[3]. Details: https://nvd.nist.gov/vuln/detail/CVE-2026-26209 [1]e61a5f365b[2]fb4ee1612a(pre patch) [3] https://github.com/agronholm/cbor2/pull/275 Dropped changes to the changelog from the original commit. Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com> Signed-off-by: Anuj Mittal <anuj.mittal@oss.qualcomm.com>
This commit is contained in:
committed by
Anuj Mittal
parent
b13ae5a8eb
commit
4810cd8c5b
@@ -0,0 +1,469 @@
|
||||
From fb4ee1612a8a1ac0dbd8cf2f2f6f931a4e06d824 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Eriksen <andreer@vespa.ai>
|
||||
Date: Mon, 29 Dec 2025 14:01:52 +0100
|
||||
Subject: [PATCH] Added a read-ahead buffer to the C decoder (#268)
|
||||
|
||||
CVE: CVE-2026-26209
|
||||
Upstream-Status: Backport [https://github.com/agronholm/cbor2/commit/fb4ee1612a8a1ac0dbd8cf2f2f6f931a4e06d824]
|
||||
Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>
|
||||
---
|
||||
source/decoder.c | 225 +++++++++++++++++++++++++++++++++---------
|
||||
source/decoder.h | 9 ++
|
||||
tests/test_decoder.py | 85 +++++++++++++++-
|
||||
3 files changed, 274 insertions(+), 45 deletions(-)
|
||||
|
||||
diff --git a/source/decoder.c b/source/decoder.c
|
||||
index 4f7ee5d..9cd1596 100644
|
||||
--- a/source/decoder.c
|
||||
+++ b/source/decoder.c
|
||||
@@ -42,6 +42,7 @@ enum DecodeOption {
|
||||
typedef uint8_t DecodeOptions;
|
||||
|
||||
static int _CBORDecoder_set_fp(CBORDecoderObject *, PyObject *, void *);
|
||||
+static int _CBORDecoder_set_fp_with_read_size(CBORDecoderObject *, PyObject *, Py_ssize_t);
|
||||
static int _CBORDecoder_set_tag_hook(CBORDecoderObject *, PyObject *, void *);
|
||||
static int _CBORDecoder_set_object_hook(CBORDecoderObject *, PyObject *, void *);
|
||||
static int _CBORDecoder_set_str_errors(CBORDecoderObject *, PyObject *, void *);
|
||||
@@ -101,6 +102,13 @@ CBORDecoder_clear(CBORDecoderObject *self)
|
||||
Py_CLEAR(self->shareables);
|
||||
Py_CLEAR(self->stringref_namespace);
|
||||
Py_CLEAR(self->str_errors);
|
||||
+ if (self->readahead) {
|
||||
+ PyMem_Free(self->readahead);
|
||||
+ self->readahead = NULL;
|
||||
+ self->readahead_size = 0;
|
||||
+ }
|
||||
+ self->read_pos = 0;
|
||||
+ self->read_len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -143,6 +151,10 @@ CBORDecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
self->immutable = false;
|
||||
self->shared_index = -1;
|
||||
self->decode_depth = 0;
|
||||
+ self->readahead = NULL;
|
||||
+ self->readahead_size = 0;
|
||||
+ self->read_pos = 0;
|
||||
+ self->read_len = 0;
|
||||
}
|
||||
return (PyObject *) self;
|
||||
error:
|
||||
@@ -152,21 +164,27 @@ error:
|
||||
|
||||
|
||||
// CBORDecoder.__init__(self, fp=None, tag_hook=None, object_hook=None,
|
||||
-// str_errors='strict')
|
||||
+// str_errors='strict', read_size=4096)
|
||||
int
|
||||
CBORDecoder_init(CBORDecoderObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
static char *keywords[] = {
|
||||
- "fp", "tag_hook", "object_hook", "str_errors", NULL
|
||||
+ "fp", "tag_hook", "object_hook", "str_errors", "read_size", NULL
|
||||
};
|
||||
PyObject *fp = NULL, *tag_hook = NULL, *object_hook = NULL,
|
||||
*str_errors = NULL;
|
||||
+ Py_ssize_t read_size = CBOR2_DEFAULT_READ_SIZE;
|
||||
|
||||
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOO", keywords,
|
||||
- &fp, &tag_hook, &object_hook, &str_errors))
|
||||
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOn", keywords,
|
||||
+ &fp, &tag_hook, &object_hook, &str_errors, &read_size))
|
||||
return -1;
|
||||
|
||||
- if (_CBORDecoder_set_fp(self, fp, NULL) == -1)
|
||||
+ if (read_size < 1) {
|
||||
+ PyErr_SetString(PyExc_ValueError, "read_size must be at least 1");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (_CBORDecoder_set_fp_with_read_size(self, fp, read_size) == -1)
|
||||
return -1;
|
||||
if (tag_hook && _CBORDecoder_set_tag_hook(self, tag_hook, NULL) == -1)
|
||||
return -1;
|
||||
@@ -197,11 +215,12 @@ _CBORDecoder_get_fp(CBORDecoderObject *self, void *closure)
|
||||
}
|
||||
|
||||
|
||||
-// CBORDecoder._set_fp(self, value)
|
||||
+// Internal: set fp with configurable read size
|
||||
static int
|
||||
-_CBORDecoder_set_fp(CBORDecoderObject *self, PyObject *value, void *closure)
|
||||
+_CBORDecoder_set_fp_with_read_size(CBORDecoderObject *self, PyObject *value, Py_ssize_t read_size)
|
||||
{
|
||||
PyObject *tmp, *read;
|
||||
+ char *new_buffer = NULL;
|
||||
|
||||
if (!value) {
|
||||
PyErr_SetString(PyExc_AttributeError, "cannot delete fp attribute");
|
||||
@@ -214,13 +233,43 @@ _CBORDecoder_set_fp(CBORDecoderObject *self, PyObject *value, void *closure)
|
||||
return -1;
|
||||
}
|
||||
|
||||
+ if (self->readahead == NULL || self->readahead_size != read_size) {
|
||||
+ new_buffer = (char *)PyMem_Malloc(read_size);
|
||||
+ if (!new_buffer) {
|
||||
+ Py_DECREF(read);
|
||||
+ PyErr_NoMemory();
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
// See notes in encoder.c / _CBOREncoder_set_fp
|
||||
tmp = self->read;
|
||||
self->read = read;
|
||||
Py_DECREF(tmp);
|
||||
+
|
||||
+ self->read_pos = 0;
|
||||
+ self->read_len = 0;
|
||||
+
|
||||
+ // Replace buffer (size changed or was NULL)
|
||||
+ if (new_buffer) {
|
||||
+ PyMem_Free(self->readahead);
|
||||
+ self->readahead = new_buffer;
|
||||
+ self->readahead_size = read_size;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
+// CBORDecoder._set_fp(self, value) - property setter uses default read size
|
||||
+static int
|
||||
+_CBORDecoder_set_fp(CBORDecoderObject *self, PyObject *value, void *closure)
|
||||
+{
|
||||
+ // Use existing readahead_size if already allocated, otherwise use default
|
||||
+ Py_ssize_t read_size = (self->readahead_size > 0) ?
|
||||
+ self->readahead_size : CBOR2_DEFAULT_READ_SIZE;
|
||||
+ return _CBORDecoder_set_fp_with_read_size(self, value, read_size);
|
||||
+}
|
||||
+
|
||||
|
||||
// CBORDecoder._get_tag_hook(self)
|
||||
static PyObject *
|
||||
@@ -376,45 +425,93 @@ raise_from(PyObject *new_exc_type, const char *message) {
|
||||
}
|
||||
}
|
||||
|
||||
-static PyObject *
|
||||
-fp_read_object(CBORDecoderObject *self, const Py_ssize_t size)
|
||||
+// Read directly into caller's buffer (bypassing readahead buffer)
|
||||
+static Py_ssize_t
|
||||
+fp_read_bytes(CBORDecoderObject *self, char *buf, Py_ssize_t size)
|
||||
{
|
||||
- PyObject *ret = NULL;
|
||||
- PyObject *obj, *size_obj;
|
||||
- size_obj = PyLong_FromSsize_t(size);
|
||||
- if (size_obj) {
|
||||
- obj = PyObject_CallFunctionObjArgs(self->read, size_obj, NULL);
|
||||
- Py_DECREF(size_obj);
|
||||
- if (obj) {
|
||||
- assert(PyBytes_CheckExact(obj));
|
||||
- if (PyBytes_GET_SIZE(obj) == (Py_ssize_t) size) {
|
||||
- ret = obj;
|
||||
+ PyObject *size_obj = PyLong_FromSsize_t(size);
|
||||
+ if (!size_obj)
|
||||
+ return -1;
|
||||
+
|
||||
+ PyObject *obj = PyObject_CallFunctionObjArgs(self->read, size_obj, NULL);
|
||||
+ Py_DECREF(size_obj);
|
||||
+ if (!obj)
|
||||
+ return -1;
|
||||
+
|
||||
+ assert(PyBytes_CheckExact(obj));
|
||||
+ Py_ssize_t bytes_read = PyBytes_GET_SIZE(obj);
|
||||
+ if (bytes_read > 0)
|
||||
+ memcpy(buf, PyBytes_AS_STRING(obj), bytes_read);
|
||||
+
|
||||
+ Py_DECREF(obj);
|
||||
+ return bytes_read;
|
||||
+}
|
||||
+
|
||||
+// Read into caller's buffer using the readahead buffer
|
||||
+static int
|
||||
+fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size)
|
||||
+{
|
||||
+ Py_ssize_t available, to_copy, remaining, total_copied;
|
||||
+
|
||||
+ remaining = size;
|
||||
+ total_copied = 0;
|
||||
+
|
||||
+ while (remaining > 0) {
|
||||
+ available = self->read_len - self->read_pos;
|
||||
+
|
||||
+ if (available > 0) {
|
||||
+ // Copy from buffer
|
||||
+ to_copy = (available < remaining) ? available : remaining;
|
||||
+ memcpy(buf + total_copied, self->readahead + self->read_pos, to_copy);
|
||||
+ self->read_pos += to_copy;
|
||||
+ total_copied += to_copy;
|
||||
+ remaining -= to_copy;
|
||||
+ } else {
|
||||
+ Py_ssize_t bytes_read;
|
||||
+
|
||||
+ if (remaining >= self->readahead_size) {
|
||||
+ // Large remaining: read directly into destination, bypass buffer
|
||||
+ bytes_read = fp_read_bytes(self, buf + total_copied, remaining);
|
||||
+ if (bytes_read > 0) {
|
||||
+ total_copied += bytes_read;
|
||||
+ remaining -= bytes_read;
|
||||
+ }
|
||||
} else {
|
||||
- PyErr_Format(
|
||||
- _CBOR2_CBORDecodeEOF,
|
||||
- "premature end of stream (expected to read %zd bytes, "
|
||||
- "got %zd instead)", size, PyBytes_GET_SIZE(obj));
|
||||
- Py_DECREF(obj);
|
||||
+ // Small remaining: refill buffer
|
||||
+ self->read_pos = 0;
|
||||
+ self->read_len = 0;
|
||||
+ bytes_read = fp_read_bytes(self, self->readahead, self->readahead_size);
|
||||
+ if (bytes_read > 0)
|
||||
+ self->read_len = bytes_read;
|
||||
+ }
|
||||
+
|
||||
+ if (bytes_read <= 0) {
|
||||
+ if (bytes_read == 0)
|
||||
+ PyErr_Format(
|
||||
+ _CBOR2_CBORDecodeEOF,
|
||||
+ "premature end of stream (expected to read %zd bytes, "
|
||||
+ "got %zd instead)", size, total_copied);
|
||||
+ return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
- return ret;
|
||||
-}
|
||||
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
-static int
|
||||
-fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size)
|
||||
+// Read and return as PyBytes object
|
||||
+static PyObject *
|
||||
+fp_read_object(CBORDecoderObject *self, const Py_ssize_t size)
|
||||
{
|
||||
- int ret = -1;
|
||||
- PyObject *obj = fp_read_object(self, size);
|
||||
- if (obj) {
|
||||
- char *data = PyBytes_AS_STRING(obj);
|
||||
- if (data) {
|
||||
- memcpy(buf, data, size);
|
||||
- ret = 0;
|
||||
- }
|
||||
- Py_DECREF(obj);
|
||||
+ PyObject *ret = PyBytes_FromStringAndSize(NULL, size);
|
||||
+ if (!ret)
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (fp_read(self, PyBytes_AS_STRING(ret), size) == -1) {
|
||||
+ Py_DECREF(ret);
|
||||
+ return NULL;
|
||||
}
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2091,23 +2188,55 @@ static PyObject *
|
||||
CBORDecoder_decode_from_bytes(CBORDecoderObject *self, PyObject *data)
|
||||
{
|
||||
PyObject *save_read, *buf, *ret = NULL;
|
||||
+ bool is_nested = (self->decode_depth > 0);
|
||||
+ Py_ssize_t save_read_pos = 0, save_read_len = 0;
|
||||
+ char *save_buffer = NULL;
|
||||
|
||||
if (!_CBOR2_BytesIO && _CBOR2_init_BytesIO() == -1)
|
||||
return NULL;
|
||||
|
||||
+ buf = PyObject_CallFunctionObjArgs(_CBOR2_BytesIO, data, NULL);
|
||||
+ if (!buf)
|
||||
+ return NULL;
|
||||
+
|
||||
self->decode_depth++;
|
||||
save_read = self->read;
|
||||
- buf = PyObject_CallFunctionObjArgs(_CBOR2_BytesIO, data, NULL);
|
||||
- if (buf) {
|
||||
- self->read = PyObject_GetAttr(buf, _CBOR2_str_read);
|
||||
- if (self->read) {
|
||||
- ret = decode(self, DECODE_NORMAL);
|
||||
- Py_DECREF(self->read);
|
||||
+ Py_INCREF(save_read); // Keep alive while we use a different read method
|
||||
+ save_read_pos = self->read_pos;
|
||||
+ save_read_len = self->read_len;
|
||||
+
|
||||
+ // Save buffer pointer if nested
|
||||
+ if (is_nested) {
|
||||
+ save_buffer = self->readahead;
|
||||
+ self->readahead = NULL; // Prevent setter from freeing saved buffer
|
||||
+ }
|
||||
+
|
||||
+ // Set up BytesIO decoder - setter handles buffer allocation
|
||||
+ if (_CBORDecoder_set_fp_with_read_size(self, buf, self->readahead_size) == -1) {
|
||||
+ if (is_nested) {
|
||||
+ PyMem_Free(self->readahead);
|
||||
+ self->readahead = save_buffer;
|
||||
}
|
||||
+ Py_DECREF(save_read);
|
||||
Py_DECREF(buf);
|
||||
+ self->decode_depth--;
|
||||
+ return NULL;
|
||||
}
|
||||
- self->read = save_read;
|
||||
+
|
||||
+ ret = decode(self, DECODE_NORMAL);
|
||||
+
|
||||
+ Py_XDECREF(self->read); // Decrement BytesIO read method
|
||||
+ self->read = save_read; // Restore saved read (already has correct refcount)
|
||||
+ Py_DECREF(buf);
|
||||
self->decode_depth--;
|
||||
+
|
||||
+ if (is_nested) {
|
||||
+ PyMem_Free(self->readahead);
|
||||
+ self->readahead = save_buffer;
|
||||
+ }
|
||||
+ self->read_pos = save_read_pos;
|
||||
+ self->read_len = save_read_len;
|
||||
+
|
||||
assert(self->decode_depth >= 0);
|
||||
if (self->decode_depth == 0) {
|
||||
clear_shareable_state(self);
|
||||
@@ -2257,6 +2386,14 @@ PyDoc_STRVAR(CBORDecoder__doc__,
|
||||
" dictionary. This callback is invoked for each deserialized\n"
|
||||
" :class:`dict` object. The return value is substituted for the dict\n"
|
||||
" in the deserialized output.\n"
|
||||
+":param read_size:\n"
|
||||
+" the size of the read buffer (default 4096). The decoder reads from\n"
|
||||
+" the stream in chunks of this size for performance. This means the\n"
|
||||
+" stream position may advance beyond the bytes actually decoded. For\n"
|
||||
+" large values (bytestrings, text strings), reads may be larger than\n"
|
||||
+" ``read_size``. Code that needs to read from the stream after\n"
|
||||
+" decoding should use :meth:`decode_from_bytes` instead, or set\n"
|
||||
+" ``read_size=1`` to disable buffering (at a performance cost).\n"
|
||||
"\n"
|
||||
".. _CBOR: https://cbor.io/\n"
|
||||
);
|
||||
diff --git a/source/decoder.h b/source/decoder.h
|
||||
index a2f1bcb..a2f4bf1 100644
|
||||
--- a/source/decoder.h
|
||||
+++ b/source/decoder.h
|
||||
@@ -3,6 +3,9 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
+// Default readahead buffer size for streaming reads
|
||||
+#define CBOR2_DEFAULT_READ_SIZE 4096
|
||||
+
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
PyObject *read; // cached read() method of fp
|
||||
@@ -14,6 +17,12 @@ typedef struct {
|
||||
bool immutable;
|
||||
Py_ssize_t shared_index;
|
||||
Py_ssize_t decode_depth;
|
||||
+
|
||||
+ // Readahead buffer for streaming
|
||||
+ char *readahead; // allocated buffer
|
||||
+ Py_ssize_t readahead_size; // size of allocated buffer
|
||||
+ Py_ssize_t read_pos; // current position in buffer
|
||||
+ Py_ssize_t read_len; // valid bytes in buffer
|
||||
} CBORDecoderObject;
|
||||
|
||||
extern PyTypeObject CBORDecoderType;
|
||||
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
|
||||
index 3b4455a..9bf5a10 100644
|
||||
--- a/tests/test_decoder.py
|
||||
+++ b/tests/test_decoder.py
|
||||
@@ -1043,4 +1043,87 @@ class TestDecoderReuse:
|
||||
|
||||
result = impl.loads(data)
|
||||
assert result == ["hello", "hello"]
|
||||
- assert result[0] is result[1] # Same object reference
|
||||
\ No newline at end of file
|
||||
+ assert result[0] is result[1] # Same object reference
|
||||
+
|
||||
+
|
||||
+def test_decode_from_bytes_in_hook_preserves_buffer(impl):
|
||||
+ """Test that calling decode_from_bytes from a hook preserves stream buffer state.
|
||||
+ This is a documented use case from docs/customizing.rst where hooks decode
|
||||
+ embedded CBOR data. Before the fix, the stream's readahead buffer would be
|
||||
+ corrupted, causing subsequent reads to fail or return wrong data.
|
||||
+ """
|
||||
+
|
||||
+ def tag_hook(decoder, tag):
|
||||
+ if tag.tag == 999:
|
||||
+ # Decode embedded CBOR (documented pattern)
|
||||
+ return decoder.decode_from_bytes(tag.value)
|
||||
+ return tag
|
||||
+
|
||||
+ # Test data: array with [tag(999, embedded_cbor), "after_hook", "final"]
|
||||
+ # embedded_cbor encodes: [1, 2, 3]
|
||||
+ data = unhexlify(
|
||||
+ "83" # array(3)
|
||||
+ "d903e7" # tag(999)
|
||||
+ "44" # bytes(4)
|
||||
+ "83010203" # embedded: array [1, 2, 3]
|
||||
+ "6a" # text(10)
|
||||
+ "61667465725f686f6f6b" # "after_hook"
|
||||
+ "65" # text(5)
|
||||
+ "66696e616c" # "final"
|
||||
+ )
|
||||
+
|
||||
+ # Decode from stream (not bytes) to use readahead buffer
|
||||
+ stream = BytesIO(data)
|
||||
+ decoder = impl.CBORDecoder(stream, tag_hook=tag_hook)
|
||||
+ result = decoder.decode()
|
||||
+
|
||||
+ # Verify all values decoded correctly
|
||||
+ assert result == [[1, 2, 3], "after_hook", "final"]
|
||||
+
|
||||
+ # First element should be the decoded embedded CBOR
|
||||
+ assert result[0] == [1, 2, 3]
|
||||
+ # Second element should be "after_hook" (not corrupted)
|
||||
+ assert result[1] == "after_hook"
|
||||
+ # Third element should be "final"
|
||||
+ assert result[2] == "final"
|
||||
+
|
||||
+
|
||||
+def test_decode_from_bytes_deeply_nested_in_hook(impl):
|
||||
+ """Test deeply nested decode_from_bytes calls preserve buffer state.
|
||||
+ This tests tag(999, tag(888, tag(777, [1,2,3]))) where each tag value
|
||||
+ is embedded CBOR that triggers the hook recursively.
|
||||
+ Before the fix, even a single level would corrupt the buffer. With multiple
|
||||
+ levels, the buffer would be completely corrupted, mixing data from different
|
||||
+ BytesIO objects and the original stream.
|
||||
+ """
|
||||
+
|
||||
+ def tag_hook(decoder, tag):
|
||||
+ if tag.tag in [999, 888, 777]:
|
||||
+ # Recursively decode embedded CBOR
|
||||
+ return decoder.decode_from_bytes(tag.value)
|
||||
+ return tag
|
||||
+
|
||||
+ # Test data: [tag(999, tag(888, tag(777, [1,2,3]))), "after", "final"]
|
||||
+ # Each tag contains embedded CBOR
|
||||
+ data = unhexlify(
|
||||
+ "83" # array(3)
|
||||
+ "d903e7" # tag(999)
|
||||
+ "4c" # bytes(12)
|
||||
+ "d9037848d903094483010203" # embedded: tag(888, tag(777, [1,2,3]))
|
||||
+ "65" # text(5)
|
||||
+ "6166746572" # "after"
|
||||
+ "65" # text(5)
|
||||
+ "66696e616c" # "final"
|
||||
+ )
|
||||
+
|
||||
+ # Decode from stream to use readahead buffer
|
||||
+ stream = BytesIO(data)
|
||||
+ decoder = impl.CBORDecoder(stream, tag_hook=tag_hook)
|
||||
+ result = decoder.decode()
|
||||
+
|
||||
+ # With the fix: all three levels of nesting work correctly
|
||||
+ # Without the fix: buffer corruption at each level, test fails
|
||||
+ assert result == [[1, 2, 3], "after", "final"]
|
||||
+ assert result[0] == [1, 2, 3]
|
||||
+ assert result[1] == "after"
|
||||
+ assert result[2] == "final"
|
||||
--
|
||||
2.50.1
|
||||
|
||||
Reference in New Issue
Block a user