meta-openembedded/meta-python/recipes-devtools/python/python3-gevent/CVE-2023-41419.patch

From f80ee15e27b67b6fdd101d5f91cf584d19b2b26e Mon Sep 17 00:00:00 2001
From: Jason Madden <jamadden@gmail.com>
Date: Fri, 6 Oct 2023 12:41:59 +0000
Subject: [PATCH] gevent.pywsgi: Much improved handling of chunk trailers.
 Validation is much stricter to the specification.

Fixes #1989

CVE: CVE-2023-41419

Upstream-Status: Backport [https://github.com/gevent/gevent/commit/2f53c851eaf926767fbac62385615efd4886221c]

Signed-off-by: Narpat Mali <narpat.mali@windriver.com>
---
 docs/changes/1989.bugfix         |  26 ++++
 src/gevent/pywsgi.py             | 229 ++++++++++++++++++++++++-------
 src/gevent/subprocess.py         |   7 +-
 src/gevent/testing/testcase.py   |   2 +-
 src/gevent/tests/test__pywsgi.py | 193 ++++++++++++++++++++++++--
 5 files changed, 390 insertions(+), 67 deletions(-)
 create mode 100644 docs/changes/1989.bugfix

diff --git a/docs/changes/1989.bugfix b/docs/changes/1989.bugfix
new file mode 100644
index 0000000..7ce4a93
--- /dev/null
+++ b/docs/changes/1989.bugfix
@@ -0,0 +1,26 @@
+Make ``gevent.pywsgi`` comply more closely with the HTTP specification
+for chunked transfer encoding. In particular, we are much stricter
+about trailers, and trailers that are invalid (too long or featuring
+disallowed characters) forcibly close the connection to the client
+*after* the results have been sent.
+
+Trailers otherwise continue to be ignored and are not available to the
+WSGI application.
+
+Previously, carefully crafted invalid trailers in chunked requests on
+keep-alive connections might appear as two requests to
+``gevent.pywsgi``. Because this was handled exactly as a normal
+keep-alive connection with two requests, the WSGI application should
+handle it normally. However, if you were counting on some upstream
+server to filter incoming requests based on paths or header fields,
+and the upstream server simply passed trailers through without
+validating them, then this embedded second request would bypass those
+checks. (If the upstream server validated that the trailers meet the
+HTTP specification, this could not occur, because characters that are
+required in an HTTP request, like a space, are not allowed in
+trailers.) CVE-2023-41419 was reserved for this.
+
+Our thanks to the original reporters, Keran Mu
+(mkr22@mails.tsinghua.edu.cn) and Jianjun Chen
+(jianjun@tsinghua.edu.cn), from Tsinghua University and Zhongguancun
+Laboratory.
diff --git a/src/gevent/pywsgi.py b/src/gevent/pywsgi.py
index 0ebe095..078398a 100644
--- a/src/gevent/pywsgi.py
+++ b/src/gevent/pywsgi.py
@@ -1,13 +1,28 @@
 # Copyright (c) 2005-2009, eventlet contributors
 # Copyright (c) 2009-2018, gevent contributors
 """
-A pure-Python, gevent-friendly WSGI server.
+A pure-Python, gevent-friendly WSGI server implementing HTTP/1.1.

 The server is provided in :class:`WSGIServer`, but most of the actual
 WSGI work is handled by :class:`WSGIHandler` --- a new instance is
 created for each request. The server can be customized to use
 different subclasses of :class:`WSGIHandler`.

+.. important::
+   This server is intended primarily for development and testing, and
+   secondarily for other "safe" scenarios where it will not be exposed to
+   potentially malicious input. The code has not been security audited,
+   and is not intended for direct exposure to the public Internet. For production
+   usage on the Internet, either choose a production-strength server such as
+   gunicorn, or put a reverse proxy between gevent and the Internet.
+.. versionchanged:: NEXT
+   Complies more closely with the HTTP specification for chunked transfer encoding.
+   In particular, we are much stricter about trailers, and trailers that
+   are invalid (too long or featuring disallowed characters) forcibly close
+   the connection to the client *after* the results have been sent.
+   Trailers otherwise continue to be ignored and are not available to the
+   WSGI application.
+
 """
 from __future__ import absolute_import

@@ -22,10 +37,7 @@ import time
 import traceback
 from datetime import datetime

-try:
-    from urllib import unquote
-except ImportError:
-    from urllib.parse import unquote # python 2 pylint:disable=import-error,no-name-in-module
+from urllib.parse import unquote

 from gevent import socket
 import gevent
@@ -53,29 +65,52 @@ __all__ = [

 MAX_REQUEST_LINE = 8192
 # Weekday and month names for HTTP date/time formatting; always English!
-_WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
-_MONTHNAME = [None,  # Dummy so we can use 1-based month numbers
+_WEEKDAYNAME = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
+_MONTHNAME = (None,  # Dummy so we can use 1-based month numbers
               "Jan", "Feb", "Mar", "Apr", "May", "Jun",
-              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")

 # The contents of the "HEX" grammar rule for HTTP, upper and lowercase A-F plus digits,
 # in byte form for comparing to the network.
 _HEX = string.hexdigits.encode('ascii')

+# The characters allowed in "token" rules.
+
+# token          = 1*tchar
+# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+#                / DIGIT / ALPHA
+#                ; any VCHAR, except delimiters
+# ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
+_ALLOWED_TOKEN_CHARS = frozenset(
+    # Remember we have to be careful because bytestrings
+    # inexplicably iterate as integers, which are not equal to bytes.
+
+    # explicit chars then DIGIT
+    (c.encode('ascii') for c in "!#$%&'*+-.^_`|~0123456789")
+    # Then we add ALPHA
+) | {c.encode('ascii') for c in string.ascii_letters}
+assert b'A' in _ALLOWED_TOKEN_CHARS
+
+
 # Errors
 _ERRORS = {}
 _INTERNAL_ERROR_STATUS = '500 Internal Server Error'
 _INTERNAL_ERROR_BODY = b'Internal Server Error'
-_INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'),
-                           ('Connection', 'close'),
-                           ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))]
+_INTERNAL_ERROR_HEADERS = (
+    ('Content-Type', 'text/plain'),
+    ('Connection', 'close'),
+    ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))
+)
 _ERRORS[500] = (_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS, _INTERNAL_ERROR_BODY)

 _BAD_REQUEST_STATUS = '400 Bad Request'
 _BAD_REQUEST_BODY = ''
-_BAD_REQUEST_HEADERS = [('Content-Type', 'text/plain'),
-                        ('Connection', 'close'),
-                        ('Content-Length', str(len(_BAD_REQUEST_BODY)))]
+_BAD_REQUEST_HEADERS = (
+    ('Content-Type', 'text/plain'),
+    ('Connection', 'close'),
+    ('Content-Length', str(len(_BAD_REQUEST_BODY)))
+)
 _ERRORS[400] = (_BAD_REQUEST_STATUS, _BAD_REQUEST_HEADERS, _BAD_REQUEST_BODY)

 _REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n"
@@ -204,23 +239,32 @@ class Input(object):
         # Read and return the next integer chunk length. If no
         # chunk length can be read, raises _InvalidClientInput.

-        # Here's the production for a chunk:
-        # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html)
-        #   chunk          = chunk-size [ chunk-extension ] CRLF
-        #                    chunk-data CRLF
-        #   chunk-size     = 1*HEX
-        #   chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
-        #   chunk-ext-name = token
-        #   chunk-ext-val  = token | quoted-string
-
-        # To cope with malicious or broken clients that fail to send valid
-        # chunk lines, the strategy is to read character by character until we either reach
-        # a ; or newline. If at any time we read a non-HEX digit, we bail. If we hit a
-        # ;, indicating an chunk-extension, we'll read up to the next
-        # MAX_REQUEST_LINE characters
-        # looking for the CRLF, and if we don't find it, we bail. If we read more than 16 hex characters,
-        # (the number needed to represent a 64-bit chunk size), we bail (this protects us from
-        # a client that sends an infinite stream of `F`, for example).
+        # Here's the production for a chunk (actually the whole body):
+        # (https://www.rfc-editor.org/rfc/rfc7230#section-4.1)
+
+        # chunked-body   = *chunk
+        #                  last-chunk
+        #                  trailer-part
+        #                  CRLF
+        #
+        # chunk          = chunk-size [ chunk-ext ] CRLF
+        #                  chunk-data CRLF
+        # chunk-size     = 1*HEXDIG
+        # last-chunk     = 1*("0") [ chunk-ext ] CRLF
+        # trailer-part   = *( header-field CRLF )
+        # chunk-data     = 1*OCTET ; a sequence of chunk-size octets
+
+        # To cope with malicious or broken clients that fail to send
+        # valid chunk lines, the strategy is to read character by
+        # character until we either reach a ; or newline. If at any
+        # time we read a non-HEX digit, we bail. If we hit a ;,
+        # indicating an chunk-extension, we'll read up to the next
+        # MAX_REQUEST_LINE characters ("A server ought to limit the
+        # total length of chunk extensions received") looking for the
+        # CRLF, and if we don't find it, we bail. If we read more than
+        # 16 hex characters, (the number needed to represent a 64-bit
+        # chunk size), we bail (this protects us from a client that
+        # sends an infinite stream of `F`, for example).

         buf = BytesIO()
         while 1:
@@ -228,16 +272,20 @@ class Input(object):
             if not char:
                 self._chunked_input_error = True
                 raise _InvalidClientInput("EOF before chunk end reached")
-            if char == b'\r':
-                break
-            if char == b';':
+
+            if char in (
+                b'\r', # Beginning EOL
+                b';', # Beginning extension
+            ):
                 break

-            if char not in _HEX:
+            if char not in _HEX: # Invalid data.
                 self._chunked_input_error = True
                 raise _InvalidClientInput("Non-hex data", char)
+
             buf.write(char)
-            if buf.tell() > 16:
+
+            if buf.tell() > 16: # Too many hex bytes
                 self._chunked_input_error = True
                 raise _InvalidClientInput("Chunk-size too large.")

@@ -257,11 +305,72 @@ class Input(object):
         if char == b'\r':
             # We either got here from the main loop or from the
             # end of an extension
+            self.__read_chunk_size_crlf(rfile, newline_only=True)
+            result = int(buf.getvalue(), 16)
+            if result == 0:
+                # The only time a chunk size of zero is allowed is the final
+                # chunk. It is either followed by another \r\n, or some trailers
+                # which are then followed by \r\n.
+                while self.__read_chunk_trailer(rfile):
+                    pass
+            return result
+
+    # Trailers have the following production (they are a header-field followed by CRLF)
+    # See above for the definition of "token".
+    #
+    # header-field   = field-name ":" OWS field-value OWS
+    # field-name     = token
+    # field-value    = *( field-content / obs-fold )
+    # field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+    # field-vchar    = VCHAR / obs-text
+    # obs-fold       = CRLF 1*( SP / HTAB )
+    #                ; obsolete line folding
+    #                ; see Section 3.2.4
+
+
+    def __read_chunk_trailer(self, rfile, ):
+        # With rfile positioned just after a \r\n, read a trailer line.
+        # Return a true value if a non-empty trailer was read, and
+        # return false if an empty trailer was read (meaning the trailers are
+        # done).
+        # If a single line exceeds the MAX_REQUEST_LINE, raise an exception.
+        # If the field-name portion contains invalid characters, raise an exception.
+
+        i = 0
+        empty = True
+        seen_field_name = False
+        while i < MAX_REQUEST_LINE:
+            char = rfile.read(1)
+            if char == b'\r':
+                # Either read the next \n or raise an error.
+                self.__read_chunk_size_crlf(rfile, newline_only=True)
+                break
+            # Not a \r, so we are NOT an empty chunk.
+            empty = False
+            if char == b':' and i > 0:
+                # We're ending the field-name part; stop validating characters.
+                # Unless : was the first character...
+                seen_field_name = True
+            if not seen_field_name and char not in _ALLOWED_TOKEN_CHARS:
+                raise _InvalidClientInput('Invalid token character: %r' % (char,))
+            i += 1
+        else:
+            # We read too much
+            self._chunked_input_error = True
+            raise _InvalidClientInput("Too large chunk trailer")
+        return not empty
+
+    def __read_chunk_size_crlf(self, rfile, newline_only=False):
+        # Also for safety, correctly verify that we get \r\n when expected.
+        if not newline_only:
             char = rfile.read(1)
-            if char != b'\n':
+            if char != b'\r':
                 self._chunked_input_error = True
-                raise _InvalidClientInput("Line didn't end in CRLF")
-            return int(buf.getvalue(), 16)
+                raise _InvalidClientInput("Line didn't end in CRLF: %r" % (char,))
+        char = rfile.read(1)
+        if char != b'\n':
+            self._chunked_input_error = True
+            raise _InvalidClientInput("Line didn't end in LF: %r" % (char,))

     def _chunked_read(self, length=None, use_readline=False):
         # pylint:disable=too-many-branches
@@ -294,7 +403,7 @@ class Input(object):

                 self.position += datalen
                 if self.chunk_length == self.position:
-                    rfile.readline()
+                    self.__read_chunk_size_crlf(rfile)

                 if length is not None:
                     length -= datalen
@@ -307,9 +416,9 @@ class Input(object):
                 # determine the next size to read
                 self.chunk_length = self.__read_chunk_length(rfile)
                 self.position = 0
-                if self.chunk_length == 0:
-                    # Last chunk. Terminates with a CRLF.
-                    rfile.readline()
+                # If chunk_length was 0, we already read any trailers and
+                # validated that we have ended with \r\n\r\n.
+
         return b''.join(response)

     def read(self, length=None):
@@ -532,7 +641,8 @@ class WSGIHandler(object):
         elif len(words) == 2:
             self.command, self.path = words
             if self.command != "GET":
-                raise _InvalidClientRequest('Expected GET method: %r' % (raw_requestline,))
+                raise _InvalidClientRequest('Expected GET method; Got command=%r; path=%r; raw=%r' % (
+                    self.command, self.path, raw_requestline,))
             self.request_version = "HTTP/0.9"
             # QQQ I'm pretty sure we can drop support for HTTP/0.9
         else:
@@ -1000,14 +1110,28 @@ class WSGIHandler(object):
             finally:
                 try:
                     self.wsgi_input._discard()
-                except (socket.error, IOError):
-                    # Don't let exceptions during discarding
+                except _InvalidClientInput:
+                    # This one is deliberately raised to the outer
+                    # scope, because, with the incoming stream in some bad state,
+                    # we can't be sure we can synchronize and properly parse the next
+                    # request.
+                    raise
+                except socket.error
+                    # Don't let socket exceptions during discarding
                     # input override any exception that may have been
                     # raised by the application, such as our own _InvalidClientInput.
                     # In the general case, these aren't even worth logging (see the comment
                     # just below)
                     pass
-        except _InvalidClientInput:
+        except _InvalidClientInput as ex:
+            # DO log this one because:
+            # - Some of the data may have been read and acted on by the
+            #   application;
+            # - The response may or may not have been sent;
+            # - It's likely that the client is bad, or malicious, and
+            #   users might wish to take steps to block the client.
+            self._handle_client_error(ex)
+            self.close_connection = True
             self._send_error_response_if_possible(400)
         except socket.error as ex:
             if ex.args[0] in self.ignored_socket_errors:
@@ -1054,17 +1178,22 @@ class WSGIHandler(object):
     def _handle_client_error(self, ex):
         # Called for invalid client input
         # Returns the appropriate error response.
-        if not isinstance(ex, ValueError):
+        if not isinstance(ex, (ValueError, _InvalidClientInput)):
             # XXX: Why not self._log_error to send it through the loop's
             # handle_error method?
+            # _InvalidClientRequest is a ValueError; _InvalidClientInput is an IOError.
             traceback.print_exc()
         if isinstance(ex, _InvalidClientRequest):
             # No formatting needed, that's already been handled. In fact, because the
             # formatted message contains user input, it might have a % in it, and attempting
             # to format that with no arguments would be an error.
-            self.log_error(ex.formatted_message)
+            # However, the error messages do not include the requesting IP
+            # necessarily, so we do add that.
+            self.log_error('(from %s) %s', self.client_address, ex.formatted_message)
         else:
-            self.log_error('Invalid request: %s', str(ex) or ex.__class__.__name__)
+            self.log_error('Invalid request (from %s): %s',
+                           self.client_address,
+                           str(ex) or ex.__class__.__name__)
         return ('400', _BAD_REQUEST_RESPONSE)

     def _headers(self):
diff --git a/src/gevent/subprocess.py b/src/gevent/subprocess.py
index 38c9bd3..8a8ccad 100644
--- a/src/gevent/subprocess.py
+++ b/src/gevent/subprocess.py
@@ -352,10 +352,11 @@ def check_output(*popenargs, **kwargs):

     To capture standard error in the result, use ``stderr=STDOUT``::

-        >>> print(check_output(["/bin/sh", "-c",
+        >>> output = check_output(["/bin/sh", "-c",
         ...               "ls -l non_existent_file ; exit 0"],
-        ...              stderr=STDOUT).decode('ascii').strip())
-        ls: non_existent_file: No such file or directory
+        ...              stderr=STDOUT).decode('ascii').strip()
+        >>> print(output.rsplit(':', 1)[1].strip())
+        No such file or directory

     There is an additional optional argument, "input", allowing you to
     pass a string to the subprocess's stdin.  If you use this argument
diff --git a/src/gevent/testing/testcase.py b/src/gevent/testing/testcase.py
index cd5db80..aa86dcf 100644
--- a/src/gevent/testing/testcase.py
+++ b/src/gevent/testing/testcase.py
@@ -225,7 +225,7 @@ class TestCaseMetaClass(type):
                 classDict.pop(key)
                 # XXX: When did we stop doing this?
                 #value = wrap_switch_count_check(value)
-                value = _wrap_timeout(timeout, value)
+                #value = _wrap_timeout(timeout, value)
                 error_fatal = getattr(value, 'error_fatal', error_fatal)
                 if error_fatal:
                     value = errorhandler.wrap_error_fatal(value)
diff --git a/src/gevent/tests/test__pywsgi.py b/src/gevent/tests/test__pywsgi.py
index d2125a8..d46030b 100644
--- a/src/gevent/tests/test__pywsgi.py
+++ b/src/gevent/tests/test__pywsgi.py
@@ -25,21 +25,11 @@ from gevent import monkey
 monkey.patch_all()

 from contextlib import contextmanager
-try:
-    from urllib.parse import parse_qs
-except ImportError:
-    # Python 2
-    from urlparse import parse_qs
+from urllib.parse import parse_qs
 import os
 import sys
-try:
-    # On Python 2, we want the C-optimized version if
-    # available; it has different corner-case behaviour than
-    # the Python implementation, and it used by socket.makefile
-    # by default.
-    from cStringIO import StringIO
-except ImportError:
-    from io import BytesIO as StringIO
+from io import BytesIO as StringIO
+
 import weakref
 import unittest
 from wsgiref.validate import validator
@@ -156,6 +146,10 @@ class Response(object):
     @classmethod
     def read(cls, fd, code=200, reason='default', version='1.1',
              body=None, chunks=None, content_length=None):
+        """
+        Read an HTTP response, optionally perform assertions,
+        and return the Response object.
+        """
         # pylint:disable=too-many-branches
         _status_line, headers = read_headers(fd)
         self = cls(_status_line, headers)
@@ -716,7 +710,14 @@ class TestNegativeReadline(TestCase):

 class TestChunkedPost(TestCase):

+    calls = 0
+
+    def setUp(self):
+        super().setUp()
+        self.calls = 0
+
     def application(self, env, start_response):
+        self.calls += 1
         self.assertTrue(env.get('wsgi.input_terminated'))
         start_response('200 OK', [('Content-Type', 'text/plain')])
         if env['PATH_INFO'] == '/a':
@@ -730,6 +731,8 @@ class TestChunkedPost(TestCase):
         if env['PATH_INFO'] == '/c':
             return list(iter(lambda: env['wsgi.input'].read(1), b''))

+        return [b'We should not get here', env['PATH_INFO'].encode('ascii')]
+
     def test_014_chunked_post(self):
         data = (b'POST /a HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n'
                 b'Transfer-Encoding: chunked\r\n\r\n'
@@ -797,6 +800,170 @@ class TestChunkedPost(TestCase):
             fd.write(data)
             read_http(fd, code=400)

+    def test_trailers_keepalive_ignored(self):
+        # Trailers after a chunk are ignored.
+        data = (
+            b'POST /a HTTP/1.1\r\n'
+            b'Host: localhost\r\n'
+            b'Connection: keep-alive\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'2\r\noh\r\n'
+            b'4\r\n hai\r\n'
+            b'0\r\n' # last-chunk
+            # Normally the final CRLF would go here, but if you put in a
+            # trailer, it doesn't.
+            b'trailer1: value1\r\n'
+            b'trailer2: value2\r\n'
+            b'\r\n' # Really terminate the chunk.
+            b'POST /a HTTP/1.1\r\n'
+            b'Host: localhost\r\n'
+            b'Connection: close\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'2\r\noh\r\n'
+            b'4\r\n bye\r\n'
+            b'0\r\n' # last-chunk
+        )
+        with self.makefile() as fd:
+            fd.write(data)
+            read_http(fd, body='oh hai')
+            read_http(fd, body='oh bye')
+
+        self.assertEqual(self.calls, 2)
+
+    def test_trailers_too_long(self):
+        # Trailers after a chunk are ignored.
+        data = (
+            b'POST /a HTTP/1.1\r\n'
+            b'Host: localhost\r\n'
+            b'Connection: keep-alive\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'2\r\noh\r\n'
+            b'4\r\n hai\r\n'
+            b'0\r\n' # last-chunk
+            # Normally the final CRLF would go here, but if you put in a
+            # trailer, it doesn't.
+            b'trailer2: value2' # not lack of \r\n
+        )
+        data += b't' * pywsgi.MAX_REQUEST_LINE
+        # No termination, because we detect the trailer as being too
+        # long and abort the connection.
+        with self.makefile() as fd:
+            fd.write(data)
+            read_http(fd, body='oh hai')
+            with self.assertRaises(ConnectionClosed):
+                read_http(fd, body='oh bye')
+
+    def test_trailers_request_smuggling_missing_last_chunk_keep_alive(self):
+        # When something that looks like a request line comes in the trailer
+        # as the first line, immediately after an invalid last chunk.
+        # We detect this and abort the connection, because the
+        # whitespace in the GET line isn't a legal part of a trailer.
+        # If we didn't abort the connection, then, because we specified
+        # keep-alive, the server would be hanging around waiting for more input.
+        data = (
+            b'POST /a HTTP/1.1\r\n'
+            b'Host: localhost\r\n'
+            b'Connection: keep-alive\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'2\r\noh\r\n'
+            b'4\r\n hai\r\n'
+            b'0' # last-chunk, but missing the \r\n
+            # Normally the final CRLF would go here, but if you put in a
+            # trailer, it doesn't.
+            # b'\r\n'
+            b'GET /path2?a=:123 HTTP/1.1\r\n'
+            b'Host: a.com\r\n'
+            b'Connection: close\r\n'
+            b'\r\n'
+        )
+        with self.makefile() as fd:
+            fd.write(data)
+            read_http(fd, body='oh hai')
+            with self.assertRaises(ConnectionClosed):
+                read_http(fd)
+
+        self.assertEqual(self.calls, 1)
+
+    def test_trailers_request_smuggling_missing_last_chunk_close(self):
+        # Same as the above, except the trailers are actually valid
+        # and since we ask to close the connection we don't get stuck
+        # waiting for more input.
+        data = (
+            b'POST /a HTTP/1.1\r\n'
+            b'Host: localhost\r\n'
+            b'Connection: close\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'2\r\noh\r\n'
+            b'4\r\n hai\r\n'
+            b'0\r\n' # last-chunk
+            # Normally the final CRLF would go here, but if you put in a
+            # trailer, it doesn't.
+            # b'\r\n'
+            b'GETpath2a:123 HTTP/1.1\r\n'
+            b'Host: a.com\r\n'
+            b'Connection: close\r\n'
+            b'\r\n'
+        )
+        with self.makefile() as fd:
+            fd.write(data)
+            read_http(fd, body='oh hai')
+            with self.assertRaises(ConnectionClosed):
+                read_http(fd)
+
+    def test_trailers_request_smuggling_header_first(self):
+        # When something that looks like a header comes in the first line.
+        data = (
+            b'POST /a HTTP/1.1\r\n'
+            b'Host: localhost\r\n'
+            b'Connection: keep-alive\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'2\r\noh\r\n'
+            b'4\r\n hai\r\n'
+            b'0\r\n' # last-chunk, but only one CRLF
+            b'Header: value\r\n'
+            b'GET /path2?a=:123 HTTP/1.1\r\n'
+            b'Host: a.com\r\n'
+            b'Connection: close\r\n'
+            b'\r\n'
+        )
+        with self.makefile() as fd:
+            fd.write(data)
+            read_http(fd, body='oh hai')
+            with self.assertRaises(ConnectionClosed):
+                read_http(fd, code=400)
+
+        self.assertEqual(self.calls, 1)
+
+    def test_trailers_request_smuggling_request_terminates_then_header(self):
+        data = (
+            b'POST /a HTTP/1.1\r\n'
+            b'Host: localhost\r\n'
+            b'Connection: keep-alive\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'2\r\noh\r\n'
+            b'4\r\n hai\r\n'
+            b'0\r\n' # last-chunk
+            b'\r\n'
+            b'Header: value'
+            b'GET /path2?a=:123 HTTP/1.1\r\n'
+            b'Host: a.com\r\n'
+            b'Connection: close\r\n'
+            b'\r\n'
+        )
+        with self.makefile() as fd:
+            fd.write(data)
+            read_http(fd, body='oh hai')
+            read_http(fd, code=400)
+
+        self.assertEqual(self.calls, 1)
+

 class TestUseWrite(TestCase):

--
2.40.0