File SQUID-2023_1.patch of Package squid.40992
commit 417da4006cf5c97d44e74431b816fc58fec9e270
Author: Eduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
Date:   Mon Mar 18 17:48:21 2019 +0000
    Fix incremental parsing of chunked quoted extensions (#310)
    
    Before this change, incremental parsing of quoted chunked extensions
    was broken for two reasons:
    
    * Http::One::Parser::skipLineTerminator() unexpectedly threw after
      partially received quoted chunk extension value.
    
    * When Http::One::Tokenizer was unable to parse a quoted extension,
      it incorrectly restored the input buffer to the beginning of the
      extension value (instead of the extension itself), thus making
      further incremental parsing iterations impossible.
    
    IMO, the reason for this problem was that Http::One::Tokenizer::qdText()
    could not distinguish two cases (returning false in both):
    
    * the end of the quoted string not yet reached
    
    * an input error, e.g., wrong/unexpected character
    
    A possible approach could be to improve Http::One::Tokenizer, making it
    aware about "needs more data" state.  However, to be acceptable,
    these improvements should be done in the base Parser::Tokenizer
    class instead. These changes seem to be non-trivial and could be
    done separately and later.
    
    Another approach, used here, is to simplify the complex and error-prone
    chunked extensions parsing algorithm, fixing incremental parsing bugs
    and still parse incrementally in almost all cases. The performance
    regression could be expected only in relatively rare cases of partially
    received or malformed extensions.
    
    Also:
    * fixed parsing of partial use-original-body extension values
    * do not treat an invalid use-original-body as an unknown extension
    * optimization: parse use-original-body extension only in ICAP context
      (i.e., where it is expected)
    * improvement: added a new API to TeChunkedParser to specify known
      chunked extensions list
From 6cfa10d94ca15a764a1d975597d8024582ef19be Mon Sep 17 00:00:00 2001
From: Amos Jeffries <yadij@users.noreply.github.com>
Date: Fri, 13 Oct 2023 08:44:16 +0000
Subject: [PATCH] RFC 9112: Improve HTTP chunked encoding compliance (#1498)
---
 src/http/one/Parser.cc          |  8 +-------
 src/http/one/Parser.h           |  4 +---
 src/http/one/TeChunkedParser.cc | 23 ++++++++++++++++++-----
 src/parser/Tokenizer.cc         | 12 ++++++++++++
 src/parser/Tokenizer.h          |  7 +++++++
 5 files changed, 39 insertions(+), 15 deletions(-)
Index: squid-4.17/src/adaptation/icap/ModXact.cc
===================================================================
--- squid-4.17.orig/src/adaptation/icap/ModXact.cc
+++ squid-4.17/src/adaptation/icap/ModXact.cc
@@ -25,12 +25,13 @@
 #include "comm.h"
 #include "comm/Connection.h"
 #include "err_detail_type.h"
-#include "http/one/TeChunkedParser.h"
 #include "HttpHeaderTools.h"
 #include "HttpMsg.h"
 #include "HttpReply.h"
 #include "HttpRequest.h"
 #include "MasterXaction.h"
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
 #include "SquidTime.h"
 
 // flow and terminology:
@@ -44,6 +45,8 @@ CBDATA_NAMESPACED_CLASS_INIT(Adaptation:
 
 static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
 
+const SBuf Adaptation::Icap::ChunkExtensionValueParser::UseOriginalBodyName("use-original-body");
+
 Adaptation::Icap::ModXact::State::State()
 {
     memset(this, 0, sizeof(*this));
@@ -1108,6 +1111,7 @@ void Adaptation::Icap::ModXact::decideOn
         state.parsing = State::psBody;
         replyHttpBodySize = 0;
         bodyParser = new Http1::TeChunkedParser;
+        bodyParser->parseExtensionValuesWith(&extensionParser);
         makeAdaptedBodyPipe("adapted response from the ICAP server");
         Must(state.sending == State::sendingAdapted);
     } else {
@@ -1142,15 +1146,10 @@ void Adaptation::Icap::ModXact::parseBod
     }
 
     if (parsed) {
-        if (state.readyForUob && bodyParser->useOriginBody >= 0) {
-            prepPartialBodyEchoing(
-                static_cast<uint64_t>(bodyParser->useOriginBody));
-            stopParsing();
-            return;
-        }
-
-        stopParsing();
-        stopSending(true); // the parser succeeds only if all parsed data fits
+        if (state.readyForUob && extensionParser.sawUseOriginalBody())
+            prepPartialBodyEchoing(extensionParser.useOriginalBody());
+        else
+            stopSending(true); // the parser succeeds only if all parsed data fits
         return;
     }
 
@@ -2014,3 +2013,14 @@ void Adaptation::Icap::ModXactLauncher::
     }
 }
 
+void
+Adaptation::Icap::ChunkExtensionValueParser::parse(Tokenizer &tok, const SBuf &extName)
+{
+    if (extName == UseOriginalBodyName) {
+        useOriginalBody_ = tok.udec64("use-original-body");
+        assert(useOriginalBody_ >= 0);
+    } else {
+        Ignore(tok, extName);
+    }
+}
+
Index: squid-4.17/src/adaptation/icap/ModXact.h
===================================================================
--- squid-4.17.orig/src/adaptation/icap/ModXact.h
+++ squid-4.17/src/adaptation/icap/ModXact.h
@@ -15,6 +15,7 @@
 #include "adaptation/icap/Xaction.h"
 #include "BodyPipe.h"
 #include "http/one/forward.h"
+#include "http/one/TeChunkedParser.h"
 
 /*
  * ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
@@ -105,6 +106,23 @@ private:
     enum State { stDisabled, stWriting, stIeof, stDone } theState;
 };
 
+/// handles ICAP-specific chunk extensions supported by Squid
+class ChunkExtensionValueParser: public Http1::ChunkExtensionValueParser
+{
+public:
+    /* Http1::ChunkExtensionValueParser API */
+    virtual void parse(Tokenizer &tok, const SBuf &extName) override;
+
+    bool sawUseOriginalBody() const { return useOriginalBody_ >= 0; }
+    uint64_t useOriginalBody() const { assert(sawUseOriginalBody()); return static_cast<uint64_t>(useOriginalBody_); }
+
+private:
+    static const SBuf UseOriginalBodyName;
+
+    /// the value of the parsed use-original-body chunk extension (or -1)
+    int64_t useOriginalBody_ = -1;
+};
+
 class ModXact: public Xaction, public BodyProducer, public BodyConsumer
 {
     CBDATA_CLASS(ModXact);
@@ -270,6 +288,8 @@ private:
 
     int adaptHistoryId; ///< adaptation history slot reservation
 
+    ChunkExtensionValueParser extensionParser;
+
     class State
     {
 
Index: squid-4.17/src/http/one/Parser.cc
===================================================================
--- squid-4.17.orig/src/http/one/Parser.cc
+++ squid-4.17/src/http/one/Parser.cc
@@ -7,10 +7,11 @@
  */
 
 #include "squid.h"
+#include "base/CharacterSet.h"
 #include "Debug.h"
 #include "http/one/Parser.h"
-#include "http/one/Tokenizer.h"
 #include "mime_header.h"
+#include "parser/Tokenizer.h"
 #include "SquidConfig.h"
 
 /// RFC 7230 section 2.6 - 7 magic octets
@@ -61,20 +62,13 @@ Http::One::Parser::DelimiterCharacters()
            RelaxedDelimiterCharacters() : CharacterSet::SP;
 }
 
-bool
-Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
+void
+Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
 {
-    if (tok.skip(Http1::CrLf()))
-        return true;
-
     if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
-        return true;
+        return;
 
-    if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
-        return false; // need more data
-
-    throw TexcHere("garbage instead of CRLF line terminator");
-    return false; // unreachable, but make naive compilers happy
+    tok.skipRequired("line-terminating CRLF", Http1::CrLf());
 }
 
 /// all characters except the LF line terminator
@@ -102,7 +96,7 @@ LineCharacters()
 void
 Http::One::Parser::cleanMimePrefix()
 {
-    Http1::Tokenizer tok(mimeHeaderBlock_);
+    Tokenizer tok(mimeHeaderBlock_);
     while (tok.skipOne(RelaxedDelimiterCharacters())) {
         (void)tok.skipAll(LineCharacters()); // optional line content
         // LF terminator is required.
@@ -137,7 +131,7 @@ Http::One::Parser::cleanMimePrefix()
 void
 Http::One::Parser::unfoldMime()
 {
-    Http1::Tokenizer tok(mimeHeaderBlock_);
+    Tokenizer tok(mimeHeaderBlock_);
     const auto szLimit = mimeHeaderBlock_.length();
     mimeHeaderBlock_.clear();
     // prevent the mime sender being able to make append() realloc/grow multiple times.
@@ -228,7 +222,7 @@ Http::One::Parser::getHostHeaderField()
     debugs(25, 5, "looking for " << name);
 
     // while we can find more LF in the SBuf
-    Http1::Tokenizer tok(mimeHeaderBlock_);
+    Tokenizer tok(mimeHeaderBlock_);
     SBuf p;
 
     while (tok.prefix(p, LineCharacters())) {
@@ -250,7 +244,7 @@ Http::One::Parser::getHostHeaderField()
         p.consume(namelen + 1);
 
         // TODO: optimize SBuf::trim to take CharacterSet directly
-        Http1::Tokenizer t(p);
+        Tokenizer t(p);
         t.skipAll(CharacterSet::WSP);
         p = t.remaining();
 
@@ -278,10 +272,15 @@ Http::One::ErrorLevel()
 }
 
 // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
-bool
-Http::One::ParseBws(Tokenizer &tok)
+void
+Http::One::ParseBws(Parser::Tokenizer &tok)
 {
-    if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
+    const auto count = tok.skipAll(Parser::WhitespaceCharacters());
+
+    if (tok.atEnd())
+        throw InsufficientInput(); // even if count is positive
+
+    if (count) {
         // Generating BWS is a MUST-level violation so warn about it as needed.
         debugs(33, ErrorLevel(), "found " << count << " BWS octets");
         // RFC 7230 says we MUST parse BWS, so we fall through even if
@@ -289,6 +288,6 @@ Http::One::ParseBws(Tokenizer &tok)
     }
     // else we successfully "parsed" an empty BWS sequence
 
-    return true;
+    // success: no more BWS characters expected
 }
 
Index: squid-4.17/src/http/one/Parser.h
===================================================================
--- squid-4.17.orig/src/http/one/Parser.h
+++ squid-4.17/src/http/one/Parser.h
@@ -12,6 +12,7 @@
 #include "anyp/ProtocolVersion.h"
 #include "http/one/forward.h"
 #include "http/StatusCode.h"
+#include "parser/forward.h"
 #include "sbuf/SBuf.h"
 
 namespace Http {
@@ -40,6 +41,7 @@ class Parser : public RefCountable
 {
 public:
     typedef SBuf::size_type size_type;
+    typedef ::Parser::Tokenizer Tokenizer;
 
     Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE), hackExpectsMime_(false) {}
     virtual ~Parser() {}
@@ -118,11 +120,9 @@ protected:
      * detect and skip the CRLF or (if tolerant) LF line terminator
      * consume from the tokenizer.
      *
-     * throws if non-terminator is detected.
-     * \retval true only if line terminator found.
-     * \retval false incomplete or missing line terminator, need more data.
+     * \throws exception on bad or InsufficientInput
      */
-    bool skipLineTerminator(Http1::Tokenizer &tok) const;
+    void skipLineTerminator(Tokenizer &) const;
 
     /**
      * Scan to find the mime headers block for current message.
@@ -159,8 +159,8 @@ private:
 };
 
 /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
-/// \returns true (always; unlike all the skip*() functions)
-bool ParseBws(Tokenizer &tok);
+/// \throws InsufficientInput when the end of BWS cannot be confirmed
+void ParseBws(Parser::Tokenizer &);
 
 /// the right debugs() level for logging HTTP violation messages
 int ErrorLevel();
Index: squid-4.17/src/http/one/RequestParser.cc
===================================================================
--- squid-4.17.orig/src/http/one/RequestParser.cc
+++ squid-4.17/src/http/one/RequestParser.cc
@@ -9,8 +9,8 @@
 #include "squid.h"
 #include "Debug.h"
 #include "http/one/RequestParser.h"
-#include "http/one/Tokenizer.h"
 #include "http/ProtocolVersion.h"
+#include "parser/Tokenizer.h"
 #include "profiler/Profiler.h"
 #include "SquidConfig.h"
 
@@ -64,7 +64,7 @@ Http::One::RequestParser::skipGarbageLin
  *  RFC 7230 section 2.6, 3.1 and 3.5
  */
 bool
-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseMethodField(Tokenizer &tok)
 {
     // method field is a sequence of TCHAR.
     // Limit to 32 characters to prevent overly long sequences of non-HTTP
@@ -145,7 +145,7 @@ Http::One::RequestParser::RequestTargetC
 }
 
 bool
-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseUriField(Tokenizer &tok)
 {
     /* Arbitrary 64KB URI upper length limit.
      *
@@ -178,7 +178,7 @@ Http::One::RequestParser::parseUriField(
 }
 
 bool
-Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
 {
     static const SBuf http1p0("HTTP/1.0");
     static const SBuf http1p1("HTTP/1.1");
@@ -253,7 +253,7 @@ Http::One::RequestParser::skipDelimiter(
 
 /// Parse CRs at the end of request-line, just before the terminating LF.
 bool
-Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
+Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
 {
     if (Config.onoff.relaxed_header_parser) {
         (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
@@ -289,12 +289,12 @@ Http::One::RequestParser::parseRequestFi
     // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
     // Now, the request line has to end at the first LF.
     static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
-    ::Parser::Tokenizer lineTok(buf_);
+    Tokenizer lineTok(buf_);
     if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
         if (buf_.length() >= Config.maxRequestHeaderSize) {
             /* who should we blame for our failure to parse this line? */
 
-            Http1::Tokenizer methodTok(buf_);
+            Tokenizer methodTok(buf_);
             if (!parseMethodField(methodTok))
                 return -1; // blame a bad method (or its delimiter)
 
@@ -308,7 +308,7 @@ Http::One::RequestParser::parseRequestFi
         return 0;
     }
 
-    Http1::Tokenizer tok(line);
+    Tokenizer tok(line);
 
     if (!parseMethodField(tok))
         return -1;
Index: squid-4.17/src/http/one/RequestParser.h
===================================================================
--- squid-4.17.orig/src/http/one/RequestParser.h
+++ squid-4.17/src/http/one/RequestParser.h
@@ -54,11 +54,11 @@ private:
     bool doParse(const SBuf &aBuf);
 
     /* all these return false and set parseStatusCode on parsing failures */
-    bool parseMethodField(Http1::Tokenizer &);
-    bool parseUriField(Http1::Tokenizer &);
-    bool parseHttpVersionField(Http1::Tokenizer &);
+    bool parseMethodField(Tokenizer &);
+    bool parseUriField(Tokenizer &);
+    bool parseHttpVersionField(Tokenizer &);
     bool skipDelimiter(const size_t count, const char *where);
-    bool skipTrailingCrs(Http1::Tokenizer &tok);
+    bool skipTrailingCrs(Tokenizer &tok);
 
     bool http0() const {return !msgProtocol_.major;}
     static const CharacterSet &RequestTargetCharacters();
Index: squid-4.17/src/http/one/ResponseParser.cc
===================================================================
--- squid-4.17.orig/src/http/one/ResponseParser.cc
+++ squid-4.17/src/http/one/ResponseParser.cc
@@ -9,8 +9,8 @@
 #include "squid.h"
 #include "Debug.h"
 #include "http/one/ResponseParser.h"
-#include "http/one/Tokenizer.h"
 #include "http/ProtocolVersion.h"
+#include "parser/Tokenizer.h"
 #include "profiler/Profiler.h"
 #include "SquidConfig.h"
 
@@ -47,7 +47,7 @@ Http::One::ResponseParser::firstLineSize
 // NP: we found the protocol version and consumed it already.
 // just need the status code and reason phrase
 int
-Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
+Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim)
 {
     if (!completedStatus_) {
         debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
@@ -87,14 +87,13 @@ Http::One::ResponseParser::parseResponse
     static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
     (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
     try {
-        if (skipLineTerminator(tok)) {
-            debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
-            buf_ = tok.remaining(); // resume checkpoint
-            return 1;
-        }
+        skipLineTerminator(tok);
+        buf_ = tok.remaining(); // resume checkpoint
+        debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
+        return 1;
+    } catch (const InsufficientInput &) {
         reasonPhrase_.clear();
         return 0; // need more to be sure we have it all
-
     } catch (const std::exception &ex) {
         debugs(74, 6, "invalid status-line: " << ex.what());
     }
@@ -119,7 +118,7 @@ Http::One::ResponseParser::parseResponse
 int
 Http::One::ResponseParser::parseResponseFirstLine()
 {
-    Http1::Tokenizer tok(buf_);
+    Tokenizer tok(buf_);
 
     const CharacterSet &WspDelim = DelimiterCharacters();
 
Index: squid-4.17/src/http/one/ResponseParser.h
===================================================================
--- squid-4.17.orig/src/http/one/ResponseParser.h
+++ squid-4.17/src/http/one/ResponseParser.h
@@ -43,7 +43,7 @@ public:
 
 private:
     int parseResponseFirstLine();
-    int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &);
+    int parseResponseStatusAndReason(Tokenizer&, const CharacterSet &);
 
     /// magic prefix for identifying ICY response messages
     static const SBuf IcyMagic;
Index: squid-4.17/src/http/one/TeChunkedParser.cc
===================================================================
--- squid-4.17.orig/src/http/one/TeChunkedParser.cc
+++ squid-4.17/src/http/one/TeChunkedParser.cc
@@ -13,10 +13,13 @@
 #include "http/one/Tokenizer.h"
 #include "http/ProtocolVersion.h"
 #include "MemBuf.h"
+#include "parser/Tokenizer.h"
 #include "Parsing.h"
+#include "sbuf/Stream.h"
 #include "SquidConfig.h"
 
-Http::One::TeChunkedParser::TeChunkedParser()
+Http::One::TeChunkedParser::TeChunkedParser():
+    customExtensionValueParser(nullptr)
 {
     // chunked encoding only exists in HTTP/1.1
     Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
@@ -31,7 +34,11 @@ Http::One::TeChunkedParser::clear()
     buf_.clear();
     theChunkSize = theLeftBodySize = 0;
     theOut = NULL;
-    useOriginBody = -1;
+    // XXX: We do not reset customExtensionValueParser here. Based on the
+    // clear() API description, we must, but it makes little sense and could
+    // break method callers if they appear because some of them may forget to
+    // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
+    // parent class and this unnecessary method with it.
 }
 
 bool
@@ -49,14 +56,14 @@ Http::One::TeChunkedParser::parse(const
     if (parsingStage_ == Http1::HTTP_PARSE_NONE)
         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
 
-    Http1::Tokenizer tok(buf_);
+    Tokenizer tok(buf_);
 
     // loop for as many chunks as we can
     // use do-while instead of while so that we can incrementally
     // restart in the middle of a chunk/frame
     do {
 
-        if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
+        if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
             return false;
 
         if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
@@ -80,10 +87,15 @@ Http::One::TeChunkedParser::needsMoreSpa
 
 /// RFC 7230 section 4.1 chunk-size
 bool
-Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
 {
     Must(theChunkSize <= 0); // Should(), really
 
+    static const SBuf bannedHexPrefixLower("0x");
+    static const SBuf bannedHexPrefixUpper("0X");
+    if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper))
+        throw TextException("chunk starts with 0x", Here());
+
     int64_t size = -1;
     if (tok.int64(size, 16, false) && !tok.atEnd()) {
         if (size < 0)
@@ -104,66 +116,83 @@ Http::One::TeChunkedParser::parseChunkSi
     return false; // should not be reachable
 }
 
-/**
- * Parses chunk metadata suffix, looking for interesting extensions and/or
- * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667:
- *
- *   chunk-ext = *( BWS  ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
- *   chunk-ext-name = token
- *   chunk-ext-val  = token / quoted-string
- *
- * ICAP 'use-original-body=N' extension is supported.
- */
-bool
-Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
-{
-    SBuf ext;
-    SBuf value;
-    while (
-        ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
-        tok.skip(';') &&
-        ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name
-        tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name
-
-        // whole value part is optional. if no '=' expect next chunk-ext
-        if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) {
-
-            if (!skipKnown) {
-                if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
-                    debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
-                    buf_ = tok.remaining(); // parse checkpoint
-                    continue;
-                }
-            }
-
-            debugs(94, 5, "skipping unknown chunk extension " << ext);
-
-            // unknown might have a value token or quoted-string
-            if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
-                buf_ = tok.remaining(); // parse checkpoint
-                continue;
-            }
-
-            // otherwise need more data OR corrupt syntax
-            break;
-        }
-
-        if (!tok.atEnd())
-            buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
-    }
-
-    if (skipLineTerminator(tok)) {
-        buf_ = tok.remaining(); // checkpoint
-        // non-0 chunk means data, 0-size means optional Trailer follows
+/// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
+///   chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
+///   last-chunk = 1*"0" [ chunk-ext ] CRLF
+bool
+Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
+{
+    // Code becomes much simpler when incremental parsing functions throw on
+    // bad or insufficient input, like in the code below. TODO: Expand up.
+    try {
+        parseChunkExtensions(tok); // a possibly empty chunk-ext list
+        tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
+        buf_ = tok.remaining();
         parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
         return true;
+    } catch (const InsufficientInput &) {
+        tok.reset(buf_); // backtrack to the last commit point
+        return false;
     }
+    // other exceptions bubble up to kill message parsing
+}
+
+/// Parses the chunk-ext list (RFC 9112 section 7.1.1:
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+void
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok)
+{
+    do {
+        auto tok = callerTok;
+
+        ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+
+        if (!tok.skip(';'))
+            return; // reached the end of extensions (if any)
 
-    return false;
+        parseOneChunkExtension(tok);
+        buf_ = tok.remaining(); // got one extension
+        callerTok = tok;
+    } while (true);
+}
+
+void
+Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName)
+{
+    const auto ignoredValue = tokenOrQuotedString(tok);
+    debugs(94, 5, extName << " with value " << ignoredValue);
+}
+
+/// Parses a single chunk-ext list element:
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+void
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok)
+{
+    auto tok = callerTok;
+
+    ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
+
+    const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
+    callerTok = tok; // in case we determine that this is a valueless chunk-ext
+
+    ParseBws(tok);
+
+    if (!tok.skip('='))
+        return; // parsed a valueless chunk-ext
+
+    ParseBws(tok);
+
+    // optimization: the only currently supported extension needs last-chunk
+    if (!theChunkSize && customExtensionValueParser)
+        customExtensionValueParser->parse(tok, extName);
+    else
+        ChunkExtensionValueParser::Ignore(tok, extName);
+
+    callerTok = tok;
 }
 
 bool
-Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok)
 {
     if (theLeftBodySize > 0) {
         buf_ = tok.remaining(); // sync buffers before buf_ use
@@ -188,17 +217,20 @@ Http::One::TeChunkedParser::parseChunkBo
 }
 
 bool
-Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
 {
     Must(theLeftBodySize == 0); // Should(), really
 
-    if (skipLineTerminator(tok)) {
+    try {
+        tok.skipRequired("chunk CRLF", Http1::CrLf());
         buf_ = tok.remaining(); // parse checkpoint
         theChunkSize = 0; // done with the current chunk
         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
         return true;
     }
-
-    return false;
+    catch (const InsufficientInput &) {
+        return false;
+    }
+    // other exceptions bubble up to kill message parsing
 }
 
Index: squid-4.17/src/http/one/TeChunkedParser.h
===================================================================
--- squid-4.17.orig/src/http/one/TeChunkedParser.h
+++ squid-4.17/src/http/one/TeChunkedParser.h
@@ -18,6 +18,26 @@ namespace Http
 namespace One
 {
 
+using ::Parser::InsufficientInput;
+
+// TODO: Move this class into http/one/ChunkExtensionValueParser.*
+/// A customizable parser of a single chunk extension value (chunk-ext-val).
+/// From RFC 7230 section 4.1.1 and its Errata #4667:
+/// chunk-ext = *( BWS  ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+/// chunk-ext-name = token
+/// chunk-ext-val  = token / quoted-string
+class ChunkExtensionValueParser
+{
+public:
+    typedef ::Parser::Tokenizer Tokenizer;
+
+    /// extracts and ignores the value of a named extension
+    static void Ignore(Tokenizer &tok, const SBuf &extName);
+
+    /// extracts and then interprets (or ignores) the extension value
+    virtual void parse(Tokenizer &tok, const SBuf &extName) = 0;
+};
+
 /**
  * An incremental parser for chunked transfer coding
  * defined in RFC 7230 section 4.1.
@@ -25,7 +45,7 @@ namespace One
  *
  * The parser shovels content bytes from the raw
  * input buffer into the content output buffer, both caller-supplied.
- * Ignores chunk extensions except for ICAP's ieof.
+ * Chunk extensions like use-original-body are handled via parseExtensionValuesWith().
  * Trailers are available via mimeHeader() if wanted.
  */
 class TeChunkedParser : public Http1::Parser
@@ -37,6 +57,10 @@ public:
     /// set the buffer to be used to store decoded chunk data
     void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
 
+    /// Instead of ignoring all chunk extension values, give the supplied
+    /// parser a chance to handle them. Only applied to last-chunk (for now).
+    void parseExtensionValuesWith(ChunkExtensionValueParser *parser) { customExtensionValueParser = parser; }
+
     bool needsMoreSpace() const;
 
     /* Http1::Parser API */
@@ -45,17 +69,20 @@ public:
     virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
 
 private:
-    bool parseChunkSize(Http1::Tokenizer &tok);
-    bool parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown);
-    bool parseChunkBody(Http1::Tokenizer &tok);
-    bool parseChunkEnd(Http1::Tokenizer &tok);
+    bool parseChunkSize(Tokenizer &tok);
+    bool parseChunkMetadataSuffix(Tokenizer &);
+    void parseChunkExtensions(Tokenizer &);
+    void parseOneChunkExtension(Tokenizer &);
+    bool parseChunkBody(Tokenizer &tok);
+    bool parseChunkEnd(Tokenizer &tok);
 
     MemBuf *theOut;
     uint64_t theChunkSize;
     uint64_t theLeftBodySize;
 
-public:
-    int64_t useOriginBody;
+    /// An optional plugin for parsing and interpreting custom chunk-ext-val.
+    /// This "visitor" object is owned by our creator.
+    ChunkExtensionValueParser *customExtensionValueParser;
 };
 
 } // namespace One
Index: squid-4.17/src/http/one/Tokenizer.cc
===================================================================
--- squid-4.17.orig/src/http/one/Tokenizer.cc
+++ squid-4.17/src/http/one/Tokenizer.cc
@@ -8,35 +8,18 @@
 
 #include "squid.h"
 #include "Debug.h"
+#include "http/one/Parser.h"
 #include "http/one/Tokenizer.h"
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
 
-bool
-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
+/// Extracts quoted-string after the caller removes the initial '"'.
+/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
+/// \throws InsufficientInput when input can be a token _prefix_
+/// \returns extracted quoted string (without quotes and with chars unescaped)
+static SBuf
+parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
 {
-    checkpoint();
-
-    if (!skip('"'))
-        return false;
-
-    return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
-{
-    checkpoint();
-
-    if (!skip('"'))
-        return prefix(returnedToken, CharacterSet::TCHAR);
-
-    return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
-{
-    // the initial DQUOTE has been skipped by the caller
-
     /*
      * RFC 1945 - defines qdtext:
      *   inclusive of LWS (which includes CR and LF)
@@ -61,12 +44,17 @@ Http::One::Tokenizer::qdText(SBuf &retur
     // best we can do is a conditional reference since http1p0 value may change per-client
     const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
 
-    for (;;) {
-        SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
-        returnedToken.append(consume(prefixLen));
+    SBuf parsedToken;
+
+    while (!tok.atEnd()) {
+        SBuf qdText;
+        if (tok.prefix(qdText, tokenChars))
+            parsedToken.append(qdText);
+
+        if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+            if (tok.atEnd())
+                break;
 
-        // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
-        if (!http1p0 && skip('\\')) {
             /* RFC 7230 section 3.2.6
              *
              * The backslash octet ("\") can be used as a single-octet quoting
@@ -78,32 +66,42 @@ Http::One::Tokenizer::qdText(SBuf &retur
              */
             static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
             SBuf escaped;
-            if (!prefix(escaped, qPairChars, 1)) {
-                returnedToken.clear();
-                restoreLastCheckpoint();
-                return false;
-            }
-            returnedToken.append(escaped);
+            if (!tok.prefix(escaped, qPairChars, 1))
+                throw TexcHere("invalid escaped character in quoted-pair");
+
+            parsedToken.append(escaped);
             continue;
+        }
 
-        } else if (skip('"')) {
-            break; // done
+        if (tok.skip('"'))
+            return parsedToken; // may be empty
 
-        } else if (atEnd()) {
-            // need more data
-            returnedToken.clear();
-            restoreLastCheckpoint();
-            return false;
-        }
+        if (tok.atEnd())
+            break;
 
-        // else, we have an error
-        debugs(24, 8, "invalid bytes for set " << tokenChars.name);
-        returnedToken.clear();
-        restoreLastCheckpoint();
-        return false;
+        throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
     }
 
-    // found the whole string
-    return true;
+    throw Http::One::InsufficientInput();
+}
+
+SBuf
+Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
+{
+    if (tok.skip('"'))
+        return parseQuotedStringSuffix(tok, http1p0);
+
+    if (tok.atEnd())
+        throw InsufficientInput();
+
+    SBuf parsedToken;
+    if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
+        throw TexcHere("invalid input while expecting an HTTP token");
+
+    if (tok.atEnd())
+        throw InsufficientInput();
+
+    // got the complete token
+    return parsedToken;
 }
 
Index: squid-4.17/src/http/one/Tokenizer.h
===================================================================
--- squid-4.17.orig/src/http/one/Tokenizer.h
+++ squid-4.17/src/http/one/Tokenizer.h
@@ -9,68 +9,47 @@
 #ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H
 #define SQUID_SRC_HTTP_ONE_TOKENIZER_H
 
-#include "parser/Tokenizer.h"
+#include "parser/forward.h"
+#include "sbuf/forward.h"
 
 namespace Http {
 namespace One {
 
 /**
- * Lexical processor extended to tokenize HTTP/1.x syntax.
+ * Extracts either an HTTP/1 token or quoted-string while dealing with
+ * possibly incomplete input typical for incremental text parsers.
+ * Unescapes escaped characters in HTTP/1.1 quoted strings.
  *
- * \see ::Parser::Tokenizer for more detail
+ * \param http1p0 whether to prohibit \-escaped characters in quoted strings
+ * \throws InsufficientInput as appropriate, including on unterminated tokens
+ * \returns extracted token or quoted string (without quotes)
+ *
+ * Governed by:
+ *  - RFC 1945 section 2.1
+ *  "
+ *    A string of text is parsed as a single word if it is quoted using
+ *    double-quote marks.
+ *
+ *        quoted-string  = ( <"> *(qdtext) <"> )
+ *
+ *        qdtext         = <any CHAR except <"> and CTLs,
+ *                         but including LWS>
+ *
+ *    Single-character quoting using the backslash ("\") character is not
+ *    permitted in HTTP/1.0.
+ *  "
+ *
+ *  - RFC 7230 section 3.2.6
+ *  "
+ *    A string of text is parsed as a single value if it is quoted using
+ *    double-quote marks.
+ *
+ *    quoted-string  = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+ *    qdtext         = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+ *    obs-text       = %x80-FF
+ *  "
  */
-class Tokenizer : public ::Parser::Tokenizer
-{
-public:
-    Tokenizer(SBuf &s) : ::Parser::Tokenizer(s), savedStats_(0) {}
-
-    /**
-     * Attempt to parse a quoted-string lexical construct.
-     *
-     * Governed by:
-     *  - RFC 1945 section 2.1
-     *  "
-     *    A string of text is parsed as a single word if it is quoted using
-     *    double-quote marks.
-     *
-     *        quoted-string  = ( <"> *(qdtext) <"> )
-     *
-     *        qdtext         = <any CHAR except <"> and CTLs,
-     *                         but including LWS>
-     *
-     *    Single-character quoting using the backslash ("\") character is not
-     *    permitted in HTTP/1.0.
-     *  "
-     *
-     *  - RFC 7230 section 3.2.6
-     *  "
-     *    A string of text is parsed as a single value if it is quoted using
-     *    double-quote marks.
-     *
-     *    quoted-string  = DQUOTE *( qdtext / quoted-pair ) DQUOTE
-     *    qdtext         = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
-     *    obs-text       = %x80-FF
-     *  "
-     *
-     * \param escaped HTTP/1.0 does not permit \-escaped characters
-     */
-    bool quotedString(SBuf &value, const bool http1p0 = false);
-
-    /**
-     * Attempt to parse a (token / quoted-string ) lexical construct.
-     */
-    bool quotedStringOrToken(SBuf &value, const bool http1p0 = false);
-
-private:
-    /// parse the internal component of a quote-string, and terminal DQUOTE
-    bool qdText(SBuf &value, const bool http1p0);
-
-    void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); }
-    void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); }
-
-    SBuf savedCheckpoint_;
-    SBuf::size_type savedStats_;
-};
+SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0 = false);
 
 } // namespace One
 } // namespace Http
Index: squid-4.17/src/http/one/forward.h
===================================================================
--- squid-4.17.orig/src/http/one/forward.h
+++ squid-4.17/src/http/one/forward.h
@@ -10,6 +10,7 @@
 #define SQUID_SRC_HTTP_ONE_FORWARD_H
 
 #include "base/RefCount.h"
+#include "parser/forward.h"
 #include "sbuf/forward.h"
 
 namespace Http {
@@ -31,6 +32,8 @@ typedef RefCount<Http::One::ResponsePars
 /// CRLF textual representation
 const SBuf &CrLf();
 
+using ::Parser::InsufficientInput;
+
 } // namespace One
 } // namespace Http
 
Index: squid-4.17/src/parser/BinaryTokenizer.h
===================================================================
--- squid-4.17.orig/src/parser/BinaryTokenizer.h
+++ squid-4.17/src/parser/BinaryTokenizer.h
@@ -9,6 +9,7 @@
 #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
 #define SQUID_SRC_PARSER_BINARYTOKENIZER_H
 
+#include "parser/forward.h"
 #include "sbuf/SBuf.h"
 
 namespace Parser
@@ -44,7 +45,7 @@ public:
 class BinaryTokenizer
 {
 public:
-    class InsufficientInput {}; // thrown when a method runs out of data
+    typedef ::Parser::InsufficientInput InsufficientInput;
     typedef uint64_t size_type; // enough for the largest supported offset
 
     BinaryTokenizer();
Index: squid-4.17/src/parser/Makefile.am
===================================================================
--- squid-4.17.orig/src/parser/Makefile.am
+++ squid-4.17/src/parser/Makefile.am
@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la
 libparser_la_SOURCES = \
 	BinaryTokenizer.h \
 	BinaryTokenizer.cc \
+	forward.h \
 	Tokenizer.h \
 	Tokenizer.cc
 
Index: squid-4.17/src/parser/Tokenizer.cc
===================================================================
--- squid-4.17.orig/src/parser/Tokenizer.cc
+++ squid-4.17/src/parser/Tokenizer.cc
@@ -10,7 +10,9 @@
 
 #include "squid.h"
 #include "Debug.h"
+#include "parser/forward.h"
 #include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
 
 #include <cerrno>
 #if HAVE_CTYPE_H
@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returned
     return true;
 }
 
+SBuf
+Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
+{
+    if (atEnd())
+        throw InsufficientInput();
+
+    SBuf result;
+
+    if (!prefix(result, tokenChars, limit))
+        throw TexcHere(ToSBuf("cannot parse ", description));
+
+    if (atEnd())
+        throw InsufficientInput();
+
+    return result;
+}
+
 bool
 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 {
@@ -128,6 +147,18 @@ Parser::Tokenizer::skipAll(const Charact
     return success(prefixLen);
 }
 
+void
+Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip)
+{
+    if (skip(tokenToSkip) || tokenToSkip.isEmpty())
+        return;
+
+    if (tokenToSkip.startsWith(buf_))
+        throw InsufficientInput();
+
+    throw TextException(ToSBuf("cannot skip ", description), Here());
+}
+
 bool
 Parser::Tokenizer::skipOne(const CharacterSet &chars)
 {
@@ -283,3 +314,24 @@ Parser::Tokenizer::int64(int64_t & resul
     return success(s - range.rawContent());
 }
 
+int64_t
+Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
+{
+    if (atEnd())
+        throw InsufficientInput();
+
+    int64_t result = 0;
+
+    // Since we only support unsigned decimals, a parsing failure with a
+    // non-empty input always implies invalid/malformed input (or a buggy
+    // limit=0 caller). TODO: Support signed and non-decimal integers by
+    // refactoring int64() to detect insufficient input.
+    if (!int64(result, 10, false, limit))
+        throw TexcHere(ToSBuf("cannot parse ", description));
+
+    if (atEnd())
+        throw InsufficientInput(); // more digits may be coming
+
+    return result;
+}
+
Index: squid-4.17/src/parser/Tokenizer.h
===================================================================
--- squid-4.17.orig/src/parser/Tokenizer.h
+++ squid-4.17/src/parser/Tokenizer.h
@@ -115,6 +115,13 @@ public:
      */
     SBuf::size_type skipAll(const CharacterSet &discardables);
 
+    /** skips a given character sequence (string);
+     * does nothing if the sequence is empty
+     *
+     * \throws exception on mismatching prefix or InsufficientInput
+     */
+    void skipRequired(const char *description, const SBuf &tokenToSkip);
+
     /** Removes a single trailing character from the set.
      *
      * \return whether a character was removed
@@ -143,6 +150,19 @@ public:
      */
     bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
 
+    /*
+     * The methods below mimic their counterparts documented above, but they
+     * throw on errors, including InsufficientInput. The field description
+     * parameter is used for error reporting and debugging.
+     */
+
+    /// prefix() wrapper but throws InsufficientInput if input contains
+    /// nothing but the prefix (i.e. if the prefix is not "terminated")
+    SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
+
+    /// int64() wrapper but limited to unsigned decimal integers (for now)
+    int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
+
 protected:
     SBuf consume(const SBuf::size_type n);
     SBuf::size_type success(const SBuf::size_type n);
Index: squid-4.17/src/parser/forward.h
===================================================================
--- /dev/null
+++ squid-4.17/src/parser/forward.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_PARSER_FORWARD_H
+#define SQUID_PARSER_FORWARD_H
+
+namespace Parser {
+class Tokenizer;
+class BinaryTokenizer;
+
+// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
+/// thrown by modern "incremental" parsers when they need more data
+class InsufficientInput {};
+} // namespace Parser
+
+#endif /* SQUID_PARSER_FORWARD_H */
+