File xerces-c-CVE-2018-1311.patch of Package xerces-c.32287

From e0024267504188e42ace4dd9031d936786914835 Mon Sep 17 00:00:00 2001
From: Karen Arutyunov <karen@codesynthesis.com>
Date: Wed, 13 Dec 2023 09:59:07 +0200
Subject: [PATCH] XERCESC-2188 - Use-after-free on external DTD scan
 (CVE-2018-1311)

These are the instructions for observing the bug (before this commit):

$ git clone https://github.com/apache/xerces-c.git
$ cd xerces-c
$ mkdir build
$ cd build
$ cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug ..
$ make -j8
$ cp ../samples/data/personal.xml .

$ cat <<EOF >personal.dtd
<?xml encoding="ISO-8859-1"?>
<!ENTITY % nonExistentEntity SYSTEM "non-existent.ent">
%nonExistentEntity;
EOF

$ gdb samples/StdInParse
(gdb) b IGXMLScanner.cpp:1544
(gdb) run <personal.xml
1544	            fReaderMgr.pushReader(reader, declDTD);
(gdb) p declDTD
$1 = (xercesc_4_0::DTDEntityDecl *) 0x49ac68
(gdb) n
1547	            dtdScanner.scanExtSubsetDecl(false, true);
(gdb) n
1548	        }
(gdb) s
...
(gdb) s                     # The Janitor is about to delete the above declDTD.
90	        delete fData;
(gdb) p fData
$1 = (xercesc_4_0::DTDEntityDecl *) 0x49ac68
(gdb) b ReaderMgr.cpp:1024
(gdb) n
...
(gdb) n                     # Now we about to dereference the deleted declDTD.
1024	    if (curEntity && !curEntity->isExternal())
(gdb) p curEntity
$2 = (const xercesc_4_0::XMLEntityDecl *) 0x49ac68
---
 src/xercesc/internal/DGXMLScanner.cpp |   6 +-
 src/xercesc/internal/IGXMLScanner.cpp |   6 +-
 src/xercesc/internal/ReaderMgr.cpp    | 207 ++++++++++++++++++--------
 src/xercesc/internal/ReaderMgr.hpp    |  92 ++++++++++--
 4 files changed, 229 insertions(+), 82 deletions(-)

diff --git a/src/xercesc/internal/DGXMLScanner.cpp b/src/xercesc/internal/DGXMLScanner.cpp
index 43342235a..895dfeb06 100644
--- a/src/xercesc/internal/DGXMLScanner.cpp
+++ b/src/xercesc/internal/DGXMLScanner.cpp
@@ -1052,13 +1052,12 @@ void DGXMLScanner::scanDocTypeDecl()
             DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
             declDTD->setSystemId(sysId);
             declDTD->setIsExternal(true);
-            Janitor<DTDEntityDecl> janDecl(declDTD);
 
             // Mark this one as a throw at end
             reader->setThrowAtEnd(true);
 
             // And push it onto the stack, with its pseudo name
-            fReaderMgr.pushReader(reader, declDTD);
+            fReaderMgr.pushReaderAdoptEntity(reader, declDTD);
 
             // Tell it its not in an include section
             dtdScanner.scanExtSubsetDecl(false, true);
@@ -2131,13 +2130,12 @@ Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src,
     DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
     declDTD->setSystemId(src.getSystemId());
     declDTD->setIsExternal(true);
-    Janitor<DTDEntityDecl> janDecl(declDTD);
 
     // Mark this one as a throw at end
     newReader->setThrowAtEnd(true);
 
     // And push it onto the stack, with its pseudo name
-    fReaderMgr.pushReader(newReader, declDTD);
+    fReaderMgr.pushReaderAdoptEntity(newReader, declDTD);
 
     //  If we have a doc type handler and advanced callbacks are enabled,
     //  call the doctype event.
diff --git a/src/xercesc/internal/IGXMLScanner.cpp b/src/xercesc/internal/IGXMLScanner.cpp
index 912ec0c62..d694b23e6 100644
--- a/src/xercesc/internal/IGXMLScanner.cpp
+++ b/src/xercesc/internal/IGXMLScanner.cpp
@@ -1535,13 +1535,12 @@ void IGXMLScanner::scanDocTypeDecl()
             DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
             declDTD->setSystemId(sysId);
             declDTD->setIsExternal(true);
-            Janitor<DTDEntityDecl> janDecl(declDTD);
 
             // Mark this one as a throw at end
             reader->setThrowAtEnd(true);
 
             // And push it onto the stack, with its pseudo name
-            fReaderMgr.pushReader(reader, declDTD);
+            fReaderMgr.pushReaderAdoptEntity(reader, declDTD);
 
             // Tell it its not in an include section
             dtdScanner.scanExtSubsetDecl(false, true);
@@ -3098,13 +3097,12 @@ Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src,
     DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
     declDTD->setSystemId(src.getSystemId());
     declDTD->setIsExternal(true);
-    Janitor<DTDEntityDecl> janDecl(declDTD);
 
     // Mark this one as a throw at end
     newReader->setThrowAtEnd(true);
 
     // And push it onto the stack, with its pseudo name
-    fReaderMgr.pushReader(newReader, declDTD);
+    fReaderMgr.pushReaderAdoptEntity(newReader, declDTD);
 
     //  If we have a doc type handler and advanced callbacks are enabled,
     //  call the doctype event.
diff --git a/src/xercesc/internal/ReaderMgr.cpp b/src/xercesc/internal/ReaderMgr.cpp
index d14483e3c..9f363a071 100644
--- a/src/xercesc/internal/ReaderMgr.cpp
+++ b/src/xercesc/internal/ReaderMgr.cpp
@@ -45,12 +45,61 @@
 
 XERCES_CPP_NAMESPACE_BEGIN
 
+// ---------------------------------------------------------------------------
+//  ReaderMgr::ReaderData: Constructors and Destructor
+// ---------------------------------------------------------------------------
+ReaderMgr::ReaderData::ReaderData( XMLReader* const       reader
+                                   , XMLEntityDecl* const entity
+                                   , const bool           adoptEntity) :
+
+    fReader(reader)
+    , fEntity(entity)
+    , fEntityAdopted(adoptEntity)
+{
+}
+
+ReaderMgr::ReaderData::~ReaderData()
+{
+    delete fReader;
+
+    if (fEntityAdopted)
+        delete fEntity;
+}
+
+// ---------------------------------------------------------------------------
+//  ReaderMgr::ReaderData: Getter methods
+// ---------------------------------------------------------------------------
+XMLReader* ReaderMgr::ReaderData::getReader() const
+{
+  return fReader;
+}
+
+XMLEntityDecl* ReaderMgr::ReaderData::getEntity() const
+{
+  return fEntity;
+}
+
+bool ReaderMgr::ReaderData::getEntityAdopted() const
+{
+  return fEntityAdopted;
+}
+
+//
+//  This method gives up the entity object ownership but leaves the fEntity
+//  pointer intact.
+//
+XMLEntityDecl* ReaderMgr::ReaderData::releaseEntity()
+{
+  fEntityAdopted = false;
+  return fEntity;
+}
+
 // ---------------------------------------------------------------------------
 //  ReaderMgr: Constructors and Destructor
 // ---------------------------------------------------------------------------
 ReaderMgr::ReaderMgr(MemoryManager* const manager) :
 
-    fCurEntity(0)
+    fCurReaderData(0)
     , fCurReader(0)
     , fEntityHandler(0)
     , fEntityStack(0)
@@ -66,12 +115,11 @@ ReaderMgr::ReaderMgr(MemoryManager* const manager) :
 ReaderMgr::~ReaderMgr()
 {
     //
-    //  Clean up the reader and entity stacks. Note that we don't own the
-    //  entities, so we don't delete the current entity (and the entity stack
-    //  does not own its elements either, so deleting it will not delete the
-    //  entities it still references!)
+    //  Clean up the reader stack and orphan entities container. Note that
+    //  all adopted entities (potentially contained in fCurReaderData,
+    //  fReaderStack, and fEntityStack) are deleted here.
     //
-    delete fCurReader;
+    delete fCurReaderData;
     delete fReaderStack;
     delete fEntityStack;
 }
@@ -357,9 +405,9 @@ void ReaderMgr::cleanStackBackTo(const XMLSize_t readerNum)
         if (fReaderStack->empty())
             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::RdrMgr_ReaderIdNotFound, fMemoryManager);
 
-        delete fCurReader;
-        fCurReader = fReaderStack->pop();
-        fCurEntity = fEntityStack->pop();
+        delete fCurReaderData;
+        fCurReaderData = fReaderStack->pop();
+        fCurReader = fCurReaderData->getReader ();
     }
 }
 
@@ -795,20 +843,20 @@ const XMLCh* ReaderMgr::getCurrentEncodingStr() const
 
 const XMLEntityDecl* ReaderMgr::getCurrentEntity() const
 {
-    return fCurEntity;
+    return fCurReaderData? fCurReaderData->getEntity() : 0;
 }
 
 
 XMLEntityDecl* ReaderMgr::getCurrentEntity()
 {
-    return fCurEntity;
+    return fCurReaderData? fCurReaderData->getEntity() : 0;
 }
 
 
 XMLSize_t ReaderMgr::getReaderDepth() const
 {
     // If the stack doesn't exist, its obviously zero
-    if (!fEntityStack)
+    if (!fReaderStack)
         return 0;
 
     //
@@ -816,7 +864,7 @@ XMLSize_t ReaderMgr::getReaderDepth() const
     //  reader. So if there is no current reader and none on the stack,
     //  its zero, else its some non-zero value.
     //
-    XMLSize_t retVal = fEntityStack->size();
+    XMLSize_t retVal = fReaderStack->size();
     if (fCurReader)
         retVal++;
     return retVal;
@@ -852,7 +900,7 @@ void ReaderMgr::getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const
 bool ReaderMgr::isScanningPERefOutOfLiteral() const
 {
     // If the current reader is not for an entity, then definitely not
-    if (!fCurEntity)
+    if (!fCurReaderData || !fCurReaderData->getEntity())
         return false;
 
     //
@@ -867,13 +915,19 @@ bool ReaderMgr::isScanningPERefOutOfLiteral() const
     return false;
 }
 
-
 bool ReaderMgr::pushReader(         XMLReader* const        reader
                             ,       XMLEntityDecl* const    entity)
+{
+    return pushReaderAdoptEntity(reader, entity, false);
+}
+
+bool ReaderMgr::pushReaderAdoptEntity(     XMLReader* const        reader
+                                       ,   XMLEntityDecl* const    entity
+                                       ,   const bool              adoptEntity)
 {
     //
     //  First, if an entity was passed, we have to confirm that this entity
-    //  is not already on the entity stack. If so, then this is a recursive
+    //  is not already on the reader stack. If so, then this is a recursive
     //  entity expansion, so we issue an error and refuse to put the reader
     //  on the stack.
     //
@@ -881,19 +935,30 @@ bool ReaderMgr::pushReader(         XMLReader* const        reader
     //  nothing to do. If there is no entity stack yet, then of coures it
     //  cannot already be there.
     //
-    if (entity && fEntityStack)
+    if (entity && fReaderStack)
     {
-        const XMLSize_t count = fEntityStack->size();
+        // @@ Strangely, we don't check the entity at the top of the stack
+        //    (fCurReaderData). Is it a bug?
+        //
+        const XMLSize_t count = fReaderStack->size();
         const XMLCh* const theName = entity->getName();
         for (XMLSize_t index = 0; index < count; index++)
         {
-            const XMLEntityDecl* curDecl = fEntityStack->elementAt(index);
+            const XMLEntityDecl* curDecl =
+              fReaderStack->elementAt(index)->getEntity();
+
             if (curDecl)
             {
                 if (XMLString::equals(theName, curDecl->getName()))
                 {
-                    // Oops, already there so delete reader and return
+                    // Oops, already there so delete reader and entity and
+                    // return.
+                    //
                     delete reader;
+
+                    if (adoptEntity)
+                        delete entity;
+
                     return false;
                 }
             }
@@ -905,52 +970,37 @@ bool ReaderMgr::pushReader(         XMLReader* const        reader
     //  tell it it does own its elements.
     //
     if (!fReaderStack)
-        fReaderStack = new (fMemoryManager) RefStackOf<XMLReader>(16, true, fMemoryManager);
-
-    // And the entity stack, which does not own its elements
-    if (!fEntityStack)
-        fEntityStack = new (fMemoryManager) RefStackOf<XMLEntityDecl>(16, false, fMemoryManager);
+        fReaderStack = new (fMemoryManager) RefStackOf<ReaderData>(16, true, fMemoryManager);
 
     //
-    //  Push the current reader and entity onto their respective stacks.
-    //  Note that the the current entity can be null if the current reader
-    //  is not for an entity.
+    //  Push the current reader and entity onto the stack. Note that
+    //  the current entity can be null if the current reader is not for
+    //  an entity.
     //
-    if (fCurReader)
-    {
-        fReaderStack->push(fCurReader);
-        fEntityStack->push(fCurEntity);
-    }
+    if (fCurReaderData)
+        fReaderStack->push(fCurReaderData);
 
     //
     //  Make the passed reader and entity the current top of stack. The
     //  passed entity can (and often is) null.
     //
+    fCurReaderData = new (fMemoryManager) ReaderData(reader, entity, adoptEntity);
     fCurReader = reader;
-    fCurEntity = entity;
 
     return true;
 }
 
-
 void ReaderMgr::reset()
 {
     // Reset all of the flags
     fThrowEOE = false;
 
     // Delete the current reader and flush the reader stack
-    delete fCurReader;
+    delete fCurReaderData;
+    fCurReaderData = 0;
     fCurReader = 0;
     if (fReaderStack)
         fReaderStack->removeAllElements();
-
-    //
-    //  And do the same for the entity stack, but don't delete the current
-    //  entity (if any) since we don't own them.
-    //
-    fCurEntity = 0;
-    if (fEntityStack)
-        fEntityStack->removeAllElements();
 }
 
 
@@ -1014,7 +1064,9 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const
     //  search the stack; else, keep the reader that we've got since its
     //  either an external entity reader or the main file reader.
     //
-    const XMLEntityDecl* curEntity = fCurEntity;
+    const XMLEntityDecl* curEntity =
+        fCurReaderData? fCurReaderData->getEntity() : 0;
+
     if (curEntity && !curEntity->isExternal())
     {
         XMLSize_t index = fReaderStack->size();
@@ -1024,7 +1076,7 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const
             {
                 // Move down to the previous element and get a pointer to it
                 index--;
-                curEntity = fEntityStack->elementAt(index);
+                curEntity = fReaderStack->elementAt(index)->getEntity();
 
                 //
                 //  If its null or its an external entity, then this reader
@@ -1032,12 +1084,12 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const
                 //
                 if (!curEntity)
                 {
-                    theReader = fReaderStack->elementAt(index);
+                    theReader = fReaderStack->elementAt(index)->getReader ();
                     break;
                 }
                  else if (curEntity->isExternal())
                 {
-                    theReader = fReaderStack->elementAt(index);
+                    theReader = fReaderStack->elementAt(index)->getReader ();
                     break;
                 }
 
@@ -1048,6 +1100,11 @@ ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const
         }
     }
 
+    // @@ It feels like we may end up with theReader being from the top of
+    //    the stack (fCurReader) and itsEntity being from the bottom of the
+    //    stack (if there are no null or external entities on the stack).
+    //    Is it a bug?
+    //
     itsEntity = curEntity;
     return theReader;
 }
@@ -1059,31 +1116,59 @@ bool ReaderMgr::popReader()
     //  We didn't get any more, so try to pop off a reader. If the reader
     //  stack is empty, then we are at the end, so return false.
     //
+    //  @@ It feels like we never pop the reader pushed to the stack first
+    //     (think of fReaderStack empty but fCurReader not null). Is it a
+    //     bug?
+    //
     if (fReaderStack->empty())
         return false;
 
     //
-    //  Remember the current entity, before we pop off a new one. We might
+    //  Remember the current reader, before we pop off a new one. We might
     //  need this to throw the end of entity exception at the end.
     //
-    XMLEntityDecl* prevEntity = fCurEntity;
+    ReaderData* prevReaderData = fCurReaderData;
     const bool prevReaderThrowAtEnd = fCurReader->getThrowAtEnd();
     const XMLSize_t readerNum = fCurReader->getReaderNum();
 
     //
-    //  Delete the current reader and pop a new reader and entity off
-    //  the stacks.
+    //  Pop a new reader and entity off the stack.
     //
-    delete fCurReader;
-    fCurReader = fReaderStack->pop();
-    fCurEntity = fEntityStack->pop();
+    fCurReaderData = fReaderStack->pop();
+    fCurReader = fCurReaderData->getReader();
 
     //
     //  If there was a previous entity, and either the fThrowEOE flag is set
-    //  or reader was marked as such, then throw an end of entity.
+    //  or reader was marked as such, then throw an end of entity. Otherwise,
+    //  delete the previous reader data.
     //
-    if (prevEntity && (fThrowEOE || prevReaderThrowAtEnd))
-        throw EndOfEntityException(prevEntity, readerNum);
+    if (prevReaderData->getEntity() && (fThrowEOE || prevReaderThrowAtEnd))
+    {
+        //
+        // If the entity is adopted, then move it to fEntityStack so that
+        // its life-time is prolonged to the life-time of this reader
+        // manager. Also delete the previous reader data before throwing
+        // EndOfEntityException.
+        //
+        XMLEntityDecl* entity;
+
+        if (prevReaderData->getEntityAdopted())
+        {
+            if (!fEntityStack)
+                fEntityStack = new (fMemoryManager) RefStackOf<XMLEntityDecl>(16, true, fMemoryManager);
+
+            entity = prevReaderData->releaseEntity();
+            fEntityStack->push(entity);
+        }
+        else
+            entity = prevReaderData->getEntity();
+
+        delete prevReaderData;
+
+        throw EndOfEntityException(entity, readerNum);
+    }
+    else
+        delete prevReaderData;
 
     while (true)
     {
@@ -1113,9 +1198,9 @@ bool ReaderMgr::popReader()
             return false;
 
         // Else pop again and try it one more time
-        delete fCurReader;
-        fCurReader = fReaderStack->pop();
-        fCurEntity = fEntityStack->pop();
+        delete fCurReaderData;
+        fCurReaderData = fReaderStack->pop();
+        fCurReader = fCurReaderData->getReader();
     }
     return true;
 }
diff --git a/src/xercesc/internal/ReaderMgr.hpp b/src/xercesc/internal/ReaderMgr.hpp
index f63b2194e..0855ed77a 100644
--- a/src/xercesc/internal/ReaderMgr.hpp
+++ b/src/xercesc/internal/ReaderMgr.hpp
@@ -160,6 +160,12 @@ public :
                 XMLReader* const        reader
         ,       XMLEntityDecl* const    entity
     );
+    bool pushReaderAdoptEntity
+    (
+                XMLReader* const        reader
+        ,       XMLEntityDecl* const    entity
+        ,       const bool              adoptEntity = true
+    );
     void reset();
 
 
@@ -208,16 +214,72 @@ private :
     ReaderMgr(const ReaderMgr&);
     ReaderMgr& operator=(const ReaderMgr&);
 
+    // -----------------------------------------------------------------------
+    //  Private data types
+    // -----------------------------------------------------------------------
+    class ReaderData : public XMemory
+    {
+    public  :
+        // ---------------------------------------------------------------------
+        //  Constructors and Destructor
+        // ---------------------------------------------------------------------
+        ReaderData
+        (    XMLReader* const     reader
+           , XMLEntityDecl* const entity
+           , const bool           adoptEntity
+        );
+
+        ~ReaderData();
+
+        // ----------------------------------------------------------------------
+        //  Getter methods
+        // ----------------------------------------------------------------------
+        XMLReader* getReader() const;
+        XMLEntityDecl* getEntity() const;
+        bool getEntityAdopted() const;
+
+        XMLEntityDecl* releaseEntity();
+
+    private :
+        // ---------------------------------------------------------------------
+        //  Unimplemented constructors and operators
+        // ---------------------------------------------------------------------
+        ReaderData();
+        ReaderData(const ReaderData&);
+        ReaderData& operator=(const ReaderData&);
+
+        // ---------------------------------------------------------------------
+        //  Private data members
+        //
+        //  fReader
+        //      This is the pointer to the reader object that must be destroyed
+        //      when this object is destroyed.
+        //
+        //  fEntity
+        //  fEntityAdopted
+        //      This is the pointer to the entity object that, if adopted, must
+        //      be destroyed when this object is destroyed.
+        //
+        //      Note that we need to keep up with which of the pushed readers
+        //      are pushed entity values that are being spooled. This is done
+        //      to avoid the problem of recursive definitions.
+        // ---------------------------------------------------------------------
+        XMLReader*     fReader;
+        XMLEntityDecl* fEntity;
+        bool           fEntityAdopted;
+    };
+
     // -----------------------------------------------------------------------
     //  Private data members
     //
-    //  fCurEntity
-    //      This is the current top of stack entity. We pull it off the stack
-    //      and store it here for efficiency.
+    //  fCurReaderData
+    //      This is the current top of the reader data stack. We pull it off
+    //      the stack and store it here for efficiency.
     //
     //  fCurReader
-    //      This is the current top of stack reader. We pull it off the
-    //      stack and store it here for efficiency.
+    //      This is the reader of the current top of the reader data stack.
+    //      It contains the same value as fCurReaderData->fReader or NULL,
+    //      if fCurReaderData is NULL. We store it here for efficiency.
     //
     //  fEntityHandler
     //      This is the installed entity handler. Its installed via the
@@ -225,10 +287,14 @@ private :
     //      process of creating external entity readers.
     //
     //  fEntityStack
-    //      We need to keep up with which of the pushed readers are pushed
-    //      entity values that are being spooled. This is done to avoid the
-    //      problem of recursive definitions. This stack consists of refs to
-    //      EntityDecl objects for the pushed entities.
+    //      This is a storage of orphaned XMLEntityDecl objects. The
+    //      popReader() function adds a reader manager-adopted entity object
+    //      to this storage before passing its pointer to the constructor
+    //      of the being thrown EndOfEntityException exception. This makes
+    //      sure that the life-time of an entity exposed to the exception
+    //      handlers is the same as the life-time of reader manager (and so
+    //      normally the life-time of the scanner which embeds the reader
+    //      manager).
     //
     //  fNextReaderNum
     //      This is the reader serial number value. Each new reader that is
@@ -236,8 +302,8 @@ private :
     //      us catch things like partial markup errors and such.
     //
     //  fReaderStack
-    //      This is the stack of reader references. We own all the readers
-    //      and destroy them when they are used up.
+    //      This is the stack of reader data references. We own all the
+    //      entries and destroy them when they are used up.
     //
     //  fThrowEOE
     //      This flag controls whether we throw an exception when we hit an
@@ -252,12 +318,12 @@ private :
     //  fStandardUriConformant
     //      This flag controls whether we force conformant URI
     // -----------------------------------------------------------------------
-    XMLEntityDecl*              fCurEntity;
+    ReaderData*                 fCurReaderData;
     XMLReader*                  fCurReader;
     XMLEntityHandler*           fEntityHandler;
     RefStackOf<XMLEntityDecl>*  fEntityStack;
     unsigned int                fNextReaderNum;
-    RefStackOf<XMLReader>*      fReaderStack;
+    RefStackOf<ReaderData>*     fReaderStack;
     bool                        fThrowEOE;
     XMLReader::XMLVersion       fXMLVersion;
     bool                        fStandardUriConformant;
openSUSE Build Service is sponsored by