File r1941-Fix-CVE-2017-8054-and-other-issues-keeping-binary-compat.patch of Package podofo.23798

------------------------------------------------------------------------
r1941 | mc-zyx | 2018-09-30 16:56:19 +0200 (dom 30 de sep de 2018) | 10 líneas

Patch by Amin Massad: Fix bugs in PdfPagesTree::GetPageNode() / PdfPagesTree::GetPageNodeFromArray()

The patch includes:
1) A real fix of CVE-2017-8054 (not really fixed upto r1937!) for handling
   of cyclic trees, see testCyclicTree()
2) A fix for handling of subtrees with „/Kids []“ and „/Count 0“ which is
   completely valid according to the PDF spec, see testEmptyKidsTree()
3) A changed behavior for trees with nested kids array which are not valid
   according to the PDF spec and now yield an NULL ptr, see testNestedArrayTree()

Modified by Antonio Larrosa <alarrosa@suse.com> so the patch
doesn't break binary compatibility

Index: src/doc/PdfPagesTree.cpp
===================================================================
--- src/doc/PdfPagesTree.cpp	(revisión: 1940)
+++ src/doc/PdfPagesTree.cpp	(revisión: 1941)
@@ -335,7 +335,6 @@
     const PdfArray & rKidsArray = pObj->GetArray(); 
     PdfArray::const_iterator it = rKidsArray.begin();
 
-    const size_t numDirectKids = rKidsArray.size();
     const size_t numKids = GetChildCount(pParent);
 
     // use <= since nPageNum is 0-based
@@ -347,62 +346,25 @@
         return NULL;
     }
 
-    //printf("Fetching: %i %i %i\n", numDirectKids, numKids, nPageNum );
-    if( numDirectKids == numKids && static_cast<size_t>(nPageNum) < numDirectKids )
+    //printf("Fetching: %i %i\n", numKids, nPageNum );
+
+    // We have to traverse the tree
+    //
+    // BEWARE: There is no valid shortcut for tree traversal.
+    // Even if eKidsArray.size()==numKids, this does not imply that
+    // eKidsArray can be accessed with the index of the page directly.
+    // The tree could have an arbitrary complex structure because
+    // internal nodes with no leaves (page objects) are not forbidden
+    // by the PDF spec.
+    while( it != rKidsArray.end() ) 
     {
-        // This node has only page nodes as kids,
-        // so we can access the array directly
-        rLstParents.push_back( pParent );
-        return GetPageNodeFromArray( nPageNum, rKidsArray, rLstParents );
-    } 
-    else
-    {
-        // We have to traverse the tree
-        while( it != rKidsArray.end() ) 
+        if(!(*it).IsReference() ) 
         {
-            if( (*it).IsArray() ) 
-            { // Fixes PDFs broken by having trees with arrays nested once
-                
-                rLstParents.push_back( pParent );
+            PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Invalid datatype in kids array: %s\n", 
+                                  nPageNum, (*it).GetDataTypeString()); 
+            return NULL;
+        }
 
-                // the following code is to find the reference to log this with
-                const PdfReference & rIterArrayRef = (*it).Reference();
-                PdfReference refToLog;
-                bool isDirectObject // don't worry about 0-num. indirect ones
-                    = ( !(rIterArrayRef.ObjectNumber() ) );
-                if ( isDirectObject ) 
-		{
-                    if ( !(pObj->Reference().ObjectNumber() ) ) // rKidsArray's
-		    {
-                        refToLog = pParent->Reference();
-                    }
-		    else
-                    {
-                        refToLog = pObj->Reference();
-                    }
-                }
-                else
-                {
-                    refToLog = rIterArrayRef;
-                }
-                PdfError::LogMessage( eLogSeverity_Error,
-                                    "Entry in Kids array is itself an array"
-                    "%s reference: %s\n", isDirectObject ? " (direct object)"
-                    ", in object with" : ",", refToLog.ToString().c_str() );
-
-                    const PdfArray & rIterArray = (*it).GetArray();
-
-                    // is the array large enough to potentially have the page?
-                    if( static_cast<size_t>(nPageNum) < rIterArray.GetSize() )
-                    {
-                        PdfObject* pPageNode = GetPageNodeFromArray( nPageNum,
-                                                    rIterArray, rLstParents );
-                        if ( pPageNode ) // and if not, search further
-                            return pPageNode;
-                    }
-            }
-            else if( (*it).IsReference() ) 
-            {
                 PdfObject* pChild = GetRoot()->GetOwner()->GetObject( (*it).GetReference() );
                 if (!pChild) 
                 {
@@ -416,13 +378,28 @@
                     int childCount = GetChildCount( pChild );
                     if( childCount < nPageNum + 1 ) // Pages are 0 based, but count is not
                     {
-                        // skip this page node
-                        // and go to the next one
+                        // skip this page tree node
+                        // and go to the next child in rKidsArray
                         nPageNum -= childCount;
                     }
                     else
                     {
+                        // page is in the subtree of pChild
+                        // => call GetPageNode() recursively
+                        
                         rLstParents.push_back( pParent );
+
+                        if ( std::find( rLstParents.begin(), rLstParents.end(), pChild )
+                             != rLstParents.end() ) // cycle in parent list detected, fend
+                        { // off security vulnerability similar to CVE-2017-8054 (infinite recursion)
+                            std::ostringstream oss;
+                            oss << "Cycle in page tree: child in /Kids array of object "
+                                << ( *(rLstParents.rbegin()) )->Reference().ToString()
+                                << " back-references to object " << pChild->Reference()
+                                .ToString() << " one of whose descendants the former is.";
+                            PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, oss.str() );
+                        }
+
                         return this->GetPageNode( nPageNum, pChild, rLstParents );
                     }
                 }
@@ -430,6 +407,7 @@
                 {
                     if( 0 == nPageNum )
                     {
+                        // page found
                         rLstParents.push_back( pParent );
                         return pChild;
                     } 
@@ -448,24 +426,18 @@
                         "Invalid datatype referenced in kids array: %s\n"
                         "Reference to invalid object: %i %i R\n", nPageNum,
                         pChild->GetDataTypeString(), nLogObjNum, nLogGenNum);
+                    return NULL;
                 }
-            }
-            else
-            {
-                PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Invalid datatype in kids array: %s\n", 
-                                      nPageNum, (*it).GetDataTypeString()); 
-                return NULL;
-            }
             
             ++it;
         }
-    }
 
     return NULL;
 }
 
 PdfObject* PdfPagesTree::GetPageNodeFromArray( int nPageNum, const PdfArray & rKidsArray, PdfObjectList & rLstParents )
 {
+    PdfError::LogMessage( eLogSeverity_Warning, "PdfPagesTree::GetPageNodeFromArray is deprecated and will be removed soon");
     if( static_cast<size_t>(nPageNum) >= rKidsArray.GetSize() )
     {
         PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i from array of size %i\n", 
Index: test/unit/PagesTreeTest.cpp
===================================================================
--- test/unit/PagesTreeTest.cpp	(revisión: 1940)
+++ test/unit/PagesTreeTest.cpp	(revisión: 1941)
@@ -22,6 +22,8 @@
 
 #include <podofo.h>
 
+#include <sstream>
+
 #define PODOFO_TEST_PAGE_KEY "PoDoFoTestPageNumber"
 #define PODOFO_TEST_NUM_PAGES 100
 
@@ -70,6 +72,58 @@
     CPPUNIT_ASSERT_THROW( writer.GetPage( 1 ), PdfError );
 }
 
+void PagesTreeTest::testCyclicTree()
+{
+    for (int pass=0; pass < 2; pass++)
+    {
+        PdfMemDocument doc;
+        CreateCyclicTree( doc, pass==1);
+        //doc.Write(pass==0?"tree_valid.pdf":"tree_cyclic.pdf");
+        for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++)
+        {
+            if (pass==0)
+            {    
+                // pass 0:
+                // valid tree without cycles should yield all pages
+                PdfPage* pPage = doc.GetPage( pagenum );
+                CPPUNIT_ASSERT_EQUAL( pPage != NULL, true );
+                CPPUNIT_ASSERT_EQUAL( IsPageNumber( pPage, pagenum ), true );
+            }
+            else
+            {
+                // pass 1:
+                // cyclic tree must throw exception to prevent infinite recursion
+                CPPUNIT_ASSERT_THROW( doc.GetPage( pagenum ), PdfError );
+            }
+        }
+    }
+}
+
+void PagesTreeTest::testEmptyKidsTree()
+{
+    PdfMemDocument doc;
+    CreateEmptyKidsTree(doc);
+    //doc.Write("tree_zerokids.pdf");
+    for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++)
+    {
+        PdfPage* pPage = doc.GetPage( pagenum );
+        CPPUNIT_ASSERT_EQUAL( pPage != NULL, true );
+        CPPUNIT_ASSERT_EQUAL( IsPageNumber( pPage, pagenum ), true );
+    }
+}
+
+void PagesTreeTest::testNestedArrayTree()
+{
+    PdfMemDocument doc;
+    CreateNestedArrayTree(doc);
+    //doc.Write("tree_nested_array.pdf");
+    for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++)
+    {
+        PdfPage* pPage = doc.GetPage( pagenum );
+        CPPUNIT_ASSERT_EQUAL( pPage == NULL, true );
+    }
+}
+
 void PagesTreeTest::testCreateDelete()
 {
     PdfMemDocument  writer;
@@ -354,7 +408,153 @@
     pRoot->GetDictionary().AddKey( PdfName("Count"), static_cast<pdf_int64>(PODOFO_TEST_NUM_PAGES) );
 }
 
+std::vector<PdfPage*> PagesTreeTest::CreateSamplePages( PdfMemDocument & rDoc,
+                                                        int nPageCount)
+{
+    PdfFont* pFont;
 
+    // create font
+    pFont = rDoc.CreateFont( "Arial" );
+    if( !pFont )
+    {
+        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
+    }
+    pFont->SetFontSize( 16.0 );
+
+    std::vector<PdfPage*> pPage(nPageCount);
+    for (int i = 0; i < nPageCount; ++i)
+    {
+        pPage[i] = new PdfPage( PdfPage::CreateStandardPageSize( ePdfPageSize_A4 ),
+                                &(rDoc.GetObjects()) );
+        pPage[i]->GetObject()->GetDictionary().AddKey( PODOFO_TEST_PAGE_KEY, 
+                                                       static_cast<pdf_int64>(i) );
+
+        PdfPainter painter;
+        painter.SetPage( pPage[i] );
+        painter.SetFont( pFont );
+        std::ostringstream os;
+        os << "Page " << i+1;
+        painter.DrawText( 200, 200, os.str()  );
+        painter.FinishPage();
+    }
+
+    return pPage;
+}
+
+std::vector<PdfObject*> PagesTreeTest::CreateNodes( PdfMemDocument & rDoc,
+                                                    int nNodeCount)
+{
+    std::vector<PdfObject*> pNode(nNodeCount);
+
+    for (int i = 0; i < nNodeCount; ++i)
+    {
+        pNode[i]=rDoc.GetObjects().CreateObject("Pages");
+        // init required keys
+        pNode[i]->GetDictionary().AddKey( "Kids", PdfArray());
+        pNode[i]->GetDictionary().AddKey( "Count", PdfVariant(static_cast<pdf_int64>(0L)));
+    }
+
+    return pNode;
+}
+
+void PagesTreeTest::CreateCyclicTree( PoDoFo::PdfMemDocument & rDoc,
+                                      bool bCreateCycle )
+{
+    const int COUNT = 3;
+
+    std::vector<PdfPage*> pPage=CreateSamplePages( rDoc, COUNT );
+    std::vector<PdfObject*> pNode=CreateNodes( rDoc, 2 );
+    
+    // manually insert pages into pagetree
+    PdfObject* pRoot = rDoc.GetPagesTree()->GetObject();
+
+    // tree layout (for !bCreateCycle):
+    //
+    //    root
+    //    +-- node0
+    //        +-- node1
+    //        |   +-- page0
+    //        |   +-- page1
+    //        \-- page2
+
+    // root node
+    AppendChildNode(pRoot, pNode[0]);
+
+    // tree node 0
+    AppendChildNode(pNode[0], pNode[1]);
+    AppendChildNode(pNode[0], pPage[2]->GetObject());
+
+    // tree node 1
+    AppendChildNode(pNode[1], pPage[0]->GetObject());
+    AppendChildNode(pNode[1], pPage[1]->GetObject());
+
+    if (bCreateCycle)
+    {
+        // invalid tree: Cycle!!!
+        // was not detected in PdfPagesTree::GetPageNode() rev. 1937
+        pNode[0]->GetIndirectKey("Kids")->GetArray()[0]=pRoot->Reference();
+    }
+}
+
+void PagesTreeTest::CreateEmptyKidsTree( PoDoFo::PdfMemDocument & rDoc )
+{
+    const int COUNT = 3;
+
+    std::vector<PdfPage*> pPage=CreateSamplePages( rDoc, COUNT );
+    std::vector<PdfObject*> pNode=CreateNodes( rDoc, 3 );
+    
+    // manually insert pages into pagetree
+    PdfObject* pRoot = rDoc.GetPagesTree()->GetObject();
+    
+    // tree layout:
+    //
+    //    root
+    //    +-- node0
+    //    |   +-- page0
+    //    |   +-- page1
+    //    |   +-- page2
+    //    +-- node1
+    //    \-- node2
+
+    // root node
+    AppendChildNode(pRoot, pNode[0]);
+    AppendChildNode(pRoot, pNode[1]);
+    AppendChildNode(pRoot, pNode[2]);
+    
+    // tree node 0
+    AppendChildNode(pNode[0], pPage[0]->GetObject());
+    AppendChildNode(pNode[0], pPage[1]->GetObject());
+    AppendChildNode(pNode[0], pPage[2]->GetObject());
+
+    // tree node 1 and node 2 are left empty: this is completely valid
+    // according to the PDF spec, i.e. the required keys may have the
+    // values "/Kids [ ]" and "/Count 0"
+}
+
+void PagesTreeTest::CreateNestedArrayTree( PoDoFo::PdfMemDocument & rDoc )
+{
+    const int COUNT = 3;
+
+    std::vector<PdfPage*> pPage=CreateSamplePages( rDoc, COUNT );
+    PdfObject* pRoot = rDoc.GetPagesTree()->GetObject();
+
+    // create kids array
+    PdfArray kids;
+    for (int i=0; i < COUNT; i++)
+    {
+        kids.push_back( pPage[i]->GetObject()->Reference() );
+        pPage[i]->GetObject()->GetDictionary().AddKey( PdfName("Parent"), pRoot->Reference());
+    }
+
+    // create nested kids array
+    PdfArray nested;
+    nested.push_back(kids);
+
+    // manually insert pages into pagetree
+    pRoot->GetDictionary().AddKey( PdfName("Count"), static_cast<pdf_int64>(COUNT) );
+    pRoot->GetDictionary().AddKey( PdfName("Kids"), nested);
+}
+
 bool PagesTreeTest::IsPageNumber( PoDoFo::PdfPage* pPage, int nNumber )
 {
     pdf_int64 lPageNumber = pPage->GetObject()->GetDictionary().GetKeyAsLong( PODOFO_TEST_PAGE_KEY, -1 );
@@ -367,3 +567,33 @@
     else
         return true;
 }
+
+void PagesTreeTest::AppendChildNode(PdfObject* pParent, PdfObject* pChild)
+{
+    // 1. Add the reference of the new child to the kids array of pParent
+    PdfArray kids;
+    PdfObject* oldKids=pParent->GetIndirectKey("Kids");
+    if (oldKids && oldKids->IsArray()) kids=oldKids->GetArray();
+    kids.push_back(pChild->Reference());
+    pParent->GetDictionary().AddKey( PdfName("Kids"), kids);
+
+    // 2. If the child is a page (leaf node), increase count of every parent
+    //    (which also includes pParent)
+    if( pChild->GetDictionary().GetKeyAsName( PdfName( "Type" ) )
+        == PdfName( "Page" ) )
+    {
+        PdfObject* node=pParent;
+        while (node)
+        {
+            pdf_int64 count=0;
+            if (node->GetIndirectKey("Count")) count=node->GetIndirectKey("Count")->GetNumber();
+            count++;
+            node->GetDictionary().AddKey( PdfName("Count"), count);
+            
+            node=node->GetIndirectKey("Parent");
+        }
+    }
+    
+    // 3. Add Parent key to the child
+    pChild->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference());
+}
Index: test/unit/PagesTreeTest.h
===================================================================
--- test/unit/PagesTreeTest.h	(revisión: 1940)
+++ test/unit/PagesTreeTest.h	(revisión: 1941)
@@ -21,11 +21,14 @@
 #ifndef _PAGES_TREE_TEST_H_
 #define _PAGES_TREE_TEST_H_
 
+#include <vector>
+
 #include <cppunit/extensions/HelperMacros.h>
 
 namespace PoDoFo {
 class PdfMemDocument;
 class PdfPage;
+class PdfObject;
 };
 
 /** This test tests the class PdfPagesTree
@@ -35,6 +38,9 @@
   CPPUNIT_TEST_SUITE( PagesTreeTest );
   CPPUNIT_TEST( testEmptyTree );
   CPPUNIT_TEST( testEmptyDoc );
+  CPPUNIT_TEST( testCyclicTree );
+  CPPUNIT_TEST( testEmptyKidsTree );
+  CPPUNIT_TEST( testNestedArrayTree );
   CPPUNIT_TEST( testCreateDelete );
   CPPUNIT_TEST( testGetPagesCustom );
   CPPUNIT_TEST( testGetPagesPoDoFo );
@@ -52,6 +58,9 @@
 
   void testEmptyTree();
   void testEmptyDoc();
+  void testCyclicTree();
+  void testEmptyKidsTree();
+  void testNestedArrayTree();
   void testCreateDelete();
   void testGetPagesCustom();
   void testGetPagesPoDoFo();
@@ -98,7 +107,58 @@
    */
   void CreateTestTreeCustom( PoDoFo::PdfMemDocument & rDoc );
 
+  /**
+   * Create a pages tree with cycles to test prevention of endless
+   * recursion as mentioned in different CVE reports.
+   *
+   * \param bCreateCycle if true a cyclic tree is created, otherwise a
+   *                     valid tree without cycles
+   */
+  void CreateCyclicTree( PoDoFo::PdfMemDocument & rDoc,
+                         bool bCreateCycle );
+
+  /**
+   * Create a pages tree with nodes containing empty kids.
+   *
+   * This is completely valid according to the PDF spec, i.e. the
+   * required keys may have the values "/Kids [ ]" and "/Count 0"
+   * Such a tree must still be parsable by a conforming reader:
+   *
+   * <BLOCKQUOTE>The tree contains nodes of two types—intermediate
+   * nodes, called page tree nodes, and leaf nodes, called page
+   * objects—whose form is described in the subsequent subclauses.
+   * Conforming products shall be prepared to handle any form
+   * of tree structure built of such nodes.</BLOCKQUOTE>
+   */
+  void CreateEmptyKidsTree( PoDoFo::PdfMemDocument & rDoc );
+  
+  /**
+  * Ceate a pages tree with a nested kids array.
+  *
+  * Such a tree is not valid to the PDF spec, which requires they key
+  * "Kids" to be an array of indirect references. And the children shall
+  * only be page objects or other page tree nodes.
+  */
+  void CreateNestedArrayTree( PoDoFo::PdfMemDocument & rDoc );
+
+ /**
+  * Create page object nodes (leaf nodes),
+  * where every page object has an additional
+  * key PoDoFoTestPageNumber with the original 
+  * page number of the page.
+  */  
+  std::vector<PoDoFo::PdfPage*> CreateSamplePages( PoDoFo::PdfMemDocument & rDoc,
+                                                   int nPageCount);
+
+  /**
+  * Create page tree nodes (internal nodes)
+  */
+  std::vector<PoDoFo::PdfObject*> CreateNodes( PoDoFo::PdfMemDocument & rDoc,
+                                               int nNodeCount);
+
   bool IsPageNumber( PoDoFo::PdfPage* pPage, int nNumber );
+
+  void AppendChildNode(PoDoFo::PdfObject* pParent, PoDoFo::PdfObject* pChild);
 };
 
 #endif // _PAGES_TREE_TEST_H_

------------------------------------------------------------------------
openSUSE Build Service is sponsored by