File improve-skip-duplicates.patch of Package apache-pdfbox.8984
From 9c4c7bb2a1158d9c75903633616adf959e952395 Mon Sep 17 00:00:00 2001
From: Tilman Hausherr <tilman@apache.org>
Date: Fri, 28 Sep 2018 18:28:59 +0000
Subject: [PATCH] PDFBOX-4071: improve skip duplicates
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/branches/1.8@1842278 13f79535-47bb-0310-9956-ffa450edef68
---
.../java/org/apache/pdfbox/pdmodel/PDPageNode.java | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageNode.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageNode.java
index a769ebfb3..2132ebd8a 100644
--- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageNode.java
+++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageNode.java
@@ -33,6 +33,7 @@
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Set;
/**
* This represents a page node in a pdf document.
@@ -171,7 +172,7 @@ public COSBase getCOSObject()
public List getKids()
{
List actuals = new ArrayList();
- COSArray kids = getAllKids(actuals, page, false);
+ COSArray kids = getAllKids(actuals, page, false, new HashSet<COSBase>());
return new COSArrayList( actuals, kids );
}
@@ -182,7 +183,7 @@ public List getKids()
*/
public void getAllKids(List result)
{
- getAllKids(result, page, true);
+ getAllKids(result, page, true, new HashSet<COSBase>());
}
/**
@@ -191,8 +192,9 @@ public void getAllKids(List result)
* @param result All direct and optionally indirect descendents of this node are added to this list.
* @param page Page dictionary of a page node.
* @param recurse if true indirect descendents are processed recursively
+ * @param seen set of objects that have been added
*/
- private static COSArray getAllKids(List result, COSDictionary page, boolean recurse)
+ private static COSArray getAllKids(List result, COSDictionary page, boolean recurse, Set<COSBase> seen)
{
if(page == null)
return null;
@@ -202,7 +204,6 @@ private static COSArray getAllKids(List result, COSDictionary page, boolean recu
log.error("No Kids found in getAllKids(). Probably a malformed pdf.");
return null;
}
- HashSet<COSBase> seen = new HashSet<COSBase>();
for( int i=0; i<kids.size(); i++ )
{
// ignore duplicates (from malformed PDFs)
@@ -220,7 +221,7 @@ private static COSArray getAllKids(List result, COSDictionary page, boolean recu
{
if (recurse)
{
- getAllKids(result, kid, recurse);
+ getAllKids(result, kid, recurse, seen);
}
else
{