File poppler-0.12.3-text-selection.patch of Package poppler

--- poppler-0.12.3/poppler/TextOutputDev.cc	2009-10-05 00:40:39.000000000 +0200
+++ poppler-0.12.3/poppler/TextOutputDev.cc	2010-01-22 17:03:42.000000000 +0100
@@ -1615,6 +1615,144 @@ GBool TextBlock::isBelow(TextBlock *blk)
   return below;
 }
 
+GBool TextBlock::isBeforeByRule1(TextBlock *blk1) {
+  GBool before = gFalse;
+  GBool overlap = gFalse;
+
+  switch (this->page->primaryRot) {
+  case 0:
+  case 2:
+    overlap = ((this->xMin <= blk1->xMin) &&
+	       (blk1->xMin <= this->xMax)) ||
+      ((blk1->xMin <= this->xMin) &&
+       (this->xMin <= blk1->xMax));
+    break;
+  case 1:
+  case 3:
+    overlap = ((this->yMin <= blk1->yMin) &&
+	       (blk1->yMin <= this->yMax)) ||
+      ((blk1->yMin <= this->yMin) &&
+       (this->yMin <= blk1->yMax));
+    break;
+  }
+  switch (this->page->primaryRot) {
+  case 0:
+    before = overlap && this->yMin < blk1->yMin;
+    break;
+  case 1:
+    before = overlap && this->xMax > blk1->xMax;
+    break;
+  case 2:
+    before = overlap && this->yMax > blk1->yMax;
+    break;
+  case 3:
+    before = overlap && this->xMin < blk1->xMin;
+    break;
+  }
+  return before;
+}
+
+GBool TextBlock::isBeforeByRule2(TextBlock *blk1) {
+  double cmp = 0;
+  int rotLR = rot;
+
+  if (!page->primaryLR) {
+    rotLR = (rotLR + 2) % 4;
+  }
+
+  switch (rotLR) {
+  case 0:
+    cmp = xMax - blk1->xMin;
+    break;
+  case 1:
+    cmp = yMin - blk1->yMax;
+    break;
+  case 2:
+    cmp = blk1->xMax - xMin;
+    break;
+  case 3:
+    cmp = blk1->yMin - yMax;
+    break;
+  }
+  return cmp <= 0;
+}
+
+// Sort into reading order by performing a topological sort using the rules
+// given in "High Performance Document Layout Analysis", T.M. Breuel, 2003.
+// See http://pubs.iupr.org/#2003-breuel-sdiut
+// Topological sort is done by depth first search, see
+// http://en.wikipedia.org/wiki/Topological_sorting
+int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1,
+			       TextBlock **sorted, int sortPos,
+			       GBool* visited) {
+  int pos2;
+  TextBlock *blk1, *blk2, *blk3;
+  GBool before;
+
+  if (visited[pos1]) {
+    return sortPos;
+  }
+
+  blk1 = this;
+
+#if 0 // for debugging
+  printf("visited: %d %.2f..%.2f %.2f..%.2f\n",
+	 sortPos, blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax);
+#endif
+  visited[pos1] = gTrue;
+  pos2 = -1;
+  for (blk2 = blkList; blk2; blk2 = blk2->next) {
+    pos2++;
+    if (visited[pos2]) {
+      // skip visited nodes
+      continue;
+    }
+    before = gFalse;
+    if (blk2->isBeforeByRule1(blk1)) {
+      // Rule (1) blk1 and blk2 overlap, and blk2 is above blk1.
+      before = gTrue;
+#if 0 // for debugging
+      printf("rule1: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n",
+	     blk2->xMin, blk2->xMax, blk2->yMin, blk2->yMax,
+	     blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax);
+#endif
+    } else if (blk2->isBeforeByRule2(blk1)) {
+      // Rule (2) blk2 left of blk1, and no intervening blk3
+      //          such that blk1 is before blk3 by rule 1,
+      //          and blk3 is before blk2 by rule 1.
+      before = gTrue;
+      for (blk3 = blkList; blk3; blk3 = blk3->next) {
+	if (blk3 == blk2 || blk3 == blk1) {
+	  continue;
+	}
+	if (blk1->isBeforeByRule1(blk3) &&
+	    blk3->isBeforeByRule1(blk2)) {
+	  before = gFalse;
+	  break;
+	}
+      }
+#if 0 // for debugging
+      if (before) {
+	printf("rule2: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n",
+	       blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax,
+	       blk2->xMin, blk2->xMax, blk2->yMin, blk2->yMax);
+      }
+#endif
+    }
+    if (before) {
+      // blk2 is before blk1, so it needs to be visited
+      // before we can add blk1 to the sorted list.
+      sortPos = blk2->visitDepthFirst(blkList, pos2, sorted, sortPos, visited);
+    }
+  }
+#if 0 // for debugging
+  printf("sorted: %d %.2f..%.2f %.2f..%.2f\n",
+	 sortPos, blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax);
+#endif
+  sorted[sortPos++] = blk1;
+  return sortPos;
+}
+
 //------------------------------------------------------------------------
 // TextFlow
 //------------------------------------------------------------------------
@@ -2183,8 +2321,7 @@ void TextPage::coalesce(GBool physLayout
   TextPool *pool;
   TextWord *word0, *word1, *word2;
   TextLine *line;
-  TextBlock *blkList, *blkStack, *blk, *lastBlk, *blk0, *blk1;
-  TextBlock **blkArray;
+  TextBlock *blkList, *blk, *lastBlk, *blk0, *blk1;
   TextFlow *flow, *lastFlow;
   TextUnderline *underline;
   TextLink *link;
@@ -2194,7 +2331,6 @@ void TextPage::coalesce(GBool physLayout
   GBool found;
   int count[4];
   int lrCount;
-  int firstBlkIdx, nBlocksLeft;
   int col1, col2;
   int i, j, n;
 
@@ -2687,7 +2823,7 @@ void TextPage::coalesce(GBool physLayout
     }
   }
 
-#if 0 // for debugging 
+#if 0 // for debugging
   printf("*** rotation ***\n");
   for (rot = 0; rot < 4; ++rot) {
     printf("  %d: %6d\n", rot, count[rot]);
@@ -2841,9 +2977,6 @@ void TextPage::coalesce(GBool physLayout
 
   //----- reading order sort
 
-  // sort blocks into yx order (in preparation for reading order sort)
-  qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpYXPrimaryRot);
-
   // compute space on left and right sides of each block
   for (i = 0; i < nBlocks; ++i) {
     blk0 = blocks[i];
@@ -2856,7 +2989,25 @@ void TextPage::coalesce(GBool physLayout
   }
 
 #if 0 // for debugging
-  printf("*** blocks, after yx sort ***\n");
+  printf("PAGE\n");
+#endif
+
+  int sortPos = 0;
+  GBool *visited = (GBool *)gmallocn(nBlocks, sizeof(GBool));
+  for (i = 0; i < nBlocks; i++) {
+    visited[i] = gFalse;
+  }
+  i = -1;
+  for (blk1 = blkList; blk1; blk1 = blk1->next) {
+    i++;
+    sortPos = blk1->visitDepthFirst(blkList, i, blocks, sortPos, visited);
+  }
+  if (visited) {
+    gfree(visited);
+  }
+
+#if 0 // for debugging
+  printf("*** blocks, after ro sort ***\n");
   for (i = 0; i < nBlocks; ++i) {
     blk = blocks[i];
     printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f space=%.2f..%.2f\n",
@@ -2876,44 +3027,34 @@ void TextPage::coalesce(GBool physLayout
     }
   }
   printf("\n");
+  fflush(stdout);
 #endif
 
   // build the flows
   //~ this needs to be adjusted for writing mode (vertical text)
   //~ this also needs to account for right-to-left column ordering
-  blkArray = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *));
-  memcpy(blkArray, blocks, nBlocks * sizeof(TextBlock *));
+  flow = NULL;
   while (flows) {
     flow = flows;
     flows = flows->next;
     delete flow;
   }
   flows = lastFlow = NULL;
-  firstBlkIdx = 0;
-  nBlocksLeft = nBlocks;
-  while (nBlocksLeft > 0) {
-
-    // find the upper-left-most block
-    for (; !blkArray[firstBlkIdx]; ++firstBlkIdx) ;
-    i = firstBlkIdx;
-    blk = blkArray[i];
-    for (j = firstBlkIdx + 1; j < nBlocks; ++j) {
-      blk1 = blkArray[j];
-      if (blk1) {
-	if (blk && blk->secondaryDelta(blk1) > 0) {
-	  break;
-	}
-	if (blk1->primaryCmp(blk) < 0) {
-	  i = j;
-	  blk = blk1;
-	}
+  // assume blocks are already in reading order,
+  // and construct flows accordingly.
+  for (i = 0; i < nBlocks; i++) {
+    blk = blocks[i];
+    blk->next = NULL;
+    if (flow) {
+      blk1 = blocks[i - 1];
+      blkSpace = maxBlockSpacing * blk1->lines->words->fontSize;
+      if (blk1->secondaryDelta(blk) <= blkSpace &&
+	  blk->isBelow(blk1) &&
+	  flow->blockFits(blk, blk1)) {
+	flow->addBlock(blk);
+	continue;
       }
     }
-    blkArray[i] = NULL;
-    --nBlocksLeft;
-    blk->next = NULL;
-
-    // create a new flow, starting with the upper-left-most block
     flow = new TextFlow(this, blk);
     if (lastFlow) {
       lastFlow->next = flow;
@@ -2921,56 +3062,7 @@ void TextPage::coalesce(GBool physLayout
       flows = flow;
     }
     lastFlow = flow;
-    fontSize = blk->lines->words->fontSize;
-
-    // push the upper-left-most block on the stack
-    blk->stackNext = NULL;
-    blkStack = blk;
-
-    // find the other blocks in this flow
-    while (blkStack) {
-
-      // find the upper-left-most block under (but within
-      // maxBlockSpacing of) the top block on the stack
-      blkSpace = maxBlockSpacing * blkStack->lines->words->fontSize;
-      blk = NULL;
-      i = -1;
-      for (j = firstBlkIdx; j < nBlocks; ++j) {
-	blk1 = blkArray[j];
-	if (blk1) {
-	  if (blkStack->secondaryDelta(blk1) > blkSpace) {
-	    break;
-	  }
-	  if (blk && blk->secondaryDelta(blk1) > 0) {
-	    break;
-	  }
-	  if (blk1->isBelow(blkStack) &&
-	      (!blk || blk1->primaryCmp(blk) < 0)) {
-	    i = j;
-	    blk = blk1;
-	  }
-	}
-      }
-
-      // if a suitable block was found, add it to the flow and push it
-      // onto the stack
-      if (blk && flow->blockFits(blk, blkStack)) {
-	blkArray[i] = NULL;
-	--nBlocksLeft;
-	blk->next = NULL;
-	flow->addBlock(blk);
-	fontSize = blk->lines->words->fontSize;
-	blk->stackNext = blkStack;
-	blkStack = blk;
-
-      // otherwise (if there is no block under the top block or the
-      // block is not suitable), pop the stack
-      } else {
-	blkStack = blkStack->stackNext;
-      }
-    }
   }
-  gfree(blkArray);
 
 #if 0 // for debugging
   printf("*** flows ***\n");
@@ -3521,10 +3613,9 @@ void TextSelectionDumper::visitLine (Tex
 
 GooString *TextSelectionDumper::getText (void)
 {
-  GBool oneRot = gTrue;
   GooString *s;
   TextLineFrag *frag;
-  int i, col;
+  int i;
   GBool multiLine;
   UnicodeMap *uMap;
   char space[8], eol[16];
@@ -3541,45 +3632,11 @@ GooString *TextSelectionDumper::getText 
   eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
 
   if (nFrags > 0) {
-    for (i = 0; i < nFrags; ++i) {
-      frags[i].computeCoords(oneRot);
-    }
-    page->assignColumns(frags, nFrags, oneRot);
-
-    // if all lines in the region have the same rotation, use it;
-    // otherwise, use the page's primary rotation
-    if (oneRot) {
-      qsort(frags, nFrags, sizeof(TextLineFrag),
-	    &TextLineFrag::cmpYXLineRot);
-    } else {
-      qsort(frags, nFrags, sizeof(TextLineFrag),
-	    &TextLineFrag::cmpYXPrimaryRot);
-    }
-
-    col = 0;
     multiLine = gFalse;
     for (i = 0; i < nFrags; ++i) {
       frag = &frags[i];
 
-      // insert a return
-      if (frag->col < col ||
-	  (i > 0 && fabs(frag->base - frags[i-1].base) >
-	              maxIntraLineDelta * frags[i-1].line->words->fontSize)) {
-	  s->append(eol, eolLen);
-	col = 0;
-	multiLine = gTrue;
-      }
-
-      // column alignment
-      for (; col < frag->col; ++col) {
-	s->append(space, spaceLen);
-      }
-
-      // get the fragment text
-      col += page->dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
-    }
-
-    if (multiLine) {
+      page->dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
       s->append(eol, eolLen);
     }
   }
@@ -3859,75 +3916,112 @@ void TextLine::visitSelection(TextSelect
 void TextBlock::visitSelection(TextSelectionVisitor *visitor,
 			       PDFRectangle *selection,
 			       SelectionStyle style) {
-  TextLine *p, *begin, *end;
   PDFRectangle child_selection;
-  double start_x, start_y, stop_x, stop_y;
-
-  begin = NULL;
-  end = NULL;
-  start_x = selection->x1;
-  start_y = selection->y1;
-  stop_x = selection->x2;
-  stop_y = selection->y2;
-  
-  for (p = lines; p != NULL; p = p->next) {
-    if (selection->x1 < p->xMax && selection->y1 < p->yMax && 
-	selection->x2 < p->xMax && selection->y2 < p->yMax && begin == NULL) {
-      begin = p;
-      if (selection->x1 < selection->x2) {
-	start_x = selection->x1;
-	start_y = selection->y1;
-	stop_x = selection->x2;
-	stop_y = selection->y2;
+  double x[2], y[2], d, best_d[2];
+  TextLine *p, *best_line[2];
+  int i, count = 0, best_count[2], start, stop;
+  GBool all[2];
+
+  x[0] = selection->x1;
+  y[0] = selection->y1;
+  x[1] = selection->x2;
+  y[1] = selection->y2;
+
+  for (i = 0; i < 2; i++) {
+    // the first/last lines are often not nearest
+    // the corners, so we have to force them to be
+    // selected when the selection runs outside this
+    // block.
+    if (page->primaryLR) {
+      all[i] = x[i] >= this->xMax && y[i] >= this->yMax;
+      if (x[i] <= this->xMin && y[i] <= this->yMin) {
+	best_line[i] = this->lines;
+	best_count[i] = 1;
       } else {
-	start_x = selection->x2;
-	start_y = selection->y2;
-	stop_x = selection->x1;
-	stop_y = selection->y1;
-      }
-    } else if (selection->x1 < p->xMax && selection->y1 < p->yMax && begin == NULL) {
-      begin = p;
-      start_x = selection->x1;
-      start_y = selection->y1;
-      stop_x = selection->x2;
-      stop_y = selection->y2;
-    } else if (selection->x2 < p->xMax && selection->y2 < p->yMax && begin == NULL) {
-      begin = p;
-      start_x = selection->x2;
-      start_y = selection->y2;
-      stop_x = selection->x1;
-      stop_y = selection->y1;
+	best_line[i] = NULL;
+	best_count[i] = 0;
+      }
+    } else {
+      all[i] = x[i] <= this->xMin && y[i] >= this->yMax;
+      if (x[i] >= this->xMax && y[i] <= this->yMin) {
+	best_line[i] = this->lines;
+	best_count[i] = 1;
+      } else {
+	best_line[i] = NULL;
+	best_count[i] = 0;
+      }
     }
-
-    if (((selection->x1 > p->xMin && selection->y1 > p->yMin) ||
-	 (selection->x2 > p->xMin && selection->y2 > p->yMin))
-	&& (begin != NULL))
-      end = p->next;
+    best_d[i] = 0;
   }
 
-  /* Skip empty selection. */
-  if (end == begin)
+  // find the nearest line to the selection points
+  // using the manhattan distance.
+  for (p = this->lines; p; p = p->next) {
+    count++;
+    for (i = 0; i < 2; i++) {
+      d = fmax(p->xMin - x[i], 0.0) +
+	fmax(x[i] - p->xMax, 0.0) +
+	fmax(p->yMin - y[i], 0.0) +
+	fmax(y[i] - p->yMax, 0.0);
+      if (!best_line[i] || all[i] ||
+	  d < best_d[i]) {
+	best_line[i] = p;
+	best_count[i] = count;
+	best_d[i] = d;
+      }
+    }
+  }
+  // assert: best is always set.
+  if (!best_line[0] || !best_line[1]) {
     return;
+  }
+
+  // Now decide which point was first.
+  if (best_count[0] < best_count[1] ||
+      (best_count[0] == best_count[1] &&
+       y[0] < y[1])) {
+    start = 0;
+    stop = 1;
+  } else {
+    start = 1;
+    stop = 0;
+  }
 
-  visitor->visitBlock (this, begin, end, selection);
+  visitor->visitBlock(this, best_line[start], best_line[stop], selection);
 
-  for (p = begin; p != end; p = p->next) {
-    if (p == begin && style != selectionStyleLine) {
-      child_selection.x1 = start_x;
-      child_selection.y1 = start_y;
+  for (p = best_line[start]; p; p = p->next) {
+    if (page->primaryLR) {
+      child_selection.x1 = p->xMin;
+      child_selection.x2 = p->xMax;
     } else {
-      child_selection.x1 = 0;
-      child_selection.y1 = 0;
+      child_selection.x1 = p->xMax;
+      child_selection.x2 = p->xMin;
     }
-    if (p->next == end && style != selectionStyleLine) {
-      child_selection.x2 = stop_x;
-      child_selection.y2 = stop_y;
+    child_selection.y1 = p->yMin;
+    child_selection.y2 = p->yMax;
+    if (style == selectionStyleLine) {
+      if (p == best_line[start]) {
+	child_selection.x1 = 0;
+	child_selection.y1 = 0;
+      }
+      if (p == best_line[stop]) {
+	child_selection.x2 = page->pageWidth;
+	child_selection.y2 = page->pageHeight;
+      }
     } else {
-      child_selection.x2 = page->pageWidth;
-      child_selection.y2 = page->pageHeight;
+      if (p == best_line[start]) {
+	child_selection.x1 = fmax(p->xMin, fmin(p->xMax, x[start]));
+	child_selection.y1 = fmax(p->yMin, fmin(p->yMax, y[start]));
+      }
+      if (p == best_line[stop]) {
+	child_selection.x2 = fmax(p->xMin, fmin(p->xMax, x[stop]));
+	child_selection.y2 = fmax(p->yMin, fmin(p->yMax, y[stop]));
+      }
     }
-
     p->visitSelection(visitor, &child_selection, style);
+    if (p == best_line[stop]) {
+      return;
+    }
   }
 }
 
@@ -3935,73 +4029,122 @@ void TextPage::visitSelection(TextSelect
 			      PDFRectangle *selection,
 			      SelectionStyle style)
 {
-  int i, begin, end;
   PDFRectangle child_selection;
-  double start_x, start_y, stop_x, stop_y;
-  TextBlock *b;
+  double x[2], y[2], d, best_d[2];
+  double xMin, yMin, xMax, yMax;
+  TextFlow *flow, *best_flow[2];
+  TextBlock *blk, *best_block[2];
+  int i, count = 0, best_count[2], start, stop;
 
-  begin = nBlocks;
-  end = 0;
-  start_x = selection->x1;
-  start_y = selection->y1;
-  stop_x = selection->x2;
-  stop_y = selection->y2;
+  if (!flows)
+    return;
 
-  for (i = 0; i < nBlocks; i++) {
-    b = blocks[i];
+  x[0] = selection->x1;
+  y[0] = selection->y1;
+  x[1] = selection->x2;
+  y[1] = selection->y2;
+
+  xMin = pageWidth;
+  yMin = pageHeight;
+  xMax = 0.0;
+  yMax = 0.0;
+
+  for (i = 0; i < 2; i++) {
+    best_block[i] = NULL;
+    best_flow[i] = NULL;
+    best_count[i] = 0;
+    best_d[i] = 0;
+  }
 
-    if (selection->x1 < b->xMax && selection->y1 < b->yMax &&
-	selection->x2 < b->xMax && selection->y2 < b->yMax && i < begin) {
-      begin = i;
-      if (selection->y1 < selection->y2) {
-	start_x = selection->x1;
-	start_y = selection->y1;
-	stop_x = selection->x2;
-	stop_y = selection->y2;
-      } else {
-	start_x = selection->x2;
-	start_y = selection->y2;
-	stop_x = selection->x1;
-	stop_y = selection->y1;
-      }
-    } else if (selection->x1 < b->xMax && selection->y1 < b->yMax && i < begin) {
-      begin = i;
-      start_x = selection->x1;
-      start_y = selection->y1;
-      stop_x = selection->x2;
-      stop_y = selection->y2;
-    } else if (selection->x2 < b->xMax && selection->y2 < b->yMax && i < begin) {
-      begin = i;
-      start_x = selection->x2;
-      start_y = selection->y2;
-      stop_x = selection->x1;
-      stop_y = selection->y1;
+  // find the nearest blocks to the selection points
+  // using the manhattan distance.
+  for (flow = flows; flow; flow = flow->next) {
+    for (blk = flow->blocks; blk; blk = blk->next) {
+      count++;
+      // the first/last blocks in reading order are
+      // often not the closest to the page corners;
+      // track the corners, force those blocks to
+      // be selected if the selection runs across
+      // multiple pages.
+      xMin = fmin(xMin, blk->xMin);
+      yMin = fmin(yMin, blk->yMin);
+      xMax = fmax(xMax, blk->xMax);
+      yMax = fmax(yMax, blk->yMax);
+      for (i = 0; i < 2; i++) {
+	d = fmax(blk->xMin - x[i], 0.0) +
+	  fmax(x[i] - blk->xMax, 0.0) +
+	  fmax(blk->yMin - y[i], 0.0) +
+	  fmax(y[i] - blk->yMax, 0.0);
+	if (!best_block[i] ||
+	    d < best_d[i] ||
+	    (!blk->next && !flow->next &&
+	     x[i] > xMax && y[i] > yMax)) {
+	  best_block[i] = blk;
+	  best_flow[i] = flow;
+	  best_count[i] = count;
+	  best_d[i] = d;
+	}
+      }
     }
+  }
+  for (i = 0; i < 2; i++) {
+    if (primaryLR) {
+      if (x[i] < xMin && y[i] < yMin) {
+	best_block[i] = flows->blocks;
+	best_flow[i] = flows;
+	best_count[i] = 1;
+      }
+    } else {
+      if (x[i] > xMax && y[i] < yMin) {
+	best_block[i] = flows->blocks;
+	best_flow[i] = flows;
+	best_count[i] = 1;
+      }
+    }
+  }
+  // assert: best is always set.
+  if (!best_block[0] || !best_block[1]) {
+    return;
+  }
 
-    if ((selection->x1 > b->xMin && selection->y1 > b->yMin) ||
-	(selection->x2 > b->xMin && selection->y2 > b->yMin))
-      end = i + 1;
+  // Now decide which point was first.
+  if (best_count[0] < best_count[1] ||
+      (best_count[0] == best_count[1] && y[0] < y[1])) {
+    start = 0;
+    stop = 1;
+  } else {
+    start = 1;
+    stop = 0;
   }
 
-  for (i = begin; i < end; i++) {
-    if (blocks[i]->xMin < start_x && start_x < blocks[i]->xMax &&
-	blocks[i]->yMin < start_y && start_y < blocks[i]->yMax) {
-      child_selection.x1 = start_x;
-      child_selection.y1 = start_y;
+  for (flow = best_flow[start]; flow; flow = flow->next) {
+    if (flow == best_flow[start]) {
+      blk = best_block[start];
     } else {
-      child_selection.x1 = 0;
-      child_selection.y1 = 0;
+      blk = flow->blocks;
     }
-    if (blocks[i]->xMin < stop_x && stop_x < blocks[i]->xMax &&
-	blocks[i]->yMin < stop_y && stop_y < blocks[i]->yMax) {
-      child_selection.x2 = stop_x;
-      child_selection.y2 = stop_y;
-    } else {
-      child_selection.x2 = pageWidth;
-      child_selection.y2 = pageHeight;
+    for (; blk; blk = blk->next) {
+      if (primaryLR) {
+	child_selection.x1 = blk->xMin;
+	child_selection.x2 = blk->xMax;
+      } else {
+	child_selection.x1 = blk->xMax;
+	child_selection.x2 = blk->xMin;
+      }
+      child_selection.y1 = blk->yMin;
+      child_selection.y2 = blk->yMax;
+      if (blk == best_block[start]) {
+	child_selection.x1 = fmax(blk->xMin, fmin(blk->xMax, x[start]));
+	child_selection.y1 = fmax(blk->yMin, fmin(blk->yMax, y[start]));
+      }
+      if (blk == best_block[stop]) {
+	child_selection.x2 = fmax(blk->xMin, fmin(blk->xMax, x[stop]));
+	child_selection.y2 = fmax(blk->yMin, fmin(blk->yMax, y[stop]));
+	blk->visitSelection(visitor, &child_selection, style);
+	return;
+      }
+      blk->visitSelection(visitor, &child_selection, style);
     }
-
-    blocks[i]->visitSelection(visitor, &child_selection, style);
   }
 }
 
@@ -4286,24 +4429,13 @@ void TextPage::dump(void *outputStream, 
 	  dumpFragment(line->text, n, uMap, s);
 	  (*outputFunc)(outputStream, s->getCString(), s->getLength());
 	  delete s;
-	  if (!line->hyphenated) {
-	    if (line->next) {
-	      (*outputFunc)(outputStream, space, spaceLen);
-	    } else if (blk->next) {
-	      //~ this is a bit of a kludge - we should really do a more
-	      //~ intelligent determination of paragraphs
-	      if (blk->next->lines->words->fontSize ==
-		  blk->lines->words->fontSize) {
-		(*outputFunc)(outputStream, space, spaceLen);
-	      } else {
-		(*outputFunc)(outputStream, eol, eolLen);
-	      }
-	    }
+	  // output a newline when a hyphen is not suppressed
+	  if (n == line->len) {
+	    (*outputFunc)(outputStream, eol, eolLen);
 	  }
 	}
       }
       (*outputFunc)(outputStream, eol, eolLen);
-      (*outputFunc)(outputStream, eol, eolLen);
     }
   }
 
--- poppler-0.12.3/poppler/TextOutputDev.h	2009-09-09 23:22:31.000000000 +0200
+++ poppler-0.12.3/poppler/TextOutputDev.h	2010-01-22 17:03:34.000000000 +0100
@@ -361,6 +361,14 @@ public:
 
 private:
 
+  GBool isBeforeByRule1(TextBlock *blk1);
+  GBool isBeforeByRepeatedRule1(TextBlock *blkList, TextBlock *blk1);
+  GBool isBeforeByRule2(TextBlock *blk1);
+
+  int visitDepthFirst(TextBlock *blkList, int pos1,
+		      TextBlock **sorted, int sortPos,
+		      GBool* visited);
+
   TextPage *page;		// the parent page
   int rot;			// text rotation
   double xMin, xMax;		// bounding box x coordinates
openSUSE Build Service is sponsored by