File poppler-0.12.3-text-selection.patch of Package poppler
--- poppler-0.12.3/poppler/TextOutputDev.cc 2009-10-05 00:40:39.000000000 +0200
+++ poppler-0.12.3/poppler/TextOutputDev.cc 2010-01-22 17:03:42.000000000 +0100
@@ -1615,6 +1615,144 @@ GBool TextBlock::isBelow(TextBlock *blk)
return below;
}
+GBool TextBlock::isBeforeByRule1(TextBlock *blk1) {
+ GBool before = gFalse;
+ GBool overlap = gFalse;
+
+ switch (this->page->primaryRot) {
+ case 0:
+ case 2:
+ overlap = ((this->xMin <= blk1->xMin) &&
+ (blk1->xMin <= this->xMax)) ||
+ ((blk1->xMin <= this->xMin) &&
+ (this->xMin <= blk1->xMax));
+ break;
+ case 1:
+ case 3:
+ overlap = ((this->yMin <= blk1->yMin) &&
+ (blk1->yMin <= this->yMax)) ||
+ ((blk1->yMin <= this->yMin) &&
+ (this->yMin <= blk1->yMax));
+ break;
+ }
+ switch (this->page->primaryRot) {
+ case 0:
+ before = overlap && this->yMin < blk1->yMin;
+ break;
+ case 1:
+ before = overlap && this->xMax > blk1->xMax;
+ break;
+ case 2:
+ before = overlap && this->yMax > blk1->yMax;
+ break;
+ case 3:
+ before = overlap && this->xMin < blk1->xMin;
+ break;
+ }
+ return before;
+}
+
+GBool TextBlock::isBeforeByRule2(TextBlock *blk1) {
+ double cmp = 0;
+ int rotLR = rot;
+
+ if (!page->primaryLR) {
+ rotLR = (rotLR + 2) % 4;
+ }
+
+ switch (rotLR) {
+ case 0:
+ cmp = xMax - blk1->xMin;
+ break;
+ case 1:
+ cmp = yMin - blk1->yMax;
+ break;
+ case 2:
+ cmp = blk1->xMax - xMin;
+ break;
+ case 3:
+ cmp = blk1->yMin - yMax;
+ break;
+ }
+ return cmp <= 0;
+}
+
+// Sort into reading order by performing a topological sort using the rules
+// given in "High Performance Document Layout Analysis", T.M. Breuel, 2003.
+// See http://pubs.iupr.org/#2003-breuel-sdiut
+// Topological sort is done by depth first search, see
+// http://en.wikipedia.org/wiki/Topological_sorting
+int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1,
+ TextBlock **sorted, int sortPos,
+ GBool* visited) {
+ int pos2;
+ TextBlock *blk1, *blk2, *blk3;
+ GBool before;
+
+ if (visited[pos1]) {
+ return sortPos;
+ }
+
+ blk1 = this;
+
+#if 0 // for debugging
+ printf("visited: %d %.2f..%.2f %.2f..%.2f\n",
+ sortPos, blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax);
+#endif
+ visited[pos1] = gTrue;
+ pos2 = -1;
+ for (blk2 = blkList; blk2; blk2 = blk2->next) {
+ pos2++;
+ if (visited[pos2]) {
+ // skip visited nodes
+ continue;
+ }
+ before = gFalse;
+ if (blk2->isBeforeByRule1(blk1)) {
+ // Rule (1) blk1 and blk2 overlap, and blk2 is above blk1.
+ before = gTrue;
+#if 0 // for debugging
+ printf("rule1: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n",
+ blk2->xMin, blk2->xMax, blk2->yMin, blk2->yMax,
+ blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax);
+#endif
+ } else if (blk2->isBeforeByRule2(blk1)) {
+ // Rule (2) blk2 left of blk1, and no intervening blk3
+ // such that blk1 is before blk3 by rule 1,
+ // and blk3 is before blk2 by rule 1.
+ before = gTrue;
+ for (blk3 = blkList; blk3; blk3 = blk3->next) {
+ if (blk3 == blk2 || blk3 == blk1) {
+ continue;
+ }
+ if (blk1->isBeforeByRule1(blk3) &&
+ blk3->isBeforeByRule1(blk2)) {
+ before = gFalse;
+ break;
+ }
+ }
+#if 0 // for debugging
+ if (before) {
+ printf("rule2: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n",
+ blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax,
+ blk2->xMin, blk2->xMax, blk2->yMin, blk2->yMax);
+ }
+#endif
+ }
+ if (before) {
+ // blk2 is before blk1, so it needs to be visited
+ // before we can add blk1 to the sorted list.
+ sortPos = blk2->visitDepthFirst(blkList, pos2, sorted, sortPos, visited);
+ }
+ }
+#if 0 // for debugging
+ printf("sorted: %d %.2f..%.2f %.2f..%.2f\n",
+ sortPos, blk1->xMin, blk1->xMax, blk1->yMin, blk1->yMax);
+#endif
+ sorted[sortPos++] = blk1;
+ return sortPos;
+}
+
//------------------------------------------------------------------------
// TextFlow
//------------------------------------------------------------------------
@@ -2183,8 +2321,7 @@ void TextPage::coalesce(GBool physLayout
TextPool *pool;
TextWord *word0, *word1, *word2;
TextLine *line;
- TextBlock *blkList, *blkStack, *blk, *lastBlk, *blk0, *blk1;
- TextBlock **blkArray;
+ TextBlock *blkList, *blk, *lastBlk, *blk0, *blk1;
TextFlow *flow, *lastFlow;
TextUnderline *underline;
TextLink *link;
@@ -2194,7 +2331,6 @@ void TextPage::coalesce(GBool physLayout
GBool found;
int count[4];
int lrCount;
- int firstBlkIdx, nBlocksLeft;
int col1, col2;
int i, j, n;
@@ -2687,7 +2823,7 @@ void TextPage::coalesce(GBool physLayout
}
}
-#if 0 // for debugging
+#if 0 // for debugging
printf("*** rotation ***\n");
for (rot = 0; rot < 4; ++rot) {
printf(" %d: %6d\n", rot, count[rot]);
@@ -2841,9 +2977,6 @@ void TextPage::coalesce(GBool physLayout
//----- reading order sort
- // sort blocks into yx order (in preparation for reading order sort)
- qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpYXPrimaryRot);
-
// compute space on left and right sides of each block
for (i = 0; i < nBlocks; ++i) {
blk0 = blocks[i];
@@ -2856,7 +2989,25 @@ void TextPage::coalesce(GBool physLayout
}
#if 0 // for debugging
- printf("*** blocks, after yx sort ***\n");
+ printf("PAGE\n");
+#endif
+
+ int sortPos = 0;
+ GBool *visited = (GBool *)gmallocn(nBlocks, sizeof(GBool));
+ for (i = 0; i < nBlocks; i++) {
+ visited[i] = gFalse;
+ }
+ i = -1;
+ for (blk1 = blkList; blk1; blk1 = blk1->next) {
+ i++;
+ sortPos = blk1->visitDepthFirst(blkList, i, blocks, sortPos, visited);
+ }
+ if (visited) {
+ gfree(visited);
+ }
+
+#if 0 // for debugging
+ printf("*** blocks, after ro sort ***\n");
for (i = 0; i < nBlocks; ++i) {
blk = blocks[i];
printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f space=%.2f..%.2f\n",
@@ -2876,44 +3027,34 @@ void TextPage::coalesce(GBool physLayout
}
}
printf("\n");
+ fflush(stdout);
#endif
// build the flows
//~ this needs to be adjusted for writing mode (vertical text)
//~ this also needs to account for right-to-left column ordering
- blkArray = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *));
- memcpy(blkArray, blocks, nBlocks * sizeof(TextBlock *));
+ flow = NULL;
while (flows) {
flow = flows;
flows = flows->next;
delete flow;
}
flows = lastFlow = NULL;
- firstBlkIdx = 0;
- nBlocksLeft = nBlocks;
- while (nBlocksLeft > 0) {
-
- // find the upper-left-most block
- for (; !blkArray[firstBlkIdx]; ++firstBlkIdx) ;
- i = firstBlkIdx;
- blk = blkArray[i];
- for (j = firstBlkIdx + 1; j < nBlocks; ++j) {
- blk1 = blkArray[j];
- if (blk1) {
- if (blk && blk->secondaryDelta(blk1) > 0) {
- break;
- }
- if (blk1->primaryCmp(blk) < 0) {
- i = j;
- blk = blk1;
- }
+ // assume blocks are already in reading order,
+ // and construct flows accordingly.
+ for (i = 0; i < nBlocks; i++) {
+ blk = blocks[i];
+ blk->next = NULL;
+ if (flow) {
+ blk1 = blocks[i - 1];
+ blkSpace = maxBlockSpacing * blk1->lines->words->fontSize;
+ if (blk1->secondaryDelta(blk) <= blkSpace &&
+ blk->isBelow(blk1) &&
+ flow->blockFits(blk, blk1)) {
+ flow->addBlock(blk);
+ continue;
}
}
- blkArray[i] = NULL;
- --nBlocksLeft;
- blk->next = NULL;
-
- // create a new flow, starting with the upper-left-most block
flow = new TextFlow(this, blk);
if (lastFlow) {
lastFlow->next = flow;
@@ -2921,56 +3062,7 @@ void TextPage::coalesce(GBool physLayout
flows = flow;
}
lastFlow = flow;
- fontSize = blk->lines->words->fontSize;
-
- // push the upper-left-most block on the stack
- blk->stackNext = NULL;
- blkStack = blk;
-
- // find the other blocks in this flow
- while (blkStack) {
-
- // find the upper-left-most block under (but within
- // maxBlockSpacing of) the top block on the stack
- blkSpace = maxBlockSpacing * blkStack->lines->words->fontSize;
- blk = NULL;
- i = -1;
- for (j = firstBlkIdx; j < nBlocks; ++j) {
- blk1 = blkArray[j];
- if (blk1) {
- if (blkStack->secondaryDelta(blk1) > blkSpace) {
- break;
- }
- if (blk && blk->secondaryDelta(blk1) > 0) {
- break;
- }
- if (blk1->isBelow(blkStack) &&
- (!blk || blk1->primaryCmp(blk) < 0)) {
- i = j;
- blk = blk1;
- }
- }
- }
-
- // if a suitable block was found, add it to the flow and push it
- // onto the stack
- if (blk && flow->blockFits(blk, blkStack)) {
- blkArray[i] = NULL;
- --nBlocksLeft;
- blk->next = NULL;
- flow->addBlock(blk);
- fontSize = blk->lines->words->fontSize;
- blk->stackNext = blkStack;
- blkStack = blk;
-
- // otherwise (if there is no block under the top block or the
- // block is not suitable), pop the stack
- } else {
- blkStack = blkStack->stackNext;
- }
- }
}
- gfree(blkArray);
#if 0 // for debugging
printf("*** flows ***\n");
@@ -3521,10 +3613,9 @@ void TextSelectionDumper::visitLine (Tex
GooString *TextSelectionDumper::getText (void)
{
- GBool oneRot = gTrue;
GooString *s;
TextLineFrag *frag;
- int i, col;
+ int i;
GBool multiLine;
UnicodeMap *uMap;
char space[8], eol[16];
@@ -3541,45 +3632,11 @@ GooString *TextSelectionDumper::getText
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
if (nFrags > 0) {
- for (i = 0; i < nFrags; ++i) {
- frags[i].computeCoords(oneRot);
- }
- page->assignColumns(frags, nFrags, oneRot);
-
- // if all lines in the region have the same rotation, use it;
- // otherwise, use the page's primary rotation
- if (oneRot) {
- qsort(frags, nFrags, sizeof(TextLineFrag),
- &TextLineFrag::cmpYXLineRot);
- } else {
- qsort(frags, nFrags, sizeof(TextLineFrag),
- &TextLineFrag::cmpYXPrimaryRot);
- }
-
- col = 0;
multiLine = gFalse;
for (i = 0; i < nFrags; ++i) {
frag = &frags[i];
- // insert a return
- if (frag->col < col ||
- (i > 0 && fabs(frag->base - frags[i-1].base) >
- maxIntraLineDelta * frags[i-1].line->words->fontSize)) {
- s->append(eol, eolLen);
- col = 0;
- multiLine = gTrue;
- }
-
- // column alignment
- for (; col < frag->col; ++col) {
- s->append(space, spaceLen);
- }
-
- // get the fragment text
- col += page->dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
- }
-
- if (multiLine) {
+ page->dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
s->append(eol, eolLen);
}
}
@@ -3859,75 +3916,112 @@ void TextLine::visitSelection(TextSelect
void TextBlock::visitSelection(TextSelectionVisitor *visitor,
PDFRectangle *selection,
SelectionStyle style) {
- TextLine *p, *begin, *end;
PDFRectangle child_selection;
- double start_x, start_y, stop_x, stop_y;
-
- begin = NULL;
- end = NULL;
- start_x = selection->x1;
- start_y = selection->y1;
- stop_x = selection->x2;
- stop_y = selection->y2;
-
- for (p = lines; p != NULL; p = p->next) {
- if (selection->x1 < p->xMax && selection->y1 < p->yMax &&
- selection->x2 < p->xMax && selection->y2 < p->yMax && begin == NULL) {
- begin = p;
- if (selection->x1 < selection->x2) {
- start_x = selection->x1;
- start_y = selection->y1;
- stop_x = selection->x2;
- stop_y = selection->y2;
+ double x[2], y[2], d, best_d[2];
+ TextLine *p, *best_line[2];
+ int i, count = 0, best_count[2], start, stop;
+ GBool all[2];
+
+ x[0] = selection->x1;
+ y[0] = selection->y1;
+ x[1] = selection->x2;
+ y[1] = selection->y2;
+
+ for (i = 0; i < 2; i++) {
+ // the first/last lines are often not nearest
+ // the corners, so we have to force them to be
+ // selected when the selection runs outside this
+ // block.
+ if (page->primaryLR) {
+ all[i] = x[i] >= this->xMax && y[i] >= this->yMax;
+ if (x[i] <= this->xMin && y[i] <= this->yMin) {
+ best_line[i] = this->lines;
+ best_count[i] = 1;
} else {
- start_x = selection->x2;
- start_y = selection->y2;
- stop_x = selection->x1;
- stop_y = selection->y1;
- }
- } else if (selection->x1 < p->xMax && selection->y1 < p->yMax && begin == NULL) {
- begin = p;
- start_x = selection->x1;
- start_y = selection->y1;
- stop_x = selection->x2;
- stop_y = selection->y2;
- } else if (selection->x2 < p->xMax && selection->y2 < p->yMax && begin == NULL) {
- begin = p;
- start_x = selection->x2;
- start_y = selection->y2;
- stop_x = selection->x1;
- stop_y = selection->y1;
+ best_line[i] = NULL;
+ best_count[i] = 0;
+ }
+ } else {
+ all[i] = x[i] <= this->xMin && y[i] >= this->yMax;
+ if (x[i] >= this->xMax && y[i] <= this->yMin) {
+ best_line[i] = this->lines;
+ best_count[i] = 1;
+ } else {
+ best_line[i] = NULL;
+ best_count[i] = 0;
+ }
}
-
- if (((selection->x1 > p->xMin && selection->y1 > p->yMin) ||
- (selection->x2 > p->xMin && selection->y2 > p->yMin))
- && (begin != NULL))
- end = p->next;
+ best_d[i] = 0;
}
- /* Skip empty selection. */
- if (end == begin)
+ // find the nearest line to the selection points
+ // using the manhattan distance.
+ for (p = this->lines; p; p = p->next) {
+ count++;
+ for (i = 0; i < 2; i++) {
+ d = fmax(p->xMin - x[i], 0.0) +
+ fmax(x[i] - p->xMax, 0.0) +
+ fmax(p->yMin - y[i], 0.0) +
+ fmax(y[i] - p->yMax, 0.0);
+ if (!best_line[i] || all[i] ||
+ d < best_d[i]) {
+ best_line[i] = p;
+ best_count[i] = count;
+ best_d[i] = d;
+ }
+ }
+ }
+ // assert: best is always set.
+ if (!best_line[0] || !best_line[1]) {
return;
+ }
+
+ // Now decide which point was first.
+ if (best_count[0] < best_count[1] ||
+ (best_count[0] == best_count[1] &&
+ y[0] < y[1])) {
+ start = 0;
+ stop = 1;
+ } else {
+ start = 1;
+ stop = 0;
+ }
- visitor->visitBlock (this, begin, end, selection);
+ visitor->visitBlock(this, best_line[start], best_line[stop], selection);
- for (p = begin; p != end; p = p->next) {
- if (p == begin && style != selectionStyleLine) {
- child_selection.x1 = start_x;
- child_selection.y1 = start_y;
+ for (p = best_line[start]; p; p = p->next) {
+ if (page->primaryLR) {
+ child_selection.x1 = p->xMin;
+ child_selection.x2 = p->xMax;
} else {
- child_selection.x1 = 0;
- child_selection.y1 = 0;
+ child_selection.x1 = p->xMax;
+ child_selection.x2 = p->xMin;
}
- if (p->next == end && style != selectionStyleLine) {
- child_selection.x2 = stop_x;
- child_selection.y2 = stop_y;
+ child_selection.y1 = p->yMin;
+ child_selection.y2 = p->yMax;
+ if (style == selectionStyleLine) {
+ if (p == best_line[start]) {
+ child_selection.x1 = 0;
+ child_selection.y1 = 0;
+ }
+ if (p == best_line[stop]) {
+ child_selection.x2 = page->pageWidth;
+ child_selection.y2 = page->pageHeight;
+ }
} else {
- child_selection.x2 = page->pageWidth;
- child_selection.y2 = page->pageHeight;
+ if (p == best_line[start]) {
+ child_selection.x1 = fmax(p->xMin, fmin(p->xMax, x[start]));
+ child_selection.y1 = fmax(p->yMin, fmin(p->yMax, y[start]));
+ }
+ if (p == best_line[stop]) {
+ child_selection.x2 = fmax(p->xMin, fmin(p->xMax, x[stop]));
+ child_selection.y2 = fmax(p->yMin, fmin(p->yMax, y[stop]));
+ }
}
-
p->visitSelection(visitor, &child_selection, style);
+ if (p == best_line[stop]) {
+ return;
+ }
}
}
@@ -3935,73 +4029,122 @@ void TextPage::visitSelection(TextSelect
PDFRectangle *selection,
SelectionStyle style)
{
- int i, begin, end;
PDFRectangle child_selection;
- double start_x, start_y, stop_x, stop_y;
- TextBlock *b;
+ double x[2], y[2], d, best_d[2];
+ double xMin, yMin, xMax, yMax;
+ TextFlow *flow, *best_flow[2];
+ TextBlock *blk, *best_block[2];
+ int i, count = 0, best_count[2], start, stop;
- begin = nBlocks;
- end = 0;
- start_x = selection->x1;
- start_y = selection->y1;
- stop_x = selection->x2;
- stop_y = selection->y2;
+ if (!flows)
+ return;
- for (i = 0; i < nBlocks; i++) {
- b = blocks[i];
+ x[0] = selection->x1;
+ y[0] = selection->y1;
+ x[1] = selection->x2;
+ y[1] = selection->y2;
+
+ xMin = pageWidth;
+ yMin = pageHeight;
+ xMax = 0.0;
+ yMax = 0.0;
+
+ for (i = 0; i < 2; i++) {
+ best_block[i] = NULL;
+ best_flow[i] = NULL;
+ best_count[i] = 0;
+ best_d[i] = 0;
+ }
- if (selection->x1 < b->xMax && selection->y1 < b->yMax &&
- selection->x2 < b->xMax && selection->y2 < b->yMax && i < begin) {
- begin = i;
- if (selection->y1 < selection->y2) {
- start_x = selection->x1;
- start_y = selection->y1;
- stop_x = selection->x2;
- stop_y = selection->y2;
- } else {
- start_x = selection->x2;
- start_y = selection->y2;
- stop_x = selection->x1;
- stop_y = selection->y1;
- }
- } else if (selection->x1 < b->xMax && selection->y1 < b->yMax && i < begin) {
- begin = i;
- start_x = selection->x1;
- start_y = selection->y1;
- stop_x = selection->x2;
- stop_y = selection->y2;
- } else if (selection->x2 < b->xMax && selection->y2 < b->yMax && i < begin) {
- begin = i;
- start_x = selection->x2;
- start_y = selection->y2;
- stop_x = selection->x1;
- stop_y = selection->y1;
+ // find the nearest blocks to the selection points
+ // using the manhattan distance.
+ for (flow = flows; flow; flow = flow->next) {
+ for (blk = flow->blocks; blk; blk = blk->next) {
+ count++;
+ // the first/last blocks in reading order are
+ // often not the closest to the page corners;
+ // track the corners, force those blocks to
+ // be selected if the selection runs across
+ // multiple pages.
+ xMin = fmin(xMin, blk->xMin);
+ yMin = fmin(yMin, blk->yMin);
+ xMax = fmax(xMax, blk->xMax);
+ yMax = fmax(yMax, blk->yMax);
+ for (i = 0; i < 2; i++) {
+ d = fmax(blk->xMin - x[i], 0.0) +
+ fmax(x[i] - blk->xMax, 0.0) +
+ fmax(blk->yMin - y[i], 0.0) +
+ fmax(y[i] - blk->yMax, 0.0);
+ if (!best_block[i] ||
+ d < best_d[i] ||
+ (!blk->next && !flow->next &&
+ x[i] > xMax && y[i] > yMax)) {
+ best_block[i] = blk;
+ best_flow[i] = flow;
+ best_count[i] = count;
+ best_d[i] = d;
+ }
+ }
}
+ }
+ for (i = 0; i < 2; i++) {
+ if (primaryLR) {
+ if (x[i] < xMin && y[i] < yMin) {
+ best_block[i] = flows->blocks;
+ best_flow[i] = flows;
+ best_count[i] = 1;
+ }
+ } else {
+ if (x[i] > xMax && y[i] < yMin) {
+ best_block[i] = flows->blocks;
+ best_flow[i] = flows;
+ best_count[i] = 1;
+ }
+ }
+ }
+ // assert: best is always set.
+ if (!best_block[0] || !best_block[1]) {
+ return;
+ }
- if ((selection->x1 > b->xMin && selection->y1 > b->yMin) ||
- (selection->x2 > b->xMin && selection->y2 > b->yMin))
- end = i + 1;
+ // Now decide which point was first.
+ if (best_count[0] < best_count[1] ||
+ (best_count[0] == best_count[1] && y[0] < y[1])) {
+ start = 0;
+ stop = 1;
+ } else {
+ start = 1;
+ stop = 0;
}
- for (i = begin; i < end; i++) {
- if (blocks[i]->xMin < start_x && start_x < blocks[i]->xMax &&
- blocks[i]->yMin < start_y && start_y < blocks[i]->yMax) {
- child_selection.x1 = start_x;
- child_selection.y1 = start_y;
+ for (flow = best_flow[start]; flow; flow = flow->next) {
+ if (flow == best_flow[start]) {
+ blk = best_block[start];
} else {
- child_selection.x1 = 0;
- child_selection.y1 = 0;
+ blk = flow->blocks;
}
- if (blocks[i]->xMin < stop_x && stop_x < blocks[i]->xMax &&
- blocks[i]->yMin < stop_y && stop_y < blocks[i]->yMax) {
- child_selection.x2 = stop_x;
- child_selection.y2 = stop_y;
- } else {
- child_selection.x2 = pageWidth;
- child_selection.y2 = pageHeight;
+ for (; blk; blk = blk->next) {
+ if (primaryLR) {
+ child_selection.x1 = blk->xMin;
+ child_selection.x2 = blk->xMax;
+ } else {
+ child_selection.x1 = blk->xMax;
+ child_selection.x2 = blk->xMin;
+ }
+ child_selection.y1 = blk->yMin;
+ child_selection.y2 = blk->yMax;
+ if (blk == best_block[start]) {
+ child_selection.x1 = fmax(blk->xMin, fmin(blk->xMax, x[start]));
+ child_selection.y1 = fmax(blk->yMin, fmin(blk->yMax, y[start]));
+ }
+ if (blk == best_block[stop]) {
+ child_selection.x2 = fmax(blk->xMin, fmin(blk->xMax, x[stop]));
+ child_selection.y2 = fmax(blk->yMin, fmin(blk->yMax, y[stop]));
+ blk->visitSelection(visitor, &child_selection, style);
+ return;
+ }
+ blk->visitSelection(visitor, &child_selection, style);
}
-
- blocks[i]->visitSelection(visitor, &child_selection, style);
}
}
@@ -4286,24 +4429,13 @@ void TextPage::dump(void *outputStream,
dumpFragment(line->text, n, uMap, s);
(*outputFunc)(outputStream, s->getCString(), s->getLength());
delete s;
- if (!line->hyphenated) {
- if (line->next) {
- (*outputFunc)(outputStream, space, spaceLen);
- } else if (blk->next) {
- //~ this is a bit of a kludge - we should really do a more
- //~ intelligent determination of paragraphs
- if (blk->next->lines->words->fontSize ==
- blk->lines->words->fontSize) {
- (*outputFunc)(outputStream, space, spaceLen);
- } else {
- (*outputFunc)(outputStream, eol, eolLen);
- }
- }
+ // output a newline when a hyphen is not suppressed
+ if (n == line->len) {
+ (*outputFunc)(outputStream, eol, eolLen);
}
}
}
(*outputFunc)(outputStream, eol, eolLen);
- (*outputFunc)(outputStream, eol, eolLen);
}
}
--- poppler-0.12.3/poppler/TextOutputDev.h 2009-09-09 23:22:31.000000000 +0200
+++ poppler-0.12.3/poppler/TextOutputDev.h 2010-01-22 17:03:34.000000000 +0100
@@ -361,6 +361,14 @@ public:
private:
+ GBool isBeforeByRule1(TextBlock *blk1);
+ GBool isBeforeByRepeatedRule1(TextBlock *blkList, TextBlock *blk1);
+ GBool isBeforeByRule2(TextBlock *blk1);
+
+ int visitDepthFirst(TextBlock *blkList, int pos1,
+ TextBlock **sorted, int sortPos,
+ GBool* visited);
+
TextPage *page; // the parent page
int rot; // text rotation
double xMin, xMax; // bounding box x coordinates