File old.patch of Package python-Whoosh

Index: whoosh-reloaded-2.7.5/src/whoosh/highlight.py
===================================================================
--- whoosh-reloaded-2.7.5.orig/src/whoosh/highlight.py
+++ whoosh-reloaded-2.7.5/src/whoosh/highlight.py
@@ -58,13 +58,13 @@ from whoosh.analysis import Token
 from whoosh.compat import htmlescape
 
 # The default value for the maximum chars to examine when fragmenting
-DEFAULT_CHARLIMIT = 2**15
+DEFAULT_CHARLIMIT = 2 ** 15
 
 
 # Fragment object
 
-
-def mkfrag(text, tokens, startchar=None, endchar=None, charsbefore=0, charsafter=0):
+def mkfrag(text, tokens, startchar=None, endchar=None,
+           charsbefore=0, charsafter=0):
     """Returns a :class:`Fragment` object based on the :class:`analysis.Token`
     objects in ``tokens`.
     """
@@ -131,11 +131,8 @@ class Fragment(object):
                 self.matched_terms.add(t.text)
 
     def __repr__(self):
-        return "<Fragment %d:%d has %d matches>" % (
-            self.startchar,
-            self.endchar,
-            len(self.matches),
-        )
+        return "<Fragment %d:%d has %d matches>" % (self.startchar, self.endchar,
+                                                    len(self.matches))
 
     def __len__(self):
         return self.endchar - self.startchar
@@ -160,7 +157,6 @@ class Fragment(object):
 
 # Tokenizing
 
-
 def set_matched_filter(tokens, termset):
     """
     Mark tokens to be highlighted as matched.
@@ -211,12 +207,8 @@ def set_matched_filter_phrases(tokens, t
             if phrase.words[0] == text[i]:  # If first word matched
                 if slop == 1:
                     # Simple substring match
-                    if (
-                        text[i + 1 : i + n_phrase_words] == phrase.words[1:]
-                    ):  # If rest of phrase matches
-                        any(
-                            map(matches.add, range(i, i + n_phrase_words))
-                        )  # Collect matching indices
+                    if text[i + 1:i + n_phrase_words] == phrase.words[1:]:  # If rest of phrase matches
+                        any(map(matches.add, range(i, i + n_phrase_words)))  # Collect matching indices
                         # Advance past match area.
                         # Choosing to ignore possible overlapping matches for efficiency due to low probability.
                         i += n_phrase_words
@@ -234,20 +226,14 @@ def set_matched_filter_phrases(tokens, t
                             For example, phrase is: 'one two three'~2
                             Target substring is:    'one two two six three', which is a valid match.
                                                      [0] [1] [2] [3] [4]
-
+                            
                             Looking for the first match will find [0], then [1] then fail since [3] is more than ~2 words away
                             Looking for the last match will find [0], then, given a choice between [1] or [2], will pick [2],
                             making [4] visible from there
                             """
-                            text_sub = text[
-                                current_word_index + 1 : current_word_index + 1 + slop
-                            ][
-                                ::-1
-                            ]  # Substring to scan (reversed)
+                            text_sub = text[current_word_index + 1:current_word_index + 1 + slop][::-1]  # Substring to scan (reversed)
                             len_sub = len(text_sub)
-                            next_word_index = (
-                                len_sub - text_sub.index(word) - 1
-                            )  # Map index back to unreversed list
+                            next_word_index = len_sub - text_sub.index(word) - 1  # Map index back to unreversed list
                             last_slop_match = current_word_index + next_word_index + 1
                             slop_matches.append(last_slop_match)
                             current_word_index = last_slop_match
@@ -275,7 +261,6 @@ def set_matched_filter_phrases(tokens, t
 
 # Fragmenters
 
-
 class Fragmenter(object):
     def must_retokenize(self):
         """Returns True if this fragmenter requires retokenized text.
@@ -366,7 +351,8 @@ class SentenceFragmenter(Fragmenter):
         sa = StandardAnalyzer(stoplist=None)
     """
 
-    def __init__(self, maxchars=200, sentencechars=".!?", charlimit=DEFAULT_CHARLIMIT):
+    def __init__(self, maxchars=200, sentencechars=".!?",
+                 charlimit=DEFAULT_CHARLIMIT):
         """
         :param maxchars: The maximum number of characters allowed in a
             fragment.
@@ -522,9 +508,8 @@ class PinpointFragmenter(Fragmenter):
     positions of the matched terms.
     """
 
-    def __init__(
-        self, maxchars=200, surround=20, autotrim=False, charlimit=DEFAULT_CHARLIMIT
-    ):
+    def __init__(self, maxchars=200, surround=20, autotrim=False,
+                 charlimit=DEFAULT_CHARLIMIT):
         """
         :param maxchars: The maximum number of characters allowed in a
             fragment.
@@ -592,13 +577,13 @@ class PinpointFragmenter(Fragmenter):
                 if ec - right <= surround and ec - left <= maxchars:
                     j += 1
                     right = ec
-                    currentlen += ec - next.startchar
+                    currentlen += (ec - next.startchar)
                 else:
                     break
 
             left = max(0, left - surround)
             right = min(len(text), right + surround)
-            fragment = Fragment(text, tokens[i : j + 1], left, right)
+            fragment = Fragment(text, tokens[i:j + 1], left, right)
             if autotrim:
                 self._autotrim(fragment)
             yield fragment
@@ -606,7 +591,6 @@ class PinpointFragmenter(Fragmenter):
 
 # Fragment scorers
 
-
 class FragmentScorer(object):
     pass
 
@@ -625,7 +609,6 @@ class BasicFragmentScorer(FragmentScorer
 
 # Fragment sorters
 
-
 def SCORE(fragment):
     "Sorts higher scored passages first."
     return 1
@@ -648,7 +631,6 @@ def SHORTER(fragment):
 
 # Formatters
 
-
 def get_text(original, token, replace):
     """Convenience function for getting the text to use for a match when
     formatting.
@@ -661,7 +643,7 @@ def get_text(original, token, replace):
     if replace:
         return token.text
     else:
-        return original[token.startchar : token.endchar]
+        return original[token.startchar:token.endchar]
 
 
 class Formatter(object):
@@ -718,19 +700,16 @@ class Formatter(object):
         # Because the formatter is sequential, it will only pick the first
         # token for a given position to highlight. This makes sure it picks
         # the longest overlapping token.
-        for t in sorted(
-            fragment.matches,
-            key=lambda token: (token.startchar, -(token.endchar - token.startchar)),
-        ):
+        for t in sorted(fragment.matches, key=lambda token: (token.startchar, -(token.endchar - token.startchar))):
             if t.startchar is None:
                 continue
             if t.startchar < index:
                 continue
             if t.startchar > index:
-                output.append(self._text(text[index : t.startchar]))
+                output.append(self._text(text[index:t.startchar]))
             output.append(self.format_token(text, t, replace))
             index = t.endchar
-        output.append(self._text(text[index : fragment.endchar]))
+        output.append(self._text(text[index:fragment.endchar]))
 
         out_string = "".join(output)
         return out_string
@@ -740,7 +719,8 @@ class Formatter(object):
         :class:`Fragment` objects.
         """
 
-        formatted = [self.format_fragment(f, replace=replace) for f in fragments]
+        formatted = [self.format_fragment(f, replace=replace)
+                     for f in fragments]
         return self.between.join(formatted)
 
     def __call__(self, text, fragments):
@@ -749,14 +729,16 @@ class Formatter(object):
 
 
 class NullFormatter(Formatter):
-    """Formatter that does not modify the string."""
+    """Formatter that does not modify the string.
+    """
 
     def format_token(self, text, token, replace=False):
         return get_text(text, token, replace)
 
 
 class UppercaseFormatter(Formatter):
-    """Returns a string in which the matched terms are in UPPERCASE."""
+    """Returns a string in which the matched terms are in UPPERCASE.
+    """
 
     def __init__(self, between="..."):
         """
@@ -791,17 +773,11 @@ class HtmlFormatter(Formatter):
     between searches to clear the mapping.
     """
 
-    template = "<%(tag)s class=%(q)s%(cls)s%(tn)s%(q)s>%(t)s</%(tag)s>"
+    template = '<%(tag)s class=%(q)s%(cls)s%(tn)s%(q)s>%(t)s</%(tag)s>'
 
-    def __init__(
-        self,
-        tagname="strong",
-        between="...",
-        classname="match",
-        termclass="term",
-        maxclasses=5,
-        attrquote='"',
-    ):
+    def __init__(self, tagname="strong", between="...",
+                 classname="match", termclass="term", maxclasses=5,
+                 attrquote='"'):
         """
         :param tagname: the tag to wrap around matching terms.
         :param between: the text to add between fragments.
@@ -837,16 +813,13 @@ class HtmlFormatter(Formatter):
             termnum = len(seen) % self.maxclasses
             seen[ttext] = termnum
 
-        return self.template % {
-            "tag": self.tagname,
-            "q": self.attrquote,
-            "cls": self.htmlclass,
-            "t": ttext,
-            "tn": termnum,
-        }
+        return self.template % {"tag": self.tagname, "q": self.attrquote,
+                                "cls": self.htmlclass, "t": ttext,
+                                "tn": termnum}
 
     def clean(self):
-        """Clears the dictionary mapping terms to HTML classnames."""
+        """Clears the dictionary mapping terms to HTML classnames.
+        """
         self.seen = {}
 
 
@@ -864,9 +837,8 @@ class GenshiFormatter(Formatter):
         self.qname = qname
         self.between = between
 
-        from genshi.core import START, END, TEXT  # type: ignore @UnresolvedImport
-        from genshi.core import Attrs, Stream  # type: ignore @UnresolvedImport
-
+        from genshi.core import START, END, TEXT  # @UnresolvedImport
+        from genshi.core import Attrs, Stream  # @UnresolvedImport
         self.START, self.END, self.TEXT = START, END, TEXT
         self.Attrs, self.Stream = Attrs, Stream
 
@@ -879,13 +851,9 @@ class GenshiFormatter(Formatter):
     def format_token(self, text, token, replace=False):
         qn = self.qname
         txt = get_text(text, token, replace)
-        return self.Stream(
-            [
-                (self.START, (qn, self.Attrs()), (None, -1, -1)),
-                (self.TEXT, txt, (None, -1, -1)),
-                (self.END, qn, (None, -1, -1)),
-            ]
-        )
+        return self.Stream([(self.START, (qn, self.Attrs()), (None, -1, -1)),
+                            (self.TEXT, txt, (None, -1, -1)),
+                            (self.END, qn, (None, -1, -1))])
 
     def format_fragment(self, fragment, replace=False):
         output = []
@@ -894,7 +862,7 @@ class GenshiFormatter(Formatter):
 
         for t in fragment.matches:
             if t.startchar > index:
-                self._add_text(text[index : t.startchar], output)
+                self._add_text(text[index:t.startchar], output)
             output.append((text, t, replace))
             index = t.endchar
         if index < len(text):
@@ -914,7 +882,6 @@ class GenshiFormatter(Formatter):
 
 # Highlighting
 
-
 def top_fragments(fragments, count, scorer, order, minscore=1):
     scored_fragments = ((scorer(f), f) for f in fragments)
     scored_fragments = nlargest(count, scored_fragments)
@@ -923,18 +890,8 @@ def top_fragments(fragments, count, scor
     return best_fragments
 
 
-def highlight(
-    text,
-    terms,
-    analyzer,
-    fragmenter,
-    formatter,
-    top=3,
-    scorer=None,
-    minscore=1,
-    order=FIRST,
-    mode="query",
-):
+def highlight(text, terms, analyzer, fragmenter, formatter, top=3,
+              scorer=None, minscore=1, order=FIRST, mode="query"):
     if scorer is None:
         scorer = BasicFragmentScorer()
 
@@ -957,14 +914,8 @@ def highlight(
 
 
 class Highlighter(object):
-    def __init__(
-        self,
-        fragmenter=None,
-        scorer=None,
-        formatter=None,
-        always_retokenize=False,
-        order=FIRST,
-    ):
+    def __init__(self, fragmenter=None, scorer=None, formatter=None,
+                 always_retokenize=False, order=FIRST):
         self.fragmenter = fragmenter or ContextFragmenter()
         self.scorer = scorer or BasicFragmentScorer()
         self.formatter = formatter or HtmlFormatter(tagname="b")
@@ -1030,7 +981,7 @@ class Highlighter(object):
                 token = t.copy()
             elif t.startchar <= token.endchar:
                 if t.endchar > token.endchar:
-                    token.text += t.text[token.endchar - t.endchar :]
+                    token.text += t.text[token.endchar - t.endchar:]
                     token.endchar = t.endchar
             else:
                 yield token
@@ -1041,9 +992,7 @@ class Highlighter(object):
         if token is not None:
             yield token
 
-    def highlight_hit(
-        self, hitobj, fieldname, text=None, top=3, minscore=1, strict_phrase=False
-    ):
+    def highlight_hit(self, hitobj, fieldname, text=None, top=3, minscore=1, strict_phrase=False):
         results = hitobj.results
         schema = results.searcher.schema
         field = schema[fieldname]
@@ -1057,7 +1006,8 @@ class Highlighter(object):
 
         # Get the terms searched for/matched in this field
         if results.has_matched_terms():
-            bterms = (term for term in results.matched_terms() if term[0] == fieldname)
+            bterms = (term for term in results.matched_terms()
+                      if term[0] == fieldname)
         else:
             bterms = results.query_terms(expand=True, fieldname=fieldname)
         # Convert bytes to unicode
@@ -1069,11 +1019,8 @@ class Highlighter(object):
             if fieldname not in results._char_cache:
                 self._load_chars(results, fieldname, words, to_bytes)
 
-            hitterms = (
-                from_bytes(term[1])
-                for term in hitobj.matched_terms()
-                if term[0] == fieldname
-            )
+            hitterms = (from_bytes(term[1]) for term in hitobj.matched_terms()
+                        if term[0] == fieldname)
 
             # Grab the word->[(startchar, endchar)] map for this docnum
             cmap = results._char_cache[fieldname][hitobj.docnum]
@@ -1085,21 +1032,17 @@ class Highlighter(object):
                 for pos, startchar, endchar in chars:
                     if charlimit and endchar > charlimit:
                         break
-                    tokens.append(
-                        Token(text=word, pos=pos, startchar=startchar, endchar=endchar)
-                    )
+                    tokens.append(Token(text=word, pos=pos,
+                                        startchar=startchar, endchar=endchar))
             tokens.sort(key=lambda t: t.startchar)
-            tokens = [
-                max(group, key=lambda t: t.endchar - t.startchar)
-                for key, group in groupby(tokens, lambda t: t.startchar)
-            ]
+            tokens = [max(group, key=lambda t: t.endchar - t.startchar)
+                      for key, group in groupby(tokens, lambda t: t.startchar)]
             fragments = self.fragmenter.fragment_matches(text, tokens)
         else:
             # Retokenize the text
             analyzer = results.searcher.schema[fieldname].analyzer
-            tokens = analyzer(
-                text, positions=True, chars=True, mode="index", removestops=False
-            )
+            tokens = analyzer(text, positions=True, chars=True, mode="index",
+                              removestops=False)
 
             # Set Token.matched attribute for tokens that match a query term
             if strict_phrase:
@@ -1110,8 +1053,8 @@ class Highlighter(object):
             tokens = self._merge_matched_tokens(tokens)
             fragments = self.fragmenter.fragment_tokens(text, tokens)
 
-        fragments = top_fragments(
-            fragments, top, self.scorer, self.order, minscore=minscore
-        )
+        fragments = top_fragments(fragments, top, self.scorer, self.order,
+                                  minscore=minscore)
         output = self.formatter.format(fragments)
         return output
+
Places

File old.patch of Package python-Whoosh

Places