File htdig-simpleUTF8.patch of Package htdig

Index: htnet/HtHTTP.cc
===================================================================
--- htnet/HtHTTP.cc.orig
+++ htnet/HtHTTP.cc
@@ -643,6 +643,8 @@
     String	line = 0;
     int		inHeader = 1;
 
+	_needUTF8Convert = 0;
+
     if (_response._modification_time)
     {
 	delete _response._modification_time;
@@ -731,8 +733,15 @@
             token = strtok(token, "\n\t");
 
             if (token && *token)
+			{
                _response._content_type = token;
-
+			  if ((_response._content_type.indexOf("text/html") != -1) && (_response._content_type.indexOf("UTF-8") != -1))
+              {
+                 if ( debug > 4 )
+                    cout << "needUTF8Convert flagged" << endl;
+                 _needUTF8Convert = 1;
+              }
+            }
          }
          else if( ! mystrncasecmp((char*)line, "content-length:", 15))
          {
@@ -970,6 +979,31 @@
 
     }
 
+    if ( _needUTF8Convert )
+    {
+        if ( debug > 4 )
+            cout << "Converting UTF-8 characters" << endl;
+
+        char *srcPtr, *dstPtr;
+        srcPtr = dstPtr = _response._contents.get();
+        while ( *srcPtr )
+        {
+            if ( ( *srcPtr & 0x80 ) == 0 )
+                *dstPtr++ = *srcPtr++;
+            else if ( ( *srcPtr & 0xE0 ) == 0xC0 ) {
+                *dstPtr++ = (((*srcPtr & 0x03) << 6) | (*(srcPtr+1) & 0x3F)  ) & 0xFF;
+                srcPtr += 2;
+            } else if ( ( *srcPtr & 0xF0 ) == 0xE0 ) {
+                *dstPtr++ = '?';
+                srcPtr += 3;
+            } else {
+                *dstPtr++ = '?';
+                srcPtr += 4;
+            }
+        }
+        *dstPtr = 0;
+    }
+
     // Set document length
     _response._document_length = _response._contents.length();
 
Index: htnet/HtHTTP.h
===================================================================
--- htnet/HtHTTP.h.orig
+++ htnet/HtHTTP.h
@@ -316,6 +316,7 @@
    int      	_bytes_read;        // Bytes read
    URL		_url;               // URL to retrieve
    URL		_referer;	    // Referring URL
+   int      _needUTF8Convert;   // Flag for simple UTF-8 convert
 
    String      _accept_language;    // accept-language directive
    
openSUSE Build Service is sponsored by