File saxon-add-fixes-from-com-isl-saxon-aelfred.patch of Package saxon6

--- XmlParser.java.old	2012-12-04 12:58:09.544956168 +0100
+++ XmlParser.java	2012-12-04 15:28:20.798929872 +0100
@@ -62,10 +62,12 @@
 // The modification over the original source are flagged by
 // <struct/> tags.
 
-// <struct>
-// package org.brownell.xml.aelfred2;
-import org.brownell.xml.aelfred2.*;
-// </struct>
+// removed all <struct/> modification and adapt the code to be able to replace
+// the GPL com/icl/saxo/aelfred/XmlParser.java, which is not compatible with
+// MPL license of the rest
+// mvyskocil@suse.com
+
+package com.icl.saxon.aelfred;
 
 import java.io.BufferedInputStream;
 import java.io.CharConversionException;
@@ -81,27 +83,20 @@
 import java.util.Stack;
 
 import org.xml.sax.SAXException;
-// <struct>
-import org.xml.sax.SAXNotRecognizedException;
-// </struct>
-
 // $Id: XmlParser.java,v 1.20 2000/05/29 12:10:24 mojo Exp $
 
 /**
  * Parse XML documents and return parse events through call-backs.
- * Use the <code>StructSaxDriver</code> class as your entry point, as all
+ * Use the <code>SAXDriver</code> class as your entry point, as all
  * internal parser interfaces are subject to change.
  *
  * @author Written by David Megginson &lt;dmeggins@microstar.com&gt;
  *	(version 1.2a with bugfixes)
  * @author Updated by David Brownell &lt;david-b@pacbell.net&gt;
  * @version $Date: 2000/05/29 12:10:24 $
- * @see StructSaxDriver
+ * @see SAXDriver
  */
-// <struct>
-// final class XmlParser
-final class StructXmlParser
-// </struct>
+final class XmlParser
 {
     // parse from buffer, avoiding slow per-character readCh()
     private final static boolean USE_CHEATS = true;
@@ -121,10 +116,7 @@
      * @see #parse
      */
     // package private
-// <struct>
-//   XmlParser ()
-   StructXmlParser ()
-// </struct>
+   XmlParser ()
     {
 	cleanupVariables ();
     }
@@ -136,10 +128,7 @@
      * @see #parse
      */
     // package private
-// <struct>
-// final class XmlParser
-//    void setHandler (SaxDriver handler)
-    void setHandler (StructSaxDriver handler)
+    void setHandler (SAXDriver handler)
 // </struct>
     {
 	this.handler = handler;
@@ -194,7 +183,7 @@
 	handler.startDocument ();
 
 	pushURL ("[document]", basePublicId, baseURI,
-		baseReader, baseInputStream, encoding);
+		baseReader, baseInputStream, encoding, false);
 
 	try {
 	    parseDocument ();
@@ -461,6 +450,8 @@
     private final static int LIT_DISABLE_EREF = 64;
 	// don't expand general entities, but make sure we _could_
     private final static int LIT_ENTITY_CHECK = 128;
+	// literal is a public ID value
+    private final static int LIT_PUBID = 256;
 
 
     //
@@ -480,7 +471,7 @@
      * Report an error.
      * @param message The error message.
      * @param textFound The text that caused the error (or null).
-     * @see StructSaxDriver#error
+     * @see SAXDriver#error
      * @see #line
      */
     private void error (String message, String textFound, String textExpected)
@@ -543,47 +534,13 @@
     {
 	char c;
 
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the comments
-		//
-
-		handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
-		handler.startElement ("str:document");
-	}
-// </struct>
-	
+        try {
 	parseProlog ();
-
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the document structure
-		//
-
-		handler.startElement ("str:body");
-		
-	}
-// </struct>
-
 	require ('<');
 	parseElement ();
-
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the document structure
-		//
-
-		handler.endElement ("str:body");
-		handler.startElement ("str:epilog");
-		
-	}
-// </struct>
+        } catch (EOFException eofe) {
+            error("premature end of file");
+        }
 
 	try {
 	    parseMisc ();   //skip all white, PIs, and comments
@@ -593,19 +550,6 @@
 	    return;
 	}
 
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the comments
-		//
-
-		handler.endElement ("str:epilog");
-		handler.endElement ("str:document");
-	}
-// </struct>
-
-
     }
 
 
@@ -626,23 +570,7 @@
 	parseUntil ("--");
 	require ('>');
 	expandPE = saved;
-// <struct>
-	if (! translateComments ) {
-// </struct>
-		handler.comment (dataBuffer, 0, dataBufferPos);
-// <struct>
-	} else {
-		//
-		// Struct: updated to show the comments
-		//
-		System.err.println("Comment : " );
-		handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
-		handler.startElement ("str:comment");
-	    handler.charData (dataBuffer, 0, dataBufferPos);
-	    handler.endElement ("str:comment");
-		
-	}
-// </struct>
+	handler.comment (dataBuffer, 0, dataBufferPos);
 	dataBufferPos = 0;
     }
 
@@ -711,45 +639,12 @@
     private void parseProlog ()
     throws Exception
     {
-
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the prolog
-		//
-
-		handler.startElement ("str:prolog");
-		if (version != null)
-			handler.attribute ("str:version", version, false);
-		if (encodingName != null)
-			handler.attribute ("str:encoding", encodingName, false);
-		if (standalone != null)
-			handler.attribute ("str:standalone", standalone, false);
-		handler.startElement ("str:X-M-L-Decl");
-		handler.endElement ("str:X-M-L-Decl");
-		
-	}
-// </struct>
-
 	parseMisc ();
 
 	if (tryRead ("<!DOCTYPE")) {
 	    parseDoctypedecl ();
 	    parseMisc ();
 	}
-	
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the prolog
-		//
-
-		handler.endElement ("str:prolog");
-	}
-// </struct>
-
     }
 
 
@@ -775,12 +670,14 @@
     throws SAXException, IOException
     {
 	boolean	white;
+        String encodingName = null;
+        String standalone;
 	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
 
 	// Read the version.
 	require ("version");
 	parseEq ();
-	version = readLiteral (flags);
+	String version = readLiteral (flags);
 	if (!version.equals ("1.0")) {
 	    error ("unsupported XML version", version, "1.0");
 	}
@@ -905,10 +802,8 @@
 		encoding = ENCODING_UTF_8;
 		return;
 	    } else if (encoding != ENCODING_EXTERNAL) {
-		// fatal error
-		error ("unsupported ASCII-derived encoding",
-		       encodingName,
-		       "UTF-8, US-ASCII, or ISO-8859-1");
+		// used to start with a new reader ...
+		throw new EncodingException(encodingName);
 	    }
 	    // else fallthrough ...
 	    // it's ASCII-ish and something other than a builtin
@@ -962,7 +857,7 @@
 
 	reader = new InputStreamReader (is, encodingName);
 	sourceType = INPUT_READER;
-	is = null;
+	//is = null;
     }
 
 
@@ -1014,24 +909,6 @@
 	// report (a) declaration of name, (b) lexical info (ids)
 	handler.doctypeDecl (doctypeName, ids [0], ids [1]);
 
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the doctype
-		//
-
-		if (doctypeName != null)
-			handler.attribute ("str:name", doctypeName, false);
-		if (ids [0] != null)
-			handler.attribute ("str:publicId", ids [0], false);
-		if (ids [1] != null)
-			handler.attribute ("str:systemId", ids [1], false);
-		handler.startElement ("str:doctype");
-		
-	}
-// </struct>
-
 	// Internal subset is parsed first, if present
 	skipWhitespace ();
 	if (tryRead ('[')) {
@@ -1054,7 +931,7 @@
 
 	// Read the external subset, if any
 	if (ids [1] != null) {
-	    pushURL ("[external subset]", ids [0], ids [1], null, null, null);
+	    pushURL ("[external subset]", ids [0], ids [1], null, null, null, false);
 
 	    // Loop until we end up back at '>'
 	    while (true) {
@@ -1078,20 +955,6 @@
 	// done dtd
 	handler.endDoctype ();
 	expandPE = false;
-
-// <struct>
-	if (showDocStructure) {
-	
-		//
-		// Struct: updated to show the doctype
-		//
-
-		handler.endElement ("str:doctype");
-		
-	}
-// </struct>
-
-
     }
 
 
@@ -1191,9 +1054,10 @@
 		    }
 		}
 		// I guess not...
-		handler.attribute (aname,
-				   getAttributeExpandedValue (gi, aname),
-				   false);
+                String foo = getAttributeExpandedValue (gi, aname);
+                if ( foo != null) {
+                    handler.attribute (aname, foo, false);
+                }
 	    }
 	}
 
@@ -1224,7 +1088,7 @@
      * [41] Attribute ::= Name Eq AttValue
      * </pre>
      * @param name The name of the attribute's element.
-     * @see StructSaxDriver#attribute
+     * @see SAXDriver#attribute
      */
     private void parseAttribute (String name)
     throws Exception
@@ -1321,16 +1185,7 @@
 	char c;
 
 	while (true) {
-
-	    switch (currentElementContent) {
-	    case CONTENT_ANY:
-	    case CONTENT_MIXED:
-		parseCharData ();
-		break;
-	    case CONTENT_ELEMENTS:
-		parseWhitespace ();
-		break;
-	    }
+	    parseCharData ();
 
 	    // Handle delimiters
 	    c = readCh ();
@@ -1425,10 +1280,10 @@
     throws Exception
     {
 	if (tryRead ("EMPTY")) {
-	    setElement (name, CONTENT_EMPTY, null);
+	    setElement (name, CONTENT_EMPTY, null, null);
 	    return;
 	} else if (tryRead ("ANY")) {
-	    setElement (name, CONTENT_ANY, null);
+	    setElement (name, CONTENT_ANY, null, null);
 	    return;
 	} else {
 	    require ('(');
@@ -1437,10 +1292,10 @@
 	    if (tryRead ("#PCDATA")) {
 		dataBufferAppend ("#PCDATA");
 		parseMixed ();
-		setElement (name, CONTENT_MIXED, dataBufferToString ());
+		setElement (name, CONTENT_MIXED, dataBufferToString (), null);
 	    } else {
 		parseElements ();
-		setElement (name, CONTENT_ELEMENTS, dataBufferToString ());
+		setElement (name, CONTENT_ELEMENTS, dataBufferToString (), null);
 	    }
 	}
     }
@@ -1624,7 +1479,7 @@
     {
 	String name;
 	int type;
-	String enum = null;
+	String enum2 = null;
 
 	// Read the attribute name.
 	name = readNmtoken (true);
@@ -1636,12 +1491,12 @@
 	// Get the string of enumerated values
 	// if necessary.
 	if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) {
-	    enum = dataBufferToString ();
+	    enum2 = dataBufferToString ();
 	}
 
 	// Read the default value.
 	requireWhitespace ();
-	parseDefault (elementName, name, type, enum);
+	parseDefault (elementName, name, type, enum2);
     }
 
 
@@ -1739,12 +1594,14 @@
 	String elementName,
 	String name,
 	int type,
-	String enum
+	String enum2
     ) throws Exception
     {
 	int	valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
 	String	value = null;
-	int	flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK;
+	int	flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK | LIT_DISABLE_PE;
+	                                                               // ^^^^^^^^^^^^^^
+	                                                               // added MHK 20 Mar 2002
 
 	// Note: char refs not checked here, and input not normalized,
 	// since it's done correctly later when we actually expand any
@@ -1769,7 +1626,7 @@
 	    }
 	} else
 	    value = readLiteral (flags);
-	setAttribute (elementName, name, type, enum, value, valueType);
+	setAttribute (elementName, name, type, enum2, value, valueType);
     }
 
 
@@ -1946,51 +1803,35 @@
 
 	name = readNmtoken (true);
 	require (';');
-// <struct>
-	if (! translateExternalParsedEntities) {
-// </struct>
-		switch (getEntityType (name)) {
-		case ENTITY_UNDECLARED:
-		    error ("reference to undeclared entity", name, null);
-		    break;
-		case ENTITY_INTERNAL:
-			System.err.println("Internal");
-		    pushString (name, getEntityValue (name));
-		    break;
-		case ENTITY_TEXT:
-			System.err.println("Text");
-		    if (externalAllowed) {
-			pushURL (name, getEntityPublicId (name),
-				 getEntitySystemId (name),
-				 null, null, null);
-		    } else {
-			error ("reference to external entity in attribute value.",
-				name, null);
-		    }
-		    break;
-		case ENTITY_NDATA:
-			System.err.println("NDATA");
-		    if (externalAllowed) {
-			error ("unparsed entity reference in content", name, null);
-		    } else {
-			error ("reference to external entity in attribute value.",
-				name, null);
-		    }
-		    break;
-		}
-// <struct>
-	} else {
-		//
-		// Struct: updated to show the entity call
-		//
-		System.err.println("Entity reference : " + name);
-		handler.attribute ("xmlns:str", "http://4xt.org/ns/xmlstructure", false);
-		handler.attribute ("str:name", name, false);
-		handler.startElement ("str:entity");
-	    handler.endElement ("str:entity");
-
-	}
-  // </struct>
+        switch (getEntityType (name)) {
+        case ENTITY_UNDECLARED:
+            error ("reference to undeclared entity", name, null);
+            break;
+        case ENTITY_INTERNAL:
+            pushString (name, getEntityValue (name));
+            System.err.println("Internal");
+            break;
+        case ENTITY_TEXT:
+            System.err.println("Text");
+            if (externalAllowed) {
+                pushURL (name, getEntityPublicId (name),
+                            getEntitySystemId (name),
+                            null, null, null, true);
+            } else {
+                error ("reference to external entity in attribute value.",
+                        name, null);
+            }
+            break;
+        case ENTITY_NDATA:
+                System.err.println("NDATA");
+            if (externalAllowed) {
+                error ("unparsed entity reference in content", name, null);
+            } else {
+                error ("reference to external entity in attribute value.",
+                        name, null);
+            }
+            break;
+        }
   }
 
 
@@ -2027,7 +1868,7 @@
 		pushString (null, " ");
 	    pushURL (name, getEntityPublicId (name),
 		     getEntitySystemId (name),
-		     null, null, null);
+		     null, null, null, true);
 	    if (!inLiteral)
 		pushString (null, " ");
 	    break;
@@ -2096,24 +1937,6 @@
 		notationName = readNmtoken (true);
 		setExternalDataEntity (name, ids [0], ids [1], notationName);
 	    } else {
-				
-// <struct>
-			if (showDocStructure) {
-			
-				//
-				// Struct: updated to show the external entities definitions
-				//
-		
-				handler.attribute ("str:name", name, false);
-				handler.attribute ("str:type", ids [0], false);
-				handler.attribute ("str:systemId", ids [1], false);
-				handler.startElement ("str:externalEntityDefinition");
-				handler.endElement ("str:externalEntityDefinition");
-				
-			}
-// </struct>
-		
-		
 		setExternalTextEntity (name, ids [0], ids [1]);
 	    }
 	}
@@ -2222,6 +2045,7 @@
 
 	// OK, the cheat didn't work; start over
 	// and do it by the book.
+	int closeSquareBracketCount = 0;
 	while (true) {
 	    c = readCh ();
 	    switch (c) {
@@ -2229,8 +2053,19 @@
 	    case '&':
 		unread (c);
 		return;
-	    // XXX "]]>" precluded ...
+	    case ']':
+	        closeSquareBracketCount++;
+	        dataBufferAppend(c);
+	        break;
+        case '>':
+            if (closeSquareBracketCount>=2) {
+                // we've hit ']]>'
+                error ("']]>' is not allowed here");
+                break;
+            }
+            // fall-through
 	    default:
+	        closeSquareBracketCount=0;
 		dataBufferAppend (c);
 		break;
 	    }
@@ -2486,6 +2321,9 @@
 		    // Can't escape this normalization for attributes
 		case '\n':
 		case '\r':
+		    if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
+			c = ' ';
+		    break;
 		case '\t':
 		    if ((flags & LIT_ATTRIBUTE) != 0)
 			c = ' ';
@@ -2497,7 +2335,6 @@
 		    if (c == '#') {
 			if ((flags & LIT_DISABLE_CREF) != 0) {
 			    dataBufferAppend ('&');
-			    dataBufferAppend ('#');
 			    continue;
 			}
 			parseCharRef ();
@@ -2578,7 +2415,7 @@
 
 	if (tryRead ("PUBLIC")) {
 	    requireWhitespace ();
-	    ids [0] = readLiteral (LIT_NORMALIZE | flags);
+	    ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags);
 	    if (inNotation) {
 		skipWhitespace ();
 		c = readCh ();
@@ -2987,30 +2824,32 @@
      *  [2] attribute hash table
      */
     private Object []
-    setElement (String name, int contentType, String contentModel)
+    setElement (String name, int contentType, String contentModel, Hashtable attributes) throws Exception
     {
-	Object element [] = (Object []) elementInfo.get (name);
+	Object element[] = (Object []) elementInfo.get (name);
 
 	// first <!ELEMENT ...> or <!ATTLIST ...> for this type
 	if (element == null) {
 	    element = new Object [3];
 	    element [0] = new Integer (contentType);
 	    element [1] = contentModel;
-	    element [2] = new Hashtable (DEFAULT_ATTR_COUNT);
+	    element [2] = attributes;
 	    elementInfo.put (name, element);
 	    return element;
 	}
 
 	// multiple <!ELEMENT ...> declarations
+	if (contentType != CONTENT_UNDECLARED) {
 	if (((Integer) element [0]).intValue () != CONTENT_UNDECLARED) {
-	    // warn ("multiple declarations for element type", name, null);
-	    return element;
-	}
-
 	// <!ELEMENT ...> after associated <!ATTLIST ...>
 	element [0] = new Integer (contentType);
 	element [1] = contentModel;
+        }
 
+        }
+        else if (attributes != null) {
+            element[2] = attributes;
+        }
 	return element;
     }
 
@@ -3023,9 +2862,9 @@
     {
 	Object element[] = (Object[]) elementInfo.get (name);
 
-	if (element == null)
-	    element = setElement (name, CONTENT_UNDECLARED, null);
-	return (Hashtable) element [2];
+	if (element == null) return null;
+	
+        return (Hashtable) element [2];
     }
 
 
@@ -3211,6 +3050,9 @@
 
 	// Create a new hashtable if necessary.
 	attlist = getElementAttributes (elName);
+	if (attlist == null) {
+	    attlist = new Hashtable ();
+	}
 
 	// ignore multiple attribute declarations!
 	if (attlist.get (name) != null) {
@@ -3224,6 +3066,9 @@
 	    attribute [3] = enumeration;
 	    attribute [4] = null;
 	    attlist.put (name, attribute);
+
+	    // save; but don't overwrite any existing <!ELEMENT ...>
+	    setElement (elName, CONTENT_UNDECLARED, null, attlist);
 	}
     }
 
@@ -3321,8 +3166,18 @@
 	if (entity == null) {
 	    return null;
 	} else {
-	    return (String) entity [2];
-	}
+    	    try {
+        	    String relativeURI = (String)entity [2];
+        	    URL baseURI = (URL)entity [5];
+        	    if (baseURI==null) return relativeURI;
+        	    URL absoluteURI = new URL( baseURI, relativeURI );
+        	    return absoluteURI.toString();
+        	} catch (IOException err) {
+        	    // ignore the exception, a user entity resolver may be able
+        	    // to do something; if not, the error will be caught later
+        	    return (String)entity [2];
+        	}
+    	}
     }
 
 
@@ -3402,12 +3257,14 @@
 	Object entity[];
 
 	if (entityInfo.get (eName) == null) {
-	    entity = new Object [5];
+	    entity = new Object [6];
 	    entity [0] = new Integer (eClass);
 	    entity [1] = pubid;
 	    entity [2] = sysid;
 	    entity [3] = value;
 	    entity [4] = nName;
+	    entity [5] = (externalEntity == null ? null : externalEntity.getURL());
+	                    // added MHK: provides base URI for resolution
 
 	    entityInfo.put (eName, entity);
 	}
@@ -3554,7 +3411,6 @@
 	while (readBufferPos >= readBufferLength) {
 	    switch (sourceType) {
 	    case INPUT_READER:
-	    case INPUT_EXTERNAL:
 	    case INPUT_STREAM:
 		readDataChunk ();
 		while (readBufferLength < 1) {
@@ -3578,9 +3434,9 @@
 	    line++;
 	    column = 0;
 	} else {
-	    if (c == '<')
+	    if (c == '<') {
 		/* favorite return to parseContent () .. NOP */ ;
-	    else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
+            }else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
 		error ("illegal XML character U+"
 			+ Integer.toHexString (c));
 
@@ -3589,7 +3445,7 @@
 	    // are also spots in the internal subset where PE refs are fatal
 	    // errors, hence yet another flag.
 	    else if (c == '%' && expandPE) {
-		if (peIsError)
+		if (peIsError && entityStack.size()==1)
 		    error ("PE reference within decl in internal subset.");
 		parsePEReference ();
 		return readCh ();
@@ -3669,7 +3525,7 @@
      * request an encoding explicitly, and it should also look at the
      * headers with an HTTP connection.
      * @param url The java.net.URL object for the entity.
-     * @see StructSaxDriver#resolveEntity
+     * @see SAXDriver#resolveEntity
      * @see #pushString
      * @see #sourceType
      * @see #pushInput
@@ -3683,7 +3539,8 @@
 	String		systemId,
 	Reader		reader,
 	InputStream	stream,
-	String		encoding
+	String		encoding,
+	boolean     isAbsolute
     ) throws SAXException, IOException
     {
 	URL	url;
@@ -3700,28 +3557,43 @@
 	readBufferOverflow = -1;
 	is = null;
 	line = 1;
+        column = 0;
 
 	currentByteCount = 0;
 
+        if (!isAbsolute) {
 	// Make any system ID (URI/URL) absolute.  There's one case
 	// where it may be null:  parser was invoked without providing
 	// one, e.g. since the XML data came from a memory buffer.
 
+        try {
 	if (systemId != null && externalEntity != null) {
 	    systemId = new URL (externalEntity.getURL (), systemId).toString ();
 	} else if (baseURI != null) {
 	    systemId = new URL (new URL (baseURI), systemId).toString ();
 	    // throws IOException if couldn't create new URL
 	}
+        } catch(java.io.IOException ioe) {
+            popInput();
+            error("Invalid URL " + systemId + "\n" + ioe.getMessage());
+        }
+        }
 
 	// See if the application wants to
 	// redirect the system ID and/or
 	// supply its own character stream.
 	if (reader == null && stream == null && systemId != null) {
-	    Object input = handler.resolveEntity (publicId, systemId);
+	    Object input = null;
+            try {
+                input = handler.resolveEntity (publicId, systemId);
+            } catch (java.io.IOException ioe){
+                popInput();
+                error("Failure resolving entity " + systemId + "\n" + ioe.getMessage());
+            }
 	    if (input != null) {
 		if (input instanceof String) {
 		    systemId = (String) input;
+                    isAbsolute = true;
 		} else if (input instanceof InputStream) {
 		    stream = (InputStream) input;
 		} else if (input instanceof Reader) {
@@ -3748,20 +3620,23 @@
 	
 	// Else we handle the conversion, and need to ensure
 	// it's done right.
+	sourceType = INPUT_STREAM;
 	if (stream != null) {
-	    sourceType = INPUT_STREAM;
 	    is = stream;
 	    url = null;
 	} else {
 	    // We have to open our own stream to the URL.
 
-	    // Set the new status
-	    sourceType = INPUT_EXTERNAL;
 	    url = new URL (systemId);
 
+            try {
 	    externalEntity = url.openConnection ();
 	    externalEntity.connect ();
 	    is = externalEntity.getInputStream ();
+            } catch (java.io.IOException ioe){
+                popInput();
+                error("Cannot read from " + systemId + "\n" + ioe.getMessage());
+            }
 	}
 
 	// If we get to here, there must be
@@ -3781,7 +3656,7 @@
 		// application/xml;charset=something;otherAttr=...
 		// ... with many variants on 'something'
 		encoding = externalEntity.getContentType ();
-		temp = encoding.indexOf ("charset");
+		temp = (encoding != null) ? encoding.indexOf ("charset") : -1;
 
 		// RFC 2376 sez MIME text defaults to ASCII, but since the
 		// JDK will create a MIME type out of thin air, we always
@@ -3790,7 +3665,7 @@
 		    encoding = null;	// autodetect
 		else {
 		    temp = encoding.indexOf ('=', temp + 7);
-		    encoding = encoding.substring (temp);
+		    encoding = encoding.substring (temp+1);
 		    if ((temp = encoding.indexOf (';')) > 0)
 			encoding = encoding.substring (0, temp);
 
@@ -3817,9 +3692,39 @@
 	    detectEncoding ();
 	    ignoreEncoding = false;
 	}
+	is.mark(100);
 
 	// Read any XML or text declaration.
-	tryEncodingDecl (ignoreEncoding);
+	try {
+	    tryEncodingDecl (ignoreEncoding);
+	} catch (EncodingException x) {
+	    encoding = x.getMessage ();
+
+	    // if we don't handle the declared encoding,
+	    // try letting a JVM InputStreamReader do it
+	    try {
+		if (sourceType != INPUT_STREAM)
+		    throw x;
+
+		is.reset ();
+		readBufferPos = 0;
+		readBufferLength = 0;
+		readBufferOverflow = -1;
+		line = 1;
+		currentByteCount = column = 0;
+
+		sourceType = INPUT_READER;
+		this.reader = new InputStreamReader (is, encoding);
+		is = null;
+
+		tryEncodingDecl (true);
+
+	    } catch (IOException e) {
+		error ("unsupported text encoding",
+		       encoding,
+		       null);
+	    }
+        }
     }
 
 
@@ -3916,6 +3821,14 @@
 	    // ff fe 00 00 UCS_4_4321 (with BOM)
 	}
 
+	// SECOND: three byte signature:
+	// look for UTF-8 byte order mark 3C 3F 78, allowed by XML 1.0 2nd edition
+
+	else if (tryEncoding (signature, (byte)0xef, (byte)0xbb, (byte)0xbf)) {
+	    encoding = ENCODING_UTF_8;
+	    is.read(); is.read(); is.read();
+	}
+
 	//
 	// SECOND:  two byte encodings
 	// note ... with 1/14/2000 errata the XML spec identifies some
@@ -4002,6 +3915,20 @@
 	return ((sig [0] == b1) && (sig [1] == b2));
     }
 
+    /**
+     * Check for a three-byte signature.
+     * <p>Looks for a UTF-8 byte-order mark.
+     * <p>Utility routine for detectEncoding ().
+     * @param sig The first four bytes read.
+     * @param b1 The first byte of the signature
+     * @param b2 The second byte of the signature
+     * @param b3 The second byte of the signature
+     * @see #detectEncoding
+     */
+    private static boolean tryEncoding (byte sig[], byte b1, byte b2, byte b3)
+    {
+	return ((sig [0] == b1) && (sig [1] == b2) && (sig [2] == b3));
+    }
 
     /**
      * This method pushes a string back onto input.
@@ -4131,25 +4058,24 @@
     private void popInput ()
     throws SAXException, IOException
     {
+        String uri;
 	Object input[];
 
+	if (externalEntity != null)
+	    uri = externalEntity.getURL ().toString ();
+	else
+	    uri = baseURI;
 
 	switch (sourceType) {
 
-	case INPUT_EXTERNAL:
-	    if (externalEntity != null) {
-		handler.endExternalEntity (
-			externalEntity.getURL ().toString ());
-	    }
-	    break;
 	case INPUT_STREAM:
-	    if (baseURI != null) {
+	    if (is != null) {
 		handler.endExternalEntity (baseURI);
 	    }
 	    is.close ();
 	    break;
 	case INPUT_READER:
-	    if (baseURI != null) {
+	    if (reader != null && uri != null) {
 		handler.endExternalEntity (baseURI);
 	    }
 	    reader.close ();
@@ -4166,6 +4092,9 @@
 	    s = (String) entityStack.pop ();
 	}
 
+	input = (Object[]) inputStack.pop ();
+        entityStack.pop ();
+
 	sourceType = ((Integer) input [0]).intValue ();
 	externalEntity = (URLConnection) input [1];
 	readBuffer = (char[]) input [2];
@@ -4807,40 +4736,6 @@
 	inCDATA = false;
 
 	symbolTable = new Object [SYMBOL_TABLE_LENGTH][];
-	
-// <struct>
-	showDocStructure = false;
-	translateExternalParsedEntities = false;
-	translateComments = false;
-	
-	try {
-		showDocStructure = ((String)handler.getProperty("http://4xt.org/inclusions/showDocStructure")).equals("yes");
-	} catch (SAXNotRecognizedException e) {
-		showDocStructure = false;
-//	      System.err.println(e.toString());
-    }
-
-	if (showDocStructure) {
-		translateExternalParsedEntities=true;
-		translateComments=true;
-	} else {
-		try {
-			translateExternalParsedEntities = ((String)handler.getProperty("http://4xt.org/inclusions/translateExternalParsedEntities")).equals("yes");
-		} catch (SAXNotRecognizedException e) {
-			translateExternalParsedEntities = false;
-	    }
-		try {
-			translateComments = ((String)handler.getProperty("http://4xt.org/inclusions/translateComments")).equals("yes");
-		} catch (SAXNotRecognizedException e) {
-			translateComments = false;
-	    }
-	}
-	
-	System.err.println("showDocStructure : "+showDocStructure);
-	System.err.println("translateExternalParsedEntities : "+translateExternalParsedEntities);
-	System.err.println("translateComments : "+translateComments);
-// </struct>
-	
     }
 
 
@@ -4868,10 +4763,16 @@
 	symbolTable = null;
     }
 
+    /* used to restart reading with some InputStreamReader */
+    static class EncodingException extends IOException
+    {
+	EncodingException (String encoding) { super (encoding); }
+    }
+
     //
     // The current XML handler interface.
     //
-    private StructSaxDriver	handler;
+    private SAXDriver	handler;
 
     //
     // I/O information.
@@ -4979,19 +4880,4 @@
     // Utility flag: are we in CDATA?  If so, whitespace isn't ignorable.
     // 
     private boolean	inCDATA;
-// <struct>
-	//
-	// Flags to define if we should show the document structure and if 
-	// we should expend the external parsed entities
-	//
-	private boolean showDocStructure;
-	private boolean translateExternalParsedEntities;
-	private boolean translateComments;
-	
-	String	version=null;
-	String	encodingName = null;
-	String	standalone = null;
-// </struct>
-
-
 }
openSUSE Build Service is sponsored by