[Yanel-commits] rev 23178 - public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization

josias at wyona.com josias at wyona.com
Wed Mar 7 16:32:26 CET 2007


Author: josias
Date: 2007-03-07 16:32:25 +0100 (Wed, 07 Mar 2007)
New Revision: 23178

Modified:
   public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java
Log:
replace characters like < by the corresponding xml entity. this should fix some problems with invalid markup in the final result page when the source document contains entities like &lt; which are always resolved by the xml reader.

Modified: public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java
===================================================================
--- public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java	2007-03-07 08:54:54 UTC (rev 23177)
+++ public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java	2007-03-07 15:32:25 UTC (rev 23178)
@@ -72,7 +72,7 @@
         element.append("<" + eName);
         for(int i = 0; i < attrs.getLength(); i++) {
             String aName = attrs.getQName(i);
-            String aValue = attrs.getValue(i);
+            String aValue = replaceEntities(attrs.getValue(i));
             element.append(" " + aName + "=\"" + aValue + "\"");
         }
         // NOTE: the element will not be closed yet because we don't know if the
@@ -130,7 +130,7 @@
 
     public void characters(char[] buf, int offset, int len) throws SAXException {
         handlePendingElement();
-        String s = new String(buf, offset, len);
+        String s = replaceEntities(new String(buf, offset, len));
         print(s);
     }
 
@@ -173,7 +173,7 @@
     
     protected void print(String s) throws SAXException {
         try {
-            this.os.write(replaceAmpersand(s).getBytes("UTF-8"));
+            this.os.write(s.getBytes("UTF-8"));
         } catch (IOException e) {
             log.error(e.getMessage(), e);
             throw new SAXException(e);
@@ -181,24 +181,21 @@
     }
     
     /**
-     * Replaces all occurences of '&' but not '&amp;' with '&amp;'.
-     * TODO: fix this in the reader.
-     * @param inputString with or without '&'
-     * @return replaced ampersands as string
+     * Replaces some characters by their corresponding xml entities.
+     * @param str
+     * @return
      */
-    private String replaceAmpersand(String inputString) {
-        String [] tokens = inputString.split("&amp;");
-        String replacedAmpersand = null;
-        if(inputString.indexOf("&amp;") == -1) {
-            replacedAmpersand = inputString.replaceAll("&", "&amp;");
-        } else {
-            replacedAmpersand = "";
-            for(int i = 0; i < tokens.length; i++) {
-                replacedAmpersand += tokens[i].replaceAll("&", "&amp;") + "&amp;";
-            }
-        }
-        log.debug("[" + inputString + "] replaced with [" + replacedAmpersand + "]");
-        return replacedAmpersand;
+    private String replaceEntities(String str) {
+        // there may be some &amp; and some & mixed in the input, so first transform all
+        // &amp; to & and then transform all & back to &amp;
+        // this way we don't get double escaped &amp;amp;
+        str = str.replaceAll("&amp;", "&");
+        str = str.replaceAll("&", "&amp;");
+        str = str.replaceAll("<", "&lt;");
+        str = str.replaceAll(">", "&gt;");
+        str = str.replaceAll("'", "&apos;");
+        str = str.replaceAll("\"", "&quot;");
+        return str;
     }
 
     public void setWriter(Writer writer) {




More information about the Yanel-commits mailing list