[Yanel-commits] rev 23178 -
public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization
josias at wyona.com
josias at wyona.com
Wed Mar 7 16:32:26 CET 2007
Author: josias
Date: 2007-03-07 16:32:25 +0100 (Wed, 07 Mar 2007)
New Revision: 23178
Modified:
public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java
Log:
replace characters like < by the corresponding xml entity. this should fix some problems with invalid markup in the final result page when the source document contains entities like < which are always resolved by the xml reader.
Modified: public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java
===================================================================
--- public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java 2007-03-07 08:54:54 UTC (rev 23177)
+++ public/yanel/trunk/src/core/java/org/wyona/yanel/core/serialization/HTMLSerializer.java 2007-03-07 15:32:25 UTC (rev 23178)
@@ -72,7 +72,7 @@
element.append("<" + eName);
for(int i = 0; i < attrs.getLength(); i++) {
String aName = attrs.getQName(i);
- String aValue = attrs.getValue(i);
+ String aValue = replaceEntities(attrs.getValue(i));
element.append(" " + aName + "=\"" + aValue + "\"");
}
// NOTE: the element will not be closed yet because we don't know if the
@@ -130,7 +130,7 @@
public void characters(char[] buf, int offset, int len) throws SAXException {
handlePendingElement();
- String s = new String(buf, offset, len);
+ String s = replaceEntities(new String(buf, offset, len));
print(s);
}
@@ -173,7 +173,7 @@
protected void print(String s) throws SAXException {
try {
- this.os.write(replaceAmpersand(s).getBytes("UTF-8"));
+ this.os.write(s.getBytes("UTF-8"));
} catch (IOException e) {
log.error(e.getMessage(), e);
throw new SAXException(e);
@@ -181,24 +181,21 @@
}
/**
- * Replaces all occurences of '&' but not '&' with '&'.
- * TODO: fix this in the reader.
- * @param inputString with or without '&'
- * @return replaced ampersands as string
+ * Replaces some characters by their corresponding xml entities.
+ * @param str
+ * @return
*/
- private String replaceAmpersand(String inputString) {
- String [] tokens = inputString.split("&");
- String replacedAmpersand = null;
- if(inputString.indexOf("&") == -1) {
- replacedAmpersand = inputString.replaceAll("&", "&");
- } else {
- replacedAmpersand = "";
- for(int i = 0; i < tokens.length; i++) {
- replacedAmpersand += tokens[i].replaceAll("&", "&") + "&";
- }
- }
- log.debug("[" + inputString + "] replaced with [" + replacedAmpersand + "]");
- return replacedAmpersand;
+ private String replaceEntities(String str) {
+ // there may be some & and some & mixed in the input, so first transform all
+ // & to & and then transform all & back to &
+ // this way we don't get double escaped &amp;
+ str = str.replaceAll("&", "&");
+ str = str.replaceAll("&", "&");
+ str = str.replaceAll("<", "<");
+ str = str.replaceAll(">", ">");
+ str = str.replaceAll("'", "'");
+ str = str.replaceAll("\"", """);
+ return str;
}
public void setWriter(Writer writer) {
More information about the Yanel-commits
mailing list