Download
Getting Started
Members
Projects
Community
Marketplace
Events
Planet Eclipse
Newsletter
Videos
Participate
Report a Bug
Forums
Mailing Lists
Wiki
IRC
How to Contribute
Working Groups
Automotive
Internet of Things
LocationTech
Long-Term Support
PolarSys
Science
OpenMDM
More
Community
Marketplace
Events
Planet Eclipse
Newsletter
Videos
Participate
Report a Bug
Forums
Mailing Lists
Wiki
IRC
How to Contribute
Working Groups
Automotive
Internet of Things
LocationTech
Long-Term Support
PolarSys
Science
OpenMDM
Toggle navigation
Bugzilla – Attachment 143859 Details for
Bug 282096
[xpath2] fn:string-length, fn:substring and fn:translate need to handl Surrogate pairs
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Terms of Use
|
Copyright Agent
Some Eclipse Foundation services are deprecated, or will be soon. Please ensure you've read
this important communication.
[patch]
Patch for this bug
bug-282096-patch.txt (text/plain), 42.98 KB, created by
Jesper Moller
on 2009-08-09 19:34:44 EDT
(
hide
)
Description:
Patch for this bug
Filename:
MIME Type:
Creator:
Jesper Moller
Created:
2009-08-09 19:34:44 EDT
Size:
42.98 KB
patch
obsolete
>### Eclipse Workspace Patch 1.0 >#P org.eclipse.wst.xml.xpath2.processor.tests >Index: src/org/eclipse/wst/xml/xpath2/processor/test/AbstractPsychoPathTest.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/test/AbstractPsychoPathTest.java,v >retrieving revision 1.27 >diff -u -r1.27 AbstractPsychoPathTest.java >--- src/org/eclipse/wst/xml/xpath2/processor/test/AbstractPsychoPathTest.java 5 Aug 2009 01:58:42 -0000 1.27 >+++ src/org/eclipse/wst/xml/xpath2/processor/test/AbstractPsychoPathTest.java 9 Aug 2009 23:27:55 -0000 >@@ -12,6 +12,7 @@ > * Jesper S Moller - bug 283214 - fix XML result serialization > * Jesper S Moller - bug 283404 - fixed locale > * Jesper S Moller - bug 281159 - fix document URIs and also filter XML namespace >+ * Jesper Steen Moeller - bug 282096 - make test harness handle all string encoding translate function surrogate aware > *******************************************************************************/ > package org.eclipse.wst.xml.xpath2.processor.test; > >@@ -46,6 +47,7 @@ > import org.eclipse.wst.xml.xpath2.processor.internal.types.ElementType; > import org.eclipse.wst.xml.xpath2.processor.internal.types.NodeType; > import org.eclipse.wst.xml.xpath2.processor.internal.types.QName; >+import org.eclipse.wst.xml.xpath2.processor.testsuite.functions.EscapeHTMLURIFuncTest; > import org.osgi.framework.Bundle; > import org.w3c.dom.Document; > import org.w3c.dom.Node; >@@ -185,6 +187,14 @@ > } > > protected String getExpectedResult(String xqFile) { >+ return getExpectedResult(xqFile, true); >+ } >+ >+ protected String getExpectedResultNoEscape(String xqFile) { >+ return getExpectedResult(xqFile, false); >+ } >+ >+ protected String getExpectedResult(String xqFile, boolean unescape) { > String resultFile = xqFile; > // > if (resultFile.length() < 10) { // <9 enough? like XPST0001 >@@ -226,6 +236,7 @@ > // e.printStackTrace(); > content = "XPST0003"; > } >+ if (unescape && content.contains("&")) return resolveCharacterReferences(content); > return content; > } > >@@ -237,7 +248,15 @@ > return unwrapResult(getExpectedResult(resultFile), elemName); > } > >+ public String extractXPathExpressionNoEscape(String xqFile, String inputFile) { >+ return extractXPathExpression(xqFile, inputFile, false); >+ } >+ > public String extractXPathExpression(String xqFile, String inputFile) { >+ return extractXPathExpression(xqFile, inputFile, true); >+ } >+ >+ public String extractXPathExpression(String xqFile, String inputFile, boolean unescape) { > // get the xpath2 expr from xq file, first > char[] cbuf = new char[2048];// > String content = null; >@@ -277,7 +296,11 @@ > // TODO Auto-generated catch block > e.printStackTrace(); > } >- return xpath2Expr; >+ if (unescape && xpath2Expr.contains("&")) { >+ return resolveCharacterReferences(xpath2Expr); >+ } else { >+ return xpath2Expr; >+ } > } > > protected String extractXPathExpression(String xqFile, String inputFile, >@@ -398,15 +421,21 @@ > return expectedResult; > } > >- protected String resolveCharacterReferences(String xpath) >- throws IOException, DOMLoaderException { >- String docText = "<doc>" + xpath + "</doc>"; >- InputStream is = new ByteArrayInputStream(docText.getBytes("UTF-8")); >- DOMLoader domloader = new XercesLoader(); >- domloader.set_validating(false); >- Document temp = domloader.load(is); >- return temp.getDocumentElement().getFirstChild().getTextContent(); >- } >+ protected String resolveCharacterReferences(String xpath) { >+ String docText = "<doc>" + xpath + "</doc>"; >+ InputStream is; >+ try { >+ is = new ByteArrayInputStream(docText.getBytes("UTF-8")); >+ DOMLoader domloader = new XercesLoader(); >+ domloader.set_validating(false); >+ Document temp = domloader.load(is); >+ return temp.getDocumentElement().getFirstChild().getTextContent(); >+ } catch (UnsupportedEncodingException e) { >+ throw new RuntimeException(e); >+ } catch (DOMLoaderException e) { >+ throw new RuntimeException(e); >+ } >+ } > > > } >Index: src/org/eclipse/wst/xml/xpath2/processor/testsuite/functions/NormalizeUnicodeFuncTest.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/testsuite/functions/NormalizeUnicodeFuncTest.java,v >retrieving revision 1.4 >diff -u -r1.4 NormalizeUnicodeFuncTest.java >--- src/org/eclipse/wst/xml/xpath2/processor/testsuite/functions/NormalizeUnicodeFuncTest.java 30 Jul 2009 23:50:21 -0000 1.4 >+++ src/org/eclipse/wst/xml/xpath2/processor/testsuite/functions/NormalizeUnicodeFuncTest.java 9 Aug 2009 23:27:55 -0000 >@@ -1,4 +1,16 @@ >- >+/******************************************************************************* >+ * Copyright (c) 2009 Standards for Technology for Automotive Retail and others. >+ * All rights reserved. This program and the accompanying materials >+ * are made available under the terms of the Eclipse Public License v1.0 >+ * which accompanies this distribution, and is available at >+ * http://www.eclipse.org/legal/epl-v10.html >+ * >+ * Contributors: >+ * David Carver (STAR) - initial API and implementation >+ * Jesper Steen Moeller - bug 282096 - special case for a test which works >+ * differently in XPath2 than in XQuery >+ *******************************************************************************/ >+ > package org.eclipse.wst.xml.xpath2.processor.testsuite.functions; > > import java.io.IOException; >@@ -12,19 +24,6 @@ > > > public class NormalizeUnicodeFuncTest extends AbstractPsychoPathTest { >- >- @Override >- public String extractXPathExpression(String xqFile, String inputFile) { >- String body = super.extractXPathExpression(xqFile, inputFile); >- try { >- body = resolveCharacterReferences(body); >- } catch (DOMLoaderException e) { >- e.printStackTrace(); >- } catch (IOException e) { >- e.printStackTrace(); >- } >- return body; >- } > > //Test normalize-unicode with simple text input. > public void test_fn_normalize_unicode1args_1() throws Exception { >Index: src/org/eclipse/wst/xml/xpath2/processor/testsuite/core/LiteralsTest.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/testsuite/core/LiteralsTest.java,v >retrieving revision 1.2 >diff -u -r1.2 LiteralsTest.java >--- src/org/eclipse/wst/xml/xpath2/processor/testsuite/core/LiteralsTest.java 12 Jul 2009 05:16:21 -0000 1.2 >+++ src/org/eclipse/wst/xml/xpath2/processor/testsuite/core/LiteralsTest.java 9 Aug 2009 23:27:55 -0000 >@@ -7,6 +7,8 @@ > * > * Contributors: > * David Carver (STAR) - initial API and implementation >+ * Jesper Steen Moeller - bug 282096 - special case for a test which works >+ * differently in XPath2 than in XQuery > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.testsuite.core; >@@ -2013,7 +2015,7 @@ > String inputFile = "/TestSources/emptydoc.xml"; > String xqFile = "/Queries/XQuery/Expressions/PrimaryExpr/Literals/Literals057.xq"; > String resultFile = "/ExpectedTestResults/Expressions/PrimaryExpr/Literals/Literals057.txt"; >- String expectedResult = getExpectedResult(resultFile); >+ String expectedResult = getExpectedResultNoEscape(resultFile); > URL fileURL = bundle.getEntry(inputFile); > loadDOMDocument(fileURL); > >@@ -2022,7 +2024,7 @@ > > DynamicContext dc = setupDynamicContext(schema); > >- String xpath = extractXPathExpression(xqFile, inputFile); >+ String xpath = extractXPathExpressionNoEscape(xqFile, inputFile); > String actual = null; > try { > XPath path = compileXPath(dc, xpath); >#P org.eclipse.wst.xml.xpath2.processor >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTokenize.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTokenize.java,v >retrieving revision 1.2 >diff -u -r1.2 FnTokenize.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTokenize.java 3 Aug 2009 02:24:10 -0000 1.2 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTokenize.java 9 Aug 2009 23:27:57 -0000 >@@ -7,17 +7,16 @@ > * > * Contributors: > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 >+ * Jesper Steen Moeller - bug 282096 - clean up string storage > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; > > import java.util.*; > import java.util.regex.*; >@@ -71,8 +70,6 @@ > String str1 = ""; > if (!arg1.empty()) { > str1 = ((XSString) arg1.first()).value(); >- str1 = SurrogateUtils.decodeXML(str1); >- str1 = StringEscapeUtils.unescapeXml(str1); > } > > ResultSequence arg2 = (ResultSequence) argiter.next(); >@@ -92,7 +89,7 @@ > ArrayList<String> ret = tokenize(pattern, flags, str1); > > for(String token : ret) { >- rs.add(new XSString(StringEscapeUtils.escapeXml(token))); >+ rs.add(new XSString(token)); > } > } catch (PatternSyntaxException err) { > throw DynamicError.regex_error(null); >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringLength.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringLength.java,v >retrieving revision 1.5 >diff -u -r1.5 FnStringLength.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringLength.java 1 Aug 2009 17:16:02 -0000 1.5 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringLength.java 9 Aug 2009 23:27:57 -0000 >@@ -11,18 +11,17 @@ > * Mukul Gandhi - bug 274805 - improvements to xs:integer data type > * Jesper Steen Moeller - bug 285145 - implement full arity checking > * David Carver - bug 282096 - improvements for surrogate handling >+ * Jesper Steen Moeller - bug 282096 - clean up string storage > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicContext; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; > > import java.math.BigInteger; > import java.util.*; >@@ -98,10 +97,9 @@ > } > > String str = ((XSString) arg1.first()).value(); >- str = SurrogateUtils.decodeXML(str); > > ResultSequence rs = ResultSequenceFactory.create_new(); >- rs.add(new XSInteger(BigInteger.valueOf(str.length()))); >+ rs.add(new XSInteger(BigInteger.valueOf(str.codePointCount(0, str.length())))); > > return rs; > } >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTranslate.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTranslate.java,v >retrieving revision 1.2 >diff -u -r1.2 FnTranslate.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTranslate.java 1 Aug 2009 17:16:02 -0000 1.2 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnTranslate.java 9 Aug 2009 23:27:57 -0000 >@@ -7,17 +7,19 @@ > * > * Contributors: > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 >+ * Jesper Steen Moeller - bug 282096 - clean up string storage and make >+ * translate function surrogate aware > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; >+import org.eclipse.wst.xml.xpath2.processor.internal.utils.CodePointIterator; >+import org.eclipse.wst.xml.xpath2.processor.internal.utils.StringCodePointIterator; > > import java.util.*; > >@@ -110,40 +112,61 @@ > } > > String str = ((XSString) arg1.first()).value(); >- str = SurrogateUtils.decodeXML(str); > String mapstr = ((XSString) arg2.first()).value(); >- mapstr = SurrogateUtils.decodeXML(mapstr); > String transstr = ((XSString) arg3.first()).value(); >- transstr = SurrogateUtils.decodeXML(transstr); > >- String result = new String(str); >- >- // ok the spec says that first occurence decides how to change >- // it... >- Map repmap = new Hashtable(256); >- for (int i = 0; i < mapstr.length(); i++) { >- String replace = ""; >- String chartofind = "" + mapstr.charAt(i); >- >- if (transstr.length() > i) >- replace += transstr.charAt(i); >- >- if (repmap.containsKey(chartofind)) >- replace = (String) repmap.get(chartofind); >- >- else >- repmap.put(chartofind, replace); >- >- result = result.replaceAll(chartofind, replace); >+ Map<Integer, Integer> replacements = buildReplacementMap(mapstr, transstr); >+ >+ StringBuffer sb = new StringBuffer(str.length()); >+ CodePointIterator strIter = new StringCodePointIterator(str); >+ for (int input = strIter.current(); input != CodePointIterator.DONE; input = strIter.next()) { >+ Integer inputCodepoint = Integer.valueOf(input); >+ if (replacements.containsKey(inputCodepoint)) { >+ Integer replaceWith = (Integer)replacements.get(inputCodepoint); >+ if (replaceWith != null) { >+ sb.appendCodePoint(replaceWith.intValue()); >+ } >+ } else { >+ sb.appendCodePoint(input); >+ } > } > >- result = StringEscapeUtils.escapeXml(result); >- rs.add(new XSString(result)); >+ rs.add(new XSString(sb.toString())); > > return rs; > } > > /** >+ * Build a replacement map from the mapstr and the transstr for translation. The function returns a Map<Integer, Integer> mapping each codepoint >+ * mentioned in the mapstr into the corresponding codepoint in transstr, or null if there is no matching mapping in transstr. >+ * >+ * @param mapstr The "mapping from" string >+ * @param transstr The "mapping into" string >+ * @return A map which maps input codepoint to output codepoint (or null) >+ */ >+ private static Map<Integer, Integer> buildReplacementMap(String mapstr, String transstr) { >+ // Build mapping (map from codepoint -> codepoint) >+ Map<Integer, Integer> replacements = new HashMap<Integer, Integer>(mapstr.length() * 4); >+ >+ CodePointIterator mapIter = new StringCodePointIterator(mapstr); >+ CodePointIterator transIter = new StringCodePointIterator(transstr); >+ // Iterate through both mapIter and transIter and produce the mapping >+ int mapFrom = mapIter.current(); >+ int mapTo = transIter.current(); >+ while (mapFrom != CodePointIterator.DONE) { >+ Integer codepointFrom = Integer.valueOf(mapFrom); >+ if (! replacements.containsKey(codepointFrom)) { >+ // only overwrite if it doesn't exist already >+ Integer replacement = mapTo != CodePointIterator.DONE ? Integer.valueOf(mapTo) : null; >+ replacements.put(codepointFrom, replacement); >+ } >+ mapFrom = mapIter.next(); >+ mapTo = transIter.next(); >+ } >+ return replacements; >+ } >+ >+ /** > * Calculate the expected arguments. > * > * @return The expected arguments. >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnCodepointsToString.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnCodepointsToString.java,v >retrieving revision 1.4 >diff -u -r1.4 FnCodepointsToString.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnCodepointsToString.java 1 Aug 2009 17:16:02 -0000 1.4 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnCodepointsToString.java 9 Aug 2009 23:27:57 -0000 >@@ -9,11 +9,11 @@ > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 > * Mukul Gandhi - improvements to the function implementation > * David Carver - bug 282096 - improvements for surrogate handling >+ * Jesper Steen Moeller - bug 282096 - clean up string storage > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; >@@ -83,7 +83,6 @@ > // "new String(int[] codePoints, int offset, int count)" is a facility > // introduced in Java 1.5 > String str = new String(codePointArray, 0, codePointArray.length); >- str = StringEscapeUtils.escapeXml(str); > rs.add(new XSString(str)); > > return rs; >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnLowerCase.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnLowerCase.java,v >retrieving revision 1.2 >diff -u -r1.2 FnLowerCase.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnLowerCase.java 1 Aug 2009 17:16:02 -0000 1.2 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnLowerCase.java 9 Aug 2009 23:27:57 -0000 >@@ -8,17 +8,16 @@ > * Contributors: > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 > * David Carver - bug 282096 - improvements for surrogate handling >+ * Jesper Steen Moeller - bug 282096 - clean up string storage > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; > > import java.util.*; > >@@ -89,9 +88,8 @@ > } > > String str = ((XSString) arg1.first()).value(); >- str = SurrogateUtils.decodeXML(str); > >- rs.add(new XSString(StringEscapeUtils.escapeXml(str.toLowerCase()))); >+ rs.add(new XSString(str.toLowerCase())); > > return rs; > } >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstring.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstring.java,v >retrieving revision 1.8 >diff -u -r1.8 FnSubstring.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstring.java 1 Aug 2009 17:16:02 -0000 1.8 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstring.java 9 Aug 2009 23:27:57 -0000 >@@ -10,17 +10,18 @@ > * Mukul Gandhi - bug 273795 - improvements to function, substring > * Jesper Steen Moeller - bug 285145 - implement full arity checking > * David Carver - bug 282096 - improvements for surrogate handling >+ * Jesper Steen Moeller - bug 282096 - reimplemented to be surrogate sensitive > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; >+import org.eclipse.wst.xml.xpath2.processor.internal.utils.CodePointIterator; >+import org.eclipse.wst.xml.xpath2.processor.internal.utils.StringCodePointIterator; > > import java.util.*; > >@@ -100,76 +101,42 @@ > } > > String str = ((XSString) stringArg.first()).value(); >- str = SurrogateUtils.decodeXML(str); > double dstart = ((XSDouble) startPosArg.first()).double_value(); > >- if (Double.NaN == dstart) { >+ // is start is NaN, no chars are returned >+ if (Double.NaN == dstart || Double.NEGATIVE_INFINITY == dstart) { > return emptyString(rs); > } >- >- int start = (int) Math.round(dstart); >- >- if (isStartOutOfBounds(str, start)) { >- return emptyString(rs); >- } >+ long istart = Math.round(dstart); > >- start = adjustStartPosition(start); >- >+ long ilength = Long.MAX_VALUE; > if (lengthArg != null) { >- return substringLength(rs, lengthArg, dstart, start, str); >+ double dlength = ((XSDouble) lengthArg.first()).double_value(); >+ if (Double.NaN == dlength) >+ return emptyString(rs); >+ // Switch to the rounded kind >+ ilength = Math.round(dlength); >+ if (ilength <= 0) >+ return emptyString(rs); > } > >- rs.add(new XSString(StringEscapeUtils.escapeXml(str.substring(start)))); >- >- return rs; >- } >- >- private static ResultSequence substringLength(ResultSequence rs, ResultSequence lengthArg, double dstart, int start, String str) { >- int length = adjustLength(lengthArg, dstart, start); >- int endpos = start + length; >- if (isEndPosOutOfBounds(endpos) || isStartOutOfBounds(str, endpos)) { >- return emptyString(rs); >- } >- >- if (start == 0 && endpos == 0) { >- rs.add(new XSString(StringEscapeUtils.escapeXml(str.substring(start)))); >- } else { >- rs.add(new XSString(StringEscapeUtils.escapeXml(str.substring(start, endpos)))); >- } >- return rs; >- } >- >- private static boolean isEndPosOutOfBounds(int endpos) { >- return endpos < 0; >- } >- >- private static int adjustLength(ResultSequence arg3, double dstart, >- int start) { >- double dlength = ((XSDouble) arg3.first()).double_value(); >- int length = (int) Math.round(dlength); >- if (dstart < 0) { >- length = (int)( dlength - (Math.abs(dstart) + 1)); >- } else if (dstart == 0) { >- length = length - 1; >- } else if (isEndPosOutOfBounds(length)) { >- length = start - Math.abs(length); >- } >- return length; >- } >+ >+ // could guess too short in cases supplementary chars >+ StringBuilder sb = new StringBuilder((int) Math.min(str.length(), ilength)); > >- private static int adjustStartPosition(int start) { >- if (start <= 0) { >- start = 0; >- } else { >- start = start - 1; >+ // This looks like an inefficient way to iterate, but due to surrogate handling, >+ // string indexes are no good here. Welcome to UTF-16! >+ >+ CodePointIterator strIter = new StringCodePointIterator(str); >+ for (long p = 1; strIter.current() != CodePointIterator.DONE; ++p, strIter.next()) { >+ if (istart <= p && p - istart < ilength) >+ sb.appendCodePoint(strIter.current()); > } >- return start; >- } >+ rs.add(new XSString(sb.toString())); > >- private static boolean isStartOutOfBounds(String str, int start) { >- return start > str.length(); >+ return rs; > } >- >+ > private static ResultSequence emptyString(ResultSequence rs) { > rs.add(new XSString("")); > return rs; >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringAfter.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringAfter.java,v >retrieving revision 1.2 >diff -u -r1.2 FnSubstringAfter.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringAfter.java 1 Aug 2009 17:16:02 -0000 1.2 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringAfter.java 9 Aug 2009 23:27:57 -0000 >@@ -8,17 +8,16 @@ > * Contributors: > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 > * David Carver - bug 282096 - improvements for surrogate handling >+ * Jesper Steen Moeller - bug 282096 - clean up string storage > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; > > import java.util.*; > >@@ -74,13 +73,11 @@ > String str2 = ""; > if (!arg1.empty()) { > str1 = ((XSString) arg1.first()).value(); >- str1 = SurrogateUtils.decodeXML(str1); > } > > ResultSequence arg2 = (ResultSequence) argiter.next(); > if (!arg2.empty()) { > str2 = ((XSString) arg2.first()).value(); >- str2 = SurrogateUtils.decodeXML(str2); > } > > int str1len = str1.length(); >@@ -104,7 +101,7 @@ > result = str1.substring(index, str1len); > } > >- rs.add(new XSString(StringEscapeUtils.escapeXml(result))); >+ rs.add(new XSString(result)); > > return rs; > } >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnMatches.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnMatches.java,v >retrieving revision 1.4 >diff -u -r1.4 FnMatches.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnMatches.java 3 Aug 2009 02:24:10 -0000 1.4 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnMatches.java 9 Aug 2009 23:27:57 -0000 >@@ -9,17 +9,16 @@ > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 > * David Carver - bug 282096 - improvements for surrogate handling > * David Carver - bug 262765 - improvements to Regular Expression >+ * Jesper Steen Moeller - bug 282096 - clean up string storage > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; > > import java.util.*; > import java.util.regex.*; >@@ -73,13 +72,10 @@ > String str1 = ""; > if (!arg1.empty()) { > str1 = ((XSString) arg1.first()).value(); >- str1 = SurrogateUtils.decodeXML(str1); >- str1 = StringEscapeUtils.unescapeXml(str1); > } > > ResultSequence arg2 = (ResultSequence) argiter.next(); > String pattern = ((XSString) arg2.first()).value(); >- pattern = SurrogateUtils.decodeXML(pattern); > String flags = null; > > if (argiter.hasNext()) { >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringToCodepoints.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringToCodepoints.java,v >retrieving revision 1.4 >diff -u -r1.4 FnStringToCodepoints.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringToCodepoints.java 1 Aug 2009 17:16:02 -0000 1.4 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnStringToCodepoints.java 9 Aug 2009 23:27:57 -0000 >@@ -9,6 +9,7 @@ > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 > * Mukul Gandhi - bug 280554 - improvements to the function implementation > * David Carver - bug 282096 - improvements for surrogate handling >+ * Jesper Steen Moeller - bug 282096 - clean up string storage and fix surrogate handling > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; >@@ -18,7 +19,8 @@ > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; >+import org.eclipse.wst.xml.xpath2.processor.internal.utils.CodePointIterator; >+import org.eclipse.wst.xml.xpath2.processor.internal.utils.StringCodePointIterator; > > import java.math.BigInteger; > import java.util.*; >@@ -71,13 +73,11 @@ > return rs; > > XSString xstr = (XSString) arg1.first(); >- String str = xstr.value(); >- str = SurrogateUtils.decodeXML(str); > >- for (int i = 0; i < str.length(); i++) { >- // Character.codePointAt API, is introduced in Java 1.5 >- int codePointValue = Character.codePointAt(str, i); >- rs.add(new XSInteger(BigInteger.valueOf(codePointValue))); >+ CodePointIterator cpi = new StringCodePointIterator(xstr.value()); >+ >+ for (int codePoint = cpi.current(); codePoint != CodePointIterator.DONE; codePoint = cpi.next()) { >+ rs.add(new XSInteger(BigInteger.valueOf(codePoint))); > } > > return rs; >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringBefore.java >=================================================================== >RCS file: /cvsroot/webtools/sourceediting/plugins/org.eclipse.wst.xml.xpath2.processor/src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringBefore.java,v >retrieving revision 1.2 >diff -u -r1.2 FnSubstringBefore.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringBefore.java 1 Aug 2009 17:16:02 -0000 1.2 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/function/FnSubstringBefore.java 9 Aug 2009 23:27:57 -0000 >@@ -8,17 +8,16 @@ > * Contributors: > * Andrea Bittau - initial API and implementation from the PsychoPath XPath 2.0 > * David Carver - bug 282096 - improvements for surrogate handling >+ * Jesper Steen Moeller - bug 282096 - clean up string storage > *******************************************************************************/ > > package org.eclipse.wst.xml.xpath2.processor.internal.function; > >-import org.apache.commons.lang.StringEscapeUtils; > import org.eclipse.wst.xml.xpath2.processor.DynamicError; > import org.eclipse.wst.xml.xpath2.processor.ResultSequence; > import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory; > import org.eclipse.wst.xml.xpath2.processor.internal.*; > import org.eclipse.wst.xml.xpath2.processor.internal.types.*; >-import org.eclipse.wst.xml.xpath2.processor.internal.utils.SurrogateUtils; > > import java.util.*; > >@@ -74,13 +73,11 @@ > String str2 = ""; > if (!arg1.empty()) { > str1 = ((XSString) arg1.first()).value(); >- str1 = SurrogateUtils.decodeXML(str1); > } > > ResultSequence arg2 = (ResultSequence) argiter.next(); > if (!arg2.empty()) { > str2 = ((XSString) arg2.first()).value(); >- str2 = SurrogateUtils.decodeXML(str2); > } > > int str1len = str1.length(); >@@ -98,7 +95,7 @@ > } > > >- rs.add(new XSString(StringEscapeUtils.escapeXml(str1.substring(0, index)))); >+ rs.add(new XSString(str1.substring(0, index))); > > return rs; > } >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/utils/SurrogateUtils.java >=================================================================== >RCS file: src/org/eclipse/wst/xml/xpath2/processor/internal/utils/SurrogateUtils.java >diff -N src/org/eclipse/wst/xml/xpath2/processor/internal/utils/SurrogateUtils.java >--- src/org/eclipse/wst/xml/xpath2/processor/internal/utils/SurrogateUtils.java 1 Aug 2009 17:16:02 -0000 1.1 >+++ /dev/null 1 Jan 1970 00:00:00 -0000 >@@ -1,34 +0,0 @@ >-/******************************************************************************* >- * Copyright (c) 2009 Standards for Technology in Automotive Retail and others. >- * All rights reserved. This program and the accompanying materials >- * are made available under the terms of the Eclipse Public License v1.0 >- * which accompanies this distribution, and is available at >- * http://www.eclipse.org/legal/epl-v10.html >- * >- * Contributors: >- * David Carver (STAR) - bug 282096 - initial API and implementation >- *******************************************************************************/ >-package org.eclipse.wst.xml.xpath2.processor.internal.utils; >- >-public class SurrogateUtils { >- >- /** >- * This class will decode a surrogate entity into it's string representation >- * @param str >- * @return The decoded string >- * @since 1.1 >- */ >- public static String decodeXML(String str) { >- String decodeString = str; >- while (decodeString.contains("")) { >- int startpos = decodeString.indexOf(""); >- String starthex = decodeString.substring(startpos); >- int semipos = starthex.indexOf(';'); >- String hexValue = starthex.substring(4, semipos); >- int i = Integer.parseInt(hexValue, 16); >- char c = (char)i; >- decodeString = decodeString.replaceAll("" + hexValue + ";", "" + c); >- } >- return decodeString; >- } >-} >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/utils/CodePointIterator.java >=================================================================== >RCS file: src/org/eclipse/wst/xml/xpath2/processor/internal/utils/CodePointIterator.java >diff -N src/org/eclipse/wst/xml/xpath2/processor/internal/utils/CodePointIterator.java >--- /dev/null 1 Jan 1970 00:00:00 -0000 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/utils/CodePointIterator.java 1 Jan 1970 00:00:00 -0000 >@@ -0,0 +1,75 @@ >+/******************************************************************************* >+ * Copyright (c) 2009 Jesper Steen Moeller and others. >+ * All rights reserved. This program and the accompanying materials >+ * are made available under the terms of the Eclipse Public License v1.0 >+ * which accompanies this distribution, and is available at >+ * http://www.eclipse.org/legal/epl-v10.html >+ * >+ * Contributors: >+ * Jesper Steen Moeller - bug 282096 - initial API and implementation >+ *******************************************************************************/ >+ >+package org.eclipse.wst.xml.xpath2.processor.internal.utils; >+ >+public interface CodePointIterator extends Cloneable >+{ >+ /** >+ * Sentinel value returned from iterator when the end is reached. >+ * The value is -1 which will never be a valid codepoint. >+ */ >+ public static final int DONE = -1; >+ >+ /** >+ * Resets the position to 0 and returns the first code point. >+ * @return the first code point in the text, or DONE if the text is empty >+ */ >+ public int first(); >+ >+ /** >+ * Sets the position to the last possible position (or 0 if the text is empty) >+ * and returns the code point at that position. >+ * @return the last code point in the text, or DONE if the text is empty >+ * @see #getEndIndex() >+ */ >+ public int last(); >+ >+ /** >+ * Gets the code point at the current position (as returned by getIndex()). >+ * @return the code point at the current position or DONE if the current >+ * position is off the end of the text. >+ * @see #getIndex() >+ */ >+ public int current(); >+ >+ /** >+ * Increments the iterator's code point index by one and returns the code point >+ * at the new index. If the resulting index is at the end of the string, the >+ * index is not incremented, and DONE is returned. >+ * @return the code point at the new position or DONE if the new >+ * position is after the text range. >+ */ >+ public int next(); >+ >+ /** >+ * Decrements the iterator's index by one and returns the character >+ * at the new index. If the current index is 0, the index >+ * remains at 0 and a value of DONE is returned. >+ * >+ * @return the code point at the new position (or DONE if the current >+ * position is 0) >+ */ >+ public int previous(); >+ >+ /** >+ * Returns the current index (as a codepoint, not a string index). >+ * @return the current index. >+ */ >+ public int getIndex(); >+ >+ /** >+ * Create a copy of this code point iterator >+ * @return A copy of this >+ */ >+ public Object clone(); >+ >+} >Index: src/org/eclipse/wst/xml/xpath2/processor/internal/utils/StringCodePointIterator.java >=================================================================== >RCS file: src/org/eclipse/wst/xml/xpath2/processor/internal/utils/StringCodePointIterator.java >diff -N src/org/eclipse/wst/xml/xpath2/processor/internal/utils/StringCodePointIterator.java >--- /dev/null 1 Jan 1970 00:00:00 -0000 >+++ src/org/eclipse/wst/xml/xpath2/processor/internal/utils/StringCodePointIterator.java 1 Jan 1970 00:00:00 -0000 >@@ -0,0 +1,188 @@ >+/******************************************************************************* >+ * Copyright (c) 2009 Jesper Steen Moeller and others. >+ * All rights reserved. This program and the accompanying materials >+ * are made available under the terms of the Eclipse Public License v1.0 >+ * which accompanies this distribution, and is available at >+ * http://www.eclipse.org/legal/epl-v10.html >+ * >+ * Contributors: >+ * Jesper Steen Moeller - bug 282096 - initial API and implementation >+ *******************************************************************************/ >+ >+package org.eclipse.wst.xml.xpath2.processor.internal.utils; >+ >+public final class StringCodePointIterator implements CodePointIterator >+{ >+ private String text; >+ private int end; >+ // invariant: 0 <= pos <= end >+ private int pos; >+ private int cpPos; >+ >+ /** >+ * Constructs an iterator with an initial index of 0. >+ */ >+ public StringCodePointIterator(String text) >+ { >+ if (text == null) >+ throw new NullPointerException(); >+ >+ this.text = text; >+ this.end = text.length(); >+ if (end > 0 && Character.isHighSurrogate(text.charAt(end-1))) >+ throw new IllegalArgumentException("Invalid UTF-16 sequence ending with a high surrogate"); >+ >+ this.pos = 0; >+ this.cpPos = 0; >+ } >+ >+ /** >+ * Reset this iterator to point to a new string. This package-visible >+ * method is used by other java.text classes that want to avoid allocating >+ * new StringCodePointIterator objects every time their setText method >+ * is called. >+ * >+ * @param text The String to be iterated over >+ * @since 1.2 >+ */ >+ public void setText(String text) { >+ if (text == null) >+ throw new NullPointerException(); >+ this.text = text; >+ this.end = text.length(); >+ this.pos = 0; >+ this.cpPos = 0; >+ } >+ >+ /** >+ * Implements CodePointIterator.first() for String. >+ * @see CodePointIterator#first >+ */ >+ public int first() >+ { >+ pos = 0; >+ cpPos = 0; >+ return current(); >+ } >+ >+ /** >+ * Implements CodePointIterator.last() for String. >+ * @see CodePointIterator#last >+ */ >+ public int last() >+ { >+ pos = end; >+ cpPos = Character.codePointCount(text, 0, pos); >+ return previous(); >+ } >+ >+ /** >+ * Implements CodePointIterator.current() for String. >+ * @see CodePointIterator#current >+ */ >+ public int current() >+ { >+ if (pos < end) { >+ char ch1 = text.charAt(pos); >+ if (Character.isHighSurrogate(ch1)) return Character.toCodePoint(ch1, text.charAt(pos+1)); >+ return ch1; >+ } >+ else { >+ return DONE; >+ } >+ } >+ >+ /** >+ * Implements CodePointIterator.next() for String. >+ * @see CodePointIterator#next >+ */ >+ public int next() >+ { >+ if (pos < end - 1) { >+ pos++; >+ if (Character.isLowSurrogate(text.charAt(pos))) pos++; >+ cpPos++; >+ return current(); >+ } >+ else { >+ pos = end; >+ return DONE; >+ } >+ } >+ >+ /** >+ * Implements CodePointIterator.previous() for String. >+ * @see CodePointIterator#previous >+ */ >+ public int previous() >+ { >+ if (pos > 0) { >+ pos--; >+ if (Character.isLowSurrogate(text.charAt(pos))) pos--; >+ cpPos--; >+ return current(); >+ } >+ else { >+ return DONE; >+ } >+ } >+ >+ /** >+ * Implements CodePointIterator.getIndex() for String. >+ * @see CodePointIterator#getIndex >+ */ >+ public int getIndex() >+ { >+ return cpPos; >+ } >+ >+ /** >+ * Compares the equality of two StringCodePointIterator objects. >+ * @param obj the StringCodePointIterator object to be compared with. >+ * @return true if the given obj is the same as this >+ * StringCodePointIterator object; false otherwise. >+ */ >+ public boolean equals(Object obj) >+ { >+ if (this == obj) >+ return true; >+ if (!(obj instanceof StringCodePointIterator)) >+ return false; >+ >+ StringCodePointIterator that = (StringCodePointIterator) obj; >+ >+ if (hashCode() != that.hashCode()) >+ return false; >+ if (!text.equals(that.text)) >+ return false; >+ if (pos != that.pos || end != that.end) >+ return false; >+ return true; >+ } >+ >+ /** >+ * Computes a hashcode for this iterator. >+ * @return A hash code >+ */ >+ public int hashCode() >+ { >+ return text.hashCode() ^ pos ^ end; >+ } >+ >+ /** >+ * Creates a copy of this iterator. >+ * @return A copy of this >+ */ >+ public Object clone() >+ { >+ try { >+ StringCodePointIterator other >+ = (StringCodePointIterator) super.clone(); >+ return other; >+ } >+ catch (CloneNotSupportedException e) { >+ throw new InternalError(); >+ } >+ } >+ >+}
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Flags:
d_a_carver
:
iplog+
Actions:
View
|
Diff
Attachments on
bug 282096
: 143859