Revision 5747
Added by ben leinfelder about 14 years ago
test/edu/ucsb/nceas/metacattest/ReaderWriterTest.java | ||
---|---|---|
26 | 26 |
package edu.ucsb.nceas.metacattest; |
27 | 27 |
|
28 | 28 |
import java.io.*; |
29 |
import java.nio.charset.Charset; |
|
29 | 30 |
import java.util.*; |
30 | 31 |
|
31 | 32 |
import org.apache.commons.io.IOUtils; |
33 |
import org.apache.commons.io.input.XmlStreamReader; |
|
32 | 34 |
|
33 | 35 |
import edu.ucsb.nceas.MCTestCase; |
34 | 36 |
import edu.ucsb.nceas.metacat.client.MetacatFactory; |
... | ... | |
208 | 210 |
} |
209 | 211 |
} |
210 | 212 |
|
213 |
/** |
|
214 |
* test Commons IO detection |
|
215 |
*/ |
|
216 |
public void testXMLEncodingDectection() |
|
217 |
{ |
|
218 |
try |
|
219 |
{ |
|
220 |
System.out.println("default charset:" + Charset.defaultCharset().displayName()); |
|
221 |
String sampleXML = "<?xml version='1.0' encoding='UTF-8'><test>my content 你</test>"; |
|
222 |
// get bytes using different encoding - shouldn't matter what we use for the prolog |
|
223 |
XmlStreamReader xsr = |
|
224 |
new XmlStreamReader( |
|
225 |
new BufferedInputStream(new ByteArrayInputStream(sampleXML.getBytes("ISO-8859-1")))); |
|
226 |
|
|
227 |
System.out.println("detected encoding:" + xsr.getEncoding()); |
|
228 |
|
|
229 |
// read the string [again] using the detected encoding |
|
230 |
// NOTE: XmlStreamReader consumes the entire stream and does not suport reset() |
|
231 |
// Besides, we'd have the wrong bytes anyway |
|
232 |
String result = IOUtils.toString(new ByteArrayInputStream(sampleXML.getBytes(xsr.getEncoding()))); |
|
233 |
System.out.println(result); |
|
234 |
|
|
235 |
assertTrue(result.equals(sampleXML)); |
|
236 |
} |
|
237 |
catch(Exception e) |
|
238 |
{ |
|
239 |
e.printStackTrace(); |
|
240 |
fail("Unexpected error in testXMLEncodingDectection: " + e.getMessage()); |
|
241 |
} |
|
242 |
|
|
243 |
} |
|
244 |
|
|
211 | 245 |
public void initialize() |
212 | 246 |
{ |
213 | 247 |
assert(1 == 1); |
Also available in: Unified diff
add XmlStreamReader test for determining encoding