Project

General

Profile

1
// CatalogReader.java - Read OASIS Catalog files
2
// Written by Norman Walsh, nwalsh@arbortext.com
3
// NO WARRANTY! This class is in the public domain.
4

    
5
package com.arbortext.catalog;
6

    
7
import java.lang.Integer;
8
import java.io.DataInputStream;
9
import java.io.IOException;
10
import java.io.FileNotFoundException;
11
import java.net.URL;
12
import java.net.MalformedURLException;
13
import com.arbortext.catalog.CatalogEntry;
14
import com.arbortext.catalog.InvalidCatalogEntryTypeException;
15
import com.arbortext.catalog.InvalidCatalogEntryException;
16

    
17
/**
18
 * <p>Parses OASIS Open Catalog files.</p>
19
 *
20
 * <blockquote>
21
 * <em>This module, both source code and documentation, is in the
22
 * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
23
 * </blockquote>
24
 *
25
 * <p>This class reads OASIS Open Catalog files, returning a stream
26
 * of tokens.</p>
27
 *
28
 * <p>This code interrogates the following non-standard system properties:</p>
29
 *
30
 * <dl>
31
 * <dt><b>xml.catalog.debug</b></dt>
32
 * <dd><p>Sets the debug level. A value of 0 is assumed if the
33
 * property is not set or is not a number.</p></dd>
34
 * </dl>
35
 *
36
 * @see Catalog
37
 *
38
 * @author Arbortext, Inc.
39
 * @author Norman Walsh
40
 *         <a href="mailto:nwalsh@arbortext.com">nwalsh@arbortext.com</a>
41
 * @version 1.0
42
 */
43
public class CatalogReader {
44
    // These are class variables so that several methods can access them
45
    /** The filename (URL) of the catalog being read */
46
    private String catfilename = null;
47

    
48
    /** The input stream used to read the catalog */
49
    private DataInputStream catfile = null;
50

    
51
    /**
52
     * Lookahead stack. Reading a catalog sometimes requires up to
53
     * two characters of lookahead.
54
     */
55
    private int[] stack = new int[3];
56

    
57
    /** The current position on the lookahead stack */
58
    private int top = -1;
59

    
60
    /**
61
     * <p>The debug level</p>
62
     *
63
     * <p>In general, higher numbers produce more information:</p>
64
     * <ul>
65
     * <li>0, no messages
66
     * <li>1, minimal messages (high-level status)
67
     * <li>2, detailed messages
68
     * </ul>
69
     */
70
    public int debug = 0;
71

    
72
    /**
73
     * <p>Construct a CatalogReader object.</p>
74
     */
75
    public CatalogReader() {
76
	String property = System.getProperty("xml.catalog.debug");
77

    
78
	if (property != null) {
79
	    try {
80
		debug = Integer.parseInt(property);
81
	    } catch (NumberFormatException e) {
82
		debug = 0;
83
	    }
84
	}
85
    }
86

    
87
    /**
88
     * <p>Start parsing an OASIS Open Catalog file. The file is
89
     * actually read and parsed
90
     * as needed by <code>nextEntry</code>.</p>
91
     *
92
     * @param fileUrl  The URL or filename of the catalog file to process
93
     *
94
     * @throws MalformedURLException Improper fileUrl
95
     * @throws IOException Error reading catalog file
96
     */
97
    public void parseCatalog(String fileUrl)
98
	throws MalformedURLException, IOException {
99
	catfilename = fileUrl;
100
	URL catalog;
101

    
102
	try {
103
	    catalog = new URL(fileUrl);
104
	} catch (MalformedURLException e) {
105
	    catalog = new URL("file:///" + fileUrl);
106
	}
107

    
108
	try {
109
	    catfile = new DataInputStream(catalog.openStream());
110
	} catch (FileNotFoundException e) {
111
	    debug(1, "Failed to load catalog, file not found", catalog.toString());
112
	}
113
    }
114

    
115
    /**
116
     * <p>The destructor.</p>
117
     *
118
     * <p>Makes sure the catalog file is closed.</p>
119
     */
120
    protected void finalize() throws IOException {
121
	if (catfile != null) {
122
	    catfile.close();
123
	}
124
	catfile = null;
125
    }
126

    
127
    /**
128
     * <p>Get the next entry from the file</p>
129
     *
130
     * @throws IOException Error reading catalog file
131
     * @return A CatalogEntry object for the next entry in the catalog
132
     */
133
    public CatalogEntry nextEntry() throws IOException {
134
	if (catfile == null) {
135
	    return null;
136
	}
137

    
138
	boolean confused = false;
139

    
140
	while (true) {
141
	    String token = nextToken();
142

    
143
	    if (token == null) {
144
		catfile.close();
145
		catfile = null;
146
		return null;
147
	    }
148

    
149
	    if (token.equalsIgnoreCase("BASE")
150
		|| token.equalsIgnoreCase("CATALOG")
151
		|| token.equalsIgnoreCase("DOCUMENT")
152
		|| token.equalsIgnoreCase("OVERRIDE")
153
		|| token.equalsIgnoreCase("SGMLDECL")) {
154
		String spec = nextToken();
155
		confused = false;
156

    
157
		try {
158
		    if (token.equalsIgnoreCase("BASE")) {
159
			return new CatalogEntry(CatalogEntry.BASE, spec);
160
		    }
161
		    if (token.equalsIgnoreCase("CATALOG")) {
162
			return new CatalogEntry(CatalogEntry.CATALOG, spec);
163
		    }
164
		    if (token.equalsIgnoreCase("DOCUMENT")) {
165
			return new CatalogEntry(CatalogEntry.DOCUMENT, spec);
166
		    }
167
		    if (token.equalsIgnoreCase("OVERRIDE")) {
168
			return new CatalogEntry(CatalogEntry.OVERRIDE, spec);
169
		    }
170
		    if (token.equalsIgnoreCase("SGMLDECL")) {
171
			return new CatalogEntry(CatalogEntry.SGMLDECL, spec);
172
		    }
173
		} catch (InvalidCatalogEntryTypeException icete) {
174
		    debug(1, "Invalid catalog entry type", token);
175
		    confused = true;
176
		} catch (InvalidCatalogEntryException icete) {
177
		    debug(1, "Invalid catalog entry", token, spec);
178
		    confused = true;
179
		}
180
	    }
181

    
182
	    if (token.equalsIgnoreCase("DELEGATE")
183
		|| token.equalsIgnoreCase("DOCTYPE")
184
		|| token.equalsIgnoreCase("DTDDECL")
185
		|| token.equalsIgnoreCase("ENTITY")
186
		|| token.equalsIgnoreCase("LINKTYPE")
187
		|| token.equalsIgnoreCase("NOTATION")
188
		|| token.equalsIgnoreCase("PUBLIC")
189
		|| token.equalsIgnoreCase("SYSTEM")) {
190
		String spec1 = nextToken();
191
		String spec2 = nextToken();
192
		confused = false;
193
		try {
194
		    if (token.equalsIgnoreCase("DELEGATE")) {
195
			return new CatalogEntry(CatalogEntry.DELEGATE,
196
						normalize(spec1), spec2);
197
		    }
198
		    if (token.equalsIgnoreCase("DOCTYPE")) {
199
			return new CatalogEntry(CatalogEntry.DOCTYPE,
200
						spec1, spec2);
201
		    }
202
		    if (token.equalsIgnoreCase("DTDDECL")) {
203
			return new CatalogEntry(CatalogEntry.DTDDECL,
204
						normalize(spec1), spec2);
205
		    }
206
		    if (token.equalsIgnoreCase("ENTITY")) {
207
			return new CatalogEntry(CatalogEntry.ENTITY,
208
						spec1, spec2);
209
		    }
210
		    if (token.equalsIgnoreCase("LINKTYPE")) {
211
			return new CatalogEntry(CatalogEntry.LINKTYPE,
212
						spec1, spec2);
213
		    }
214
		    if (token.equalsIgnoreCase("NOTATION")) {
215
			return new CatalogEntry(CatalogEntry.NOTATION,
216
						spec1, spec2);
217
		    }
218
		    if (token.equalsIgnoreCase("PUBLIC")) {
219
			return new CatalogEntry(CatalogEntry.PUBLIC,
220
						normalize(spec1), spec2);
221
		    }
222
		    if (token.equalsIgnoreCase("SYSTEM")) {
223
			return new CatalogEntry(CatalogEntry.SYSTEM,
224
						spec1, spec2);
225
		    }
226
		} catch (InvalidCatalogEntryTypeException icete) {
227
		    debug(1, "Invalid catalog entry type", token);
228
		    confused = true;
229
		} catch (InvalidCatalogEntryException icete) {
230
		    debug(1, "Invalid catalog entry", token, spec1, spec2);
231
		    confused = true;
232
		}
233
	    }
234

    
235
	    if (!confused) {
236
		if (debug > 1) {
237
		    System.out.println("Unrecognized token parsing catalog: '"
238
				       + catfilename
239
				       + "': "
240
				       + token);
241
		    System.out.println("\tSkipping to next recognized token.");
242
		}
243
		confused = true;
244
	    }
245
	}
246
    }
247

    
248
    // -----------------------------------------------------------------
249

    
250
    /**
251
     * <p>Normalize a public identifier.</p>
252
     *
253
     * <p>Public identifiers must be normalized according to the following
254
     * rules before comparisons between them can be made:</p>
255
     *
256
     * <ul>
257
     * <li>Whitespace characters are normalized to spaces (e.g., line feeds,
258
     * tabs, etc. become spaces).</li>
259
     * <li>Leading and trailing whitespace is removed.</li>
260
     * <li>Multiple internal whitespaces are normalized to a single
261
     * space.</li>
262
     * </ul>
263
     *
264
     * <p>This method is declared static so that other classes
265
     * can use it directly.</p>
266
     *
267
     * @param publicId The unnormalized public identifier.
268
     *
269
     * @return The normalized identifier.
270
     */
271
    public static String normalize(String publicId) {
272
	String normal = publicId.replace('\t', ' ');
273
	normal = normal.replace('\r', ' ');
274
	normal = normal.replace('\n', ' ');
275
	normal = normal.trim();
276

    
277
	int pos;
278

    
279
	while ((pos = normal.indexOf("  ")) >= 0) {
280
	    normal = normal.substring(0, pos) + normal.substring(pos+1);
281
	}
282

    
283
	return normal;
284
    }
285

    
286
    // -----------------------------------------------------------------
287

    
288
    /**
289
     * <p>Return the next token in the catalog file.</p>
290
     *
291
     * @return The Catalog file token from the input stream.
292
     * @throws IOException If an error occurs reading from the stream.
293
     */
294
    private String nextToken() throws IOException {
295
	String token = "";
296
	int ch, nextch;
297

    
298
	// Skip over leading whitespace and comments
299
	while (true) {
300
	    // skip leading whitespace
301
	    ch = catfile.read();
302
	    while (ch <= ' ') {      // all ctrls are whitespace
303
		ch = catfile.read();
304
		if (ch < 0) {
305
		    return null;
306
		}
307
	    }
308

    
309
	    // now 'ch' is the current char from the file
310
	    nextch = catfile.read();
311
	    if (nextch < 0) {
312
		return null;
313
	    }
314

    
315
	    if (ch == '-' && nextch == '-') {
316
		// we've found a comment, skip it...
317
		ch = ' ';
318
		nextch = nextChar();
319
		while (ch != '-' || nextch != '-') {
320
		    ch = nextch;
321
		    nextch = nextChar();
322
		}
323

    
324
		// Ok, we've found the end of the comment,
325
		// loop back to the top and start again...
326
	    } else {
327
		stack[++top] = nextch;
328
		stack[++top] = ch;
329
		break;
330
	    }
331
	}
332

    
333
	ch = nextChar();
334
	if (ch == '"' || ch == '\'') {
335
	    int quote = ch;
336
	    while ((ch = nextChar()) != quote) {
337
		char[] chararr = new char[1];
338
		chararr[0] = (char) ch;
339
		String s = new String(chararr);
340
		token = token.concat(s);
341
	    }
342
	    return token;
343
	} else {
344
	    // return the next whitespace or comment delimited
345
	    // string
346
	    while (ch > ' ') {
347
		nextch = nextChar();
348
		if (ch == '-' && nextch == '-') {
349
		    stack[++top] = ch;
350
		    stack[++top] = nextch;
351
		    return token;
352
		} else {
353
		    char[] chararr = new char[1];
354
		    chararr[0] = (char) ch;
355
		    String s = new String(chararr);
356
		    token = token.concat(s);
357
		    ch = nextch;
358
		}
359
	    }
360
	    return token;
361
	}
362
    }
363

    
364
    /**
365
     * <p>Return the next logical character from the input stream.</p>
366
     *
367
     * @return The next (logical) character from the input stream. The
368
     * character may be buffered from a previous lookahead.
369
     *
370
     * @throws IOException If an error occurs reading from the stream.
371
     */
372
    private int nextChar() throws IOException {
373
	if (top < 0) {
374
	    return catfile.read();
375
	} else {
376
	    return stack[top--];
377
	}
378
    }
379

    
380
    /**
381
     * <p>Print debug message (if the debug level is high enough).</p>
382
     *
383
     * @param level The debug level of this message. This message
384
     * will only be
385
     * displayed if the current debug level is at least equal to this
386
     * value.
387
     * @param message The text of the message.
388
     * @param token The catalog file token being processed.
389
     */
390
    private void debug(int level, String message, String token) {
391
	if (debug >= level) {
392
	    System.out.println(message + ": " + token);
393
	}
394
    }
395

    
396
    /**
397
     * <p>Print debug message (if the debug level is high enough).</p>
398
     *
399
     * @param level The debug level of this message. This message
400
     * will only be
401
     * displayed if the current debug level is at least equal to this
402
     * value.
403
     * @param message The text of the message.
404
     * @param token The catalog file token being processed.
405
     * @param spec The argument to the token.
406
     */
407
    private void debug(int level, String message, String token, String spec) {
408
	if (debug >= level) {
409
	    System.out.println(message + ": " + token + " " + spec);
410
	}
411
    }
412

    
413
    /**
414
     * <p>Print debug message (if the debug level is high enough).</p>
415
     *
416
     * @param level The debug level of this message. This message
417
     * will only be
418
     * displayed if the current debug level is at least equal to this
419
     * value.
420
     * @param message The text of the message.
421
     * @param token The catalog file token being processed.
422
     * @param spec1 The first argument to the token.
423
     * @param spec2 The second argument to the token.
424
     */
425
    private void debug(int level, String message,
426
		       String token, String spec1, String spec2) {
427
	if (debug >= level) {
428
	    System.out.println(message + ": " + token + " " + spec1);
429
	    System.out.println("\t" + spec2);
430
	}
431
    }
432
}
(4-4/11)