View Javadoc

1   package com.wutka.dtd;
2   
3   import java.io.*;
4   import java.util.*;
5   
6   /*** Lexical scanner for DTD's
7    *
8    * @author Mark Wutka
9    * @version $Revision: 1.19 $ $Date: 2002/07/31 00:19:10 $ by $Author: wutka $
10   */
11  
12  class Scanner
13  {
14  	public static final TokenType LTQUES = new TokenType(0, "LTQUES");
15  	public static final TokenType IDENTIFIER = new TokenType(1, "IDENTIFIER");
16  	public static final TokenType EQUAL = new TokenType(2, "EQUAL");
17  	public static final TokenType LPAREN = new TokenType(3, "LPAREN");
18  	public static final TokenType RPAREN = new TokenType(4, "RPAREN");
19  	public static final TokenType COMMA = new TokenType(5, "COMMA");
20  	public static final TokenType STRING = new TokenType(6, "STRING");
21  	public static final TokenType QUESGT = new TokenType(7, "QUESGT");
22  	public static final TokenType LTBANG = new TokenType(8, "LTBANG");
23  	public static final TokenType GT = new TokenType(9, "GT");
24  	public static final TokenType PIPE = new TokenType(10, "PIPE");
25  	public static final TokenType QUES = new TokenType(11, "QUES");
26  	public static final TokenType PLUS = new TokenType(12, "PLUS");
27  	public static final TokenType ASTERISK = new TokenType(13, "ASTERISK");
28  	public static final TokenType LT = new TokenType(14, "LT");
29  	public static final TokenType EOF = new TokenType(15, "EOF");
30  	public static final TokenType COMMENT = new TokenType(16, "COMMENT");
31  	public static final TokenType PERCENT = new TokenType(17, "PERCENT");
32  	public static final TokenType CONDITIONAL =
33          new TokenType(18, "CONDITIONAL");
34  	public static final TokenType ENDCONDITIONAL =
35          new TokenType(19, "ENDCONDITIONAL");
36      public static final TokenType NMTOKEN = new TokenType(20, "NMTOKEN");
37  
38      protected class StreamInfo
39      {
40          String      id;
41          Reader      in;
42          int         lineNumber = 1;
43          int         column = 1;
44  
45          StreamInfo(String id, Reader in)
46          {
47              this.id = id;
48              this.in = in;
49          }
50      };
51  
52      protected StreamInfo in;
53      protected Stack inputStreams;
54  	protected Token nextToken;
55  	protected int nextChar;
56      protected boolean atEOF;
57      protected boolean trace;
58      protected char[] expandBuffer;
59      protected int expandPos;
60      protected Hashtable entityExpansion;
61      protected EntityExpansion expander;
62  
63  	public Scanner(Reader inReader, EntityExpansion anExpander)
64  	{
65          this(inReader, false, anExpander);
66  	}
67  
68  	public Scanner(Reader inReader, boolean doTrace, EntityExpansion anExpander)
69      {
70          in = new StreamInfo("", inReader);
71          atEOF = false;
72          trace = doTrace;
73          expandBuffer = null;
74          entityExpansion = new Hashtable();
75          expander = anExpander;
76      }
77  
78  	public Token peek()
79  		throws IOException
80  	{
81  		if (nextToken == null)
82  		{
83  			nextToken = readNextToken();
84  		}
85  
86  		return nextToken;
87  	}
88  
89  	public Token get()
90  		throws IOException
91  	{
92  		if (nextToken == null)
93  		{
94  			nextToken = readNextToken();
95  		}
96  
97  		Token retval = nextToken;
98  		nextToken = null;
99  
100 		return retval;
101 	}
102 
103     protected int readNextChar()
104         throws IOException
105     {
106         int ch = in.in.read();
107 
108         if (ch < 0)
109         {
110             if ((inputStreams != null) && (!inputStreams.empty()))
111             {
112                 in.in.close();
113                 in = (StreamInfo) inputStreams.pop();
114                 return readNextChar();
115             }
116         }
117         return ch;
118     }
119 
120 	protected int peekChar()
121 		throws IOException
122 	{
123         if (expandBuffer != null)
124         {
125             return (int) expandBuffer[expandPos];
126         }
127 
128 		if (nextChar == 0)
129 		{
130 			nextChar = readNextChar();
131             in.column++;
132             if (nextChar == '\n')
133             {
134                 in.lineNumber++;
135                 in.column=1;
136             }
137 		}
138 
139 		return nextChar;
140 	}
141 
142 	protected int read()
143 		throws IOException
144 	{
145         if (expandBuffer != null)
146         {
147             int expNextChar = expandBuffer[expandPos++];
148             if (expandPos >= expandBuffer.length)
149             {
150                 expandPos = -1;
151                 expandBuffer = null;
152             }
153             if (trace)
154             {
155                 System.out.print((char) expNextChar);
156             }
157             return expNextChar;
158         }
159 		if (nextChar == 0)
160 		{
161 			peekChar();
162 		}
163 
164 		int retval = nextChar;
165 		nextChar = 0;
166 
167         if (trace)
168         {
169             System.out.print((char) retval);
170         }
171 		return retval;
172 	}
173 
174     public String getUntil(char stopChar)
175         throws IOException
176     {
177         StringBuffer out = new StringBuffer();
178 
179         int ch;
180 
181         while ((ch = read()) >= 0)
182         {
183             if (ch == stopChar)
184             {
185                 return out.toString();
186             }
187             out.append((char) ch);
188         }
189         return out.toString();
190     }
191 
192     public void skipUntil(char stopChar)
193         throws IOException
194     {
195         int ch;
196 
197         while ((ch = read()) >= 0)
198         {
199             if (ch == stopChar)
200             {
201                 return;
202             }
203         }
204         return;
205     }
206 
207 	protected Token readNextToken()
208 		throws IOException
209 	{
210 		for (;;)
211 		{
212 			int ch = read();
213 
214 			if (ch == '<')
215 			{
216 				ch = peekChar();
217 				if (ch == '!')
218 				{
219 					read();
220 
221                     if (peekChar() == '[')
222                     {
223                         read();
224 
225                         return new Token(CONDITIONAL);
226                     }
227 
228 					if (peekChar() != '-')
229 					{
230 						return new Token(LTBANG);
231 					}
232 					else
233 					{
234 						read();
235 						if (peekChar() != '-')
236 						{
237                             throw new DTDParseException(getUriId(),
238 								"Invalid character sequence <!-"+read(),
239                                 getLineNumber(), getColumn());
240 						}
241 						read();
242 
243 						StringBuffer buff = new StringBuffer();
244 						for (;;)
245 						{
246                             if (peekChar() < 0)
247                             {
248                                 throw new DTDParseException(getUriId(),
249                                     "Unterminated comment: <!--"+
250                                     buff.toString(),
251                                     getLineNumber(), getColumn());
252                             }
253 
254 							if (peekChar() != '-')
255 							{
256 								buff.append((char) read());
257 							}
258 							else
259 							{
260 								read();
261                                 if (peekChar() < 0)
262                                 {
263                                     throw new DTDParseException(getUriId(),
264                                         "Unterminated comment: <!--"+
265                                         buff.toString(),
266                                         getLineNumber(), getColumn());
267                                 }
268 								if (peekChar() == '-')
269 								{
270 									read();
271 									if (peekChar() != '>')
272 									{
273                                         throw new DTDParseException(getUriId(),
274 											"Invalid character sequence --"+
275 											read(), getLineNumber(), getColumn());
276 									}
277 									read();
278 									return new Token(COMMENT, buff.toString());
279 								}
280 								else
281 								{
282 									buff.append('-');
283 								}
284 							}
285 						}
286 					}
287 				}
288 				else if (ch == '?')
289 				{
290 					read();
291 					return new Token(LTQUES);
292 				}
293 				else
294 				{
295 					return new Token(LT);
296 				}
297 			}
298 			else if (ch == '?')
299 			{
300 // Need to treat ?> as two separate tokens because
301 // <!ELEMENT blah (foo)?> needs the ? as a QUES, not QUESGT
302 /*				ch = peekChar();
303 
304 				if (ch == '>')
305 				{
306 					read();
307 					return new Token(QUESGT);
308 				}
309 				else
310 				{
311 					return new Token(QUES);
312 				}*/
313 				return new Token(QUES);
314 			}
315 			else if ((ch == '"') || (ch == '\''))
316 			{
317 				int quoteChar = ch;
318 
319 				StringBuffer buff = new StringBuffer();
320 				while (peekChar() != quoteChar)
321 				{
322 					ch = read();
323 					if (ch == '//')
324 					{
325 						buff.append((char) read());
326 					}
327                     else if (ch < 0)
328                     {
329                         break;  // IF EOF before getting end quote
330                     }
331 					else
332 					{
333 						buff.append((char) ch);
334 					}
335 				}
336 				read();
337 				return new Token(STRING, buff.toString());
338 			}
339 			else if (ch == '(')
340 			{
341 				return new Token(LPAREN);
342 			}
343 			else if (ch == ')')
344 			{
345 				return new Token(RPAREN);
346 			}
347 			else if (ch == '|')
348 			{
349 				return new Token(PIPE);
350 			}
351 			else if (ch == '>')
352 			{
353 				return new Token(GT);
354 			}
355 			else if (ch == '=')
356 			{
357 				return new Token(EQUAL);
358 			}
359 			else if (ch == '*')
360 			{
361 				return new Token(ASTERISK);
362 			}
363             else if (ch == ']')
364             {
365                 if (read() != ']')
366                 {
367                     throw new DTDParseException(getUriId(),
368                         "Illegal character in input stream: "+ch,
369                         getLineNumber(), getColumn());
370                 }
371                 if (read() != '>')
372                 {
373                     throw new DTDParseException(getUriId(),
374                         "Illegal character in input stream: "+ch,
375                         getLineNumber(), getColumn());
376                 }
377 
378                 return new Token(ENDCONDITIONAL);
379             }
380 			else if (ch == '#')
381 			{
382 				StringBuffer buff = new StringBuffer();
383 				buff.append((char) ch);
384 
385                 if (isIdentifierChar((char) peekChar()))
386                 {
387                     buff.append((char) read());
388 
389 				    while (isNameChar((char) peekChar()))
390 				    {
391 					    buff.append((char) read());
392 				    }
393                 }
394 				return new Token(IDENTIFIER, buff.toString());
395 			}
396 			else if ((ch == '&') || (ch == '%'))
397 			{
398                 if ((ch == '%') && Character.isWhitespace((char)peekChar()))
399                 {
400                     return new Token(PERCENT);
401                 }
402 
403                 boolean peRef = (ch == '%');
404 
405 				StringBuffer buff = new StringBuffer();
406 				buff.append((char) ch);
407 
408                 if (isIdentifierChar((char) peekChar()))
409                 {
410                     buff.append((char) read());
411 				    while (isNameChar((char) peekChar()))
412 				    {
413 					    buff.append((char) read());
414 				    }
415                 }
416 
417 				if (read() != ';')
418 				{
419                     throw new DTDParseException(getUriId(),
420                                 "Expected ';' after reference "+
421                                 buff.toString()+", found '"+ch+"'",
422                                 getLineNumber(), getColumn());
423 				}
424                 buff.append(';');
425 
426                 if (peRef)
427                 {
428                     if (expandEntity(buff.toString()))
429                     {
430                         continue;
431                     }
432                     else
433                     {
434                         // MAW: Added version 1.17
435                         // If the entity can't be expanded, don't return it, skip it
436                         continue;
437                     }
438                 }
439 				return new Token(IDENTIFIER, buff.toString());
440 			}
441 			else if (ch == '+')
442 			{
443 				return new Token(PLUS);
444 			}
445 			else if (ch == ',')
446 			{
447 				return new Token(COMMA);
448 			}
449 			else if (isIdentifierChar((char) ch))
450 			{
451 				StringBuffer buff = new StringBuffer();
452 				buff.append((char) ch);
453 
454 				while (isNameChar((char) peekChar()))
455 				{
456 					buff.append((char) read());
457 				}
458 				return new Token(IDENTIFIER, buff.toString());
459 			}
460 			else if (isNameChar((char) ch))
461 			{
462 				StringBuffer buff = new StringBuffer();
463 				buff.append((char) ch);
464 
465 				while (isNameChar((char) peekChar()))
466 				{
467 					buff.append((char) read());
468 				}
469 				return new Token(NMTOKEN, buff.toString());
470 			}
471 			else if (ch < 0)
472 			{
473                 if (atEOF)
474                 {
475                     throw new IOException("Read past EOF");
476                 }
477                 atEOF = true;
478 				return new Token(EOF);
479 			}
480 			else if (Character.isWhitespace((char) ch))
481 			{
482 				continue;
483 			}
484 			else
485 			{
486                 throw new DTDParseException(getUriId(),
487                                 "Illegal character in input stream: "+ch,
488                                 getLineNumber(), getColumn());
489 			}
490 		}
491 	}
492 
493     public void skipConditional()
494         throws IOException
495     {
496 // 070401 MAW: Fix for nested conditionals provided by Noah Fike
497         // BEGIN CHANGE
498         int ch = 0;
499         int nestingDepth = 0; // Add nestingDepth parameter
500 
501 //    Everything is ignored within an ignored section, except the
502 //    sub-section delimiters '<![' and ']]>'. These must be balanced,
503 //    but no section keyword is required:
504 //    Conditional Section
505 //[61] conditionalSect ::=  includeSect | ignoreSect
506 //[62] includeSect ::=  '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
507 //[63] ignoreSect ::=  '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
508 //[64] ignoreSectContents ::=  Ignore ('<![' ignoreSectContents ']]>' Ignore)*
509 //[65] Ignore ::=  Char* - (Char* ('<![' | ']]>') Char*)
510 
511         for (;;)
512         {
513             if ( ch != ']' )
514             {
515                 ch = read();
516             }
517             if (ch == ']')
518             {
519                 ch = read();
520                 if (ch == ']')
521                 {
522                     ch = read();
523                     if (ch == '>')
524                     {
525                         if ( nestingDepth == 0)
526                         {
527                             // The end of the IGNORE conditional section
528                             // has been found.  Break out of for loop.
529                             break;
530                         }
531                         else
532                         {
533                             // We are within an ignoreSectContents section.  Decrement
534                             // the nesting depth to represent that this section has
535                             // been ended.
536                             nestingDepth--;
537                         }
538                     }
539                 }
540             }
541             // See if this is the first character of the beginning of a new section.
542             if (ch == '<')
543             {
544                 ch = read();
545                 if ( ch == '!' )
546                 {
547                     ch = read();
548                     if ( ch == '[' )
549                     {
550                         // The beginning of a new ignoreSectContents section
551                         // has been found.  Increment nesting depth.
552                         nestingDepth++;
553                     }
554                 }
555             }
556         }
557 // END CHANGE
558     }
559 
560     public String getUriId() { return(in.id); }
561     public int getLineNumber() { return in.lineNumber; }
562     public int getColumn() { return in.column; }
563 
564 	public boolean isIdentifierChar(char ch)
565 	{
566 		if (isLetter(ch) ||
567 			(ch == '_') || (ch == ':'))
568 		{
569 			return true;
570 		}
571 		return false;
572 	}
573 
574 	public boolean isNameChar(char ch)
575 	{
576 		if (isLetter(ch) || isDigit(ch) ||
577 			(ch == '-') || (ch == '_') || (ch == '.') || (ch == ':')
578 			|| isCombiningChar(ch) || isExtender(ch))
579 		{
580 			return true;
581 		}
582 		return false;
583 	}
584 
585     public boolean isLetter(char ch)
586     {
587         return isBaseChar(ch) || isIdeographic(ch);
588     }
589 
590     public boolean isBaseChar(char ch)
591     {
592         for (int i=0; i < letterRanges.length; i++)
593         {
594             if (ch < letterRanges[i][0]) return false;
595             if ((ch >= letterRanges[i][0]) &&
596                 (ch <= letterRanges[i][1])) return true;
597         }
598         return false;
599     }
600 
601     public boolean isIdeographic(char ch)
602     {
603         if (ch < 0x4e00) return false;
604         if ((ch >= 0x4e00) && (ch <= 0x9fa5)) return true;
605         if (ch == 0x3007) return true;
606         if ((ch >= 0x3021) && (ch <= 0x3029)) return true;
607         return false;
608     }
609 
610     public boolean isDigit(char ch)
611     {
612         if ((ch >= 0x0030) && (ch <= 0x0039)) return true;
613         if (ch < 0x0660) return false;
614         if ((ch >= 0x0660) && (ch <= 0x0669)) return true;
615         if (ch < 0x06f0) return false;
616         if ((ch >= 0x06f0) && (ch <= 0x06f9)) return true;
617         if (ch < 0x0966) return false;
618         if ((ch >= 0x0966) && (ch <= 0x096f)) return true;
619         if (ch < 0x09e6) return false;
620         if ((ch >= 0x09e6) && (ch <= 0x09ef)) return true;
621         if (ch < 0x0a66) return false;
622         if ((ch >= 0x0a66) && (ch <= 0x0a6f)) return true;
623         if (ch < 0x0ae6) return false;
624         if ((ch >= 0x0ae6) && (ch <= 0x0aef)) return true;
625         if (ch < 0x0b66) return false;
626         if ((ch >= 0x0b66) && (ch <= 0x0b6f)) return true;
627         if (ch < 0x0be7) return false;
628         if ((ch >= 0x0be7) && (ch <= 0x0bef)) return true;
629         if (ch < 0x0c66) return false;
630         if ((ch >= 0x0c66) && (ch <= 0x0c6f)) return true;
631         if (ch < 0x0ce6) return false;
632         if ((ch >= 0x0ce6) && (ch <= 0x0cef)) return true;
633         if (ch < 0x0d66) return false;
634         if ((ch >= 0x0d66) && (ch <= 0x0d6f)) return true;
635         if (ch < 0x0e50) return false;
636         if ((ch >= 0x0e50) && (ch <= 0x0e59)) return true;
637         if (ch < 0x0ed0) return false;
638         if ((ch >= 0x0ed0) && (ch <= 0x0ed9)) return true;
639         if (ch < 0x0f20) return false;
640         if ((ch >= 0x0f20) && (ch <= 0x0f29)) return true;
641         return false;
642     }
643 
644 	public boolean isCombiningChar(char ch)
645 	{
646 		if (ch < 0x0300) return false;
647 		if ((ch >= 0x0300) && (ch <= 0x0345)) return true;
648 		if ((ch >= 0x0360) && (ch <= 0x0361)) return true;
649 		if ((ch >= 0x0483) && (ch <= 0x0486)) return true;
650 		if ((ch >= 0x0591) && (ch <= 0x05a1)) return true;
651 		if ((ch >= 0x05a3) && (ch <= 0x05b9)) return true;
652 		if ((ch >= 0x05bb) && (ch <= 0x05bd)) return true;
653 		if (ch == 0x05bf) return true;
654 		if ((ch >= 0x05c1) && (ch <= 0x05c2)) return true;
655 		if (ch == 0x05c4) return true;
656 		if ((ch >= 0x064b) && (ch <= 0x0652)) return true;
657 		if (ch == 0x0670) return true;
658 		if ((ch >= 0x06d6) && (ch <= 0x06dc)) return true;
659 		if ((ch >= 0x06dd) && (ch <= 0x06df)) return true;
660 		if ((ch >= 0x06e0) && (ch <= 0x06e4)) return true;
661 		if ((ch >= 0x06e7) && (ch <= 0x06e8)) return true;
662 		if ((ch >= 0x06ea) && (ch <= 0x06ed)) return true;
663 		if ((ch >= 0x0901) && (ch <= 0x0903)) return true;
664 		if (ch == 0x093c) return true;
665 		if ((ch >= 0x093e) && (ch <= 0x094c)) return true;
666 		if (ch == 0x094d) return true;
667 		if ((ch >= 0x0951) && (ch <= 0x0954)) return true;
668 		if ((ch >= 0x0962) && (ch <= 0x0963)) return true;
669 		if ((ch >= 0x0981) && (ch <= 0x0983)) return true;
670 		if (ch == 0x09bc) return true;
671 		if (ch == 0x09be) return true;
672 		if (ch == 0x09bf) return true;
673 		if ((ch >= 0x09c0) && (ch <= 0x09c4)) return true;
674 		if ((ch >= 0x09c7) && (ch <= 0x09c8)) return true;
675 		if ((ch >= 0x09cb) && (ch <= 0x09cd)) return true;
676 		if (ch == 0x09d7) return true;
677 		if ((ch >= 0x09e2) && (ch <= 0x09e3)) return true;
678 		if (ch == 0x0a02) return true;
679 		if (ch == 0x0a3c) return true;
680 		if (ch == 0x0a3e) return true;
681 		if (ch == 0x0a3f) return true;
682 		if ((ch >= 0x0a40) && (ch <= 0x0a42)) return true;
683 		if ((ch >= 0x0a47) && (ch <= 0x0a48)) return true;
684 		if ((ch >= 0x0a4b) && (ch <= 0x0a4d)) return true;
685 		if ((ch >= 0x0a70) && (ch <= 0x0a71)) return true;
686 		if ((ch >= 0x0a81) && (ch <= 0x0a83)) return true;
687 		if (ch == 0x0abc) return true;
688 		if ((ch >= 0x0abe) && (ch <= 0x0ac5)) return true;
689 		if ((ch >= 0x0ac7) && (ch <= 0x0ac9)) return true;
690 		if ((ch >= 0x0acb) && (ch <= 0x0acd)) return true;
691 		if ((ch >= 0x0b01) && (ch <= 0x0b03)) return true;
692 		if (ch == 0x0b3c) return true;
693 		if ((ch >= 0x0b3e) && (ch <= 0x0b43)) return true;
694 		if ((ch >= 0x0b47) && (ch <= 0x0b48)) return true;
695 		if ((ch >= 0x0b4b) && (ch <= 0x0b4d)) return true;
696 		if ((ch >= 0x0b56) && (ch <= 0x0b57)) return true;
697 		if ((ch >= 0x0b82) && (ch <= 0x0b83)) return true;
698 		if ((ch >= 0x0bbe) && (ch <= 0x0bc2)) return true;
699 		if ((ch >= 0x0bc6) && (ch <= 0x0bc8)) return true;
700 		if ((ch >= 0x0bca) && (ch <= 0x0bcd)) return true;
701 		if (ch == 0x0bd7) return true;
702 		if ((ch >= 0x0c01) && (ch <= 0x0c03)) return true;
703 		if ((ch >= 0x0c3e) && (ch <= 0x0c44)) return true;
704 		if ((ch >= 0x0c46) && (ch <= 0x0c48)) return true;
705 		if ((ch >= 0x0c4a) && (ch <= 0x0c4d)) return true;
706 		if ((ch >= 0x0c55) && (ch <= 0x0c56)) return true;
707 		if ((ch >= 0x0c82) && (ch <= 0x0c83)) return true;
708 		if ((ch >= 0x0cbe) && (ch <= 0x0cc4)) return true;
709 		if ((ch >= 0x0cc6) && (ch <= 0x0cc8)) return true;
710 		if ((ch >= 0x0cca) && (ch <= 0x0ccd)) return true;
711 		if ((ch >= 0x0cd5) && (ch <= 0x0cd6)) return true;
712 		if ((ch >= 0x0d02) && (ch <= 0x0d03)) return true;
713 		if ((ch >= 0x0d3e) && (ch <= 0x0d43)) return true;
714 		if ((ch >= 0x0d46) && (ch <= 0x0d48)) return true;
715 		if ((ch >= 0x0d4a) && (ch <= 0x0d4d)) return true;
716 		if (ch == 0x0d57) return true;
717 		if (ch == 0x0e31) return true;
718 		if ((ch >= 0x0e34) && (ch <= 0x0e3a)) return true;
719 		if ((ch >= 0x0e47) && (ch <= 0x0e4e)) return true;
720 		if (ch == 0x0eb1) return true;
721 		if ((ch >= 0x0eb4) && (ch <= 0x0eb9)) return true;
722 		if ((ch >= 0x0ebb) && (ch <= 0x0ebc)) return true;
723 		if ((ch >= 0x0ec8) && (ch <= 0x0ecd)) return true;
724 		if ((ch >= 0x0f18) && (ch <= 0x0f19)) return true;
725 		if (ch == 0x0f35) return true;
726 		if (ch == 0x0f37) return true;
727 		if (ch == 0x0f39) return true;
728 		if (ch == 0x0f3e) return true;
729 		if (ch == 0x0f3f) return true;
730 		if ((ch >= 0x0f71) && (ch <= 0x0f84)) return true;
731 		if ((ch >= 0x0f86) && (ch <= 0x0f8b)) return true;
732 		if ((ch >= 0x0f90) && (ch <= 0x0f95)) return true;
733 		if (ch == 0x0f97) return true;
734 		if ((ch >= 0x0f99) && (ch <= 0x0fad)) return true;
735 		if ((ch >= 0x0fb1) && (ch <= 0x0fb7)) return true;
736 		if (ch == 0x0fb9) return true;
737 		if ((ch >= 0x20d0) && (ch <= 0x20dc)) return true;
738 		if (ch == 0x20e1) return true;
739 		if ((ch >= 0x302a) && (ch <= 0x302f)) return true;
740 		if (ch == 0x3099) return true;
741 		if (ch == 0x309a) return true;
742 
743 		return false;
744 	}
745 
746 	public boolean isExtender(char ch)
747 	{
748 		if (ch < 0x00b7) return false;
749 
750 		if ((ch == 0x00b7) || (ch == 0x02d0) || (ch == 0x02d1) ||
751 			(ch == 0x0387) || (ch == 0x0640) || (ch == 0x0e46) ||
752 			((ch >= 0x3031) && (ch <= 0x3035)) ||
753 			((ch >= 0x309d) && (ch <= 0x309e)) ||
754 			((ch >= 0x30fc) && (ch <= 0x30fe))) return true;
755 
756 		return false;
757 	}
758 
759     public boolean expandEntity(String entityName)
760         throws IOException
761     {
762         String entity = (String) entityExpansion.get(entityName);
763         if (entity != null)
764         {
765             expand(entity.toCharArray());
766             return true;
767         }
768 
769         entityName = entityName.substring(1, entityName.length()-1);
770 
771         //System.out.println("Trying to expand: "+entityName);
772         DTDEntity realEntity = expander.expandEntity(entityName);
773         if (realEntity != null)
774         {
775             //System.out.println("Expanded: "+entityName);
776             Reader entityIn = realEntity.getReader();
777             if (entityIn != null)
778             {
779                 if (inputStreams == null)
780                 {
781                     inputStreams = new Stack();
782                 }
783 
784                 inputStreams.push(in);
785                 in = new StreamInfo(realEntity.getExternalId(), entityIn);
786 
787                 return true;
788             }
789         }
790 
791         return false;
792     }
793 
794     public void expand(char[] expandChars)
795     {
796         if (expandBuffer != null)
797         {
798             int oldCharsLeft = expandBuffer.length - expandPos;
799 
800             char[] newExp = new char[oldCharsLeft + expandChars.length];
801             System.arraycopy(expandChars, 0, newExp, 0,
802                 expandChars.length);
803             System.arraycopy(expandBuffer, expandPos, newExp,
804                 expandChars.length, oldCharsLeft);
805             expandPos = 0;
806             expandBuffer = newExp;
807             if (expandBuffer.length == 0)
808             {
809                 expandBuffer = null;
810                 expandPos = -1;
811             }
812         }
813         else
814         {
815             expandBuffer = expandChars;
816             expandPos = 0;
817             if (expandBuffer.length == 0)
818             {
819                 expandBuffer = null;
820                 expandPos = -1;
821             }
822         }
823     }
824 
825     public void addEntity(String entityName, String entityValue)
826     {
827         entityExpansion.put("%"+entityName+";", entityValue);
828     }
829 
830     public static char letterRanges[][] = {
831 		{ 0x0041, 0x005A }, { 0x0061, 0x007A }, { 0x00C0, 0x00D6 },
832 		{ 0x00D8, 0x00F6 }, { 0x00F8, 0x00FF }, { 0x0100, 0x0131 },
833 		{ 0x0134, 0x013E }, { 0x0141, 0x0148 }, { 0x014A, 0x017E },
834 		{ 0x0180, 0x01C3 }, { 0x01CD, 0x01F0 }, { 0x01F4, 0x01F5 },
835 		{ 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, { 0x02BB, 0x02C1 },
836 		{ 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
837 		{ 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
838 		{ 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
839 		{ 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, { 0x0401, 0x040C },
840 		{ 0x040E, 0x044F }, { 0x0451, 0x045C }, { 0x045E, 0x0481 },
841 		{ 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, { 0x04CB, 0x04CC },
842 		{ 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, { 0x04F8, 0x04F9 },
843 		{ 0x0531, 0x0556 }, { 0x0559, 0x0559 }, { 0x0561, 0x0586 },
844 		{ 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 }, { 0x0621, 0x063A },
845 		{ 0x0641, 0x064A }, { 0x0671, 0x06B7 }, { 0x06BA, 0x06BE },
846 		{ 0x06C0, 0x06CE }, { 0x06D0, 0x06D3 }, { 0x06D5, 0x06D5 },
847 		{ 0x06E5, 0x06E6 }, { 0x0905, 0x0939 }, { 0x093D, 0x093D },
848 		{ 0x0958, 0x0961 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 },
849 		{ 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 },
850 		{ 0x09B6, 0x09B9 }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 },
851 		{ 0x09F0, 0x09F1 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 },
852 		{ 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 },
853 		{ 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C },
854 		{ 0x0A5E, 0x0A5E }, { 0x0A72, 0x0A74 }, { 0x0A85, 0x0A8B },
855 		{ 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 },
856 		{ 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 },
857 		{ 0x0ABD, 0x0ABD }, { 0x0AE0, 0x0AE0 }, { 0x0B05, 0x0B0C },
858 		{ 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 },
859 		{ 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B3D },
860 		{ 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, { 0x0B85, 0x0B8A },
861 		{ 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A },
862 		{ 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 },
863 		{ 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 },
864 		{ 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 },
865 		{ 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 },
866 		{ 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 },
867 		{ 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CDE, 0x0CDE },
868 		{ 0x0CE0, 0x0CE1 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 },
869 		{ 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 },
870 		{ 0x0E01, 0x0E2E }, { 0x0E30, 0x0E30 }, { 0x0E32, 0x0E33 },
871 		{ 0x0E40, 0x0E45 }, { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 },
872 		{ 0x0E87, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D },
873 		{ 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 },
874 		{ 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB },
875 		{ 0x0EAD, 0x0EAE }, { 0x0EB0, 0x0EB0 }, { 0x0EB2, 0x0EB3 },
876 		{ 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 }, { 0x0F40, 0x0F47 },
877 		{ 0x0F49, 0x0F69 }, { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
878 		{ 0x1100, 0x1100 }, { 0x1102, 0x1103 }, { 0x1105, 0x1107 },
879 		{ 0x1109, 0x1109 }, { 0x110B, 0x110C }, { 0x110E, 0x1112 },
880 		{ 0x113C, 0x113C }, { 0x113E, 0x113E }, { 0x1140, 0x1140 },
881 		{ 0x114C, 0x114C }, { 0x114E, 0x114E }, { 0x1150, 0x1150 },
882 		{ 0x1154, 0x1155 }, { 0x1159, 0x1159 }, { 0x115F, 0x1161 },
883 		{ 0x1163, 0x1163 }, { 0x1165, 0x1165 }, { 0x1167, 0x1167 },
884 		{ 0x1169, 0x1169 }, { 0x116D, 0x116E }, { 0x1172, 0x1173 },
885 		{ 0x1175, 0x1175 }, { 0x119E, 0x119E }, { 0x11A8, 0x11A8 },
886 		{ 0x11AB, 0x11AB }, { 0x11AE, 0x11AF }, { 0x11B7, 0x11B8 },
887 		{ 0x11BA, 0x11BA }, { 0x11BC, 0x11C2 }, { 0x11EB, 0x11EB },
888 		{ 0x11F0, 0x11F0 }, { 0x11F9, 0x11F9 }, { 0x1E00, 0x1E9B },
889 		{ 0x1EA0, 0x1EF9 }, { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D },
890 		{ 0x1F20, 0x1F45 }, { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 },
891 		{ 0x1F59, 0x1F59 }, { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D },
892 		{ 0x1F5F, 0x1F7D }, { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC },
893 		{ 0x1FBE, 0x1FBE }, { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC },
894 		{ 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC },
895 		{ 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, { 0x2126, 0x2126 },
896 		{ 0x212A, 0x212B }, { 0x212E, 0x212E }, { 0x2180, 0x2182 },
897 		{ 0x3041, 0x3094 }, { 0x30A1, 0x30FA }, { 0x3105, 0x312C },
898 		{ 0xAC00, 0xD7A3 }
899     };
900 }