1 package com.wutka.dtd;
2
3 import java.io.*;
4 import java.util.*;
5
6 /*** Lexical scanner for DTD's
7 *
8 * @author Mark Wutka
9 * @version $Revision: 1.19 $ $Date: 2002/07/31 00:19:10 $ by $Author: wutka $
10 */
11
12 class Scanner
13 {
14 public static final TokenType LTQUES = new TokenType(0, "LTQUES");
15 public static final TokenType IDENTIFIER = new TokenType(1, "IDENTIFIER");
16 public static final TokenType EQUAL = new TokenType(2, "EQUAL");
17 public static final TokenType LPAREN = new TokenType(3, "LPAREN");
18 public static final TokenType RPAREN = new TokenType(4, "RPAREN");
19 public static final TokenType COMMA = new TokenType(5, "COMMA");
20 public static final TokenType STRING = new TokenType(6, "STRING");
21 public static final TokenType QUESGT = new TokenType(7, "QUESGT");
22 public static final TokenType LTBANG = new TokenType(8, "LTBANG");
23 public static final TokenType GT = new TokenType(9, "GT");
24 public static final TokenType PIPE = new TokenType(10, "PIPE");
25 public static final TokenType QUES = new TokenType(11, "QUES");
26 public static final TokenType PLUS = new TokenType(12, "PLUS");
27 public static final TokenType ASTERISK = new TokenType(13, "ASTERISK");
28 public static final TokenType LT = new TokenType(14, "LT");
29 public static final TokenType EOF = new TokenType(15, "EOF");
30 public static final TokenType COMMENT = new TokenType(16, "COMMENT");
31 public static final TokenType PERCENT = new TokenType(17, "PERCENT");
32 public static final TokenType CONDITIONAL =
33 new TokenType(18, "CONDITIONAL");
34 public static final TokenType ENDCONDITIONAL =
35 new TokenType(19, "ENDCONDITIONAL");
36 public static final TokenType NMTOKEN = new TokenType(20, "NMTOKEN");
37
38 protected class StreamInfo
39 {
40 String id;
41 Reader in;
42 int lineNumber = 1;
43 int column = 1;
44
45 StreamInfo(String id, Reader in)
46 {
47 this.id = id;
48 this.in = in;
49 }
50 };
51
52 protected StreamInfo in;
53 protected Stack inputStreams;
54 protected Token nextToken;
55 protected int nextChar;
56 protected boolean atEOF;
57 protected boolean trace;
58 protected char[] expandBuffer;
59 protected int expandPos;
60 protected Hashtable entityExpansion;
61 protected EntityExpansion expander;
62
63 public Scanner(Reader inReader, EntityExpansion anExpander)
64 {
65 this(inReader, false, anExpander);
66 }
67
68 public Scanner(Reader inReader, boolean doTrace, EntityExpansion anExpander)
69 {
70 in = new StreamInfo("", inReader);
71 atEOF = false;
72 trace = doTrace;
73 expandBuffer = null;
74 entityExpansion = new Hashtable();
75 expander = anExpander;
76 }
77
78 public Token peek()
79 throws IOException
80 {
81 if (nextToken == null)
82 {
83 nextToken = readNextToken();
84 }
85
86 return nextToken;
87 }
88
89 public Token get()
90 throws IOException
91 {
92 if (nextToken == null)
93 {
94 nextToken = readNextToken();
95 }
96
97 Token retval = nextToken;
98 nextToken = null;
99
100 return retval;
101 }
102
103 protected int readNextChar()
104 throws IOException
105 {
106 int ch = in.in.read();
107
108 if (ch < 0)
109 {
110 if ((inputStreams != null) && (!inputStreams.empty()))
111 {
112 in.in.close();
113 in = (StreamInfo) inputStreams.pop();
114 return readNextChar();
115 }
116 }
117 return ch;
118 }
119
120 protected int peekChar()
121 throws IOException
122 {
123 if (expandBuffer != null)
124 {
125 return (int) expandBuffer[expandPos];
126 }
127
128 if (nextChar == 0)
129 {
130 nextChar = readNextChar();
131 in.column++;
132 if (nextChar == '\n')
133 {
134 in.lineNumber++;
135 in.column=1;
136 }
137 }
138
139 return nextChar;
140 }
141
142 protected int read()
143 throws IOException
144 {
145 if (expandBuffer != null)
146 {
147 int expNextChar = expandBuffer[expandPos++];
148 if (expandPos >= expandBuffer.length)
149 {
150 expandPos = -1;
151 expandBuffer = null;
152 }
153 if (trace)
154 {
155 System.out.print((char) expNextChar);
156 }
157 return expNextChar;
158 }
159 if (nextChar == 0)
160 {
161 peekChar();
162 }
163
164 int retval = nextChar;
165 nextChar = 0;
166
167 if (trace)
168 {
169 System.out.print((char) retval);
170 }
171 return retval;
172 }
173
174 public String getUntil(char stopChar)
175 throws IOException
176 {
177 StringBuffer out = new StringBuffer();
178
179 int ch;
180
181 while ((ch = read()) >= 0)
182 {
183 if (ch == stopChar)
184 {
185 return out.toString();
186 }
187 out.append((char) ch);
188 }
189 return out.toString();
190 }
191
192 public void skipUntil(char stopChar)
193 throws IOException
194 {
195 int ch;
196
197 while ((ch = read()) >= 0)
198 {
199 if (ch == stopChar)
200 {
201 return;
202 }
203 }
204 return;
205 }
206
207 protected Token readNextToken()
208 throws IOException
209 {
210 for (;;)
211 {
212 int ch = read();
213
214 if (ch == '<')
215 {
216 ch = peekChar();
217 if (ch == '!')
218 {
219 read();
220
221 if (peekChar() == '[')
222 {
223 read();
224
225 return new Token(CONDITIONAL);
226 }
227
228 if (peekChar() != '-')
229 {
230 return new Token(LTBANG);
231 }
232 else
233 {
234 read();
235 if (peekChar() != '-')
236 {
237 throw new DTDParseException(getUriId(),
238 "Invalid character sequence <!-"+read(),
239 getLineNumber(), getColumn());
240 }
241 read();
242
243 StringBuffer buff = new StringBuffer();
244 for (;;)
245 {
246 if (peekChar() < 0)
247 {
248 throw new DTDParseException(getUriId(),
249 "Unterminated comment: <!--"+
250 buff.toString(),
251 getLineNumber(), getColumn());
252 }
253
254 if (peekChar() != '-')
255 {
256 buff.append((char) read());
257 }
258 else
259 {
260 read();
261 if (peekChar() < 0)
262 {
263 throw new DTDParseException(getUriId(),
264 "Unterminated comment: <!--"+
265 buff.toString(),
266 getLineNumber(), getColumn());
267 }
268 if (peekChar() == '-')
269 {
270 read();
271 if (peekChar() != '>')
272 {
273 throw new DTDParseException(getUriId(),
274 "Invalid character sequence --"+
275 read(), getLineNumber(), getColumn());
276 }
277 read();
278 return new Token(COMMENT, buff.toString());
279 }
280 else
281 {
282 buff.append('-');
283 }
284 }
285 }
286 }
287 }
288 else if (ch == '?')
289 {
290 read();
291 return new Token(LTQUES);
292 }
293 else
294 {
295 return new Token(LT);
296 }
297 }
298 else if (ch == '?')
299 {
300
301
302
303
304
305
306
307
308
309
310
311
312
313 return new Token(QUES);
314 }
315 else if ((ch == '"') || (ch == '\''))
316 {
317 int quoteChar = ch;
318
319 StringBuffer buff = new StringBuffer();
320 while (peekChar() != quoteChar)
321 {
322 ch = read();
323 if (ch == '//')
324 {
325 buff.append((char) read());
326 }
327 else if (ch < 0)
328 {
329 break;
330 }
331 else
332 {
333 buff.append((char) ch);
334 }
335 }
336 read();
337 return new Token(STRING, buff.toString());
338 }
339 else if (ch == '(')
340 {
341 return new Token(LPAREN);
342 }
343 else if (ch == ')')
344 {
345 return new Token(RPAREN);
346 }
347 else if (ch == '|')
348 {
349 return new Token(PIPE);
350 }
351 else if (ch == '>')
352 {
353 return new Token(GT);
354 }
355 else if (ch == '=')
356 {
357 return new Token(EQUAL);
358 }
359 else if (ch == '*')
360 {
361 return new Token(ASTERISK);
362 }
363 else if (ch == ']')
364 {
365 if (read() != ']')
366 {
367 throw new DTDParseException(getUriId(),
368 "Illegal character in input stream: "+ch,
369 getLineNumber(), getColumn());
370 }
371 if (read() != '>')
372 {
373 throw new DTDParseException(getUriId(),
374 "Illegal character in input stream: "+ch,
375 getLineNumber(), getColumn());
376 }
377
378 return new Token(ENDCONDITIONAL);
379 }
380 else if (ch == '#')
381 {
382 StringBuffer buff = new StringBuffer();
383 buff.append((char) ch);
384
385 if (isIdentifierChar((char) peekChar()))
386 {
387 buff.append((char) read());
388
389 while (isNameChar((char) peekChar()))
390 {
391 buff.append((char) read());
392 }
393 }
394 return new Token(IDENTIFIER, buff.toString());
395 }
396 else if ((ch == '&') || (ch == '%'))
397 {
398 if ((ch == '%') && Character.isWhitespace((char)peekChar()))
399 {
400 return new Token(PERCENT);
401 }
402
403 boolean peRef = (ch == '%');
404
405 StringBuffer buff = new StringBuffer();
406 buff.append((char) ch);
407
408 if (isIdentifierChar((char) peekChar()))
409 {
410 buff.append((char) read());
411 while (isNameChar((char) peekChar()))
412 {
413 buff.append((char) read());
414 }
415 }
416
417 if (read() != ';')
418 {
419 throw new DTDParseException(getUriId(),
420 "Expected ';' after reference "+
421 buff.toString()+", found '"+ch+"'",
422 getLineNumber(), getColumn());
423 }
424 buff.append(';');
425
426 if (peRef)
427 {
428 if (expandEntity(buff.toString()))
429 {
430 continue;
431 }
432 else
433 {
434
435
436 continue;
437 }
438 }
439 return new Token(IDENTIFIER, buff.toString());
440 }
441 else if (ch == '+')
442 {
443 return new Token(PLUS);
444 }
445 else if (ch == ',')
446 {
447 return new Token(COMMA);
448 }
449 else if (isIdentifierChar((char) ch))
450 {
451 StringBuffer buff = new StringBuffer();
452 buff.append((char) ch);
453
454 while (isNameChar((char) peekChar()))
455 {
456 buff.append((char) read());
457 }
458 return new Token(IDENTIFIER, buff.toString());
459 }
460 else if (isNameChar((char) ch))
461 {
462 StringBuffer buff = new StringBuffer();
463 buff.append((char) ch);
464
465 while (isNameChar((char) peekChar()))
466 {
467 buff.append((char) read());
468 }
469 return new Token(NMTOKEN, buff.toString());
470 }
471 else if (ch < 0)
472 {
473 if (atEOF)
474 {
475 throw new IOException("Read past EOF");
476 }
477 atEOF = true;
478 return new Token(EOF);
479 }
480 else if (Character.isWhitespace((char) ch))
481 {
482 continue;
483 }
484 else
485 {
486 throw new DTDParseException(getUriId(),
487 "Illegal character in input stream: "+ch,
488 getLineNumber(), getColumn());
489 }
490 }
491 }
492
493 public void skipConditional()
494 throws IOException
495 {
496
497
498 int ch = 0;
499 int nestingDepth = 0;
500
501
502
503
504
505
506
507
508
509
510
511 for (;;)
512 {
513 if ( ch != ']' )
514 {
515 ch = read();
516 }
517 if (ch == ']')
518 {
519 ch = read();
520 if (ch == ']')
521 {
522 ch = read();
523 if (ch == '>')
524 {
525 if ( nestingDepth == 0)
526 {
527
528
529 break;
530 }
531 else
532 {
533
534
535
536 nestingDepth--;
537 }
538 }
539 }
540 }
541
542 if (ch == '<')
543 {
544 ch = read();
545 if ( ch == '!' )
546 {
547 ch = read();
548 if ( ch == '[' )
549 {
550
551
552 nestingDepth++;
553 }
554 }
555 }
556 }
557
558 }
559
560 public String getUriId() { return(in.id); }
561 public int getLineNumber() { return in.lineNumber; }
562 public int getColumn() { return in.column; }
563
564 public boolean isIdentifierChar(char ch)
565 {
566 if (isLetter(ch) ||
567 (ch == '_') || (ch == ':'))
568 {
569 return true;
570 }
571 return false;
572 }
573
574 public boolean isNameChar(char ch)
575 {
576 if (isLetter(ch) || isDigit(ch) ||
577 (ch == '-') || (ch == '_') || (ch == '.') || (ch == ':')
578 || isCombiningChar(ch) || isExtender(ch))
579 {
580 return true;
581 }
582 return false;
583 }
584
585 public boolean isLetter(char ch)
586 {
587 return isBaseChar(ch) || isIdeographic(ch);
588 }
589
590 public boolean isBaseChar(char ch)
591 {
592 for (int i=0; i < letterRanges.length; i++)
593 {
594 if (ch < letterRanges[i][0]) return false;
595 if ((ch >= letterRanges[i][0]) &&
596 (ch <= letterRanges[i][1])) return true;
597 }
598 return false;
599 }
600
601 public boolean isIdeographic(char ch)
602 {
603 if (ch < 0x4e00) return false;
604 if ((ch >= 0x4e00) && (ch <= 0x9fa5)) return true;
605 if (ch == 0x3007) return true;
606 if ((ch >= 0x3021) && (ch <= 0x3029)) return true;
607 return false;
608 }
609
610 public boolean isDigit(char ch)
611 {
612 if ((ch >= 0x0030) && (ch <= 0x0039)) return true;
613 if (ch < 0x0660) return false;
614 if ((ch >= 0x0660) && (ch <= 0x0669)) return true;
615 if (ch < 0x06f0) return false;
616 if ((ch >= 0x06f0) && (ch <= 0x06f9)) return true;
617 if (ch < 0x0966) return false;
618 if ((ch >= 0x0966) && (ch <= 0x096f)) return true;
619 if (ch < 0x09e6) return false;
620 if ((ch >= 0x09e6) && (ch <= 0x09ef)) return true;
621 if (ch < 0x0a66) return false;
622 if ((ch >= 0x0a66) && (ch <= 0x0a6f)) return true;
623 if (ch < 0x0ae6) return false;
624 if ((ch >= 0x0ae6) && (ch <= 0x0aef)) return true;
625 if (ch < 0x0b66) return false;
626 if ((ch >= 0x0b66) && (ch <= 0x0b6f)) return true;
627 if (ch < 0x0be7) return false;
628 if ((ch >= 0x0be7) && (ch <= 0x0bef)) return true;
629 if (ch < 0x0c66) return false;
630 if ((ch >= 0x0c66) && (ch <= 0x0c6f)) return true;
631 if (ch < 0x0ce6) return false;
632 if ((ch >= 0x0ce6) && (ch <= 0x0cef)) return true;
633 if (ch < 0x0d66) return false;
634 if ((ch >= 0x0d66) && (ch <= 0x0d6f)) return true;
635 if (ch < 0x0e50) return false;
636 if ((ch >= 0x0e50) && (ch <= 0x0e59)) return true;
637 if (ch < 0x0ed0) return false;
638 if ((ch >= 0x0ed0) && (ch <= 0x0ed9)) return true;
639 if (ch < 0x0f20) return false;
640 if ((ch >= 0x0f20) && (ch <= 0x0f29)) return true;
641 return false;
642 }
643
644 public boolean isCombiningChar(char ch)
645 {
646 if (ch < 0x0300) return false;
647 if ((ch >= 0x0300) && (ch <= 0x0345)) return true;
648 if ((ch >= 0x0360) && (ch <= 0x0361)) return true;
649 if ((ch >= 0x0483) && (ch <= 0x0486)) return true;
650 if ((ch >= 0x0591) && (ch <= 0x05a1)) return true;
651 if ((ch >= 0x05a3) && (ch <= 0x05b9)) return true;
652 if ((ch >= 0x05bb) && (ch <= 0x05bd)) return true;
653 if (ch == 0x05bf) return true;
654 if ((ch >= 0x05c1) && (ch <= 0x05c2)) return true;
655 if (ch == 0x05c4) return true;
656 if ((ch >= 0x064b) && (ch <= 0x0652)) return true;
657 if (ch == 0x0670) return true;
658 if ((ch >= 0x06d6) && (ch <= 0x06dc)) return true;
659 if ((ch >= 0x06dd) && (ch <= 0x06df)) return true;
660 if ((ch >= 0x06e0) && (ch <= 0x06e4)) return true;
661 if ((ch >= 0x06e7) && (ch <= 0x06e8)) return true;
662 if ((ch >= 0x06ea) && (ch <= 0x06ed)) return true;
663 if ((ch >= 0x0901) && (ch <= 0x0903)) return true;
664 if (ch == 0x093c) return true;
665 if ((ch >= 0x093e) && (ch <= 0x094c)) return true;
666 if (ch == 0x094d) return true;
667 if ((ch >= 0x0951) && (ch <= 0x0954)) return true;
668 if ((ch >= 0x0962) && (ch <= 0x0963)) return true;
669 if ((ch >= 0x0981) && (ch <= 0x0983)) return true;
670 if (ch == 0x09bc) return true;
671 if (ch == 0x09be) return true;
672 if (ch == 0x09bf) return true;
673 if ((ch >= 0x09c0) && (ch <= 0x09c4)) return true;
674 if ((ch >= 0x09c7) && (ch <= 0x09c8)) return true;
675 if ((ch >= 0x09cb) && (ch <= 0x09cd)) return true;
676 if (ch == 0x09d7) return true;
677 if ((ch >= 0x09e2) && (ch <= 0x09e3)) return true;
678 if (ch == 0x0a02) return true;
679 if (ch == 0x0a3c) return true;
680 if (ch == 0x0a3e) return true;
681 if (ch == 0x0a3f) return true;
682 if ((ch >= 0x0a40) && (ch <= 0x0a42)) return true;
683 if ((ch >= 0x0a47) && (ch <= 0x0a48)) return true;
684 if ((ch >= 0x0a4b) && (ch <= 0x0a4d)) return true;
685 if ((ch >= 0x0a70) && (ch <= 0x0a71)) return true;
686 if ((ch >= 0x0a81) && (ch <= 0x0a83)) return true;
687 if (ch == 0x0abc) return true;
688 if ((ch >= 0x0abe) && (ch <= 0x0ac5)) return true;
689 if ((ch >= 0x0ac7) && (ch <= 0x0ac9)) return true;
690 if ((ch >= 0x0acb) && (ch <= 0x0acd)) return true;
691 if ((ch >= 0x0b01) && (ch <= 0x0b03)) return true;
692 if (ch == 0x0b3c) return true;
693 if ((ch >= 0x0b3e) && (ch <= 0x0b43)) return true;
694 if ((ch >= 0x0b47) && (ch <= 0x0b48)) return true;
695 if ((ch >= 0x0b4b) && (ch <= 0x0b4d)) return true;
696 if ((ch >= 0x0b56) && (ch <= 0x0b57)) return true;
697 if ((ch >= 0x0b82) && (ch <= 0x0b83)) return true;
698 if ((ch >= 0x0bbe) && (ch <= 0x0bc2)) return true;
699 if ((ch >= 0x0bc6) && (ch <= 0x0bc8)) return true;
700 if ((ch >= 0x0bca) && (ch <= 0x0bcd)) return true;
701 if (ch == 0x0bd7) return true;
702 if ((ch >= 0x0c01) && (ch <= 0x0c03)) return true;
703 if ((ch >= 0x0c3e) && (ch <= 0x0c44)) return true;
704 if ((ch >= 0x0c46) && (ch <= 0x0c48)) return true;
705 if ((ch >= 0x0c4a) && (ch <= 0x0c4d)) return true;
706 if ((ch >= 0x0c55) && (ch <= 0x0c56)) return true;
707 if ((ch >= 0x0c82) && (ch <= 0x0c83)) return true;
708 if ((ch >= 0x0cbe) && (ch <= 0x0cc4)) return true;
709 if ((ch >= 0x0cc6) && (ch <= 0x0cc8)) return true;
710 if ((ch >= 0x0cca) && (ch <= 0x0ccd)) return true;
711 if ((ch >= 0x0cd5) && (ch <= 0x0cd6)) return true;
712 if ((ch >= 0x0d02) && (ch <= 0x0d03)) return true;
713 if ((ch >= 0x0d3e) && (ch <= 0x0d43)) return true;
714 if ((ch >= 0x0d46) && (ch <= 0x0d48)) return true;
715 if ((ch >= 0x0d4a) && (ch <= 0x0d4d)) return true;
716 if (ch == 0x0d57) return true;
717 if (ch == 0x0e31) return true;
718 if ((ch >= 0x0e34) && (ch <= 0x0e3a)) return true;
719 if ((ch >= 0x0e47) && (ch <= 0x0e4e)) return true;
720 if (ch == 0x0eb1) return true;
721 if ((ch >= 0x0eb4) && (ch <= 0x0eb9)) return true;
722 if ((ch >= 0x0ebb) && (ch <= 0x0ebc)) return true;
723 if ((ch >= 0x0ec8) && (ch <= 0x0ecd)) return true;
724 if ((ch >= 0x0f18) && (ch <= 0x0f19)) return true;
725 if (ch == 0x0f35) return true;
726 if (ch == 0x0f37) return true;
727 if (ch == 0x0f39) return true;
728 if (ch == 0x0f3e) return true;
729 if (ch == 0x0f3f) return true;
730 if ((ch >= 0x0f71) && (ch <= 0x0f84)) return true;
731 if ((ch >= 0x0f86) && (ch <= 0x0f8b)) return true;
732 if ((ch >= 0x0f90) && (ch <= 0x0f95)) return true;
733 if (ch == 0x0f97) return true;
734 if ((ch >= 0x0f99) && (ch <= 0x0fad)) return true;
735 if ((ch >= 0x0fb1) && (ch <= 0x0fb7)) return true;
736 if (ch == 0x0fb9) return true;
737 if ((ch >= 0x20d0) && (ch <= 0x20dc)) return true;
738 if (ch == 0x20e1) return true;
739 if ((ch >= 0x302a) && (ch <= 0x302f)) return true;
740 if (ch == 0x3099) return true;
741 if (ch == 0x309a) return true;
742
743 return false;
744 }
745
746 public boolean isExtender(char ch)
747 {
748 if (ch < 0x00b7) return false;
749
750 if ((ch == 0x00b7) || (ch == 0x02d0) || (ch == 0x02d1) ||
751 (ch == 0x0387) || (ch == 0x0640) || (ch == 0x0e46) ||
752 ((ch >= 0x3031) && (ch <= 0x3035)) ||
753 ((ch >= 0x309d) && (ch <= 0x309e)) ||
754 ((ch >= 0x30fc) && (ch <= 0x30fe))) return true;
755
756 return false;
757 }
758
759 public boolean expandEntity(String entityName)
760 throws IOException
761 {
762 String entity = (String) entityExpansion.get(entityName);
763 if (entity != null)
764 {
765 expand(entity.toCharArray());
766 return true;
767 }
768
769 entityName = entityName.substring(1, entityName.length()-1);
770
771
772 DTDEntity realEntity = expander.expandEntity(entityName);
773 if (realEntity != null)
774 {
775
776 Reader entityIn = realEntity.getReader();
777 if (entityIn != null)
778 {
779 if (inputStreams == null)
780 {
781 inputStreams = new Stack();
782 }
783
784 inputStreams.push(in);
785 in = new StreamInfo(realEntity.getExternalId(), entityIn);
786
787 return true;
788 }
789 }
790
791 return false;
792 }
793
794 public void expand(char[] expandChars)
795 {
796 if (expandBuffer != null)
797 {
798 int oldCharsLeft = expandBuffer.length - expandPos;
799
800 char[] newExp = new char[oldCharsLeft + expandChars.length];
801 System.arraycopy(expandChars, 0, newExp, 0,
802 expandChars.length);
803 System.arraycopy(expandBuffer, expandPos, newExp,
804 expandChars.length, oldCharsLeft);
805 expandPos = 0;
806 expandBuffer = newExp;
807 if (expandBuffer.length == 0)
808 {
809 expandBuffer = null;
810 expandPos = -1;
811 }
812 }
813 else
814 {
815 expandBuffer = expandChars;
816 expandPos = 0;
817 if (expandBuffer.length == 0)
818 {
819 expandBuffer = null;
820 expandPos = -1;
821 }
822 }
823 }
824
825 public void addEntity(String entityName, String entityValue)
826 {
827 entityExpansion.put("%"+entityName+";", entityValue);
828 }
829
830 public static char letterRanges[][] = {
831 { 0x0041, 0x005A }, { 0x0061, 0x007A }, { 0x00C0, 0x00D6 },
832 { 0x00D8, 0x00F6 }, { 0x00F8, 0x00FF }, { 0x0100, 0x0131 },
833 { 0x0134, 0x013E }, { 0x0141, 0x0148 }, { 0x014A, 0x017E },
834 { 0x0180, 0x01C3 }, { 0x01CD, 0x01F0 }, { 0x01F4, 0x01F5 },
835 { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, { 0x02BB, 0x02C1 },
836 { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
837 { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
838 { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
839 { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, { 0x0401, 0x040C },
840 { 0x040E, 0x044F }, { 0x0451, 0x045C }, { 0x045E, 0x0481 },
841 { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, { 0x04CB, 0x04CC },
842 { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, { 0x04F8, 0x04F9 },
843 { 0x0531, 0x0556 }, { 0x0559, 0x0559 }, { 0x0561, 0x0586 },
844 { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 }, { 0x0621, 0x063A },
845 { 0x0641, 0x064A }, { 0x0671, 0x06B7 }, { 0x06BA, 0x06BE },
846 { 0x06C0, 0x06CE }, { 0x06D0, 0x06D3 }, { 0x06D5, 0x06D5 },
847 { 0x06E5, 0x06E6 }, { 0x0905, 0x0939 }, { 0x093D, 0x093D },
848 { 0x0958, 0x0961 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 },
849 { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 },
850 { 0x09B6, 0x09B9 }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 },
851 { 0x09F0, 0x09F1 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 },
852 { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 },
853 { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C },
854 { 0x0A5E, 0x0A5E }, { 0x0A72, 0x0A74 }, { 0x0A85, 0x0A8B },
855 { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 },
856 { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 },
857 { 0x0ABD, 0x0ABD }, { 0x0AE0, 0x0AE0 }, { 0x0B05, 0x0B0C },
858 { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 },
859 { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B3D },
860 { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, { 0x0B85, 0x0B8A },
861 { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A },
862 { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 },
863 { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 },
864 { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 },
865 { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 },
866 { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 },
867 { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CDE, 0x0CDE },
868 { 0x0CE0, 0x0CE1 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 },
869 { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 },
870 { 0x0E01, 0x0E2E }, { 0x0E30, 0x0E30 }, { 0x0E32, 0x0E33 },
871 { 0x0E40, 0x0E45 }, { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 },
872 { 0x0E87, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D },
873 { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 },
874 { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB },
875 { 0x0EAD, 0x0EAE }, { 0x0EB0, 0x0EB0 }, { 0x0EB2, 0x0EB3 },
876 { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 }, { 0x0F40, 0x0F47 },
877 { 0x0F49, 0x0F69 }, { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
878 { 0x1100, 0x1100 }, { 0x1102, 0x1103 }, { 0x1105, 0x1107 },
879 { 0x1109, 0x1109 }, { 0x110B, 0x110C }, { 0x110E, 0x1112 },
880 { 0x113C, 0x113C }, { 0x113E, 0x113E }, { 0x1140, 0x1140 },
881 { 0x114C, 0x114C }, { 0x114E, 0x114E }, { 0x1150, 0x1150 },
882 { 0x1154, 0x1155 }, { 0x1159, 0x1159 }, { 0x115F, 0x1161 },
883 { 0x1163, 0x1163 }, { 0x1165, 0x1165 }, { 0x1167, 0x1167 },
884 { 0x1169, 0x1169 }, { 0x116D, 0x116E }, { 0x1172, 0x1173 },
885 { 0x1175, 0x1175 }, { 0x119E, 0x119E }, { 0x11A8, 0x11A8 },
886 { 0x11AB, 0x11AB }, { 0x11AE, 0x11AF }, { 0x11B7, 0x11B8 },
887 { 0x11BA, 0x11BA }, { 0x11BC, 0x11C2 }, { 0x11EB, 0x11EB },
888 { 0x11F0, 0x11F0 }, { 0x11F9, 0x11F9 }, { 0x1E00, 0x1E9B },
889 { 0x1EA0, 0x1EF9 }, { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D },
890 { 0x1F20, 0x1F45 }, { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 },
891 { 0x1F59, 0x1F59 }, { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D },
892 { 0x1F5F, 0x1F7D }, { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC },
893 { 0x1FBE, 0x1FBE }, { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC },
894 { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC },
895 { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, { 0x2126, 0x2126 },
896 { 0x212A, 0x212B }, { 0x212E, 0x212E }, { 0x2180, 0x2182 },
897 { 0x3041, 0x3094 }, { 0x30A1, 0x30FA }, { 0x3105, 0x312C },
898 { 0xAC00, 0xD7A3 }
899 };
900 }