1 package com.wutka.dtd;
2
3 import java.util.*;
4 import java.io.*;
5 import java.net.*;
6
7 /*** Parses a DTD file and returns a DTD object
8 *
9 * @author Mark Wutka
10 * @version $Revision: 1.19 $ $Date: 2002/10/01 12:48:47 $ by $Author: wutka $
11 */
12 public class DTDParser implements EntityExpansion
13 {
14 protected Scanner scanner;
15 protected DTD dtd;
16 protected Object defaultLocation;
17
18 /*** Creates a parser that will read from the specified Reader object */
19 public DTDParser(Reader in)
20 {
21 scanner = new Scanner(in, false, this);
22 dtd = new DTD();
23 }
24
25 /*** Creates a parser that will read from the specified Reader object
26 * @param in The input stream to read
27 * @param trace True if the parser should print out tokens as it reads them
28 * (used for debugging the parser)
29 */
30 public DTDParser(Reader in, boolean trace)
31 {
32 scanner = new Scanner(in, trace, this);
33 dtd = new DTD();
34 }
35
36 /*** Creates a parser that will read from the specified File object */
37 public DTDParser(File in)
38 throws IOException
39 {
40 defaultLocation = in.getParentFile();
41
42 scanner = new Scanner(new BufferedReader(new FileReader(in)),
43 false, this);
44 dtd = new DTD();
45 }
46
47 /*** Creates a parser that will read from the specified File object
48 * @param in The file to read
49 * @param trace True if the parser should print out tokens as it reads them
50 * (used for debugging the parser)
51 */
52 public DTDParser(File in, boolean trace)
53 throws IOException
54 {
55 defaultLocation = in.getParentFile();
56
57 scanner = new Scanner(new BufferedReader(new FileReader(in)),
58 trace, this);
59 dtd = new DTD();
60 }
61
62 /*** Creates a parser that will read from the specified URL object */
63 public DTDParser(URL in)
64 throws IOException
65 {
66
67
68
69 String file = in.getFile();
70 defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1));
71
72 scanner = new Scanner(new BufferedReader(
73 new InputStreamReader(in.openStream())), false, this);
74 dtd = new DTD();
75 }
76
77 /*** Creates a parser that will read from the specified URL object
78 * @param in The URL to read
79 * @param trace True if the parser should print out tokens as it reads them
80 * (used for debugging the parser)
81 */
82 public DTDParser(URL in, boolean trace)
83 throws IOException
84 {
85
86
87
88 String file = in.getFile();
89 defaultLocation = new URL(in.getProtocol(), in.getHost(), in.getPort(), file.substring(0, file.lastIndexOf('/') + 1));
90
91
92 scanner = new Scanner(new BufferedReader(
93 new InputStreamReader(in.openStream())), trace, this);
94 dtd = new DTD();
95 }
96
97 /*** Parses the DTD file and returns a DTD object describing the DTD.
98 This invocation of parse does not try to guess the root element
99 (for efficiency reasons) */
100 public DTD parse()
101 throws IOException
102 {
103 return parse(false);
104 }
105
106 /*** Parses the DTD file and returns a DTD object describing the DTD.
107 * @param guessRootElement If true, tells the parser to try to guess the
108 root element of the document by process of elimination
109 */
110 public DTD parse(boolean guessRootElement)
111 throws IOException
112 {
113 Token token;
114
115 for (;;)
116 {
117 token = scanner.peek();
118
119 if (token.type == Scanner.EOF) break;
120
121 parseTopLevelElement();
122 }
123
124 if (guessRootElement)
125 {
126 Hashtable roots = new Hashtable();
127
128 Enumeration e = dtd.elements.elements();
129
130 while (e.hasMoreElements())
131 {
132 DTDElement element = (DTDElement) e.nextElement();
133 roots.put(element.name, element);
134 }
135
136 e = dtd.elements.elements();
137 while (e.hasMoreElements())
138 {
139 DTDElement element = (DTDElement) e.nextElement();
140 if (!(element.content instanceof DTDContainer)) continue;
141
142 Enumeration items = ((DTDContainer) element.content).
143 getItemsVec(). elements();
144
145 while (items.hasMoreElements())
146 {
147 removeElements(roots, dtd, (DTDItem) items.nextElement());
148 }
149 }
150
151 if (roots.size() == 1)
152 {
153 e = roots.elements();
154 dtd.rootElement = (DTDElement) e.nextElement();
155 }
156 else
157 {
158 dtd.rootElement = null;
159 }
160 }
161 else
162 {
163 dtd.rootElement = null;
164 }
165
166 return dtd;
167 }
168
169 protected void removeElements(Hashtable h, DTD dtd, DTDItem item)
170 {
171 if (item instanceof DTDName)
172 {
173 h.remove(((DTDName) item).value);
174 }
175 else if (item instanceof DTDContainer)
176 {
177 Enumeration e = ((DTDContainer) item).getItemsVec().elements();
178
179 while (e.hasMoreElements())
180 {
181 removeElements(h, dtd, (DTDItem) e.nextElement());
182 }
183 }
184 }
185
186 protected void parseTopLevelElement()
187 throws IOException
188 {
189 Token token = scanner.get();
190
191
192 if (token.type == Scanner.LTQUES)
193 {
194 StringBuffer textBuffer = new StringBuffer();
195
196 for (;;)
197 {
198 String text = scanner.getUntil('?');
199 textBuffer.append(text);
200
201 token = scanner.peek();
202 if (token.type == Scanner.GT)
203 {
204 scanner.get();
205 break;
206 }
207 textBuffer.append('?');
208 }
209 DTDProcessingInstruction instruct =
210 new DTDProcessingInstruction(textBuffer.toString());
211
212 dtd.items.addElement(instruct);
213
214 return;
215 }
216 else if (token.type == Scanner.CONDITIONAL)
217 {
218 token = expect(Scanner.IDENTIFIER);
219
220 if (token.value.equals("IGNORE"))
221 {
222 scanner.skipConditional();
223 }
224 else
225 {
226 if (token.value.equals("INCLUDE"))
227 {
228 scanner.skipUntil('[');
229 }
230 else
231 {
232 throw new DTDParseException(scanner.getUriId(),
233 "Invalid token in conditional: "+token.value,
234 scanner.getLineNumber(), scanner.getColumn());
235 }
236 }
237 }
238 else if (token.type == Scanner.ENDCONDITIONAL)
239 {
240
241 }
242 else if (token.type == Scanner.COMMENT)
243 {
244 dtd.items.addElement(
245 new DTDComment(token.value));
246 }
247 else if (token.type == Scanner.LTBANG)
248 {
249
250 token = expect(Scanner.IDENTIFIER);
251
252 if (token.value.equals("ELEMENT"))
253 {
254 parseElement();
255 }
256 else if (token.value.equals("ATTLIST"))
257 {
258 parseAttlist();
259 }
260 else if (token.value.equals("ENTITY"))
261 {
262 parseEntity();
263 }
264 else if (token.value.equals("NOTATION"))
265 {
266 parseNotation();
267 }
268 else
269 {
270 skipUntil(Scanner.GT);
271 }
272 }
273 else
274 {
275
276
277
278 throw new DTDParseException(scanner.getUriId(),
279 "Unexpected token: "+ token.type.name+"("+token.value+")",
280 scanner.getLineNumber(), scanner.getColumn());
281 }
282
283 }
284
285 protected void skipUntil(TokenType stopToken)
286 throws IOException
287 {
288 Token token = scanner.get();
289
290 while (token.type != stopToken)
291 {
292 token = scanner.get();
293 }
294 }
295
296 protected Token expect(TokenType expected)
297 throws IOException
298 {
299 Token token = scanner.get();
300
301 if (token.type != expected)
302 {
303 if (token.value == null)
304 {
305 throw new DTDParseException(scanner.getUriId(),
306 "Expected "+expected.name+" instead of "+token.type.name,
307 scanner.getLineNumber(), scanner.getColumn());
308 }
309 else
310 {
311 throw new DTDParseException(scanner.getUriId(),
312 "Expected "+expected.name+
313 " instead of "+ token.type.name+"("+token.value+")",
314 scanner.getLineNumber(), scanner.getColumn());
315 }
316 }
317
318 return token;
319 }
320
321 protected void parseElement()
322 throws IOException
323 {
324 Token name = expect(Scanner.IDENTIFIER);
325
326 DTDElement element = (DTDElement) dtd.elements.get(name.value);
327
328 if (element == null)
329 {
330 element = new DTDElement(name.value);
331 dtd.elements.put(element.name, element);
332 }
333 else if (element.content != null)
334 {
335
336
337
338
339
340 throw new DTDParseException(scanner.getUriId(),
341 "Found second definition of element: "+name.value,
342 scanner.getLineNumber(), scanner.getColumn());
343 }
344
345 dtd.items.addElement(element);
346 parseContentSpec(scanner, element);
347
348 expect(Scanner.GT);
349 }
350
351 protected void parseContentSpec(Scanner scanner, DTDElement element)
352 throws IOException
353 {
354 Token token = scanner.get();
355
356 if (token.type == Scanner.IDENTIFIER)
357 {
358 if (token.value.equals("EMPTY"))
359 {
360 element.content = new DTDEmpty();
361 }
362 else if (token.value.equals("ANY"))
363 {
364 element.content = new DTDAny();
365 }
366 else
367 {
368 throw new DTDParseException(scanner.getUriId(),
369 "Invalid token in entity content spec "+
370 token.value,
371 scanner.getLineNumber(), scanner.getColumn());
372 }
373 }
374 else if (token.type == Scanner.LPAREN)
375 {
376 token = scanner.peek();
377
378 if (token.type == Scanner.IDENTIFIER)
379 {
380 if (token.value.equals("#PCDATA"))
381 {
382 parseMixed(element);
383 }
384 else
385 {
386 parseChildren(element);
387 }
388 }
389 else if (token.type == Scanner.LPAREN)
390 {
391 parseChildren(element);
392 }
393 }
394 }
395
396 protected void parseMixed(DTDElement element)
397 throws IOException
398 {
399
400
401
402
403 boolean isPcdataOnly = true;
404
405 DTDMixed mixed = new DTDMixed();
406
407 mixed.add(new DTDPCData());
408
409 scanner.get();
410
411 element.content = mixed;
412
413 for (;;)
414 {
415 Token token = scanner.get();
416
417 if (token.type == Scanner.RPAREN)
418 {
419 token = scanner.peek();
420
421 if (token.type == Scanner.ASTERISK)
422 {
423 scanner.get();
424 mixed.cardinal = DTDCardinal.ZEROMANY;
425 }
426 else
427 {
428 if (!isPcdataOnly)
429 {
430 throw new DTDParseException(scanner.getUriId(),
431 "Invalid token in Mixed content type, '*' required after (#PCDATA|xx ...): "+
432 token.type.name, scanner.getLineNumber(), scanner.getColumn());
433 }
434
435 mixed.cardinal = DTDCardinal.NONE;
436 }
437
438 return;
439 }
440 else if (token.type == Scanner.PIPE)
441 {
442 token = scanner.get();
443
444 mixed.add(new DTDName(token.value));
445
446
447 isPcdataOnly = false;
448 }
449 else
450 {
451 throw new DTDParseException(scanner.getUriId(),
452 "Invalid token in Mixed content type: "+
453 token.type.name, scanner.getLineNumber(), scanner.getColumn());
454 }
455 }
456 }
457
458 protected void parseChildren(DTDElement element)
459 throws IOException
460 {
461 DTDContainer choiceSeq = parseChoiceSequence();
462
463 Token token = scanner.peek();
464
465 choiceSeq.cardinal = parseCardinality();
466
467 if (token.type == Scanner.QUES)
468 {
469 choiceSeq.cardinal = DTDCardinal.OPTIONAL;
470 }
471 else if (token.type == Scanner.ASTERISK)
472 {
473 choiceSeq.cardinal = DTDCardinal.ZEROMANY;
474 }
475 else if (token.type == Scanner.PLUS)
476 {
477 choiceSeq.cardinal = DTDCardinal.ONEMANY;
478 }
479 else
480 {
481 choiceSeq.cardinal = DTDCardinal.NONE;
482 }
483
484 element.content = choiceSeq;
485 }
486
487 protected DTDContainer parseChoiceSequence()
488 throws IOException
489 {
490 TokenType separator = null;
491
492 DTDContainer cs = null;
493
494 for (;;)
495 {
496 DTDItem item = parseCP();
497
498 Token token = scanner.get();
499
500 if ((token.type == Scanner.PIPE) ||
501 (token.type == Scanner.COMMA))
502 {
503 if ((separator != null) && (separator != token.type))
504 {
505 throw new DTDParseException(scanner.getUriId(),
506 "Can't mix separators in a choice/sequence",
507 scanner.getLineNumber(), scanner.getColumn());
508 }
509 separator = token.type;
510
511 if (cs == null)
512 {
513 if (token.type == Scanner.PIPE)
514 {
515 cs = new DTDChoice();
516 }
517 else
518 {
519 cs = new DTDSequence();
520 }
521 }
522 cs.add(item);
523 }
524 else if (token.type == Scanner.RPAREN)
525 {
526 if (cs == null)
527 {
528 cs = new DTDSequence();
529 }
530 cs.add(item);
531 return cs;
532 }
533 else
534 {
535 throw new DTDParseException(scanner.getUriId(),
536 "Found invalid token in sequence: "+
537 token.type.name, scanner.getLineNumber(), scanner.getColumn());
538 }
539 }
540 }
541
542 protected DTDItem parseCP()
543 throws IOException
544 {
545 Token token = scanner.get();
546
547 DTDItem item = null;
548
549 if (token.type == Scanner.IDENTIFIER)
550 {
551 item = new DTDName(token.value);
552 }
553 else if (token.type == Scanner.LPAREN)
554 {
555 item = parseChoiceSequence();
556 }
557 else
558 {
559 throw new DTDParseException(scanner.getUriId(),
560 "Found invalid token in sequence: "+
561 token.type.name, scanner.getLineNumber(),
562 scanner.getColumn());
563 }
564
565 item.cardinal = parseCardinality();
566
567 return item;
568 }
569
570 protected DTDCardinal parseCardinality()
571 throws IOException
572 {
573 Token token = scanner.peek();
574
575 if (token.type == Scanner.QUES)
576 {
577 scanner.get();
578 return DTDCardinal.OPTIONAL;
579 }
580 else if (token.type == Scanner.ASTERISK)
581 {
582 scanner.get();
583 return DTDCardinal.ZEROMANY;
584 }
585 else if (token.type == Scanner.PLUS)
586 {
587 scanner.get();
588 return DTDCardinal.ONEMANY;
589 }
590 else
591 {
592 return DTDCardinal.NONE;
593 }
594 }
595
596 protected void parseAttlist()
597 throws IOException
598 {
599 Token token = expect(Scanner.IDENTIFIER);
600
601 DTDElement element = (DTDElement) dtd.elements.get(token.value);
602
603 DTDAttlist attlist = new DTDAttlist(token.value);
604
605 dtd.items.addElement(attlist);
606
607 if (element == null)
608 {
609 element = new DTDElement(token.value);
610 dtd.elements.put(token.value, element);
611 }
612
613 token = scanner.peek();
614
615 while (token.type != Scanner.GT)
616 {
617 parseAttdef(scanner, element, attlist);
618 token = scanner.peek();
619 }
620
621
622
623
624
625 expect(Scanner.GT);
626 }
627
628 protected void parseAttdef(Scanner scanner, DTDElement element,
629 DTDAttlist attlist)
630 throws IOException
631 {
632 Token token = expect(Scanner.IDENTIFIER);
633
634 DTDAttribute attr = new DTDAttribute(token.value);
635
636 attlist.attributes.addElement(attr);
637
638 element.attributes.put(token.value, attr);
639
640 token = scanner.get();
641
642 if (token.type == Scanner.IDENTIFIER)
643 {
644 if (token.value.equals("NOTATION"))
645 {
646 attr.type = parseNotationList();
647 }
648 else
649 {
650 attr.type = token.value;
651 }
652 }
653 else if (token.type == Scanner.LPAREN)
654 {
655 attr.type = parseEnumeration();
656 }
657
658 token = scanner.peek();
659
660 if (token.type == Scanner.IDENTIFIER)
661 {
662 scanner.get();
663 if (token.value.equals("#FIXED"))
664 {
665 attr.decl = DTDDecl.FIXED;
666
667 token = scanner.get();
668 attr.defaultValue = token.value;
669 }
670 else if (token.value.equals("#REQUIRED"))
671 {
672 attr.decl = DTDDecl.REQUIRED;
673 }
674 else if (token.value.equals("#IMPLIED"))
675 {
676 attr.decl = DTDDecl.IMPLIED;
677 }
678 else
679 {
680 throw new DTDParseException(scanner.getUriId(),
681 "Invalid token in attribute declaration: "+
682 token.value, scanner.getLineNumber(), scanner.getColumn());
683 }
684 }
685 else if (token.type == Scanner.STRING)
686 {
687 scanner.get();
688 attr.decl = DTDDecl.VALUE;
689 attr.defaultValue = token.value;
690 }
691 }
692
693 protected DTDNotationList parseNotationList()
694 throws IOException
695 {
696 DTDNotationList notation = new DTDNotationList();
697
698 Token token = scanner.get();
699 if (token.type != Scanner.LPAREN)
700 {
701 throw new DTDParseException(scanner.getUriId(),
702 "Invalid token in notation: "+
703 token.type.name, scanner.getLineNumber(),
704 scanner.getColumn());
705 }
706
707 for (;;)
708 {
709 token = scanner.get();
710
711 if (token.type != Scanner.IDENTIFIER)
712 {
713 throw new DTDParseException(scanner.getUriId(),
714 "Invalid token in notation: "+
715 token.type.name, scanner.getLineNumber(),
716 scanner.getColumn());
717 }
718
719 notation.add(token.value);
720
721 token = scanner.peek();
722
723 if (token.type == Scanner.RPAREN)
724 {
725 scanner.get();
726 return notation;
727 }
728 else if (token.type != Scanner.PIPE)
729 {
730 throw new DTDParseException(scanner.getUriId(),
731 "Invalid token in notation: "+
732 token.type.name, scanner.getLineNumber(),
733 scanner.getColumn());
734 }
735 scanner.get();
736 }
737 }
738
739 protected DTDEnumeration parseEnumeration()
740 throws IOException
741 {
742 DTDEnumeration enumeration = new DTDEnumeration();
743
744 for (;;)
745 {
746 Token token = scanner.get();
747
748 if ((token.type != Scanner.IDENTIFIER) &&
749 (token.type != Scanner.NMTOKEN))
750 {
751 throw new DTDParseException(scanner.getUriId(),
752 "Invalid token in enumeration: "+
753 token.type.name, scanner.getLineNumber(),
754 scanner.getColumn());
755 }
756
757 enumeration.add(token.value);
758
759 token = scanner.peek();
760
761 if (token.type == Scanner.RPAREN)
762 {
763 scanner.get();
764 return enumeration;
765 }
766 else if (token.type != Scanner.PIPE)
767 {
768 throw new DTDParseException(scanner.getUriId(),
769 "Invalid token in enumeration: "+
770 token.type.name, scanner.getLineNumber(),
771 scanner.getColumn());
772 }
773 scanner.get();
774 }
775 }
776
777 protected void parseEntity()
778 throws IOException
779 {
780 boolean isParsed = false;
781
782 Token name = scanner.get();
783
784 if (name.type == Scanner.PERCENT)
785 {
786 isParsed = true;
787 name = expect(Scanner.IDENTIFIER);
788 }
789 else if (name.type != Scanner.IDENTIFIER)
790 {
791 throw new DTDParseException(scanner.getUriId(),
792 "Invalid entity declaration",
793 scanner.getLineNumber(), scanner.getColumn());
794 }
795
796 DTDEntity entity = (DTDEntity) dtd.entities.get(name.value);
797
798 boolean skip = false;
799
800 if (entity == null)
801 {
802 entity = new DTDEntity(name.value, defaultLocation);
803 dtd.entities.put(entity.name, entity);
804 }
805 else
806 {
807
808
809
810 entity = new DTDEntity(name.value, defaultLocation);
811 skip = true;
812 }
813
814 dtd.items.addElement(entity);
815
816 entity.isParsed = isParsed;
817
818 parseEntityDef(entity);
819
820 if (entity.isParsed && (entity.value != null) && !skip)
821 {
822 scanner.addEntity(entity.name, entity.value);
823 }
824 }
825
826 protected void parseEntityDef(DTDEntity entity)
827 throws IOException
828 {
829 Token token = scanner.get();
830
831 if (token.type == Scanner.STRING)
832 {
833
834
835
836 if (entity.value == null)
837 {
838 entity.value = token.value;
839 }
840 }
841 else if (token.type == Scanner.IDENTIFIER)
842 {
843 if (token.value.equals("SYSTEM"))
844 {
845 DTDSystem sys = new DTDSystem();
846 token = expect(Scanner.STRING);
847
848 sys.system = token.value;
849 entity.externalID = sys;
850 }
851 else if (token.value.equals("PUBLIC"))
852 {
853 DTDPublic pub = new DTDPublic();
854
855 token = expect(Scanner.STRING);
856 pub.pub = token.value;
857 token = expect(Scanner.STRING);
858 pub.system = token.value;
859 entity.externalID = pub;
860 }
861 else
862 {
863 throw new DTDParseException(scanner.getUriId(),
864 "Invalid External ID specification",
865 scanner.getLineNumber(), scanner.getColumn());
866 }
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888 if (!entity.isParsed)
889 {
890 token = scanner.peek();
891 if (token.type == Scanner.IDENTIFIER)
892 {
893 if (!token.value.equals("NDATA"))
894 {
895 throw new DTDParseException(scanner.getUriId(),
896 "Invalid NData declaration",
897 scanner.getLineNumber(), scanner.getColumn());
898 }
899
900
901 token = scanner.get();
902
903 token = expect(Scanner.IDENTIFIER);
904
905 entity.ndata = token.value;
906 }
907 }
908 }
909 else
910 {
911 throw new DTDParseException(scanner.getUriId(),
912 "Invalid entity definition",
913 scanner.getLineNumber(), scanner.getColumn());
914 }
915
916 expect(Scanner.GT);
917 }
918
919 protected void parseNotation()
920 throws java.io.IOException
921 {
922 DTDNotation notation = new DTDNotation();
923
924 Token token = expect(Scanner.IDENTIFIER);
925
926 notation.name = token.value;
927
928 dtd.notations.put(notation.name, notation);
929 dtd.items.addElement(notation);
930
931 token = expect(Scanner.IDENTIFIER);
932
933 if (token.value.equals("SYSTEM"))
934 {
935 DTDSystem sys = new DTDSystem();
936 token = expect(Scanner.STRING);
937
938 sys.system = token.value;
939 notation.externalID = sys;
940 }
941 else if (token.value.equals("PUBLIC"))
942 {
943 DTDPublic pub = new DTDPublic();
944 token = expect(Scanner.STRING);
945
946 pub.pub = token.value;
947 pub.system = null;
948
949
950
951 token = scanner.peek();
952 if (token.type == Scanner.STRING)
953 {
954 token = scanner.get();
955 pub.system = token.value;
956 }
957
958 notation.externalID = pub;
959 }
960 expect(Scanner.GT);
961 }
962
963 public DTDEntity expandEntity(String name)
964 {
965 return (DTDEntity) dtd.entities.get(name);
966 }
967 }