1
23package org.jzuul.dtdparser;
24
25import java.io.BufferedReader;
26import java.io.FileInputStream;
27import java.io.FileNotFoundException;
28import java.io.IOException;
29import java.io.InputStream;
30import java.io.InputStreamReader;
31import java.io.OutputStream;
32import java.util.ArrayList;
33import java.util.Arrays;
34import java.util.HashMap;
35import java.util.Iterator;
36import java.util.List;
37import java.util.Vector;
38import java.util.regex.Matcher;
39import java.util.regex.Pattern;
40
41
49public class DTDParser {
50
51 protected static void debug(String message) {
52 if (false) System.out.println(message);
53 }
54
55 private DTDTreeElement root;
56
57 private List flattenedElements;
58
59 private HashMap entities, attributes, orphanedElements, elements;
60
61 protected DTDParser() {
62 flattenedElements = new Vector();
63 entities = new HashMap();
64 attributes = new HashMap();
65 orphanedElements = new HashMap();
66 elements = new HashMap();
67 }
68
69 public DTDParser(String filename) throws DTDParserException, FileNotFoundException, IOException {
70 this();
71 InputStream is = new FileInputStream(filename);
72 this.parseStream(is);
73 }
74
75 public DTDParser(InputStream file) throws DTDParserException, IOException {
76 this();
77 this.parseStream(file);
78 }
79
80 private void parseStream(InputStream s) throws IOException, DTDParserException {
81 StringBuffer fileContents = new StringBuffer(1024 * 1024); BufferedReader br = new BufferedReader(new InputStreamReader(s));
95 String line;
96 while ((line = br.readLine()) != null) {
97 fileContents.append(line + " ");
98 }
99 String strFileContents = fileContents.toString();
00 strFileContents = strFileContents.replaceAll("\\n+", " ");
01 strFileContents = strFileContents.replaceAll("\\t+", " ");
02 strFileContents = strFileContents.replaceAll("\\r+", " ");
03 strFileContents = strFileContents.replaceAll("\\f+", " ");
04 strFileContents = strFileContents.replaceAll(" +", " ");
05
06 List tags = tokeniseString(strFileContents);
07
08 buildEntityMap(tags);
09
10 buildAttributeList(getAttributeTokens(tags));
11 buildElementTree(getElementTokens(tags));
12
13
14 }
15
16 private List tokeniseString(String contents) {
17 contents = contents.replaceAll("<!--.+?-->", "");
18
19 Vector tokens = new Vector(Arrays.asList(contents.split(">")));
20 for (Iterator iter = tokens.iterator(); iter.hasNext();) {
21
22 String element = (String) iter.next();
23 element = element.replaceAll("^\\s+", "");
24 if (element.equals("")) {
25 iter.remove();
26 } else {
27 element += ">";
28 DTDParser.debug(element);
29 }
30 }
31 return tokens;
32 }
33
34 public DTDTreeElement getRoot() {
35 return root;
36 }
37
38 public void writeTo(String filename) {
39 throw new UnsupportedOperationException("Not yet implemented");
40 }
41
42 public void writeTo(OutputStream out) {
43 throw new UnsupportedOperationException("Not yet implemented");
44 }
45
46 protected List getTokenByPrefix(String prefix, List tokens) {
47 ArrayList tokenList = new ArrayList();
48 for (Iterator iter = tokens.iterator(); iter.hasNext();) {
49 String element = (String) iter.next();
50 if (element.matches("^\\s*" + prefix + ".+")) {
51 tokenList.add(element);
52 }
53 }
54 DTDParser.debug("Found " + tokenList.size() + " for prefix " + prefix);
55 return tokenList;
56 }
57
58 private List getElementTokens(List tokens) {
59 return getTokenByPrefix("<!ELEMENT", tokens);
60 }
61
62 private List getAttributeTokens(List tokens) {
63 return getTokenByPrefix("<!ATTLIST", tokens);
64 }
65
66 private void buildElementTree(List elmentTokens) throws DTDParserException {
67 final String ow = "\\s*";
68
69 final String begintag = ow + "<!ELEMENT" + ow;
70
71 final String identifier = "[-\\w:]+";
72 final String quantifier = "(?>\\?|\\*|\\+)?";
73
74 final String subelementsep = ow + "(\\||,)?" + ow;
75
76 final String op = "\\(";
77 final String cp = "\\)";
78
79 final String subelements = op + ".+" + cp + quantifier;
80
81 final String contents = "(EMPTY|ANY|" + subelements + ")";
82
83 final String pattern = begintag + "(" + identifier + ")" + ow + contents + ow;
84
85 DTDParser.debug("Pattern is: " + pattern);
86
87 Pattern elementPattern = Pattern.compile(pattern);
88 Pattern subelPattern = Pattern.compile("(" + identifier + ")(" + quantifier + ")(" + subelementsep + ")");
89
90 for (Iterator iter = elmentTokens.iterator(); iter.hasNext();) {
91 String element = (String) iter.next();
92 element = resolveEntity(element);
93 if (element == null) break;
94 Matcher m = elementPattern.matcher(element);
95 if (m.matches()) {
96 DTDParser.debug("MATCH: " + element);
97 for (int i = 1; i <= m.groupCount(); i++) {
98 DTDParser.debug(i + ": " + m.group(i));
99 }
00 String name = m.group(1);
01 String subels = m.group(2);
02
03 DTDTreeElement e = getOrCreateElement(name);
04
05 if (subels.equalsIgnoreCase("EMPTY")) {
06 e.setType(DTDElement.EMPTY);
07 } else { Matcher n = subelPattern.matcher(subels);
09 while (n.find()) {
10 DTDParser.debug("\t:" + n.group());
11 String subname = n.group(1);
12 String cardinal = n.group(2);
13 DTDTreeElement subel = null;
14 if ((subel = findOrphanedElement(subname)) == null) {
15 subel = new DTDTreeElement(subname);
16 this.flattenedElements.add(subel);
17 this.elements.put(subname,subel);
18 }
19 subel.setParent(e);
20 subel.setCardinality(cardinal);
21 e.addChild(subel);
22 }
23 }
24 Vector v = (Vector)attributes.get(e.getName());
25 if (v != null) {
26 for (Iterator iterator = v.iterator(); iterator.hasNext();) {
27 DTDAttribute att = (DTDAttribute) iterator.next();
28 assignAttribute(e.getName(), att);
29 }
30 }
31 mergeSubElements(e);
32
33 findRoot(e);
34 } else {
35 String message = "Syntax error at contents |" + element + "|\n";
36 throw new DTDParserException(message);
37 }
38 }
39 }
40
41 public void printTree() {
42 if (root == null) throw new IllegalStateException("No root Element found");
43 debug("Starting to print tree");
44 System.out.println(root.toString());
45 }
46
47 protected void findRoot(DTDTreeElement e) {
48 while (e.getParent() != null) {
49 if (e.getParent().getName().equals(e.name)) break;
50 e = e.getParent();
51 }
52 root = e;
53 DTDParser.debug("Root seems to be " + root.getName());
54
55 }
56
57 protected DTDTreeElement findOrphanedElement(String name) {
58 if (name == null) throw new IllegalArgumentException("Name must not be null");
59 DTDTreeElement el = (DTDTreeElement)orphanedElements.get(name);
60 if (el != null ) {
61 orphanedElements.remove(name);
62 flattenedElements.add(el);
63 elements.put(name,el);
64 }
65 return el;
66 }
67
68 protected DTDTreeElement getOrCreateElement(String name) {
69 DTDElement el = getElement(name);
70 if (el != null) { return (DTDTreeElement)el; }
71
72 DTDTreeElement newEl = new DTDTreeElement(name);
73 orphanedElements.put(name,newEl);
74 return newEl;
75 }
76
77 protected DTDElement getElement(String name) {
78 DTDTreeElement el = (DTDTreeElement)elements.get(name);
79 if (el != null) { return el; }
80
81 DTDParser.debug("Element " + name + " not found!");
82 return null;
83 }
84
85 protected void mergeSubElements(DTDElement e) {
86
87 debug("Merging sub elements for " + e.getName());
88 for (Iterator iter = flattenedElements.iterator(); iter.hasNext();) {
89 DTDTreeElement element = (DTDTreeElement) iter.next();
90 if (element.getName().equals(e.getName())) {
91 element.mergeSubElements(e);
92 }
93 }
94 }
95
96 protected void buildAttributeList(List tokens) throws DTDParserException {
97
98 final String whitespace = "\\s*";
99
00 final String begintag = whitespace + "<!ATTLIST" + whitespace;
01
02 final String identifier = "[-\\w:]+";
03
04 final String enumeration = "\\(" + whitespace + identifier + whitespace +
05 "\\)|\\((?>" + whitespace + identifier + whitespace + "\\|)+" + whitespace + identifier
06 + whitespace + "\\)";
07
08 final String valuetype = "(CDATA|" + enumeration + ")";
09
10 final String defvalue = "(?>\"[^\"]+\")";
11 final String modifier = "(?>#REQUIRED|#IMPLIED|#FIXED)";
15 final String flag = "(" + defvalue + "|" + modifier + ")";
16 final String attdef = "(?>" + whitespace + "(" + identifier + ")" + whitespace + valuetype + whitespace + flag
17 + whitespace + flag + "?)";
18
19 final String pattern = begintag + "(" + identifier + ")" + "(" + ".+" + ")" + whitespace;
20
21 DTDParser.debug("Pattern is: " + pattern);
22
23 Pattern elementPattern = Pattern.compile(pattern);
24 Pattern enumValuePattern = Pattern.compile("(" + identifier + ")" + "\\|?");
25 Pattern attributePattern = Pattern.compile(attdef);
26
27 for (Iterator iter = tokens.iterator(); iter.hasNext();) {
28 String line = (String) iter.next();
29 line = resolveEntity(line);
30 if (line == null) break;
31 Matcher m = elementPattern.matcher(line);
32 if (m.matches()) {
33 DTDParser.debug("MATCH: |" + line + "|");
34 for (int i = 1; i <= m.groupCount(); i++) {
35 DTDParser.debug(i + ": " + m.group(i));
36 }
37 String element = m.group(1);
38 String attributeLine = m.group(2);
39 Matcher o = attributePattern.matcher(attributeLine);
40 while (o.find()) {
41 for (int i = 1; i <= o.groupCount(); i++) {
42 DTDParser.debug("\t" + i + ": " + o.group(i));
43 }
44
45 String name = o.group(1);
46 String type = o.group(2);
47 String defvalOrFlag = o.group(3);
48
49 DTDAttribute a = new DTDAttribute(name);
50 a.setElementName(element);
51 if (type.equalsIgnoreCase("CDATA")) {
52 a.setType(DTDAttribute.CDATA);
53 } else {
54 Matcher n = enumValuePattern.matcher(type);
55 while (n.find()) {
56 a.addEnumValue(n.group(1));
57 }
58 }
59
60 if (defvalOrFlag.startsWith("#")) {
61 a.setFlag(defvalOrFlag);
62 if (defvalOrFlag.equals("#FIXED")) {
63 a.setDefaultValue(o.group(4));
64 }
65 } else {
66 a.setDefaultValue(defvalOrFlag.replaceAll("\"", ""));
67 }
68
69 addAttribute(a);
70 }
71
72 } else {
73 String message = "Syntax error at contents |" + line + "|\n";
74 throw new DTDParserException(message);
75
76 }
77 }
78
79
80 }
81
82 protected void assignAttribute(String elementName, DTDAttribute attribute) {
83 DTDElement e = (DTDElement)orphanedElements.get(elementName);
84 if (e == null) {
85 e = getElement(elementName);
86 }
87 e.addAttribute(attribute);
88 }
89
90 private void buildEntityMap(List tokens) throws DTDParserException {
91 final String whitespace = "\\s*";
92 final String reftoken = "%?";
93
94 final String name = "[-\\w\\d._]+";
95 final String pedef = "\"([^\"]*)\"";
96
97 final String prefix = whitespace + "<!ENTITY" + whitespace + reftoken + whitespace + "(" + name + ")"
98 + whitespace;
99
00 final String peDeclTail = pedef + whitespace;
01
02 final String geDeclTail = "(?>SYSTEM|PUBLIC)" + whitespace + pedef + whitespace + pedef + "?" + whitespace;
03
04 final String pattern = prefix + "(?>" + peDeclTail + "|" + geDeclTail + ")";
05
06 DTDParser.debug("Pattern is: " + pattern);
07
08 Pattern entityPattern = Pattern.compile(pattern);
09 for (Iterator iter = tokens.iterator(); iter.hasNext();) {
10 String element = (String) iter.next();
11 String newelement = resolveEntity(element);
12 if (newelement == null) {
13 break;
14 }
15 if (newelement.matches(".+INCLUDE\\[.+")) System.exit(9);
16 Matcher m = entityPattern.matcher(newelement);
17 if (m.matches()) {
18 String nameVal = m.group(1);
19 String pedefVal = m.group(2);
20 for (int i = 1; i <= m.groupCount(); i++) {
21 DTDParser.debug(i + ": " + m.group(i));
22 }
23 if (pedefVal == null) pedefVal = m.group(4);
24 DTDParser.debug("Adding |" + nameVal + "|->" + pedefVal);
25 entities.put(nameVal, pedefVal);
26 } else {
27 }
31
32 }
33
34 }
35
36 protected String resolveEntity(String element) throws DTDParserException {
37 String newelement = new String(element);
38 final String entref = "%([^%;\"]+);";
39
40 Pattern entity = Pattern.compile(entref);
41
42 if (newelement.matches("^\\s*<!\\[IGNORE.+")) return null;
43
44 DTDParser.debug(newelement);
45 Matcher m = entity.matcher(newelement);
46 while (m.find()) {
47 String name = m.group(1);
48 DTDParser.debug("Resolving :" + name + ":");
49 String replace = (String) entities.get(name);
50 if (replace != null) newelement = newelement.replaceAll("%" + name + ";", replace);
53 DTDParser.debug(newelement);
54 }
55 DTDParser.debug(newelement);
56 newelement = newelement.replaceAll("<!\\[INCLUDE\\[","");
57 DTDParser.debug("newelement is now: " + newelement);
58
59 return newelement;
60 }
61
62 protected void addAttribute(DTDAttribute attribute) {
63 if (attributes.containsKey(attribute.getElementName())) {
64 ((Vector)attributes.get(attribute.getElementName())).add(attribute);
65 } else {
66 Vector v = new Vector();
67 v.add(attribute);
68 attributes.put(attribute.getElementName(),v);
69 }
70
71 }
72
73}