1 /*
2  *  CVS: $Id: DTDParser.java,v 1.6 2004/07/16 16:22:34 marcus Exp $
3  * 
4  *  This file is part of zuul.
5  *
6  *  zuul is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10 *
11 *  zuul is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 *  GNU General Public License for more details.
15 *
16 *  You should have received a copy of the GNU General Public License
17 *  along with zuul; if not, write to the Free Software
18 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 * 
20 *  Copyrigth 2004 by marcus, leh
21 * 
22 */
23package org.jzuul.dtdparser;
24
25import java.io.BufferedReader;
26import java.io.FileInputStream;
27import java.io.FileNotFoundException;
28import java.io.IOException;
29import java.io.InputStream;
30import java.io.InputStreamReader;
31import java.io.OutputStream;
32import java.util.ArrayList;
33import java.util.Arrays;
34import java.util.HashMap;
35import java.util.Iterator;
36import java.util.List;
37import java.util.Vector;
38import java.util.regex.Matcher;
39import java.util.regex.Pattern;
40
41/**
42 * TODO Document new class
43 * 
44 * Created on Jun 7, 2004
45 * 
46 * 
47 * @version $Revision: 1.6 $
48 */
49public class DTDParser {
50    
51    protected static void debug(String message) {
52        if (false) System.out.println(message);
53    }
54
55    private DTDTreeElement root;
56
57    private List flattenedElements;
58
59    private HashMap entities, attributes, orphanedElements, elements;
60
61    protected DTDParser() {
62        flattenedElements = new Vector();
63        entities = new HashMap();
64        attributes = new HashMap();
65        orphanedElements = new HashMap();
66        elements = new HashMap();
67    }
68
69    public DTDParser(String filename) throws DTDParserException, FileNotFoundException, IOException {
70        this();
71        InputStream is = new FileInputStream(filename);
72        this.parseStream(is);
73    }
74
75    public DTDParser(InputStream file) throws DTDParserException, IOException {
76        this();
77        this.parseStream(file);
78    }
79
80    private void parseStream(InputStream s) throws IOException, DTDParserException {
81        StringBuffer fileContents = new StringBuffer(1024 * 1024); // Reserve 1
82                                                                   // MB,
83                                                                   // everything
84                                                                   // else is
85                                                                   // fucking
86                                                                   // slow
87                                                                   // because he
88                                                                   // keeps
89                                                                   // resizing
90                                                                   // the buffer
91                                                                   // under the
92                                                                   // String all
93                                                                   // the time
94        BufferedReader br = new BufferedReader(new InputStreamReader(s));
95        String line;
96        while ((line = br.readLine()) != null) {
97            fileContents.append(line + " ");
98        }
99        String strFileContents = fileContents.toString();
00        strFileContents = strFileContents.replaceAll("\\n+", " ");
01        strFileContents = strFileContents.replaceAll("\\t+", " ");
02        strFileContents = strFileContents.replaceAll("\\r+", " ");
03        strFileContents = strFileContents.replaceAll("\\f+", " ");
04        strFileContents = strFileContents.replaceAll(" +", " ");
05
06        List tags = tokeniseString(strFileContents);
07
08        buildEntityMap(tags);
09
10        buildAttributeList(getAttributeTokens(tags));
11        buildElementTree(getElementTokens(tags));
12        
13   
14    }
15
16    private List tokeniseString(String contents) {
17        contents = contents.replaceAll("<!--.+?-->", "");
18
19        Vector tokens = new Vector(Arrays.asList(contents.split(">")));
20        for (Iterator iter = tokens.iterator(); iter.hasNext();) {
21
22            String element = (String) iter.next();
23            element = element.replaceAll("^\\s+", "");
24            if (element.equals("")) {
25                iter.remove();
26            } else {
27                element += ">";
28                DTDParser.debug(element);
29            }
30        }
31        return tokens;
32    }
33
34    public DTDTreeElement getRoot() {
35        return root;
36    }
37
38    public void writeTo(String filename) {
39        throw new UnsupportedOperationException("Not yet implemented");
40    }
41
42    public void writeTo(OutputStream out) {
43        throw new UnsupportedOperationException("Not yet implemented");
44    }
45
46    protected List getTokenByPrefix(String prefix, List tokens) {
47        ArrayList tokenList = new ArrayList();
48        for (Iterator iter = tokens.iterator(); iter.hasNext();) {
49            String element = (String) iter.next();
50            if (element.matches("^\\s*" + prefix + ".+")) {
51                tokenList.add(element);
52            }
53        }
54        DTDParser.debug("Found " + tokenList.size() + " for prefix " + prefix);
55        return tokenList;
56    }
57
58    private List getElementTokens(List tokens) {
59        return getTokenByPrefix("<!ELEMENT", tokens);
60    }
61
62    private List getAttributeTokens(List tokens) {
63        return getTokenByPrefix("<!ATTLIST", tokens);
64    }
65
66    private void buildElementTree(List elmentTokens) throws DTDParserException {
67        final String ow = "\\s*";
68
69        final String begintag = ow + "<!ELEMENT" + ow;
70
71        final String identifier = "[-\\w:]+";
72        final String quantifier = "(?>\\?|\\*|\\+)?";
73
74        final String subelementsep = ow + "(\\||,)?" + ow;
75        
76        final String op = "\\(";
77        final String cp = "\\)";
78        
79        final String subelements = op + ".+" + cp + quantifier; 
80        
81        final String contents = "(EMPTY|ANY|" + subelements + ")";
82
83        final String pattern = begintag + "(" + identifier + ")" + ow + contents + ow;
84
85        DTDParser.debug("Pattern is: " + pattern);
86
87        Pattern elementPattern = Pattern.compile(pattern);
88        Pattern subelPattern = Pattern.compile("(" + identifier + ")(" + quantifier + ")(" + subelementsep + ")");
89
90        for (Iterator iter = elmentTokens.iterator(); iter.hasNext();) {
91            String element = (String) iter.next();
92            element = resolveEntity(element);
93            if (element == null) break;
94            Matcher m = elementPattern.matcher(element);
95            if (m.matches()) {
96                DTDParser.debug("MATCH: " + element);
97                for (int i = 1; i <= m.groupCount(); i++) {
98                    DTDParser.debug(i + ": " + m.group(i));
99                }
00                String name = m.group(1);
01                String subels = m.group(2);
02                
03                DTDTreeElement e = getOrCreateElement(name);
04
05                if (subels.equalsIgnoreCase("EMPTY")) {
06                    e.setType(DTDElement.EMPTY);
07                } else { // we got some subelements
08                    Matcher n = subelPattern.matcher(subels);
09                    while (n.find()) {
10                        DTDParser.debug("\t:" + n.group());
11                        String subname = n.group(1);
12                        String cardinal = n.group(2);
13                        DTDTreeElement subel = null;
14                        if ((subel = findOrphanedElement(subname)) == null) {
15                            subel = new DTDTreeElement(subname);
16                            this.flattenedElements.add(subel);
17                            this.elements.put(subname,subel);
18                        }
19                        subel.setParent(e);
20                        subel.setCardinality(cardinal);
21                        e.addChild(subel);
22                    }
23                }
24                Vector v = (Vector)attributes.get(e.getName());
25                if (v != null) {
26                    for (Iterator iterator = v.iterator(); iterator.hasNext();) {
27                        DTDAttribute att = (DTDAttribute) iterator.next();
28                        assignAttribute(e.getName(), att);
29                    }
30                }
31                mergeSubElements(e);
32                
33                findRoot(e);
34            } else {
35                String message = "Syntax error at contents |" + element + "|\n";
36                throw new DTDParserException(message);
37            }
38        }
39    }
40
41    public void printTree() {
42        if (root == null) throw new IllegalStateException("No root Element found");
43        debug("Starting to print tree");
44        System.out.println(root.toString());
45    }
46
47    protected void findRoot(DTDTreeElement e) {
48        while (e.getParent() != null) {
49            if (e.getParent().getName().equals(e.name)) break;
50            e = e.getParent();
51        }
52        root = e;
53        DTDParser.debug("Root seems to be " + root.getName());
54
55    }
56
57    protected DTDTreeElement findOrphanedElement(String name) {
58        if (name == null) throw new IllegalArgumentException("Name must not be null");
59        DTDTreeElement el = (DTDTreeElement)orphanedElements.get(name);
60        if (el != null ) {
61            orphanedElements.remove(name);
62            flattenedElements.add(el);
63            elements.put(name,el);
64        }
65        return el;
66    }
67
68    protected DTDTreeElement getOrCreateElement(String name) {
69        DTDElement el = getElement(name);
70        if (el != null) { return (DTDTreeElement)el; }
71        
72        DTDTreeElement newEl = new DTDTreeElement(name);
73        orphanedElements.put(name,newEl);
74        return newEl;
75    }
76
77    protected DTDElement getElement(String name) {
78        DTDTreeElement el = (DTDTreeElement)elements.get(name);
79        if (el != null) { return el; }
80   
81        DTDParser.debug("Element " + name + " not found!");
82        return null;
83    }
84
85    protected void mergeSubElements(DTDElement e) {
86      
87        debug("Merging sub elements for " + e.getName());
88        for (Iterator iter = flattenedElements.iterator(); iter.hasNext();) {
89            DTDTreeElement element = (DTDTreeElement) iter.next();
90            if (element.getName().equals(e.getName())) {
91                element.mergeSubElements(e);
92            }
93        }
94    }
95
96    protected void buildAttributeList(List tokens) throws DTDParserException {
97        
98        final String whitespace = "\\s*";
99
00        final String begintag = whitespace + "<!ATTLIST" + whitespace;
01
02        final String identifier = "[-\\w:]+";
03
04        final String enumeration = "\\(" + whitespace + identifier + whitespace +
05            "\\)|\\((?>" + whitespace + identifier + whitespace  + "\\|)+" + whitespace + identifier
06            + whitespace + "\\)";
07
08        final String valuetype = "(CDATA|" + enumeration + ")";
09
10        final String defvalue = "(?>\"[^\"]+\")";
11        final String modifier = "(?>#REQUIRED|#IMPLIED|#FIXED)"; // FIXME There
12        // are some
13        // more
14
15        final String flag = "(" + defvalue + "|" + modifier + ")";
16        final String attdef = "(?>" + whitespace + "(" + identifier + ")" + whitespace + valuetype + whitespace + flag
17                + whitespace + flag + "?)";
18
19        final String pattern = begintag + "(" + identifier + ")" + "(" + ".+" + ")" + whitespace;
20
21        DTDParser.debug("Pattern is: " + pattern);
22
23        Pattern elementPattern = Pattern.compile(pattern);
24        Pattern enumValuePattern = Pattern.compile("(" + identifier + ")" + "\\|?");
25        Pattern attributePattern = Pattern.compile(attdef);
26
27        for (Iterator iter = tokens.iterator(); iter.hasNext();) {
28            String line = (String) iter.next();
29            line = resolveEntity(line);
30            if (line == null) break;
31            Matcher m = elementPattern.matcher(line);
32            if (m.matches()) {
33                DTDParser.debug("MATCH: |" + line + "|");
34                for (int i = 1; i <= m.groupCount(); i++) {
35                    DTDParser.debug(i + ": " + m.group(i));
36                }
37                String element = m.group(1);
38                String attributeLine = m.group(2);
39                Matcher o = attributePattern.matcher(attributeLine);
40                while (o.find()) {
41                    for (int i = 1; i <= o.groupCount(); i++) {
42                        DTDParser.debug("\t" + i + ": " + o.group(i));
43                    }
44
45                    String name = o.group(1);
46                    String type = o.group(2);
47                    String defvalOrFlag = o.group(3);
48
49                    DTDAttribute a = new DTDAttribute(name);
50                    a.setElementName(element);
51                    if (type.equalsIgnoreCase("CDATA")) {
52                        a.setType(DTDAttribute.CDATA);
53                    } else {
54                        Matcher n = enumValuePattern.matcher(type);
55                        while (n.find()) {
56                            a.addEnumValue(n.group(1));
57                        }
58                    }
59
60                    if (defvalOrFlag.startsWith("#")) {
61                        a.setFlag(defvalOrFlag);
62                        if (defvalOrFlag.equals("#FIXED")) {
63                            a.setDefaultValue(o.group(4));
64                        }
65                    } else {
66                        a.setDefaultValue(defvalOrFlag.replaceAll("\"", ""));
67                    }
68
69                    addAttribute(a);
70                }
71
72            } else {
73                String message = "Syntax error at contents |" + line + "|\n";
74                throw new DTDParserException(message);
75
76            }
77        }
78
79        
80    }
81
82    protected void assignAttribute(String elementName, DTDAttribute attribute) {
83        DTDElement e = (DTDElement)orphanedElements.get(elementName);
84        if (e == null) {
85            e = getElement(elementName);
86        }
87        e.addAttribute(attribute);
88    }
89
90    private void buildEntityMap(List tokens) throws DTDParserException {
91        final String whitespace = "\\s*";
92        final String reftoken = "%?";
93
94        final String name = "[-\\w\\d._]+";
95        final String pedef = "\"([^\"]*)\"";
96
97        final String prefix = whitespace + "<!ENTITY" + whitespace + reftoken + whitespace + "(" + name + ")"
98                + whitespace;
99
00        final String peDeclTail = pedef + whitespace;
01
02        final String geDeclTail = "(?>SYSTEM|PUBLIC)" + whitespace + pedef + whitespace + pedef + "?" + whitespace;
03
04        final String pattern = prefix + "(?>" + peDeclTail + "|" + geDeclTail + ")";
05
06        DTDParser.debug("Pattern is: " + pattern);
07
08        Pattern entityPattern = Pattern.compile(pattern);
09        for (Iterator iter = tokens.iterator(); iter.hasNext();) {
10            String element = (String) iter.next();
11            String newelement = resolveEntity(element);
12            if (newelement == null) {
13                break;
14            }
15            if (newelement.matches(".+INCLUDE\\[.+")) System.exit(9);
16            Matcher m = entityPattern.matcher(newelement);
17            if (m.matches()) {
18                String nameVal = m.group(1);
19                String pedefVal = m.group(2);
20                for (int i = 1; i <= m.groupCount(); i++) {
21                    DTDParser.debug(i + ": " + m.group(i));
22                }
23                if (pedefVal == null) pedefVal = m.group(4);
24                DTDParser.debug("Adding |" + nameVal + "|->" + pedefVal);
25                entities.put(nameVal, pedefVal);
26            } else {
27                // String message = "Syntax error at contents |" + newelement +
28                // "|\n";
29                // throw new DTDParserException(message);
30            }
31
32        }
33
34    }
35
36    protected String resolveEntity(String element) throws DTDParserException {
37        String newelement = new String(element);
38        final String entref = "%([^%;\"]+);";
39
40        Pattern entity = Pattern.compile(entref);
41        
42        if (newelement.matches("^\\s*<!\\[IGNORE.+")) return null;
43        
44        DTDParser.debug(newelement);
45        Matcher m = entity.matcher(newelement);
46        while (m.find()) {
47            String name = m.group(1);
48            DTDParser.debug("Resolving :" + name + ":");
49            String replace = (String) entities.get(name);
50            if (replace != null)// throw new DTDParserException("No entity " +
51                                  //name + " near " + newelement);
52                    newelement = newelement.replaceAll("%" + name + ";", replace);
53            DTDParser.debug(newelement);
54        }
55        DTDParser.debug(newelement);
56        newelement = newelement.replaceAll("<!\\[INCLUDE\\[","");
57        DTDParser.debug("newelement is now: " + newelement);
58        
59        return newelement;
60    }
61
62    protected void addAttribute(DTDAttribute attribute) {
63        if (attributes.containsKey(attribute.getElementName())) {
64            ((Vector)attributes.get(attribute.getElementName())).add(attribute);
65        } else {
66            Vector v = new Vector();
67            v.add(attribute);
68            attributes.put(attribute.getElementName(),v);
69        }
70        
71    }
72    
73}