001    /*
002     * 
003     * $Revision: 15270 $ $Date: 2009-05-25 17:27:57 +0200 (Mon, 25 May 2009) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.common.xml;
025    
026    import java.io.ByteArrayInputStream;
027    import java.io.IOException;
028    import java.io.InputStream;
029    import java.io.StringReader;
030    import java.net.URI;
031    import java.net.URL;
032    import java.net.URLConnection;
033    
034    import org.apache.log4j.Logger;
035    import org.jdom.Document;
036    import org.jdom.input.SAXBuilder;
037    import org.xml.sax.ErrorHandler;
038    import org.xml.sax.InputSource;
039    import org.xml.sax.SAXException;
040    import org.xml.sax.SAXParseException;
041    
042    import org.mycore.common.MCRConfiguration;
043    import org.mycore.common.MCRException;
044    
045    /**
046     * Implements the MCRParserInterface using the Xerces XML to parse XML streams
047     * to a DOM document.
048     * 
049     * @author Jens Kupferschmidt
050     * @author Frank Lützenkirchen
051     * @author Thomas Scheffler (yagee)
052     * 
053     * @version $Revision: 15270 $ $Date: 2009-05-25 17:27:57 +0200 (Mon, 25 May 2009) $
054     */
055    public class MCRParserXerces implements MCRParserInterface, ErrorHandler {
056    
057        /** The logger */
058        private final static Logger LOGGER = Logger.getLogger(MCRParserXerces.class);
059    
060        /** A xerces parser instance that will validate */
061        SAXBuilder builderValid;
062    
063        /** A xerces parser instance that will not validate */
064        SAXBuilder builder;
065    
066        /** By default, validate xml or not? */
067        private static boolean FLAG_VALIDATION = false;
068    
069        private static String FEATURE_NAMESPACES = "http://xml.org/sax/features/namespaces";
070    
071        private static String FEATURE_SCHEMA_SUPPORT = "http://apache.org/xml/features/validation/schema";
072    
073        private static String FEATURE_FULL_SCHEMA_SUPPORT = "http://apache.org/xml/features/validation/schema-full-checking";
074    
075        /**
076         * Constructor for the Xerces parser. Sets default validation flag as
077         * specified by the property MCR.XMLParser.ValidateSchema in
078         * mycore.properties
079         */
080        public MCRParserXerces() {
081            FLAG_VALIDATION = MCRConfiguration.instance().getBoolean("MCR.XMLParser.ValidateSchema", FLAG_VALIDATION);
082            builderValid = new SAXBuilder("org.apache.xerces.parsers.SAXParser", true);
083            builderValid.setFeature(FEATURE_NAMESPACES, true);
084            builderValid.setFeature(FEATURE_SCHEMA_SUPPORT, true);
085            builderValid.setFeature(FEATURE_FULL_SCHEMA_SUPPORT, false);
086            builderValid.setReuseParser(false);
087            builderValid.setErrorHandler(this);
088            builderValid.setEntityResolver(MCRURIResolver.instance());
089            builder = new SAXBuilder("org.apache.xerces.parsers.SAXParser", false);
090            builder.setFeature(FEATURE_NAMESPACES, true);
091            builder.setFeature(FEATURE_SCHEMA_SUPPORT, false);
092            builder.setFeature(FEATURE_FULL_SCHEMA_SUPPORT, false);
093            builder.setReuseParser(false);
094            builder.setErrorHandler(this);
095            builder.setEntityResolver(MCRURIResolver.instance());
096        }
097    
098        /**
099         * Parses the XML byte stream with xerces parser and returns a DOM document.
100         * Uses the validation flag from mycore.properties.
101         * 
102         * @param uri
103         *            the URI of the XML input stream
104         * @throws MCRException
105         *             if XML could not be parsed
106         * @return the parsed XML stream as a DOM document
107         */
108        public Document parseURI(URI uri) {
109            return parseURI(uri, FLAG_VALIDATION);
110        }
111    
112        /**
113         * Parses the XML byte stream with xerces parser and returns a DOM document.
114         * Uses the validation flag given.
115         * 
116         * @param uri
117         *            the URI of the XML input stream
118         * @param validate
119         *            if true, will validate against XML Schema
120         * @throws MCRException
121         *             if XML could not be parsed
122         * @return the parsed XML stream as a DOM document
123         * @throws IOException 
124         * @throws SAXException 
125         */
126        public Document parseURI(URI uri, boolean validate) {
127            InputSource inputSource = null;
128            try {
129                //use uri as a SystemID
130                inputSource = new InputSource(uri.toString());
131            } catch (Exception e) {
132                throw new MCRException(msg + uri, e);
133            }
134            if (inputSource == null)
135                throw new MCRException("Could not get " + uri);
136            return parse(inputSource, validate);
137        }
138    
139        /**
140         * Parses the XML byte stream with xerces parser and returns a DOM document.
141         * Uses the validation flag from mycore.properties
142         * 
143         * @param xml
144         *            the XML byte stream
145         * @throws MCRException
146         *             if XML could not be parsed
147         * @return the parsed XML stream as a DOM document
148         */
149        public Document parseXML(String xml) {
150            return parseXML(xml, FLAG_VALIDATION);
151        }
152    
153        /**
154         * Parses the XML byte stream with xerces parser and returns a DOM document.
155         * Uses the validation flag given.
156         * 
157         * @param xml
158         *            the XML byte stream
159         * @param validate
160         *            if true, will validate against XML Schema
161         * @throws MCRException
162         *             if XML could not be parsed
163         * @return the parsed XML stream as a DOM document
164         */
165        public Document parseXML(String xml, boolean validate) {
166            InputSource source = new InputSource(new StringReader(xml));
167    
168            return parse(source, validate);
169        }
170    
171        /**
172         * Parses the XML byte stream with xerces parser and returns a DOM document.
173         * Uses the validation flag from mycore.properties
174         * 
175         * @param xml
176         *            the XML byte stream
177         * @throws MCRException
178         *             if XML could not be parsed
179         * @return the parsed XML stream as a DOM document
180         */
181        public Document parseXML(byte[] xml) {
182            return parseXML(xml, FLAG_VALIDATION);
183        }
184    
185        /**
186         * Parses the XML byte stream with xerces parser and returns a DOM document.
187         * Uses the given validation flag.
188         * 
189         * @param xml
190         *            the XML byte stream
191         * @param validate
192         *            if true, will validate against XML Schema
193         * @throws MCRException
194         *             if XML could not be parsed
195         * @return the parsed XML stream as a DOM document
196         */
197        public Document parseXML(byte[] xml, boolean validate) {
198            InputSource source = new InputSource(new ByteArrayInputStream(xml));
199            return parse(source, validate);
200        }
201    
202        public Document parseXML(InputStream input) throws MCRException {
203            return parseXML(input, FLAG_VALIDATION);
204        }
205    
206        public Document parseXML(InputStream input, boolean validate) throws MCRException {
207            InputSource source = new InputSource(input);
208            return parse(source, validate);
209        }
210    
211        /**
212         * Parses the InputSource with xerces parser and returns a DOM document.
213         * Uses the given validation flag.
214         * 
215         * @param source
216         *            the XML InputSource
217         * @param validate
218         *            if true, will validate against XML Schema
219         * @throws MCRException
220         *             if XML could not be parsed
221         * @return the parsed XML stream as a DOM document
222         */
223        private Document parse(InputSource source, boolean validate) {
224            SAXBuilder builder = (validate ? this.builderValid : this.builder);
225    
226            try {
227                return builder.build(source);
228            } catch (Exception ex) {
229                throw new MCRException(msg, ex);
230            }
231        }
232    
233        private final static String msg = "Error while parsing XML document: ";
234    
235        /**
236         * Handles parser warnings
237         */
238        public void warning(SAXParseException ex) {
239            LOGGER.warn(getSAXErrorMessage(ex), ex);
240        }
241    
242        /**
243         * Handles parse errors
244         */
245        public void error(SAXParseException ex) {
246            LOGGER.error(getSAXErrorMessage(ex), ex);
247            throw new MCRException(msg + getSAXErrorMessage(ex), ex);
248        }
249    
250        /**
251         * Handles fatal parse errors
252         */
253        public void fatalError(SAXParseException ex) {
254            LOGGER.fatal(getSAXErrorMessage(ex));
255            throw new MCRException(msg + getSAXErrorMessage(ex), ex);
256        }
257    
258        /**
259         * Returns a text indicating at which line and column the error occured.
260         * 
261         * @param ex
262         *            the SAXParseException exception
263         * @return the location string
264         */
265        private String getSAXErrorMessage(SAXParseException ex) {
266            StringBuffer str = new StringBuffer();
267    
268            String systemId = ex.getSystemId();
269            if (systemId != null) {
270                int index = systemId.lastIndexOf('/');
271    
272                if (index != -1) {
273                    systemId = systemId.substring(index + 1);
274                }
275    
276                str.append(systemId).append(": ");
277            }
278    
279            str.append("line ").append(ex.getLineNumber());
280            str.append(", column ").append(ex.getColumnNumber());
281            str.append(", ");
282            str.append(ex.getLocalizedMessage());
283    
284            return str.toString();
285        }
286    }