001 /*
002 *
003 * $Revision: 15270 $ $Date: 2009-05-25 17:27:57 +0200 (Mon, 25 May 2009) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.common.xml;
025
026 import java.io.ByteArrayInputStream;
027 import java.io.IOException;
028 import java.io.InputStream;
029 import java.io.StringReader;
030 import java.net.URI;
031 import java.net.URL;
032 import java.net.URLConnection;
033
034 import org.apache.log4j.Logger;
035 import org.jdom.Document;
036 import org.jdom.input.SAXBuilder;
037 import org.xml.sax.ErrorHandler;
038 import org.xml.sax.InputSource;
039 import org.xml.sax.SAXException;
040 import org.xml.sax.SAXParseException;
041
042 import org.mycore.common.MCRConfiguration;
043 import org.mycore.common.MCRException;
044
045 /**
046 * Implements the MCRParserInterface using the Xerces XML to parse XML streams
047 * to a DOM document.
048 *
049 * @author Jens Kupferschmidt
050 * @author Frank Lützenkirchen
051 * @author Thomas Scheffler (yagee)
052 *
053 * @version $Revision: 15270 $ $Date: 2009-05-25 17:27:57 +0200 (Mon, 25 May 2009) $
054 */
055 public class MCRParserXerces implements MCRParserInterface, ErrorHandler {
056
057 /** The logger */
058 private final static Logger LOGGER = Logger.getLogger(MCRParserXerces.class);
059
060 /** A xerces parser instance that will validate */
061 SAXBuilder builderValid;
062
063 /** A xerces parser instance that will not validate */
064 SAXBuilder builder;
065
066 /** By default, validate xml or not? */
067 private static boolean FLAG_VALIDATION = false;
068
069 private static String FEATURE_NAMESPACES = "http://xml.org/sax/features/namespaces";
070
071 private static String FEATURE_SCHEMA_SUPPORT = "http://apache.org/xml/features/validation/schema";
072
073 private static String FEATURE_FULL_SCHEMA_SUPPORT = "http://apache.org/xml/features/validation/schema-full-checking";
074
075 /**
076 * Constructor for the Xerces parser. Sets default validation flag as
077 * specified by the property MCR.XMLParser.ValidateSchema in
078 * mycore.properties
079 */
080 public MCRParserXerces() {
081 FLAG_VALIDATION = MCRConfiguration.instance().getBoolean("MCR.XMLParser.ValidateSchema", FLAG_VALIDATION);
082 builderValid = new SAXBuilder("org.apache.xerces.parsers.SAXParser", true);
083 builderValid.setFeature(FEATURE_NAMESPACES, true);
084 builderValid.setFeature(FEATURE_SCHEMA_SUPPORT, true);
085 builderValid.setFeature(FEATURE_FULL_SCHEMA_SUPPORT, false);
086 builderValid.setReuseParser(false);
087 builderValid.setErrorHandler(this);
088 builderValid.setEntityResolver(MCRURIResolver.instance());
089 builder = new SAXBuilder("org.apache.xerces.parsers.SAXParser", false);
090 builder.setFeature(FEATURE_NAMESPACES, true);
091 builder.setFeature(FEATURE_SCHEMA_SUPPORT, false);
092 builder.setFeature(FEATURE_FULL_SCHEMA_SUPPORT, false);
093 builder.setReuseParser(false);
094 builder.setErrorHandler(this);
095 builder.setEntityResolver(MCRURIResolver.instance());
096 }
097
098 /**
099 * Parses the XML byte stream with xerces parser and returns a DOM document.
100 * Uses the validation flag from mycore.properties.
101 *
102 * @param uri
103 * the URI of the XML input stream
104 * @throws MCRException
105 * if XML could not be parsed
106 * @return the parsed XML stream as a DOM document
107 */
108 public Document parseURI(URI uri) {
109 return parseURI(uri, FLAG_VALIDATION);
110 }
111
112 /**
113 * Parses the XML byte stream with xerces parser and returns a DOM document.
114 * Uses the validation flag given.
115 *
116 * @param uri
117 * the URI of the XML input stream
118 * @param validate
119 * if true, will validate against XML Schema
120 * @throws MCRException
121 * if XML could not be parsed
122 * @return the parsed XML stream as a DOM document
123 * @throws IOException
124 * @throws SAXException
125 */
126 public Document parseURI(URI uri, boolean validate) {
127 InputSource inputSource = null;
128 try {
129 //use uri as a SystemID
130 inputSource = new InputSource(uri.toString());
131 } catch (Exception e) {
132 throw new MCRException(msg + uri, e);
133 }
134 if (inputSource == null)
135 throw new MCRException("Could not get " + uri);
136 return parse(inputSource, validate);
137 }
138
139 /**
140 * Parses the XML byte stream with xerces parser and returns a DOM document.
141 * Uses the validation flag from mycore.properties
142 *
143 * @param xml
144 * the XML byte stream
145 * @throws MCRException
146 * if XML could not be parsed
147 * @return the parsed XML stream as a DOM document
148 */
149 public Document parseXML(String xml) {
150 return parseXML(xml, FLAG_VALIDATION);
151 }
152
153 /**
154 * Parses the XML byte stream with xerces parser and returns a DOM document.
155 * Uses the validation flag given.
156 *
157 * @param xml
158 * the XML byte stream
159 * @param validate
160 * if true, will validate against XML Schema
161 * @throws MCRException
162 * if XML could not be parsed
163 * @return the parsed XML stream as a DOM document
164 */
165 public Document parseXML(String xml, boolean validate) {
166 InputSource source = new InputSource(new StringReader(xml));
167
168 return parse(source, validate);
169 }
170
171 /**
172 * Parses the XML byte stream with xerces parser and returns a DOM document.
173 * Uses the validation flag from mycore.properties
174 *
175 * @param xml
176 * the XML byte stream
177 * @throws MCRException
178 * if XML could not be parsed
179 * @return the parsed XML stream as a DOM document
180 */
181 public Document parseXML(byte[] xml) {
182 return parseXML(xml, FLAG_VALIDATION);
183 }
184
185 /**
186 * Parses the XML byte stream with xerces parser and returns a DOM document.
187 * Uses the given validation flag.
188 *
189 * @param xml
190 * the XML byte stream
191 * @param validate
192 * if true, will validate against XML Schema
193 * @throws MCRException
194 * if XML could not be parsed
195 * @return the parsed XML stream as a DOM document
196 */
197 public Document parseXML(byte[] xml, boolean validate) {
198 InputSource source = new InputSource(new ByteArrayInputStream(xml));
199 return parse(source, validate);
200 }
201
202 public Document parseXML(InputStream input) throws MCRException {
203 return parseXML(input, FLAG_VALIDATION);
204 }
205
206 public Document parseXML(InputStream input, boolean validate) throws MCRException {
207 InputSource source = new InputSource(input);
208 return parse(source, validate);
209 }
210
211 /**
212 * Parses the InputSource with xerces parser and returns a DOM document.
213 * Uses the given validation flag.
214 *
215 * @param source
216 * the XML InputSource
217 * @param validate
218 * if true, will validate against XML Schema
219 * @throws MCRException
220 * if XML could not be parsed
221 * @return the parsed XML stream as a DOM document
222 */
223 private Document parse(InputSource source, boolean validate) {
224 SAXBuilder builder = (validate ? this.builderValid : this.builder);
225
226 try {
227 return builder.build(source);
228 } catch (Exception ex) {
229 throw new MCRException(msg, ex);
230 }
231 }
232
233 private final static String msg = "Error while parsing XML document: ";
234
235 /**
236 * Handles parser warnings
237 */
238 public void warning(SAXParseException ex) {
239 LOGGER.warn(getSAXErrorMessage(ex), ex);
240 }
241
242 /**
243 * Handles parse errors
244 */
245 public void error(SAXParseException ex) {
246 LOGGER.error(getSAXErrorMessage(ex), ex);
247 throw new MCRException(msg + getSAXErrorMessage(ex), ex);
248 }
249
250 /**
251 * Handles fatal parse errors
252 */
253 public void fatalError(SAXParseException ex) {
254 LOGGER.fatal(getSAXErrorMessage(ex));
255 throw new MCRException(msg + getSAXErrorMessage(ex), ex);
256 }
257
258 /**
259 * Returns a text indicating at which line and column the error occured.
260 *
261 * @param ex
262 * the SAXParseException exception
263 * @return the location string
264 */
265 private String getSAXErrorMessage(SAXParseException ex) {
266 StringBuffer str = new StringBuffer();
267
268 String systemId = ex.getSystemId();
269 if (systemId != null) {
270 int index = systemId.lastIndexOf('/');
271
272 if (index != -1) {
273 systemId = systemId.substring(index + 1);
274 }
275
276 str.append(systemId).append(": ");
277 }
278
279 str.append("line ").append(ex.getLineNumber());
280 str.append(", column ").append(ex.getColumnNumber());
281 str.append(", ");
282 str.append(ex.getLocalizedMessage());
283
284 return str.toString();
285 }
286 }