View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.common.content.transformer;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  import java.nio.ByteBuffer;
24  import java.util.Arrays;
25  import java.util.Base64;
26  import java.util.LinkedList;
27  import java.util.Optional;
28  import java.util.Properties;
29  
30  import javax.xml.parsers.ParserConfigurationException;
31  import javax.xml.transform.OutputKeys;
32  import javax.xml.transform.Result;
33  import javax.xml.transform.Templates;
34  import javax.xml.transform.TransformerConfigurationException;
35  import javax.xml.transform.TransformerException;
36  import javax.xml.transform.TransformerFactory;
37  import javax.xml.transform.TransformerFactoryConfigurationError;
38  import javax.xml.transform.sax.SAXResult;
39  import javax.xml.transform.sax.SAXSource;
40  import javax.xml.transform.sax.SAXTransformerFactory;
41  import javax.xml.transform.sax.TransformerHandler;
42  import javax.xml.transform.stream.StreamResult;
43  
44  import org.apache.commons.io.FilenameUtils;
45  import org.apache.logging.log4j.LogManager;
46  import org.apache.logging.log4j.Logger;
47  import org.mycore.common.MCRCache;
48  import org.mycore.common.MCRClassTools;
49  import org.mycore.common.MCRException;
50  import org.mycore.common.config.MCRConfiguration2;
51  import org.mycore.common.config.MCRConfigurationBase;
52  import org.mycore.common.config.MCRConfigurationException;
53  import org.mycore.common.content.MCRByteContent;
54  import org.mycore.common.content.MCRContent;
55  import org.mycore.common.content.MCRWrappedContent;
56  import org.mycore.common.content.streams.MCRByteArrayOutputStream;
57  import org.mycore.common.xml.MCREntityResolver;
58  import org.mycore.common.xml.MCRURIResolver;
59  import org.mycore.common.xml.MCRXMLParserFactory;
60  import org.mycore.common.xsl.MCRErrorListener;
61  import org.mycore.common.xsl.MCRParameterCollector;
62  import org.mycore.common.xsl.MCRTemplatesSource;
63  import org.xml.sax.SAXException;
64  import org.xml.sax.XMLReader;
65  
66  /**
67   * Transforms XML content using a static XSL stylesheet. The stylesheet is configured via
68   * <code>MCR.ContentTransformer.{ID}.Stylesheet</code>. You may choose your own instance of
69   * {@link SAXTransformerFactory} via <code>MCR.ContentTransformer.{ID}.TransformerFactoryClass</code>.
70   * The default transformer factory implementation {@link org.apache.xalan.processor.TransformerFactoryImpl}
71   * is configured with <code>MCR.LayoutService.TransformerFactoryClass</code>.
72   *
73   * @author Frank L\u00FCtzenkirchen
74   */
75  public class MCRXSLTransformer extends MCRParameterizedTransformer {
76  
77      private static final int INITIAL_BUFFER_SIZE = 32 * 1024;
78  
79      private static final MCRURIResolver URI_RESOLVER = MCRURIResolver.instance();
80  
81      private static final MCREntityResolver ENTITY_RESOLVER = MCREntityResolver.instance();
82  
83      private static Logger LOGGER = LogManager.getLogger(MCRXSLTransformer.class);
84  
85      private static MCRCache<String, MCRXSLTransformer> INSTANCE_CACHE = new MCRCache<>(100,
86          "MCRXSLTransformer instance cache");
87  
88      private static long CHECK_PERIOD = MCRConfiguration2.getLong("MCR.LayoutService.LastModifiedCheckPeriod")
89          .orElse(60000l);
90  
91      private static final Class<? extends TransformerFactory> DEFAULT_FACTORY_CLASS = MCRConfiguration2
92          .<TransformerFactory>getClass("MCR.LayoutService.TransformerFactoryClass")
93          .orElseGet(TransformerFactory.newInstance()::getClass);
94  
95      /** The compiled XSL stylesheet */
96      protected MCRTemplatesSource[] templateSources;
97  
98      protected Templates[] templates;
99  
100     protected long[] modified;
101 
102     protected long modifiedChecked;
103 
104     protected SAXTransformerFactory tFactory;
105 
106     public MCRXSLTransformer(String... stylesheets) {
107         this(DEFAULT_FACTORY_CLASS);
108         setStylesheets(stylesheets);
109     }
110 
111     public MCRXSLTransformer() {
112         this(DEFAULT_FACTORY_CLASS);
113     }
114 
115     public MCRXSLTransformer(Class<? extends TransformerFactory> tfClass) {
116         super();
117         setTransformerFactory(tfClass.getName());
118     }
119 
120     public synchronized void setTransformerFactory(String factoryClass) throws TransformerFactoryConfigurationError {
121         TransformerFactory transformerFactory = Optional.ofNullable(factoryClass)
122             .map(c -> TransformerFactory.newInstance(c, MCRClassTools.getClassLoader()))
123             .orElseGet(TransformerFactory::newInstance);
124         LOGGER.debug("Transformerfactory: {}", transformerFactory.getClass().getName());
125         transformerFactory.setURIResolver(URI_RESOLVER);
126         transformerFactory.setErrorListener(MCRErrorListener.getInstance());
127         if (transformerFactory.getFeature(SAXSource.FEATURE) && transformerFactory.getFeature(SAXResult.FEATURE)) {
128             this.tFactory = (SAXTransformerFactory) transformerFactory;
129         } else {
130             throw new MCRConfigurationException("Transformer Factory " + transformerFactory.getClass().getName()
131                 + " does not implement SAXTransformerFactory");
132         }
133     }
134 
135     public static MCRXSLTransformer getInstance(String... stylesheets) {
136         return getInstance(DEFAULT_FACTORY_CLASS, stylesheets);
137     }
138 
139     public static MCRXSLTransformer getInstance(Class<? extends TransformerFactory> tfClass, String... stylesheets) {
140         String key = tfClass.getName() + "_"
141             + (stylesheets.length == 1 ? stylesheets[0] : Arrays.toString(stylesheets));
142         MCRXSLTransformer instance = INSTANCE_CACHE.get(key);
143         if (instance == null) {
144             instance = new MCRXSLTransformer(tfClass);
145             instance.setStylesheets(stylesheets);
146             INSTANCE_CACHE.put(key, instance);
147         }
148         return instance;
149     }
150 
151     @Override
152     public void init(String id) {
153         super.init(id);
154         String property = "MCR.ContentTransformer." + id + ".Stylesheet";
155         String[] stylesheets = MCRConfiguration2.getStringOrThrow(property).split(",");
156         setStylesheets(stylesheets);
157         MCRConfiguration2.getString("MCR.ContentTransformer." + id + ".TransformerFactoryClass")
158             .ifPresent(this::setTransformerFactory);
159     }
160 
161     public void setStylesheets(String... stylesheets) {
162         this.templateSources = new MCRTemplatesSource[stylesheets.length];
163         for (int i = 0; i < stylesheets.length; i++) {
164             this.templateSources[i] = new MCRTemplatesSource(stylesheets[i].trim());
165         }
166         this.modified = new long[templateSources.length];
167         this.modifiedChecked = 0;
168         this.templates = new Templates[templateSources.length];
169     }
170 
171     private void checkTemplateUptodate()
172         throws TransformerConfigurationException, SAXException, ParserConfigurationException {
173         boolean check = System.currentTimeMillis() - modifiedChecked > CHECK_PERIOD;
174         boolean useCache = MCRConfiguration2.getBoolean("MCR.UseXSLTemplateCache").orElse(true);
175 
176         if (check || !useCache) {
177             for (int i = 0; i < templateSources.length; i++) {
178                 long lastModified = templateSources[i].getLastModified();
179                 if (templates[i] == null || modified[i] < lastModified || !useCache) {
180                     SAXSource source = templateSources[i].getSource();
181                     templates[i] = tFactory.newTemplates(source);
182                     if (templates[i] == null) {
183                         throw new TransformerConfigurationException(
184                             "XSLT Stylesheet could not be compiled: " + templateSources[i].getURL());
185                     }
186                     modified[i] = lastModified;
187                 }
188             }
189             modifiedChecked = System.currentTimeMillis();
190         }
191     }
192 
193     @Override
194     public String getEncoding() throws TransformerException, SAXException, ParserConfigurationException {
195         return getOutputProperties().getProperty("encoding", "UTF-8");
196     }
197 
198     @Override
199     public String getMimeType() throws TransformerException, SAXException, ParserConfigurationException {
200         return getOutputProperties().getProperty("media-type", "text/xml");
201     }
202 
203     @Override
204     public MCRContent transform(MCRContent source) throws IOException {
205         return transform(source, new MCRParameterCollector());
206     }
207 
208     @Override
209     public MCRContent transform(MCRContent source, MCRParameterCollector parameter) throws IOException {
210         try {
211             LinkedList<TransformerHandler> transformHandlerList = getTransformHandlerList(parameter);
212             XMLReader reader = getXMLReader(transformHandlerList);
213             TransformerHandler lastTransformerHandler = transformHandlerList.getLast();
214             return transform(source, reader, lastTransformerHandler, parameter);
215         } catch (TransformerException | SAXException | ParserConfigurationException e) {
216             throw new IOException(e);
217         }
218     }
219 
220     @Override
221     public void transform(MCRContent source, OutputStream out) throws IOException {
222         transform(source, out, new MCRParameterCollector());
223     }
224 
225     @Override
226     public void transform(MCRContent source, OutputStream out, MCRParameterCollector parameter) throws IOException {
227         MCRErrorListener el = null;
228         try {
229             LinkedList<TransformerHandler> transformHandlerList = getTransformHandlerList(parameter);
230             XMLReader reader = getXMLReader(transformHandlerList);
231             TransformerHandler lastTransformerHandler = transformHandlerList.getLast();
232             el = (MCRErrorListener) lastTransformerHandler.getTransformer().getErrorListener();
233             StreamResult result = new StreamResult(out);
234             lastTransformerHandler.setResult(result);
235             reader.parse(source.getInputSource());
236         } catch (TransformerConfigurationException | SAXException | IllegalArgumentException
237             | ParserConfigurationException e) {
238             throw new IOException(e);
239         } catch (RuntimeException e) {
240             if (el != null && e.getCause() == null && el.getExceptionThrown() != null) {
241                 //typically if a RuntimeException has no cause,
242                 //we can get the "real cause" from MCRErrorListener, yeah!!!
243                 throw new IOException(el.getExceptionThrown());
244             }
245             throw e;
246         }
247     }
248 
249     protected MCRContent transform(MCRContent source, XMLReader reader, TransformerHandler transformerHandler,
250         MCRParameterCollector parameter)
251         throws IOException, SAXException, TransformerException, ParserConfigurationException {
252         return new MCRTransformedContent(source, reader, transformerHandler, getLastModified(), parameter,
253             getFileName(source), getMimeType(), getEncoding(), this);
254     }
255 
256     protected MCRContent getTransformedContent(MCRContent source, XMLReader reader,
257         TransformerHandler transformerHandler) throws IOException, SAXException {
258         MCRByteArrayOutputStream baos = new MCRByteArrayOutputStream(INITIAL_BUFFER_SIZE);
259         StreamResult serializer = new StreamResult(baos);
260         transformerHandler.setResult(serializer);
261         // Parse the source XML, and send the parse events to the
262         // TransformerHandler.
263         LOGGER.debug("Start transforming: {}", source.getSystemId() == null ? source.getName() : source.getSystemId());
264         reader.parse(source.getInputSource());
265         return new MCRByteContent(baos.getBuffer(), 0, baos.size());
266     }
267 
268     private String getFileName(MCRContent content)
269         throws TransformerException, SAXException, ParserConfigurationException {
270         String fileName = content.getName();
271         if (fileName == null) {
272             return null;
273         }
274         //MCR-2254, ':' in fileName causes problems on Windows
275         fileName = fileName.replace(':', '_');
276         return FilenameUtils.removeExtension(fileName) + "." + getFileExtension();
277     }
278 
279     private long getLastModified() {
280         long lastModified = -1;
281         for (long current : modified) {
282             if (current < 0) {
283                 return -1;
284             }
285             lastModified = Math.max(lastModified, current);
286         }
287         return lastModified;
288     }
289 
290     protected LinkedList<TransformerHandler> getTransformHandlerList(MCRParameterCollector parameterCollector)
291         throws TransformerConfigurationException, SAXException, ParserConfigurationException {
292         checkTemplateUptodate();
293         LinkedList<TransformerHandler> xslSteps = new LinkedList<>();
294         //every transformhandler shares the same ErrorListener instance
295         MCRErrorListener errorListener = MCRErrorListener.getInstance();
296         for (Templates template : templates) {
297             TransformerHandler handler = tFactory.newTransformerHandler(template);
298             parameterCollector.setParametersTo(handler.getTransformer());
299             handler.getTransformer().setErrorListener(errorListener);
300             if (!xslSteps.isEmpty()) {
301                 Result result = new SAXResult(handler);
302                 xslSteps.getLast().setResult(result);
303             }
304             xslSteps.add(handler);
305         }
306         return xslSteps;
307     }
308 
309     protected XMLReader getXMLReader(LinkedList<TransformerHandler> transformHandlerList)
310         throws SAXException, ParserConfigurationException {
311         XMLReader reader = MCRXMLParserFactory.getNonValidatingParser().getXMLReader();
312         reader.setEntityResolver(ENTITY_RESOLVER);
313         reader.setContentHandler(transformHandlerList.getFirst());
314         return reader;
315     }
316 
317     public Properties getOutputProperties()
318         throws TransformerConfigurationException, SAXException, ParserConfigurationException {
319         checkTemplateUptodate();
320         Templates lastTemplate = templates[templates.length - 1];
321         Properties outputProperties = lastTemplate.getOutputProperties();
322         return outputProperties;
323     }
324 
325     /* (non-Javadoc)
326      * @see org.mycore.common.content.transformer.MCRContentTransformer#getFileExtension()
327      */
328     @Override
329     public String getFileExtension() throws TransformerException, SAXException, ParserConfigurationException {
330         String fileExtension = super.fileExtension;
331         if (fileExtension != null && !getDefaultExtension().equals(fileExtension)) {
332             return fileExtension;
333         }
334         Properties outputProperties = getOutputProperties();
335         //until we have a better solution
336         String definedMimeType = getMimeType();
337         if ("text/html".equals(definedMimeType) || "html".equals(outputProperties.getProperty(OutputKeys.METHOD))) {
338             return "html";
339         }
340         if ("text/xml".equals(definedMimeType)) {
341             return "xml";
342         }
343         return getDefaultExtension();
344     }
345 
346     static class MCRTransformedContent extends MCRWrappedContent {
347         private MCRContent source;
348 
349         private XMLReader reader;
350 
351         private TransformerHandler transformerHandler;
352 
353         private MCRContent transformed;
354 
355         private String eTag;
356 
357         private MCRXSLTransformer instance;
358 
359         MCRTransformedContent(MCRContent source, XMLReader reader, TransformerHandler transformerHandler,
360             long transformerLastModified, MCRParameterCollector parameter, String fileName, String mimeType,
361             String encoding, MCRXSLTransformer instance) throws IOException {
362             this.source = source;
363             this.reader = reader;
364             this.transformerHandler = transformerHandler;
365             LOGGER.debug("Transformer lastModified: {}", transformerLastModified);
366             LOGGER.debug("Source lastModified     : {}", source.lastModified());
367             this.lastModified = (transformerLastModified >= 0 && source.lastModified() >= 0)
368                 ? Math.max(transformerLastModified, source.lastModified())
369                 : -1;
370             this.eTag = generateETag(source, lastModified, parameter.hashCode());
371             this.name = fileName;
372             this.mimeType = mimeType;
373             this.encoding = encoding;
374             this.instance = instance;
375         }
376 
377         @Override
378         public String getMimeType() throws IOException {
379             return mimeType;
380         }
381 
382         @Override
383         public String getName() {
384             return name;
385         }
386 
387         private String generateETag(MCRContent content, final long lastModified, final int parameterHashCode)
388             throws IOException {
389             //parameterHashCode is stable for this session and current request URL
390             long systemLastModified = MCRConfigurationBase.getSystemLastModified();
391             StringBuilder b = new StringBuilder("\"");
392             byte[] unencodedETag = ByteBuffer.allocate(Long.SIZE / 4).putLong(lastModified ^ parameterHashCode)
393                 .putLong(systemLastModified ^ parameterHashCode).array();
394             b.append(Base64.getEncoder().encodeToString(unencodedETag));
395             b.append('"');
396             return b.toString();
397         }
398 
399         @Override
400         public MCRContent getBaseContent() {
401             if (transformed == null) {
402                 try {
403                     transformed = instance.getTransformedContent(source, reader, transformerHandler);
404                     transformed.setLastModified(lastModified);
405                     transformed.setName(name);
406                     transformed.setMimeType(mimeType);
407                     transformed.setEncoding(encoding);
408                 } catch (IOException | SAXException e) {
409                     throw new MCRException(e);
410                 } catch (RuntimeException e) {
411                     MCRErrorListener el = (MCRErrorListener) transformerHandler.getTransformer().getErrorListener();
412                     if (el != null && e.getCause() == null && el.getExceptionThrown() != null) {
413                         //typically if a RuntimeException has no cause,
414                         //we can get the "real cause" from MCRErrorListener, yeah!!!
415                         throw new RuntimeException(MCRErrorListener.getMyMessageAndLocation(el.getExceptionThrown()),
416                             el.getExceptionThrown());
417                     }
418                     throw e;
419                 } finally {
420                     try {
421                         transformerHandler.getTransformer().clearParameters();
422                         transformerHandler.getTransformer().reset();
423                     } catch (UnsupportedOperationException e) {
424                         //expected and safely ignored
425                     }
426                 }
427 
428             }
429             return transformed;
430         }
431 
432         @Override
433         public long lastModified() throws IOException {
434             return lastModified;
435         }
436 
437         @Override
438         public String getETag() throws IOException {
439             return eTag;
440         }
441 
442         @Override
443         public boolean isUsingSession() {
444             return true;
445         }
446 
447         @Override
448         public String getEncoding() {
449             return transformed == null ? encoding : getBaseContent().getEncoding();
450         }
451     }
452 }