View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  package org.mycore.datamodel.common;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.Serializable;
22  import java.io.UnsupportedEncodingException;
23  import java.net.MalformedURLException;
24  import java.net.URLDecoder;
25  import java.net.URLEncoder;
26  import java.nio.charset.Charset;
27  import java.nio.charset.StandardCharsets;
28  import java.security.MessageDigest;
29  import java.util.Arrays;
30  import java.util.Base64;
31  import java.util.Collections;
32  import java.util.LinkedHashMap;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Optional;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  import java.util.stream.Collectors;
39  
40  import javax.xml.transform.OutputKeys;
41  import javax.xml.transform.Transformer;
42  import javax.xml.transform.TransformerException;
43  import javax.xml.transform.TransformerFactory;
44  import javax.xml.transform.dom.DOMSource;
45  import javax.xml.transform.stream.StreamResult;
46  
47  import org.w3c.dom.Document;
48  import org.w3c.dom.Node;
49  import org.w3c.dom.NodeList;
50  
51  /**
52   * Represents the data URL scheme (<a href="https://tools.ietf.org/html/rfc2397">RFC2397</a>).
53   *
54   * @author Ren\u00E9 Adler (eagle)
55   *
56   */
57  public class MCRDataURL implements Serializable {
58  
59      private static final long serialVersionUID = 1L;
60  
61      private static final String SCHEME = "data:";
62  
63      private static final String DEFAULT_MIMETYPE = "text/plain";
64  
65      private static final Pattern PATTERN_MIMETYPE = Pattern.compile("^([a-z0-9\\-\\+]+)\\/([a-z0-9\\-\\+]+)$");
66  
67      private static final String CHARSET_PARAM = "charset";
68  
69      private static final String TOKEN_SEPARATOR = ";";
70  
71      private static final String DATA_SEPARATOR = ",";
72  
73      private static final String PARAM_SEPARATOR = "=";
74  
75      private final String mimeType;
76  
77      private final Map<String, String> parameters;
78  
79      private final Charset charset;
80  
81      private final MCRDataURLEncoding encoding;
82  
83      private final byte[] data;
84  
85      /**
86       * Build a "data" URL for given {@link Document}, encoding, mime-type and charset.
87       * Should encoding be <code>null</code>, it is detect from mime-type.
88       *
89       * @param document the document
90       * @param encoding the {@link MCRDataURLEncoding}
91       * @param mimeType the mime-type
92       * @param charset the charset
93       * @return a string with "data" URL
94       * @throws TransformerException
95       * @throws MalformedURLException
96       */
97      public static String build(final Document document, final String encoding, final String mimeType,
98          final String charset) throws TransformerException, MalformedURLException {
99          return build(document.getChildNodes(), encoding, mimeType, charset);
100     }
101 
102     /**
103      * Build a "data" URL for given {@link NodeList}, encoding, mime-type and charset.
104      * Should encoding be <code>null</code>, it is detect from mime-type.
105      *
106      * @param nodeList the node list
107      * @param encoding the {@link MCRDataURLEncoding}
108      * @param mimeType the mime-type
109      * @param charset the charset
110      * @return a string with "data" URL
111      * @throws TransformerException
112      * @throws MalformedURLException
113      */
114     public static String build(final NodeList nodeList, final String encoding, final String mimeType,
115         final String charset) throws TransformerException, MalformedURLException {
116         Node node = Optional.ofNullable(nodeList.item(0)).filter(n -> n.getNodeName().equals("#document"))
117             .orElseGet(() -> Optional.of(nodeList).filter(nl -> nl.getLength() == 1).map(nl -> nl.item(0))
118                 .orElseThrow(() -> new IllegalArgumentException("Nodelist must have an single root element.")));
119 
120         final TransformerFactory transformerFactory = TransformerFactory.newInstance();
121         final Transformer transformer = transformerFactory.newTransformer();
122 
123         MCRDataURLEncoding enc = encoding != null ? MCRDataURLEncoding.fromValue(encoding) : null;
124         String method = "xml";
125 
126         final Matcher mtm = PATTERN_MIMETYPE.matcher(mimeType);
127         if (mtm.matches()) {
128             if (enc == null) {
129                 if ("text".equals(mtm.group(1))) {
130                     enc = MCRDataURLEncoding.URL;
131                 } else {
132                     enc = MCRDataURLEncoding.BASE64;
133                 }
134             }
135 
136             if ("plain".equals(mtm.group(2))) {
137                 method = "text";
138             } else if ("html".equals(mtm.group(2))) {
139                 method = "html";
140             } else if ("xml|xhtml+xml".contains(mtm.group(2))) {
141                 method = "xml";
142             } else {
143                 method = null;
144             }
145         }
146 
147         if (method != null) {
148             transformer.setOutputProperty(OutputKeys.METHOD, method);
149         }
150 
151         transformer.setOutputProperty(OutputKeys.INDENT, "no");
152         transformer.setOutputProperty(OutputKeys.MEDIA_TYPE, mimeType);
153         transformer.setOutputProperty(OutputKeys.ENCODING, charset);
154 
155         DOMSource source = new DOMSource(node);
156         ByteArrayOutputStream bao = new ByteArrayOutputStream();
157         StreamResult result = new StreamResult(bao);
158         transformer.transform(source, result);
159 
160         final MCRDataURL dataURL = new MCRDataURL(bao.toByteArray(), enc, mimeType, charset);
161 
162         return dataURL.toString();
163     }
164 
165     /**
166      * Build a "data" URL for given {@link String}, encoding, mime-type and charset.
167      * Should encoding be <code>null</code>, it is detect from mime-type.
168      *
169      * @param str the value
170      * @param encoding the {@link MCRDataURLEncoding}
171      * @param mimeType the mime-type
172      * @param charset the charset
173      * @return a string with "data" URL
174      * @throws TransformerException
175      * @throws MalformedURLException
176      */
177     public static String build(final String str, final String encoding, final String mimeType, final String charset)
178         throws TransformerException, MalformedURLException {
179         MCRDataURLEncoding enc = encoding != null ? MCRDataURLEncoding.fromValue(encoding) : null;
180 
181         final Matcher mtm = PATTERN_MIMETYPE.matcher(mimeType);
182         if (mtm.matches()) {
183             if (enc == null) {
184                 if ("text".equals(mtm.group(1))) {
185                     enc = MCRDataURLEncoding.URL;
186                 } else {
187                     enc = MCRDataURLEncoding.BASE64;
188                 }
189             }
190         }
191 
192         final MCRDataURL dataURL = new MCRDataURL(str.getBytes(Charset.forName(charset)), enc, mimeType, charset);
193 
194         return dataURL.toString();
195     }
196 
197     /**
198      * Build a "data" URL for given {@link Document}, mime-type and <code>UTF-8</code> as charset.
199      *
200      * @param document the document
201      * @param mimeType the mime-type
202      * @return a string with "data" URL
203      * @throws TransformerException
204      * @throws MalformedURLException
205      */
206     public static String build(final Document document, final String mimeType)
207         throws TransformerException, MalformedURLException {
208         return build(document, null, mimeType, "UTF-8");
209     }
210 
211     /**
212      * Build a "data" URL for given {@link NodeList}, mime-type and <code>UTF-8</code> as charset.
213      *
214      * @param nodeList the node list
215      * @param mimeType the mime-type
216      * @return a string with "data" URL
217      * @throws TransformerException
218      * @throws MalformedURLException
219      */
220     public static String build(final NodeList nodeList, final String mimeType)
221         throws TransformerException, MalformedURLException {
222         return build(nodeList, null, mimeType, "UTF-8");
223     }
224 
225     /**
226      * Build a "data" URL for given {@link String}, mime-type and <code>UTF-8</code> as charset.
227      *
228      * @param str the string
229      * @param mimeType the mime-type
230      * @return a string with "data" URL
231      * @throws TransformerException
232      * @throws MalformedURLException
233      */
234     public static String build(final String str, final String mimeType)
235         throws TransformerException, MalformedURLException {
236         return build(str, null, mimeType, "UTF-8");
237     }
238 
239     /**
240      * Build a "data" URL for given {@link Document} with mime-type based encoding,
241      * <code>text/xml</code> as mime-type and <code>UTF-8</code> as charset.
242      *
243      * @param document the document
244      * @return a string with "data" URL
245      * @throws TransformerException
246      * @throws MalformedURLException
247      */
248     public static String build(final Document document) throws TransformerException, MalformedURLException {
249         return build(document, null, "text/xml", "UTF-8");
250     }
251 
252     /**
253      * Build a "data" URL for given {@link NodeList} with mime-type based encoding,
254      * <code>text/xml</code> as mime-type and <code>UTF-8</code> as charset.
255      *
256      * @param nodeList the node list
257      * @return a string with "data" URL
258      * @throws TransformerException
259      * @throws MalformedURLException
260      */
261     public static String build(final NodeList nodeList) throws TransformerException, MalformedURLException {
262         return build(nodeList, null, "text/xml", "UTF-8");
263     }
264 
265     /**
266      * Build a "data" URL for given {@link String} with mime-type based encoding,
267      * <code>text/xml</code> as mime-type and <code>UTF-8</code> as charset.
268      *
269      * @param str the node list
270      * @return a string with "data" URL
271      * @throws TransformerException
272      * @throws MalformedURLException
273      */
274     public static String build(final String str) throws TransformerException, MalformedURLException {
275         return build(str, null, "text/palin", "UTF-8");
276     }
277 
278     /**
279      * Parse a {@link String} to {@link MCRDataURL}.
280      *
281      * @param dataURL the data url string
282      * @return a {@link MCRDataURL} object
283      * @throws MalformedURLException
284      */
285     public static MCRDataURL parse(final String dataURL) throws MalformedURLException {
286         final String url = dataURL.trim();
287         if (url.startsWith(SCHEME)) {
288             String[] parts = url.substring(SCHEME.length()).split(DATA_SEPARATOR, 2);
289             if (parts.length == 2) {
290                 String[] tokens = parts[0].split(TOKEN_SEPARATOR);
291                 List<String> token = Arrays.stream(tokens).filter(s -> !s.contains(PARAM_SEPARATOR))
292                     .collect(Collectors.toList());
293                 Map<String, String> params = Arrays.stream(tokens).filter(s -> s.contains(PARAM_SEPARATOR))
294                     .map(s -> s.split(PARAM_SEPARATOR, 2)).collect(Collectors.toMap(sl -> sl[0], sl -> {
295                         try {
296                             return decode(sl[1], StandardCharsets.UTF_8);
297                         } catch (Exception e) {
298                             throw new RuntimeException("Error encoding the parameter value \"" + sl[1]
299                                 + "\". Error: " + e.getMessage());
300                         }
301                     }));
302 
303                 final String mimeType = !token.isEmpty() ? token.get(0) : null;
304 
305                 if (mimeType != null && !mimeType.isEmpty() && !PATTERN_MIMETYPE.matcher(mimeType).matches()) {
306                     throw new MalformedURLException("Unknown mime type.");
307                 }
308 
309                 final MCRDataURLEncoding encoding;
310                 try {
311                     encoding = !token.isEmpty() && token.size() > 1 ? MCRDataURLEncoding.fromValue(token.get(1))
312                         : MCRDataURLEncoding.URL;
313                 } catch (IllegalArgumentException e) {
314                     throw new MalformedURLException("Unknown encoding.");
315                 }
316 
317                 Charset charset = params.containsKey(CHARSET_PARAM) ? Charset.forName(params.get(CHARSET_PARAM))
318                     : StandardCharsets.US_ASCII;
319 
320                 byte[] data;
321                 try {
322                     data = encoding == MCRDataURLEncoding.BASE64 ? Base64.getDecoder().decode(parts[1])
323                         : decode(parts[1], charset).getBytes(StandardCharsets.UTF_8);
324                 } catch (IllegalArgumentException | UnsupportedEncodingException e) {
325                     throw new MalformedURLException("Error decoding the data. " + e.getMessage());
326                 }
327 
328                 return new MCRDataURL(data, encoding, mimeType, params);
329             } else {
330                 throw new MalformedURLException("Error parse data url: " + url);
331             }
332         } else {
333             throw new MalformedURLException("Wrong protocol");
334         }
335 
336     }
337 
338     /**
339      * Constructs a new {@link MCRDataURL}.
340      *
341      * @param data the data
342      * @param encoding the encoding of data url
343      * @param mimeType the mimeType of data url
344      * @param parameters a list of paramters of data url
345      * @throws MalformedURLException
346      */
347     public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType,
348         final Map<String, String> parameters) throws MalformedURLException {
349         this.data = data;
350         this.encoding = encoding != null ? encoding : MCRDataURLEncoding.URL;
351         this.mimeType = mimeType != null && !mimeType.isEmpty() ? mimeType : DEFAULT_MIMETYPE;
352 
353         if (!PATTERN_MIMETYPE.matcher(this.mimeType).matches()) {
354             throw new MalformedURLException("Unknown mime type.");
355         }
356 
357         if (parameters != null) {
358             this.parameters = Collections.unmodifiableMap(new LinkedHashMap<>(parameters.entrySet()
359                 .stream()
360                 .filter(
361                     e -> !CHARSET_PARAM.equals(e.getKey()))
362                 .collect(Collectors.toMap(
363                     Map.Entry::getKey,
364                     Map.Entry::getValue))));
365             this.charset = parameters.containsKey(CHARSET_PARAM) && parameters.get(CHARSET_PARAM) != null
366                 && !parameters.get(CHARSET_PARAM).isEmpty() ? Charset.forName(parameters.get(CHARSET_PARAM))
367                     : StandardCharsets.US_ASCII;
368         } else {
369             this.parameters = Collections.emptyMap();
370             this.charset = StandardCharsets.US_ASCII;
371         }
372     }
373 
374     /**
375      * Constructs a new {@link MCRDataURL}.
376      *
377      * @param data the data
378      * @param encoding the encoding of data url
379      * @param mimeType the mimeType of data url
380      * @param charset the charset of data url
381      * @throws MalformedURLException
382      */
383     public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType,
384         final Charset charset) throws MalformedURLException {
385         this.data = data;
386         this.encoding = encoding != null ? encoding : MCRDataURLEncoding.URL;
387         this.mimeType = mimeType != null && !mimeType.isEmpty() ? mimeType : DEFAULT_MIMETYPE;
388 
389         if (!PATTERN_MIMETYPE.matcher(this.mimeType).matches()) {
390             throw new MalformedURLException("Unknown mime type.");
391         }
392 
393         this.parameters = Collections.emptyMap();
394         this.charset = charset != null ? charset : StandardCharsets.US_ASCII;
395     }
396 
397     /**
398      * Constructs a new {@link MCRDataURL}.
399      *
400      * @param data the data
401      * @param encoding the encoding of data url
402      * @param mimeType the mimeType of data url
403      * @param charset the charset of data url
404      * @throws MalformedURLException
405      */
406     public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType, final String charset)
407         throws MalformedURLException {
408         this(data, encoding, mimeType, Charset.forName(charset));
409     }
410 
411     /**
412      * Constructs a new {@link MCRDataURL}.
413      *
414      * @param data the data
415      * @param encoding the encoding of data url
416      * @param mimeType the mimeType of data url
417      * @throws MalformedURLException
418      */
419     public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType)
420         throws MalformedURLException {
421         this(data, encoding, DEFAULT_MIMETYPE, StandardCharsets.US_ASCII);
422     }
423 
424     /**
425      * Constructs a new {@link MCRDataURL}.
426      *
427      * @param data the data of data url
428      * @param encoding the encoding of data url
429      * @throws MalformedURLException
430      */
431     public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding) throws MalformedURLException {
432         this(data, encoding, DEFAULT_MIMETYPE, StandardCharsets.US_ASCII);
433     }
434 
435     /**
436      * Constructs a new {@link MCRDataURL}.
437      *
438      * @param data the data of data url
439      * @throws MalformedURLException
440      */
441     public MCRDataURL(final byte[] data) throws MalformedURLException {
442         this(data, MCRDataURLEncoding.URL, DEFAULT_MIMETYPE, StandardCharsets.US_ASCII);
443     }
444 
445     /**
446      * @return the mimeType
447      */
448     public String getMimeType() {
449         return mimeType;
450     }
451 
452     /**
453      * @return the parameters
454      */
455     public Map<String, String> getParameters() {
456         return parameters;
457     }
458 
459     /**
460      * @return the charset
461      */
462     public Charset getCharset() {
463         return charset;
464     }
465 
466     /**
467      * @return the encoding
468      */
469     public MCRDataURLEncoding getEncoding() {
470         return encoding;
471     }
472 
473     /**
474      * @return the data
475      */
476     public byte[] getData() {
477         return data;
478     }
479 
480     /**
481      * Returns a {@link String} of a {@link MCRDataURL} object .
482      *
483      * @return the data url as string
484      * @throws RuntimeException
485      */
486     @Override
487     public String toString() {
488         StringBuffer sb = new StringBuffer(SCHEME);
489 
490         if (!DEFAULT_MIMETYPE.equals(mimeType) || charset != StandardCharsets.US_ASCII) {
491             sb.append(mimeType);
492         }
493 
494         if (charset != StandardCharsets.US_ASCII) {
495             sb.append(TOKEN_SEPARATOR + CHARSET_PARAM + PARAM_SEPARATOR).append(charset.name());
496         }
497 
498         parameters.forEach((key, value) -> {
499             try {
500                 sb.append(TOKEN_SEPARATOR)
501                     .append(key)
502                     .append(PARAM_SEPARATOR)
503                     .append(encode(value, StandardCharsets.UTF_8));
504             } catch (UnsupportedEncodingException e) {
505                 throw new RuntimeException(
506                     "Error encoding the parameter value \"" + value + "\". Error: " + e.getMessage());
507             }
508         });
509 
510         if (encoding == MCRDataURLEncoding.BASE64) {
511             sb.append(TOKEN_SEPARATOR).append(encoding.value());
512             sb.append(DATA_SEPARATOR).append(Base64.getEncoder().withoutPadding().encodeToString(data));
513         } else {
514             try {
515                 sb.append(DATA_SEPARATOR).append(encode(new String(data, charset), charset));
516             } catch (UnsupportedEncodingException e) {
517                 throw new RuntimeException("Error encoding the data. Error: " + e.getMessage());
518             }
519         }
520 
521         return sb.toString();
522     }
523 
524     /* (non-Javadoc)
525      * @see java.lang.Object#hashCode()
526      */
527     @Override
528     public int hashCode() {
529         final int prime = 31;
530         int result = 1;
531         result = prime * result + ((charset == null) ? 0 : charset.hashCode());
532         result = prime * result + ((data == null) ? 0 : Arrays.hashCode(data));
533         result = prime * result + ((encoding == null) ? 0 : encoding.hashCode());
534         result = prime * result + ((mimeType == null) ? 0 : mimeType.hashCode());
535         result = prime * result + ((parameters == null) ? 0 : parameters.hashCode());
536         return result;
537     }
538 
539     /* (non-Javadoc)
540      * @see java.lang.Object#equals(java.lang.Object)
541      */
542     @Override
543     public boolean equals(Object obj) {
544         if (this == obj) {
545             return true;
546         }
547         if (obj == null) {
548             return false;
549         }
550         if (!(obj instanceof MCRDataURL)) {
551             return false;
552         }
553         MCRDataURL other = (MCRDataURL) obj;
554         if (charset == null) {
555             if (other.charset != null) {
556                 return false;
557             }
558         } else if (!charset.equals(other.charset)) {
559             return false;
560         }
561         if (data == null) {
562             if (other.data != null) {
563                 return false;
564             }
565         } else if (!MessageDigest.isEqual(data, other.data)) {
566             return false;
567         }
568         if (encoding != other.encoding) {
569             return false;
570         }
571         if (mimeType == null) {
572             if (other.mimeType != null) {
573                 return false;
574             }
575         } else if (!mimeType.equals(other.mimeType)) {
576             return false;
577         }
578         if (parameters == null) {
579             return other.parameters == null;
580         } else {
581             return parameters.equals(other.parameters);
582         }
583     }
584 
585     private static String encode(final String str, final Charset charset) throws UnsupportedEncodingException {
586         return URLEncoder.encode(str, charset.name()).replace("+", "%20");
587     }
588 
589     private static String decode(final String str, final Charset charset) throws UnsupportedEncodingException {
590         return URLDecoder.decode(str.replace("%20", "+"), charset.name());
591     }
592 }