1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.mycore.datamodel.common;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.Serializable;
22 import java.io.UnsupportedEncodingException;
23 import java.net.MalformedURLException;
24 import java.net.URLDecoder;
25 import java.net.URLEncoder;
26 import java.nio.charset.Charset;
27 import java.nio.charset.StandardCharsets;
28 import java.security.MessageDigest;
29 import java.util.Arrays;
30 import java.util.Base64;
31 import java.util.Collections;
32 import java.util.LinkedHashMap;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Optional;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38 import java.util.stream.Collectors;
39
40 import javax.xml.transform.OutputKeys;
41 import javax.xml.transform.Transformer;
42 import javax.xml.transform.TransformerException;
43 import javax.xml.transform.TransformerFactory;
44 import javax.xml.transform.dom.DOMSource;
45 import javax.xml.transform.stream.StreamResult;
46
47 import org.w3c.dom.Document;
48 import org.w3c.dom.Node;
49 import org.w3c.dom.NodeList;
50
51
52
53
54
55
56
57 public class MCRDataURL implements Serializable {
58
59 private static final long serialVersionUID = 1L;
60
61 private static final String SCHEME = "data:";
62
63 private static final String DEFAULT_MIMETYPE = "text/plain";
64
65 private static final Pattern PATTERN_MIMETYPE = Pattern.compile("^([a-z0-9\\-\\+]+)\\/([a-z0-9\\-\\+]+)$");
66
67 private static final String CHARSET_PARAM = "charset";
68
69 private static final String TOKEN_SEPARATOR = ";";
70
71 private static final String DATA_SEPARATOR = ",";
72
73 private static final String PARAM_SEPARATOR = "=";
74
75 private final String mimeType;
76
77 private final Map<String, String> parameters;
78
79 private final Charset charset;
80
81 private final MCRDataURLEncoding encoding;
82
83 private final byte[] data;
84
85
86
87
88
89
90
91
92
93
94
95
96
97 public static String build(final Document document, final String encoding, final String mimeType,
98 final String charset) throws TransformerException, MalformedURLException {
99 return build(document.getChildNodes(), encoding, mimeType, charset);
100 }
101
102
103
104
105
106
107
108
109
110
111
112
113
114 public static String build(final NodeList nodeList, final String encoding, final String mimeType,
115 final String charset) throws TransformerException, MalformedURLException {
116 Node node = Optional.ofNullable(nodeList.item(0)).filter(n -> n.getNodeName().equals("#document"))
117 .orElseGet(() -> Optional.of(nodeList).filter(nl -> nl.getLength() == 1).map(nl -> nl.item(0))
118 .orElseThrow(() -> new IllegalArgumentException("Nodelist must have an single root element.")));
119
120 final TransformerFactory transformerFactory = TransformerFactory.newInstance();
121 final Transformer transformer = transformerFactory.newTransformer();
122
123 MCRDataURLEncoding enc = encoding != null ? MCRDataURLEncoding.fromValue(encoding) : null;
124 String method = "xml";
125
126 final Matcher mtm = PATTERN_MIMETYPE.matcher(mimeType);
127 if (mtm.matches()) {
128 if (enc == null) {
129 if ("text".equals(mtm.group(1))) {
130 enc = MCRDataURLEncoding.URL;
131 } else {
132 enc = MCRDataURLEncoding.BASE64;
133 }
134 }
135
136 if ("plain".equals(mtm.group(2))) {
137 method = "text";
138 } else if ("html".equals(mtm.group(2))) {
139 method = "html";
140 } else if ("xml|xhtml+xml".contains(mtm.group(2))) {
141 method = "xml";
142 } else {
143 method = null;
144 }
145 }
146
147 if (method != null) {
148 transformer.setOutputProperty(OutputKeys.METHOD, method);
149 }
150
151 transformer.setOutputProperty(OutputKeys.INDENT, "no");
152 transformer.setOutputProperty(OutputKeys.MEDIA_TYPE, mimeType);
153 transformer.setOutputProperty(OutputKeys.ENCODING, charset);
154
155 DOMSource source = new DOMSource(node);
156 ByteArrayOutputStream bao = new ByteArrayOutputStream();
157 StreamResult result = new StreamResult(bao);
158 transformer.transform(source, result);
159
160 final MCRDataURL dataURL = new MCRDataURL(bao.toByteArray(), enc, mimeType, charset);
161
162 return dataURL.toString();
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177 public static String build(final String str, final String encoding, final String mimeType, final String charset)
178 throws TransformerException, MalformedURLException {
179 MCRDataURLEncoding enc = encoding != null ? MCRDataURLEncoding.fromValue(encoding) : null;
180
181 final Matcher mtm = PATTERN_MIMETYPE.matcher(mimeType);
182 if (mtm.matches()) {
183 if (enc == null) {
184 if ("text".equals(mtm.group(1))) {
185 enc = MCRDataURLEncoding.URL;
186 } else {
187 enc = MCRDataURLEncoding.BASE64;
188 }
189 }
190 }
191
192 final MCRDataURL dataURL = new MCRDataURL(str.getBytes(Charset.forName(charset)), enc, mimeType, charset);
193
194 return dataURL.toString();
195 }
196
197
198
199
200
201
202
203
204
205
206 public static String build(final Document document, final String mimeType)
207 throws TransformerException, MalformedURLException {
208 return build(document, null, mimeType, "UTF-8");
209 }
210
211
212
213
214
215
216
217
218
219
220 public static String build(final NodeList nodeList, final String mimeType)
221 throws TransformerException, MalformedURLException {
222 return build(nodeList, null, mimeType, "UTF-8");
223 }
224
225
226
227
228
229
230
231
232
233
234 public static String build(final String str, final String mimeType)
235 throws TransformerException, MalformedURLException {
236 return build(str, null, mimeType, "UTF-8");
237 }
238
239
240
241
242
243
244
245
246
247
248 public static String build(final Document document) throws TransformerException, MalformedURLException {
249 return build(document, null, "text/xml", "UTF-8");
250 }
251
252
253
254
255
256
257
258
259
260
261 public static String build(final NodeList nodeList) throws TransformerException, MalformedURLException {
262 return build(nodeList, null, "text/xml", "UTF-8");
263 }
264
265
266
267
268
269
270
271
272
273
274 public static String build(final String str) throws TransformerException, MalformedURLException {
275 return build(str, null, "text/palin", "UTF-8");
276 }
277
278
279
280
281
282
283
284
285 public static MCRDataURL parse(final String dataURL) throws MalformedURLException {
286 final String url = dataURL.trim();
287 if (url.startsWith(SCHEME)) {
288 String[] parts = url.substring(SCHEME.length()).split(DATA_SEPARATOR, 2);
289 if (parts.length == 2) {
290 String[] tokens = parts[0].split(TOKEN_SEPARATOR);
291 List<String> token = Arrays.stream(tokens).filter(s -> !s.contains(PARAM_SEPARATOR))
292 .collect(Collectors.toList());
293 Map<String, String> params = Arrays.stream(tokens).filter(s -> s.contains(PARAM_SEPARATOR))
294 .map(s -> s.split(PARAM_SEPARATOR, 2)).collect(Collectors.toMap(sl -> sl[0], sl -> {
295 try {
296 return decode(sl[1], StandardCharsets.UTF_8);
297 } catch (Exception e) {
298 throw new RuntimeException("Error encoding the parameter value \"" + sl[1]
299 + "\". Error: " + e.getMessage());
300 }
301 }));
302
303 final String mimeType = !token.isEmpty() ? token.get(0) : null;
304
305 if (mimeType != null && !mimeType.isEmpty() && !PATTERN_MIMETYPE.matcher(mimeType).matches()) {
306 throw new MalformedURLException("Unknown mime type.");
307 }
308
309 final MCRDataURLEncoding encoding;
310 try {
311 encoding = !token.isEmpty() && token.size() > 1 ? MCRDataURLEncoding.fromValue(token.get(1))
312 : MCRDataURLEncoding.URL;
313 } catch (IllegalArgumentException e) {
314 throw new MalformedURLException("Unknown encoding.");
315 }
316
317 Charset charset = params.containsKey(CHARSET_PARAM) ? Charset.forName(params.get(CHARSET_PARAM))
318 : StandardCharsets.US_ASCII;
319
320 byte[] data;
321 try {
322 data = encoding == MCRDataURLEncoding.BASE64 ? Base64.getDecoder().decode(parts[1])
323 : decode(parts[1], charset).getBytes(StandardCharsets.UTF_8);
324 } catch (IllegalArgumentException | UnsupportedEncodingException e) {
325 throw new MalformedURLException("Error decoding the data. " + e.getMessage());
326 }
327
328 return new MCRDataURL(data, encoding, mimeType, params);
329 } else {
330 throw new MalformedURLException("Error parse data url: " + url);
331 }
332 } else {
333 throw new MalformedURLException("Wrong protocol");
334 }
335
336 }
337
338
339
340
341
342
343
344
345
346
347 public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType,
348 final Map<String, String> parameters) throws MalformedURLException {
349 this.data = data;
350 this.encoding = encoding != null ? encoding : MCRDataURLEncoding.URL;
351 this.mimeType = mimeType != null && !mimeType.isEmpty() ? mimeType : DEFAULT_MIMETYPE;
352
353 if (!PATTERN_MIMETYPE.matcher(this.mimeType).matches()) {
354 throw new MalformedURLException("Unknown mime type.");
355 }
356
357 if (parameters != null) {
358 this.parameters = Collections.unmodifiableMap(new LinkedHashMap<>(parameters.entrySet()
359 .stream()
360 .filter(
361 e -> !CHARSET_PARAM.equals(e.getKey()))
362 .collect(Collectors.toMap(
363 Map.Entry::getKey,
364 Map.Entry::getValue))));
365 this.charset = parameters.containsKey(CHARSET_PARAM) && parameters.get(CHARSET_PARAM) != null
366 && !parameters.get(CHARSET_PARAM).isEmpty() ? Charset.forName(parameters.get(CHARSET_PARAM))
367 : StandardCharsets.US_ASCII;
368 } else {
369 this.parameters = Collections.emptyMap();
370 this.charset = StandardCharsets.US_ASCII;
371 }
372 }
373
374
375
376
377
378
379
380
381
382
383 public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType,
384 final Charset charset) throws MalformedURLException {
385 this.data = data;
386 this.encoding = encoding != null ? encoding : MCRDataURLEncoding.URL;
387 this.mimeType = mimeType != null && !mimeType.isEmpty() ? mimeType : DEFAULT_MIMETYPE;
388
389 if (!PATTERN_MIMETYPE.matcher(this.mimeType).matches()) {
390 throw new MalformedURLException("Unknown mime type.");
391 }
392
393 this.parameters = Collections.emptyMap();
394 this.charset = charset != null ? charset : StandardCharsets.US_ASCII;
395 }
396
397
398
399
400
401
402
403
404
405
406 public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType, final String charset)
407 throws MalformedURLException {
408 this(data, encoding, mimeType, Charset.forName(charset));
409 }
410
411
412
413
414
415
416
417
418
419 public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding, final String mimeType)
420 throws MalformedURLException {
421 this(data, encoding, DEFAULT_MIMETYPE, StandardCharsets.US_ASCII);
422 }
423
424
425
426
427
428
429
430
431 public MCRDataURL(final byte[] data, final MCRDataURLEncoding encoding) throws MalformedURLException {
432 this(data, encoding, DEFAULT_MIMETYPE, StandardCharsets.US_ASCII);
433 }
434
435
436
437
438
439
440
441 public MCRDataURL(final byte[] data) throws MalformedURLException {
442 this(data, MCRDataURLEncoding.URL, DEFAULT_MIMETYPE, StandardCharsets.US_ASCII);
443 }
444
445
446
447
448 public String getMimeType() {
449 return mimeType;
450 }
451
452
453
454
455 public Map<String, String> getParameters() {
456 return parameters;
457 }
458
459
460
461
462 public Charset getCharset() {
463 return charset;
464 }
465
466
467
468
469 public MCRDataURLEncoding getEncoding() {
470 return encoding;
471 }
472
473
474
475
476 public byte[] getData() {
477 return data;
478 }
479
480
481
482
483
484
485
486 @Override
487 public String toString() {
488 StringBuffer sb = new StringBuffer(SCHEME);
489
490 if (!DEFAULT_MIMETYPE.equals(mimeType) || charset != StandardCharsets.US_ASCII) {
491 sb.append(mimeType);
492 }
493
494 if (charset != StandardCharsets.US_ASCII) {
495 sb.append(TOKEN_SEPARATOR + CHARSET_PARAM + PARAM_SEPARATOR).append(charset.name());
496 }
497
498 parameters.forEach((key, value) -> {
499 try {
500 sb.append(TOKEN_SEPARATOR)
501 .append(key)
502 .append(PARAM_SEPARATOR)
503 .append(encode(value, StandardCharsets.UTF_8));
504 } catch (UnsupportedEncodingException e) {
505 throw new RuntimeException(
506 "Error encoding the parameter value \"" + value + "\". Error: " + e.getMessage());
507 }
508 });
509
510 if (encoding == MCRDataURLEncoding.BASE64) {
511 sb.append(TOKEN_SEPARATOR).append(encoding.value());
512 sb.append(DATA_SEPARATOR).append(Base64.getEncoder().withoutPadding().encodeToString(data));
513 } else {
514 try {
515 sb.append(DATA_SEPARATOR).append(encode(new String(data, charset), charset));
516 } catch (UnsupportedEncodingException e) {
517 throw new RuntimeException("Error encoding the data. Error: " + e.getMessage());
518 }
519 }
520
521 return sb.toString();
522 }
523
524
525
526
527 @Override
528 public int hashCode() {
529 final int prime = 31;
530 int result = 1;
531 result = prime * result + ((charset == null) ? 0 : charset.hashCode());
532 result = prime * result + ((data == null) ? 0 : Arrays.hashCode(data));
533 result = prime * result + ((encoding == null) ? 0 : encoding.hashCode());
534 result = prime * result + ((mimeType == null) ? 0 : mimeType.hashCode());
535 result = prime * result + ((parameters == null) ? 0 : parameters.hashCode());
536 return result;
537 }
538
539
540
541
542 @Override
543 public boolean equals(Object obj) {
544 if (this == obj) {
545 return true;
546 }
547 if (obj == null) {
548 return false;
549 }
550 if (!(obj instanceof MCRDataURL)) {
551 return false;
552 }
553 MCRDataURL other = (MCRDataURL) obj;
554 if (charset == null) {
555 if (other.charset != null) {
556 return false;
557 }
558 } else if (!charset.equals(other.charset)) {
559 return false;
560 }
561 if (data == null) {
562 if (other.data != null) {
563 return false;
564 }
565 } else if (!MessageDigest.isEqual(data, other.data)) {
566 return false;
567 }
568 if (encoding != other.encoding) {
569 return false;
570 }
571 if (mimeType == null) {
572 if (other.mimeType != null) {
573 return false;
574 }
575 } else if (!mimeType.equals(other.mimeType)) {
576 return false;
577 }
578 if (parameters == null) {
579 return other.parameters == null;
580 } else {
581 return parameters.equals(other.parameters);
582 }
583 }
584
585 private static String encode(final String str, final Charset charset) throws UnsupportedEncodingException {
586 return URLEncoder.encode(str, charset.name()).replace("+", "%20");
587 }
588
589 private static String decode(final String str, final Charset charset) throws UnsupportedEncodingException {
590 return URLDecoder.decode(str.replace("%20", "+"), charset.name());
591 }
592 }