1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.mycore.frontend.xeditor;
20
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.Iterator;
24 import java.util.List;
25 import java.util.Map;
26
27 import org.jdom2.Attribute;
28 import org.jdom2.Document;
29 import org.jdom2.Element;
30 import org.jdom2.filter.Filters;
31 import org.jdom2.xpath.XPathExpression;
32 import org.jdom2.xpath.XPathFactory;
33 import org.mycore.common.MCRConstants;
34
35 public class MCRXMLCleaner {
36
37 private static final MCRCleaningRule REMOVE_EMPTY_ATTRIBUTES = new MCRCleaningRule("//@*", "string-length(.) > 0");
38
39 private static final MCRCleaningRule REMOVE_EMPTY_ELEMENTS = new MCRCleaningRule("//*",
40 "@* or * or (string-length(text()) > 0)");
41
42 private static final MCRCleaningRule PRESERVE_STRUCTURE_AND_SERVICE = new MCRCleaningRule(
43 "/mycoreobject/structure|/mycoreobject/service", "true()");
44
45 private List<MCRCleaningRule> rules = new ArrayList<>();
46
47 private Map<Object, MCRCleaningRule> nodes2rules = new HashMap<>();
48
49 public MCRXMLCleaner() {
50 addRule(REMOVE_EMPTY_ATTRIBUTES);
51 addRule(REMOVE_EMPTY_ELEMENTS);
52 addRule(PRESERVE_STRUCTURE_AND_SERVICE);
53 }
54
55 public void addRule(String xPathExprNodesToInspect, String xPathExprRelevancyTest) {
56 addRule(new MCRCleaningRule(xPathExprNodesToInspect, xPathExprRelevancyTest));
57 }
58
59 public void addRule(MCRCleaningRule rule) {
60 rules.remove(rule);
61 rules.add(rule);
62 }
63
64 public Document clean(Document xml) {
65 Document clone = xml.clone();
66 do {
67 mapNodesToRules(clone);
68 } while (clean(clone.getRootElement()));
69 return clone;
70 }
71
72 private void mapNodesToRules(Document xml) {
73 nodes2rules.clear();
74 for (MCRCleaningRule rule : rules) {
75 for (Object object : rule.getNodesToInspect(xml)) {
76 nodes2rules.put(object, rule);
77 }
78 }
79 }
80
81 private boolean clean(Element element) {
82 boolean changed = false;
83
84 for (Iterator<Element> children = element.getChildren().iterator(); children.hasNext();) {
85 Element child = children.next();
86 if (clean(child)) {
87 changed = true;
88 }
89 if (!isRelevant(child)) {
90 changed = true;
91 children.remove();
92 }
93 }
94
95 for (Iterator<Attribute> attributes = element.getAttributes().iterator(); attributes.hasNext();) {
96 Attribute attribute = attributes.next();
97 if (!isRelevant(attribute)) {
98 changed = true;
99 attributes.remove();
100 }
101 }
102
103 return changed;
104 }
105
106 private boolean isRelevant(Object node) {
107 MCRCleaningRule rule = nodes2rules.get(node);
108 return (rule == null || rule.isRelevant(node));
109 }
110 }
111
112 class MCRCleaningRule {
113
114 private String xPathExprNodesToInspect;
115
116 private XPathExpression<Object> xPathNodesToInspect;
117
118 private XPathExpression<Object> xPathRelevancyTest;
119
120 MCRCleaningRule(String xPathExprNodesToInspect, String xPathExprRelevancyTest) {
121 this.xPathExprNodesToInspect = xPathExprNodesToInspect;
122 this.xPathNodesToInspect = XPathFactory.instance().compile(xPathExprNodesToInspect, Filters.fpassthrough(),
123 null,
124 MCRConstants.getStandardNamespaces());
125 this.xPathRelevancyTest = XPathFactory.instance().compile(xPathExprRelevancyTest, Filters.fpassthrough(), null,
126 MCRConstants.getStandardNamespaces());
127 }
128
129 public List<Object> getNodesToInspect(Document xml) {
130 return xPathNodesToInspect.evaluate(xml);
131 }
132
133 public boolean isRelevant(Object node) {
134 Object found = xPathRelevancyTest.evaluateFirst(node);
135 if (found == null) {
136 return false;
137 } else if (found instanceof Boolean) {
138 return (Boolean) found;
139 } else {
140 return true;
141 }
142 }
143
144 @Override
145 public boolean equals(Object obj) {
146 if (obj instanceof MCRCleaningRule) {
147 return xPathExprNodesToInspect.equals(((MCRCleaningRule) obj).xPathExprNodesToInspect);
148 } else {
149 return false;
150 }
151 }
152
153 @Override
154 public int hashCode() {
155 return xPathExprNodesToInspect.hashCode();
156 }
157 }