View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.tools;
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.nio.file.Files;
24  import java.nio.file.Path;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.HashMap;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Objects;
31  import java.util.Set;
32  import java.util.TreeMap;
33  import java.util.TreeSet;
34  import java.util.stream.Collectors;
35  
36  import javax.xml.stream.XMLInputFactory;
37  import javax.xml.stream.XMLStreamConstants;
38  import javax.xml.stream.XMLStreamException;
39  import javax.xml.stream.XMLStreamReader;
40  
41  import org.apache.logging.log4j.LogManager;
42  import org.apache.logging.log4j.Logger;
43  import org.mycore.datamodel.common.MCRLinkTableManager;
44  import org.mycore.datamodel.metadata.MCRObjectID;
45  
46  import com.google.common.collect.BiMap;
47  import com.google.common.collect.HashBiMap;
48  
49  /**
50   * This class implements an algorithm for topological ordering.
51   * It can be used to retrieve the order in which MyCoRe object can be imported to be
52   * sure that parent objects are imported first.
53   *
54   * It also checks for circular dependencies and will throw an exception if it occurs.
55   *
56   * The doTopoSort() method can only be called once, since it processes the internal data.
57   * Afterwards prepareData() must be called again or a new object has to be used.
58   *
59   * For performance reasons each node label will be mapped to an integer (position in node list)
60   *
61   * The algorithm is described in
62   * http://en.wikipedia.org/wiki/Topological_sorting
63   *
64   * @author Robert Stephan
65   * @version $Revision: 28688 $ $Date: 2013-12-18 15:27:20 +0100 (Mi, 18 Dez 2013) $
66   *
67   */
68  public class MCRTopologicalSort<T> {
69      private static final Logger LOGGER = LogManager.getLogger(MCRTopologicalSort.class);
70  
71      /** 
72       * store the edges as adjacent list
73       *  for each target node a list of corresponding source node is stored
74       */
75      Map<Integer, TreeSet<Integer>> edgeSources = new TreeMap<>();
76  
77      /**
78       * store the position of elements to sort in BiMap
79       */
80      BiMap<Integer, T> nodes = HashBiMap.create();
81  
82      boolean dirty = false;
83  
84      /**
85       * parses MCRObject xml files for parent links
86       * and creates the graph
87       *
88       * uses StAX cursor API (higher performance)
89       *
90       * @param ts - the topological sort data structure
91       * @param files - a list of file names
92       * @param dir - the directory where the files can be found 
93       */
94      public static void prepareData(MCRTopologicalSort<String> ts, String[] files, Path dir) {
95          ts.reset();
96  
97          String file = null;
98          Map<Integer, List<String>> parentNames = new HashMap<>();
99          XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
100         for (int i = 0; i < files.length; i++) {
101             file = files[i];
102 
103             try (BufferedReader br = Files.newBufferedReader(dir.resolve(file))) {
104                 XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(br);
105                 while (xmlStreamReader.hasNext()) {
106                     switch (xmlStreamReader.getEventType()) {
107                     case XMLStreamConstants.START_ELEMENT:
108                         if (xmlStreamReader.getLocalName().equals("mycoreobject")) {
109                             ts.getNodes().forcePut(i, xmlStreamReader
110                                 .getAttributeValue(null, "ID"));
111                         } else {
112                             String href = xmlStreamReader
113                                 .getAttributeValue("http://www.w3.org/1999/xlink", "href");
114                             if (xmlStreamReader.getLocalName().equals("parent")) {
115                                 List<String> dependencyList = parentNames.computeIfAbsent(i,
116                                     e -> new ArrayList<>());
117                                 dependencyList.add(
118                                     href);
119                             } else if (xmlStreamReader.getLocalName().equals("relatedItem")) {
120                                 if (MCRObjectID.isValid(
121                                     href)) {
122                                     List<String> dependencyList = parentNames
123                                         .computeIfAbsent(i, e -> new ArrayList<>());
124                                     dependencyList.add(
125                                         href);
126                                 }
127                             } else if (xmlStreamReader.getLocalName().equals("metadata")) {
128                                 break;
129                             }
130                         }
131                         break;
132 
133                     case XMLStreamConstants.END_ELEMENT:
134                         if (xmlStreamReader.getLocalName().equals("parents")) {
135                             break;
136                         } else if (xmlStreamReader.getLocalName().equals("relatedItem")) {
137                             break;
138                         }
139                         break;
140                     }
141                     xmlStreamReader.next();
142                 }
143 
144             } catch (XMLStreamException | IOException e) {
145                 e.printStackTrace();
146             }
147         }
148 
149         //build edges
150         for (int source : parentNames.keySet()) {
151             parentNames.get(source)
152                 .stream()
153                 .map(ts.getNodes().inverse()::get)
154                 .filter(Objects::nonNull)
155                 .forEach(target -> ts.addEdge(source, target));
156         }
157     }
158 
159     /**
160      * reads MCRObjectIDs as Strings, retrieves parent links from MCRLinkTableManager
161      * and creates the graph
162      *
163      * uses StAX cursor API (higher performance)
164      */
165     public static void prepareMCRObjects(MCRTopologicalSort<String> ts, String[] mcrids) {
166         ts.reset();
167         for (int i = 0; i < mcrids.length; i++) {
168             ts.getNodes().forcePut(i, mcrids[i]);
169         }
170         for (int i = 0; i < mcrids.length; i++) {
171             Collection<String> parents = MCRLinkTableManager.instance().getDestinationOf(String.valueOf(mcrids[i]),
172                 MCRLinkTableManager.ENTRY_TYPE_PARENT);
173             for (String p : parents) {
174                 Integer target = ts.getNodes().inverse().get(p);
175                 if (target != null) {
176                     ts.addEdge(i, target);
177                 }
178             }
179             Collection<String> refs = MCRLinkTableManager.instance().getDestinationOf(String.valueOf(mcrids[i]),
180                 MCRLinkTableManager.ENTRY_TYPE_REFERENCE);
181             for (String r : refs) {
182                 Integer target = ts.getNodes().inverse().get(r);
183                 if (target != null) {
184                     ts.addEdge(i, target);
185                 }
186             }
187         }
188     }
189 
190     /**
191      * resets the topological sort data structure
192      */
193     public void reset() {
194         nodes.clear();
195         edgeSources.clear();
196         dirty = false;
197     }
198 
199     /**
200      * return the Map of Integer to NodeObjects
201      */
202     public BiMap<Integer, T> getNodes() {
203         return nodes;
204     }
205 
206     /**
207      * add a node to the graph
208      * @param name - the node name
209      */
210 
211     public void addNode(T name) {
212         if (!nodes.containsValue(name)) {
213             nodes.put(nodes.size(), name);
214         }
215     }
216 
217     /**
218      * returns a node id for a given node
219      *
220      * @param name - the node name
221      * @return the node id
222      */
223     public Integer getNodeID(T name) {
224         return nodes.inverse().get(name);
225     }
226 
227     /**
228      * return the name of the given node
229      * @param id - the node id
230      * @return the node name
231      */
232     public T getNodeName(Integer id) {
233         return nodes.get(id);
234     }
235 
236     /**
237      *  add an edge to the graph
238      * @param from - the source node
239      * @param to - the target node
240      */
241     public void addEdge(Integer from, Integer to) {
242         edgeSources.computeIfAbsent(to, k -> new TreeSet<>()).add(from);
243     }
244 
245     /**
246      * removes an edge from grapn
247      *
248      * @param from - the source node id
249      * @param to - the target node id
250      * @return true, if there are no more incoming edges on the [to]-node
251      *               ([to] = leaf node)
252      */
253     public boolean removeEdge(Integer from, Integer to) {
254         TreeSet<Integer> ts = edgeSources.get(to);
255         if (ts != null) {
256             ts.remove(from);
257             if (ts.isEmpty()) {
258                 edgeSources.remove(to);
259                 return true;
260             } else {
261                 return false;
262             }
263         }
264         return true;
265     }
266 
267     /**
268      * based upon first pseudo code in
269      * http://en.wikipedia.org/w/index.php?title=Topological_sorting&amp;oldid=611829125
270      *
271      * The algorithm will destroy the input data -&gt; the method can only be called once
272      *
273      * @return an array of ids which define the order
274      *         in which the elements have to be retrieved from the given input list
275      *         or null if an error occured
276      */
277     public int[] doTopoSort() {
278         if (dirty) {
279             LOGGER.error(
280                 "The data of this instance is inconsistent."
281                     + " Please call prepareData() again or start with a new instance!");
282             return null;
283         }
284         dirty = true;
285         // L <-array that will contain the sorted elements
286         int[] result = new int[nodes.size()];
287         // S <- Set of all nodes with no incoming edges
288         List<Integer> leafs = nodes.keySet()
289             .stream()
290             .filter(i -> !edgeSources.containsKey(i))
291             .sorted()
292             .collect(Collectors.toList());
293         int cursor = result.length - 1;
294 
295         // while S is non-empty do
296         while (!leafs.isEmpty()) {
297             // remove a node n from S
298             Integer node = leafs.remove(0);
299             // add n to tail of L (we use head, because we need an inverted list !!)
300             result[cursor--] = node;
301             // for each node m with an edge e from n to m do
302             for (Integer to : new TreeSet<>(edgeSources.keySet())) {
303                 Set<Integer> ts = edgeSources.get(to);
304                 if (ts != null && ts.contains(node)) {
305                     // remove edge e from the graph
306                     if (removeEdge(node, to)) {
307                         // if m has no other incoming edges then insert m  into S
308                         leafs.add(to);
309                     }
310                 }
311             }
312         }
313         // if graph has edges then return error (graph has at least one cycle)
314         if (!edgeSources.isEmpty()) {
315             LOGGER.error("The input contained circular dependencies: \n{}", toString());
316             return null;
317             // else return L (a topologically sorted order)
318         } else {
319             return result;
320         }
321     }
322 
323     /**
324      * @return a string representation of the underlying graph
325      */
326     @Override
327     public String toString() {
328         StringBuilder result = new StringBuilder("[");
329         for (Integer to : edgeSources.keySet()) {
330             for (Integer from : edgeSources.get(to)) {
331                 result.append('[').append(nodes.get(from)).append("->").append(nodes.get(to)).append(']');
332             }
333         }
334         result.append(']');
335         return result.toString();
336     }
337 }