View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.datamodel.ifs2;
20  
21  import java.io.IOException;
22  import java.net.URI;
23  import java.net.URISyntaxException;
24  import java.nio.file.Files;
25  import java.nio.file.Path;
26  import java.nio.file.Paths;
27  import java.nio.file.attribute.BasicFileAttributes;
28  import java.text.NumberFormat;
29  import java.util.ArrayList;
30  import java.util.Arrays;
31  import java.util.Iterator;
32  import java.util.List;
33  import java.util.Locale;
34  import java.util.NoSuchElementException;
35  import java.util.Spliterator;
36  import java.util.Spliterators;
37  import java.util.StringTokenizer;
38  import java.util.function.Function;
39  import java.util.stream.IntStream;
40  import java.util.stream.Stream;
41  import java.util.stream.StreamSupport;
42  
43  import org.apache.logging.log4j.LogManager;
44  import org.apache.logging.log4j.Logger;
45  import org.mycore.common.MCRException;
46  import org.mycore.common.config.MCRConfigurationException;
47  import org.mycore.datamodel.niofs.utils.MCRRecursiveDeleter;
48  
49  /**
50   * Stores metadata files or file collections containing files and directories in
51   * a persistent store implemented using a local filesystem.
52   * 
53   * For better filesystem performance, the store can build slot subdirectories
54   * (containing other subdirectories and so on) so that not all objects are
55   * stored in the same filesystem directory. Directories containing a very large
56   * number of files typically show bad performance.
57   * 
58   * The slot layout of the store defines the usage of subdirectories within the
59   * base directory. A layout of "8" would mean no subdirectories will be used,
60   * the maximum ID size is 8 digits, and therefore up to 99999999 objects can be
61   * stored all in the same base directory. A layout of "2-2-4" would mean data is
62   * stored using two levels of subdirectories, where the first subdirectory
63   * contains up to 100 (00-99) subdirectories, the second subdirectory level
64   * below contains up to 100 subdirectories, too, and below the data is stored,
65   * with up to 10000 data objects in the subdirectory. Using this slot layout,
66   * the data of ID 10485 would be stored in the file object "/00/01/00010485",
67   * for example. Using layout "4-2-2", data would be stored in
68   * "/0001/04/00010485", and so on.
69   * 
70   * The slot file name itself may optionally have a prefix and suffix. With
71   * prefix "derivate-", the slot name would be "derivate-00010485". With prefix
72   * "DocPortal_document_" and suffix ".xml", the slot name would be
73   * "DocPortal_document_00010485.xml" for example.
74   * 
75   * MCR.IFS2.Store.ID.Class=org.mycore.datamodel.ifs2.MCRFileStore
76   * MCR.IFS2.Store.ID.BaseDir=/foo/bar
77   * MCR.IFS2.Store.ID.SlotLayout=4-2-2
78   * 
79   * @author Frank Lützenkirchen
80   */
81  public abstract class MCRStore {
82  
83      /**
84       * Indicates ascending order when listing IDs
85       */
86      public static final boolean ASCENDING = true;
87  
88      /**
89       * Indicates descending order when listing IDs
90       */
91      public static final boolean DESCENDING = false;
92  
93      /** The ID of the store */
94      protected String id;
95  
96      /** The base directory containing the stored data */
97      protected Path baseDirectory;
98  
99      /** The maximum length of IDs **/
100     protected int idLength;
101 
102     /**
103      * The slot subdirectory layout, which is the number of digits used at each
104      * subdirectory level to build the filename.
105      */
106     protected int[] slotLength;
107 
108     /** The prefix of slot names */
109     protected String prefix = "";
110 
111     /** The suffix of slot names */
112     protected String suffix = "";
113 
114     private MCRStoreConfig storeConfig;
115 
116     private Function<String, String> toNativePath;
117 
118     /**
119      * Offset to add to the maximum ID found in the store to build the new ID.
120      * This is normally 1, but initially higher to avoid reassigning the same ID
121      * after system restarts. Consider the following example:
122      * 
123      * 1) User creates new document, ID assigned is 10. 2) User deletes document
124      * 10. 3) Web application is restarted. 4) User creates new document, ID
125      * assigned is 20. If offset would always be 1, ID assigned would have been
126      * 10 again, and that is not nice, because we can not distinguish the two
127      * creates easily.
128      */
129     protected int offset = 11; // Sicherheitsabstand, initially 11, later 1
130 
131     /**
132      * The last ID assigned by this store.
133      */
134     protected int lastID = 0;
135 
136     public static final Logger LOGGER = LogManager.getLogger();
137 
138     /**
139      * Deletes the data stored under the given ID from the store
140      * 
141      * @param id
142      *            the ID of the document to be deleted
143      */
144     public void delete(final int id) throws IOException {
145         delete(getSlot(id));
146     }
147 
148     /**
149      * Returns true if data for the given ID is existing in the store.
150      * 
151      * @param id
152      *            the ID of the data
153      * @return true, if data for the given ID is existing in the store.
154      */
155     public boolean exists(final int id) throws IOException {
156         return Files.exists(getSlot(id));
157     }
158 
159     public synchronized int getHighestStoredID() {
160         try {
161             String max = findMaxID(baseDirectory, 0);
162             if (max != null) {
163                 return slot2id(max);
164             }
165         } catch (final IOException e) {
166             LOGGER.error("Error while getting highest stored ID in " + baseDirectory, e);
167         }
168         return 0;
169     }
170 
171     /**
172      * Returns the ID of this store
173      */
174     public String getID() {
175         return getStoreConfig().getID();
176     }
177 
178     /**
179      * Returns the next free ID that can be used to store data. Call as late as
180      * possible to avoid that another process, for example from batch import, in
181      * the meantime already used that ID.
182      * 
183      * @return the next free ID that can be used to store data
184      */
185     public synchronized int getNextFreeID() {
186         lastID = Math.max(getHighestStoredID(), lastID);
187         lastID += lastID > 0 ? offset : 1;
188         offset = 1;
189         return lastID;
190     }
191 
192     public boolean isEmpty() {
193         try (Stream<Path> streamBaseDirectory = Files.list(baseDirectory)) {
194             return streamBaseDirectory.findAny().isEmpty();
195         } catch (final IOException e) {
196             LOGGER.error("Error while checking if base directory is empty: " + baseDirectory, e);
197             return false;
198         }
199     }
200 
201     /**
202      * @return all Ids of this store
203      */
204     public IntStream getStoredIDs() {
205         int characteristics = Spliterator.ORDERED | Spliterator.DISTINCT | Spliterator.SORTED;
206         return StreamSupport
207             .stream(() -> Spliterators
208                 .spliteratorUnknownSize(listIDs(ASCENDING), characteristics),
209                 characteristics,
210                 false)
211             .mapToInt(Integer::intValue);
212     }
213 
214     /**
215      * Lists all IDs currently used in the store, in ascending or descending
216      * order
217      * 
218      * @see #ASCENDING
219      * @see #DESCENDING
220      * 
221      * @param order
222      *            the order in which IDs should be returned.
223      * @return all IDs currently used in the store
224      */
225     public Iterator<Integer> listIDs(final boolean order) {
226         return new Iterator<Integer>() {
227             /**
228              * List of files or directories in store not yet handled
229              */
230             List<Path> files = new ArrayList<>();
231 
232             /**
233              * The next ID to return, when 0, all IDs have been returned
234              */
235             int nextID;
236 
237             /**
238              * The last ID that was returned
239              */
240             int lastID;
241 
242             /**
243              * The order in which the IDs should be returned, ascending or
244              * descending
245              */
246             boolean order;
247 
248             @Override
249             public boolean hasNext() {
250                 return nextID > 0;
251             }
252 
253             @Override
254             public Integer next() {
255                 if (nextID < 1) {
256                     throw new NoSuchElementException();
257                 }
258 
259                 lastID = nextID;
260                 nextID = findNextID();
261                 return lastID;
262             }
263 
264             @Override
265             public void remove() {
266                 if (lastID == 0) {
267                     throw new IllegalStateException();
268                 }
269                 try {
270                     MCRStore.this.delete(lastID);
271                 } catch (final Exception ex) {
272                     throw new MCRException("Could not delete " + MCRStore.this.getID() + " " + lastID, ex);
273                 }
274                 lastID = 0;
275             }
276 
277             /**
278              * Initializes the enumeration and searches for the first ID to
279              * return
280              * 
281              * @param order
282              *            the return order, ascending or descending
283              */
284             Iterator<Integer> init(final boolean order) {
285                 this.order = order;
286                 try {
287                     addChildren(baseDirectory);
288                 } catch (final IOException e) {
289                     LOGGER.error("Error while iterating over children of " + baseDirectory, e);
290                 }
291                 nextID = findNextID();
292                 return this;
293             }
294 
295             /**
296              * Adds children of the given directory to the list of files to
297              * handle next. Depending on the return sort order, ascending or
298              * descending file name order is used.
299              * 
300              * @param dir
301              *            the directory thats children should be added
302              * @throws IOException
303              */
304             private void addChildren(final Path dir) throws IOException {
305                 if (Files.isDirectory(dir)) {
306                     try (Stream<Path> steamDir = Files.list(dir)) {
307                         final Path[] children = steamDir.toArray(Path[]::new);
308                         Arrays.sort(children, new MCRPathComparator());
309 
310                         for (int i = 0; i < children.length; i++) {
311                             files.add(order ? i : 0, children[i]);
312                         }
313                     }
314                 }
315             }
316 
317             /**
318              * Finds the next ID used in the store.
319              * 
320              * @return the next ID, or 0 if there is no other ID any more
321              */
322             private int findNextID() {
323                 if (files.isEmpty()) {
324                     return 0;
325                 }
326 
327                 final Path first = files.remove(0);
328                 // checks basename length against prefix (projectId_typeId), file suffix (.xml) and configured id length
329                 // if they match it should be a parseable id
330                 String fileName = first.getFileName().toString();
331                 if (fileName.length() == idLength + prefix.length() + suffix.length()) {
332                     return MCRStore.this.slot2id(fileName);
333                 }
334 
335                 try {
336                     addChildren(first);
337                 } catch (final IOException e) {
338                     LOGGER.error("Error while finding next id.", e);
339                 }
340                 return findNextID();
341             }
342         }.init(order);
343     }
344 
345     /**
346      * Deletes the data stored in the given file object from the store
347      *
348      * @see <a href="https://stackoverflow.com/questions/39628328/trying-to-create-a-directory-immediately-after-a-successful-deleteifexists-throw">stackoverflow</a>
349      * @param path
350      *            the file object to be deleted
351      */
352     void delete(final Path path) throws IOException {
353         if (!path.startsWith(baseDirectory)) {
354             throw new IllegalArgumentException(path + " is not in the base directory " + baseDirectory);
355         }
356         Path current = path;
357         Path parent = path.getParent();
358         Files.walkFileTree(path, MCRRecursiveDeleter.instance());
359 
360         while (!Files.isSameFile(baseDirectory, parent)) {
361 
362             // Prevent access denied error in windows with closing the stream correctly
363             try (Stream<Path> streamParent = Files.list(parent)) {
364                 if (streamParent.findAny().isPresent()) {
365                     break;
366                 }
367                 current = parent;
368                 parent = current.getParent();
369                 Files.delete(current);
370             }
371         }
372     }
373 
374     /**
375      * @return the absolute path of the local base directory
376      */
377     public Path getBaseDirectory() {
378         return baseDirectory.toAbsolutePath();
379     }
380 
381     /**
382      * Returns the absolute path of the local base directory
383      * 
384      * @return the base directory storing the data
385      */
386     String getBaseDirURI() {
387         return baseDirectory.toAbsolutePath().toUri().toString();
388     }
389 
390     /** Returns the maximum length of any ID stored in this store */
391     int getIDLength() {
392         return idLength;
393     }
394 
395     /**
396      * Returns the relative path used to store data for the given id within the
397      * store base directory
398      * 
399      * @param id
400      *            the id of the data
401      * @return the relative path storing that data
402      */
403     String getSlotPath(final int id) {
404         final String[] paths = getSlotPaths(id);
405         return paths[paths.length - 1];
406     }
407 
408     /**
409      * Returns the paths of all subdirectories and the slot itself used to store
410      * data for the given id relative to the store base directory
411      * 
412      * @param id
413      *            the id of the data
414      * @return the directory and file names of the relative path storing that
415      *         data
416      */
417     String[] getSlotPaths(final int id) {
418         final String paddedId = createIDWithLeadingZeros(id);
419 
420         final String[] paths = new String[slotLength.length + 1];
421         final StringBuilder path = new StringBuilder();
422         int offset = 0;
423         for (int i = 0; i < paths.length - 1; i++) {
424             path.append(paddedId, offset, offset + slotLength[i]);
425             paths[i] = path.toString();
426             path.append("/");
427             offset += slotLength[i];
428         }
429         path.append(prefix).append(paddedId).append(suffix);
430         paths[paths.length - 1] = path.toString();
431         return paths;
432     }
433 
434     /**
435      * Extracts the numerical ID contained in the slot filename.
436      * 
437      * @param slot
438      *            the file name of the slot containing the data
439      * @return the ID of that data
440      */
441     int slot2id(String slot) {
442         slot = slot.substring(prefix.length());
443         slot = slot.substring(0, idLength);
444         return Integer.parseInt(slot);
445     }
446 
447     /**
448      * Returns the slot file object used to store data for the given id. This
449      * may be a file or directory, depending on the subclass of MCRStore that is
450      * used.
451      * 
452      * @param id
453      *            the id of the data
454      * @return the file object storing that data
455      */
456     protected Path getSlot(final int id) throws IOException {
457         String slotPath = getSlotPath(id);
458         return baseDirectory.resolve(toNativePath.apply(slotPath));
459     }
460 
461     protected MCRStoreConfig getStoreConfig() {
462         return storeConfig;
463     }
464 
465     protected void init(final MCRStoreConfig config) {
466         setStoreConfig(config);
467 
468         idLength = 0;
469 
470         final StringTokenizer st = new StringTokenizer(getStoreConfig().getSlotLayout(), "-");
471         slotLength = new int[st.countTokens() - 1];
472 
473         int i = 0;
474         while (st.countTokens() > 1) {
475             slotLength[i] = Integer.parseInt(st.nextToken());
476             idLength += slotLength[i++];
477         }
478         idLength += Integer.parseInt(st.nextToken());
479         prefix = config.getPrefix();
480 
481         try {
482             try {
483                 URI uri = new URI(getStoreConfig().getBaseDir());
484                 if (uri.getScheme() != null) {
485                     baseDirectory = Paths.get(uri);
486                 }
487             } catch (URISyntaxException e) {
488                 //not a uri, handle as relative path
489             }
490             if (baseDirectory == null) {
491                 baseDirectory = Paths.get(getStoreConfig().getBaseDir());
492             }
493 
494             String separator = baseDirectory.getFileSystem().getSeparator();
495             if (separator.equals("/")) {
496                 toNativePath = s -> s;
497             } else {
498                 toNativePath = s -> {
499                     if (s.contains("/")) {
500                         if (s.contains(separator)) {
501                             throw new IllegalArgumentException(
502                                 s + " may not contain both '/' and '" + separator + "'.");
503                         }
504                         return s.replace("/", separator);
505                     }
506                     return s;
507                 };
508             }
509 
510             try {
511                 BasicFileAttributes attrs = Files.readAttributes(baseDirectory, BasicFileAttributes.class);
512                 if (!attrs.isDirectory()) {
513                     final String msg = "Store " + getStoreConfig().getBaseDir() + " is not a directory";
514                     throw new MCRConfigurationException(msg);
515                 }
516 
517                 if (!Files.isReadable(baseDirectory)) {
518                     final String msg = "Store directory " + getStoreConfig().getBaseDir() + " is not readable";
519                     throw new MCRConfigurationException(msg);
520                 }
521             } catch (IOException e) {
522                 //does not exist;
523                 Files.createDirectories(baseDirectory);
524             }
525         } catch (final IOException e) {
526             LOGGER.error("Could not initialize store " + config.getID() + " correctly.", e);
527         }
528     }
529 
530     /**
531      * Initializes a new store instance
532      */
533     protected void init(final String id) {
534         init(new MCRStoreDefaultConfig(id));
535     }
536 
537     protected void setStoreConfig(final MCRStoreConfig storeConfig) {
538         this.storeConfig = storeConfig;
539     }
540 
541     private String createIDWithLeadingZeros(final int id) {
542         final NumberFormat numWithLeadingZerosFormat = NumberFormat.getIntegerInstance(Locale.ROOT);
543         numWithLeadingZerosFormat.setMinimumIntegerDigits(idLength);
544         numWithLeadingZerosFormat.setGroupingUsed(false);
545         return numWithLeadingZerosFormat.format(id);
546     }
547 
548     /**
549      * Recursively searches for the highest ID, which is the greatest slot file
550      * name currently used in the store.
551      * 
552      * @param dir
553      *            the directory to search
554      * @param depth
555      *            the subdirectory depth level of the dir
556      * @return the highest slot file name / ID currently stored
557      */
558     private String findMaxID(final Path dir, final int depth) throws IOException {
559 
560         final Path[] children;
561 
562         try (Stream<Path> streamDirectory = Files.list(dir)) {
563             children = streamDirectory.toArray(Path[]::new);
564         }
565 
566         if (children.length == 0) {
567             return null;
568         }
569 
570         Arrays.sort(children, new MCRPathComparator());
571 
572         if (depth == slotLength.length) {
573             return children[children.length - 1].getFileName().toString();
574         }
575 
576         for (int i = children.length - 1; i >= 0; i--) {
577             final Path child = children[i];
578 
579             try (Stream<Path> streamChild = Files.list(child)) {
580                 if (!Files.isDirectory(child) || streamChild.findAny().isEmpty()) {
581                     continue;
582                 }
583             }
584 
585             final String found = findMaxID(child, depth + 1);
586             if (found != null) {
587                 return found;
588             }
589         }
590         return null;
591     }
592 
593     public interface MCRStoreConfig {
594         String getBaseDir();
595 
596         String getID();
597 
598         String getPrefix();
599 
600         String getSlotLayout();
601     }
602 }