001    /*
002     * $Revision: 15620 $ 
003     * $Date: 2009-07-25 08:29:12 +0200 (Sat, 25 Jul 2009) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.datamodel.ifs2;
025    
026    import java.io.File;
027    import java.util.ArrayList;
028    import java.util.Arrays;
029    import java.util.HashMap;
030    import java.util.Iterator;
031    import java.util.List;
032    import java.util.NoSuchElementException;
033    import java.util.StringTokenizer;
034    
035    import org.apache.commons.vfs.FileObject;
036    import org.apache.commons.vfs.Selectors;
037    import org.apache.commons.vfs.VFS;
038    import org.mycore.common.MCRConfiguration;
039    import org.mycore.common.MCRConfigurationException;
040    import org.mycore.common.MCRException;
041    
042    /**
043     * Stores metadata files or file collections containing files and directories in
044     * a persistent store implemented using a local filesystem.
045     * 
046     * For better filesystem performance, the store can build slot subdirectories
047     * (containing other subdirectories and so on) so that not all objects are
048     * stored in the same filesystem directory. Directories containing a very large
049     * number of files typically show bad performance.
050     * 
051     * The slot layout of the store defines the usage of subdirectories within the
052     * base directory. A layout of "8" would mean no subdirectories will be used,
053     * the maximum ID size is 8 digits, and therefore up to 99999999 objects can be
054     * stored all in the same base directory. A layout of "2-2-4" would mean data is
055     * stored using two levels of subdirectories, where the first subdirectory
056     * contains up to 100 (00-99) subdirectories, the second subdirectory level
057     * below contains up to 100 subdirectories, too, and below the data is stored,
058     * with up to 10000 data objects in the subdirectory. Using this slot layout,
059     * the data of ID 10485 would be stored in the file object "/00/01/00010485",
060     * for example. Using layout "4-2-2", data would be stored in
061     * "/0001/04/00010485", and so on.
062     * 
063     * The slot file name itself may optionally have a prefix and suffix. With
064     * prefix "derivate-", the slot name would be "derivate-00010485". With prefix
065     * "DocPortal_document_" and suffix ".xml", the slot name would be
066     * "DocPortal_document_00010485.xml" for example.
067     * 
068     * MCR.IFS2.Store.ID.Class=org.mycore.datamodel.ifs2.MCRFileStore
069     * 
070     * MCR.IFS2.Store.ID.BaseDir=/foo/bar
071     * 
072     * MCR.IFS2.Store.ID.SlotLayout=4-2-2
073     * 
074     * @author Frank Lützenkirchen
075     */
076    public abstract class MCRStore {
077    
078        /**
079         * Map of defined stores, where store ID is the map key.
080         */
081        protected static HashMap<String, MCRStore> stores = new HashMap<String, MCRStore>();
082    
083        /**
084         * Returns the store with the given ID
085         * 
086         * @param ID
087         *            the ID of the store
088         */
089        protected static MCRStore getStore(String ID) {
090            if (!stores.containsKey(ID)) {
091                MCRStore store = (MCRStore) (MCRConfiguration.instance().getInstanceOf("MCR.IFS2.Store." + ID + ".Class"));
092                store.init(ID);
093            }
094            return stores.get(ID);
095        }
096    
097        /** The ID of the store */
098        protected String id;
099    
100        /** The base directory containing the stored data */
101        protected File dir;
102    
103        /** The maximum length of IDs **/
104        protected int idLength;
105    
106        /**
107         * The slot subdirectory layout, which is the number of digits used at each
108         * subdirectory level to build the filename.
109         */
110        protected int[] slotLength;
111    
112        /** The prefix of slot names */
113        protected String prefix = "";
114    
115        /** The suffix of slot names */
116        protected String suffix = "";
117    
118        /**
119         * Initializes a new store instance
120         */
121        protected void init(String id) {
122            stores.put(id, this);
123            this.id = id;
124    
125            String cfg = "MCR.IFS2.Store." + id + ".";
126            MCRConfiguration config = MCRConfiguration.instance();
127            String baseDir = config.getString(cfg + "BaseDir");
128            String slotLayout = config.getString(cfg + "SlotLayout");
129    
130            this.idLength = 0;
131    
132            StringTokenizer st = new StringTokenizer(slotLayout, "-");
133            slotLength = new int[st.countTokens() - 1];
134    
135            int i = 0;
136            while (st.countTokens() > 1) {
137                slotLength[i] = Integer.parseInt(st.nextToken());
138                idLength += slotLength[i++];
139            }
140            idLength += Integer.parseInt(st.nextToken());
141    
142            dir = new File(baseDir);
143            if (!dir.exists()) {
144                try {
145                    boolean created = dir.mkdirs();
146                    if (!created) {
147                        String msg = "Unable to create store directory " + baseDir;
148                        throw new MCRConfigurationException(msg);
149                    }
150                } catch (Exception ex) {
151                    String msg = "Exception while creating store directory " + baseDir;
152                    throw new MCRConfigurationException(msg, ex);
153                }
154            } else {
155                if (!dir.canRead()) {
156                    String msg = "Store directory " + baseDir + " is not readable";
157                    throw new MCRConfigurationException(msg);
158                }
159                if (!dir.isDirectory()) {
160                    String msg = "Store " + baseDir + " is a file, not a directory";
161                    throw new MCRConfigurationException(msg);
162                }
163            }
164        }
165    
166        /**
167         * Returns the ID of this store
168         */
169        public String getID() {
170            return id;
171        }
172    
173        /**
174         * Returns the absolute path of the local base directory
175         * 
176         * @return the base directory storing the data
177         */
178        String getBaseDir() {
179            return dir.getAbsolutePath();
180        }
181    
182        /**
183         * Used to fill small IDs with leading zeros
184         */
185        private static String nulls = "00000000000000000000000000000000";
186    
187        /**
188         * Returns the slot file object used to store data for the given ID. This
189         * may be a file or directory, depending on the subclass of MCRStore that is
190         * used.
191         * 
192         * @param ID
193         *            the ID of the data
194         * @return the file object storing that data
195         */
196        FileObject getSlot(int ID) throws Exception {
197            return VFS.getManager().resolveFile(dir, getSlotPath(ID));
198        }
199    
200        /**
201         * Returns the relative path used to store data for the given ID within the
202         * store base directory
203         * 
204         * @param ID
205         *            the ID of the data
206         * @return the relative path storing that data
207         */
208        String getSlotPath(int ID) {
209            String[] paths = getSlotPaths(ID);
210            return paths[paths.length - 1];
211        }
212    
213        /**
214         * Returns the paths of all subdirectories and the slot itself used to store
215         * data for the given ID relative to the store base directory
216         * 
217         * @param ID
218         *            the ID of the data
219         * @return the directory and file names of the relative path storing that
220         *         data
221         */
222        String[] getSlotPaths(int ID) {
223            String id = nulls + String.valueOf(ID);
224            id = id.substring(id.length() - idLength);
225    
226            String[] paths = new String[slotLength.length + 1];
227            StringBuffer path = new StringBuffer();
228            int offset = 0;
229            for (int i = 0; i < paths.length - 1; i++) {
230                path.append(id.substring(offset, offset + slotLength[i]));
231                paths[i] = path.toString();
232                path.append("/");
233                offset += slotLength[i];
234            }
235            path.append(prefix).append(id).append(suffix);
236            paths[paths.length - 1] = path.toString();
237            return paths;
238        }
239    
240        /**
241         * Returns true if data for the given ID is existing in the store.
242         * 
243         * @param id
244         *            the ID of the data
245         * @return true, if data for the given ID is existing in the store.
246         */
247        public boolean exists(int id) throws Exception {
248            return getSlot(id).exists();
249        }
250    
251        /**
252         * Offset to add to the maximum ID found in the store to build the new ID.
253         * This is normally 1, but initially higher to avoid reassigning the same ID
254         * after system restarts. Consider the following example:
255         * 
256         * 1) User creates new document, ID assigned is 10. 2) User deletes document
257         * 10. 3) Web application is restarted. 4) User creates new document, ID
258         * assigned is 20. If offset would always be 1, ID assigned would have been
259         * 10 again, and that is not nice, because we can not distinguish the two
260         * creates easily.
261         */
262        protected int offset = 11; // Sicherheitsabstand, initially 11, later 1
263    
264        /**
265         * The last ID assigned by this store.
266         */
267        protected int lastID = 0;
268    
269        /**
270         * Returns the next free ID that can be used to store data. Call as late as
271         * possible to avoid that another process, for example from batch import, in
272         * the meantime already used that ID.
273         * 
274         * @return the next free ID that can be used to store data
275         */
276        public synchronized int getNextFreeID() {
277            lastID = Math.max(getHighestStoredID(), lastID);
278            lastID += (lastID > 0 ? offset : 1);
279            offset = 1;
280            return lastID;
281        }
282        
283        public synchronized int getHighestStoredID() {
284            int found = 0;
285            String max = findMaxID(dir, 0);
286            if (max != null)
287                found = slot2id(max);
288            return found;
289        }
290    
291        /**
292         * Extracts the numerical ID contained in the slot filename.
293         * 
294         * @param slot
295         *            the file name of the slot containing the data
296         * @return the ID of that data
297         */
298        private int slot2id(String slot) {
299            slot = slot.substring(prefix.length());
300            slot = slot.substring(0, idLength);
301            return Integer.parseInt(slot);
302        }
303    
304        /**
305         * Recursively searches for the highest ID, which is the greatest slot file
306         * name currently used in the store.
307         * 
308         * @param dir
309         *            the directory to search
310         * @param depth
311         *            the subdirectory depth level of the dir
312         * @return the highest slot file name / ID currently stored
313         */
314        private String findMaxID(File dir, int depth) {
315            String[] children = dir.list();
316    
317            if ((children == null) || (children.length == 0))
318                return null;
319    
320            Arrays.sort(children);
321    
322            if (depth == slotLength.length)
323                return children[children.length - 1];
324    
325            for (int i = children.length - 1; i >= 0; i--) {
326                File child = new File(dir, children[i]);
327                if (!child.isDirectory())
328                    continue;
329                String found = findMaxID(child, depth + 1);
330                if (found != null)
331                    return found;
332            }
333            return null;
334        }
335    
336        /**
337         * Indicates ascending order when listing IDs
338         */
339        public final static boolean ASCENDING = true;
340    
341        /**
342         * Indicates descending order when listing IDs
343         */
344        public final static boolean DESCENDING = false;
345    
346        /**
347         * Lists all IDs currently used in the store, in ascending or descending
348         * order
349         * 
350         * @see #ASCENDING
351         * @see #DESCENDING
352         * 
353         * @param order
354         *            the order in which IDs should be returned.
355         * @return all IDs currently used in the store
356         */
357        public Iterator<Integer> listIDs(boolean order) {
358            return new Iterator<Integer>() {
359                /**
360                 * List of files or directories in store not yet handled
361                 */
362                List<File> files = new ArrayList<File>();
363    
364                /**
365                 * The next ID to return, when 0, all IDs have been returned
366                 */
367                int nextID;
368    
369                /**
370                 * The last ID that was returned
371                 */
372                int lastID;
373    
374                /**
375                 * The order in which the IDs should be returned, ascending or
376                 * descending
377                 */
378                boolean order;
379    
380                /**
381                 * Initializes the enumeration and searches for the first ID to
382                 * return
383                 * 
384                 * @param order
385                 *            the return order, ascending or descending
386                 */
387                Iterator<Integer> init(boolean order) {
388                    this.order = order;
389                    addChildren(dir);
390                    nextID = findNextID();
391                    return this;
392                }
393    
394                /**
395                 * Adds children of the given directory to the list of files to
396                 * handle next. Depending on the return sort order, ascending or
397                 * descending file name order is used.
398                 * 
399                 * @param dir
400                 *            the directory thats children should be added
401                 */
402                private void addChildren(File dir) {
403                    String[] children = dir.list();
404                    if ((children == null) || (children.length == 0))
405                        return;
406    
407                    Arrays.sort(children);
408                    for (int i = 0; i < children.length; i++)
409                        files.add((order ? i : 0), new File(dir, children[i]));
410                }
411    
412                public boolean hasNext() {
413                    return (nextID > 0);
414                }
415    
416                public Integer next() {
417                    if (nextID < 1)
418                        throw new NoSuchElementException();
419    
420                    lastID = nextID;
421                    nextID = findNextID();
422                    return lastID;
423                }
424    
425                public void remove() {
426                    if (lastID == 0)
427                        throw new IllegalStateException();
428                    try {
429                        MCRStore.this.delete(lastID);
430                    } catch (Exception ex) {
431                        throw new MCRException("Could not delete " + MCRStore.this.getID() + " " + lastID, ex);
432                    }
433                    lastID = 0;
434                }
435    
436                /**
437                 * Finds the next ID used in the store.
438                 * 
439                 * @return the next ID, or 0 if there is no other ID any more
440                 */
441                private int findNextID() {
442                    if (files.isEmpty())
443                        return 0;
444    
445                    File first = files.remove(0);
446                    if (first.getName().length() == idLength + prefix.length() + suffix.length())
447                        return MCRStore.this.slot2id(first.getName());
448    
449                    addChildren(first);
450                    return findNextID();
451                }
452            }.init(order);
453        }
454    
455        /**
456         * Deletes the data stored under the given ID from the store
457         * 
458         * @param id
459         *            the ID of the document to be deleted
460         */
461        public void delete(int id) throws Exception {
462            delete(getSlot(id));
463        }
464    
465        /**
466         * Deletes the data stored in the given file object from the store
467         * 
468         * @param fo
469         *            the file object to be deleted
470         */
471        void delete(FileObject fo) throws Exception {
472            FileObject parent = fo.getParent();
473            fo.delete(Selectors.SELECT_ALL);
474    
475            FileObject base = VFS.getManager().resolveFile(dir.getAbsolutePath());
476            while (!parent.equals(base)) {
477                FileObject[] children = parent.getChildren();
478                if (children.length > 0)
479                    break;
480                fo = parent;
481                parent = fo.getParent();
482                fo.delete();
483            }
484        }
485    }