001    /*
002     * 
003     * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.datamodel.ifs.extractors;
025    
026    import java.io.BufferedInputStream;
027    import java.io.FileInputStream;
028    import java.io.InputStream;
029    
030    import org.apache.log4j.Logger;
031    import org.jdom.Element;
032    import org.jdom.output.Format;
033    import org.jdom.output.XMLOutputter;
034    import org.mycore.common.events.MCREvent;
035    import org.mycore.common.events.MCREventHandlerBase;
036    import org.mycore.datamodel.ifs.MCRFile;
037    
038    /**
039     * Event handler that extracts data like technical metadata (ID3 from MP3, EXIF
040     * from JPEG etc.) whenever an MCRFile's content is changed. The extracted data
041     * is stored in MCRFile's additional xml data.
042     * 
043     * @see org.mycore.datamodel.ifs.MCRFilesystemNode#getAdditionalData()
044     * 
045     * @author Frank Lützenkirchen
046     * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
047     */
048    public abstract class MCRDataExtractor extends MCREventHandlerBase {
049    
050        /** The logger */
051        private final static Logger LOGGER = Logger.getLogger(MCRDataExtractorJPEG.class);
052    
053        /**
054         * Convenience method that prints out extracted data of a local file
055         * 
056         * @param filePath
057         *            the path of the file to be tested
058         */
059        protected void testLocalFile(String filePath) {
060            try {
061                InputStream in = new BufferedInputStream(new FileInputStream(filePath));
062                String name = getClass().getName();
063                Element data = new Element(name.substring(name.lastIndexOf('.') + 1));
064                extractData(data, in);
065                System.out.println(outputData(data));
066                in.close();
067            } catch (Exception ex) {
068                ex.printStackTrace();
069            }
070        }
071    
072        /**
073         * Returns the XML data element as a String
074         * 
075         * @param data
076         *            the extracted data as XML element
077         * @return the XML pretty-outputted as a String
078         */
079        protected String outputData(Element data) {
080            XMLOutputter xout = new XMLOutputter();
081            xout.setFormat(Format.getPrettyFormat().setEncoding("ISO-8859-1"));
082            return xout.outputString(data);
083        }
084    
085        protected void handleFileCreated(MCREvent evt, MCRFile file) {
086            String supported = " " + getSupportedContentTypeIDs() + " ";
087            if (supported.indexOf(" " + file.getContentTypeID() + " ") == -1)
088                return;
089    
090            try {
091                InputStream in = new BufferedInputStream(file.getContentAsInputStream());
092                String name = getClass().getName();
093                Element data = new Element(name.substring(name.lastIndexOf('.') + 1));
094                extractData(data, in);
095                if (LOGGER.isDebugEnabled())
096                    LOGGER.debug(outputData(data));
097                if (data.getChildren().size() > 0)
098                    file.setAdditionalData(data);
099            } catch (Exception ex) {
100                ex.printStackTrace();
101            }
102        }
103    
104        protected void handleFileUpdated(MCREvent evt, MCRFile file) {
105            handleFileCreated(evt, file);
106        }
107    
108        /**
109         * Extracts metadata from a file. This method must be overwritten by
110         * subclasses.
111         * 
112         * @param container
113         *            empty XML element that the extractor should fill with data
114         * @param in
115         *            the InputStream to read the file's content from
116         * @return the XML element containing the extracted data
117         * @throws Exception
118         */
119        protected abstract void extractData(Element container, InputStream in) throws Exception;
120    
121        /**
122         * Returns the IDs of the FileContentTypes that are supported by this
123         * metadata extractor. Only if the given file matches one of these types,
124         * metadata is extracted.
125         * 
126         * @return a String of supported MCRFileContentType ID(s), separated by
127         *         spaces
128         */
129        protected abstract String getSupportedContentTypeIDs();
130    
131        /**
132         * Adds extracted metadata value to the resulting XML output, if it is not
133         * null or empty.
134         */
135        protected void addDataValue(Element parent, String name, String value) {
136            if (value == null)
137                return;
138            value = value.trim();
139            if (value.length() == 0)
140                return;
141            if (value.equals("0") || value.equals("0.0"))
142                return;
143            parent.addContent(new Element(name).setText(value));
144        }
145    }