001 /*
002 *
003 * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.datamodel.ifs.extractors;
025
026 import java.io.BufferedInputStream;
027 import java.io.FileInputStream;
028 import java.io.InputStream;
029
030 import org.apache.log4j.Logger;
031 import org.jdom.Element;
032 import org.jdom.output.Format;
033 import org.jdom.output.XMLOutputter;
034 import org.mycore.common.events.MCREvent;
035 import org.mycore.common.events.MCREventHandlerBase;
036 import org.mycore.datamodel.ifs.MCRFile;
037
038 /**
039 * Event handler that extracts data like technical metadata (ID3 from MP3, EXIF
040 * from JPEG etc.) whenever an MCRFile's content is changed. The extracted data
041 * is stored in MCRFile's additional xml data.
042 *
043 * @see org.mycore.datamodel.ifs.MCRFilesystemNode#getAdditionalData()
044 *
045 * @author Frank Lützenkirchen
046 * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
047 */
048 public abstract class MCRDataExtractor extends MCREventHandlerBase {
049
050 /** The logger */
051 private final static Logger LOGGER = Logger.getLogger(MCRDataExtractorJPEG.class);
052
053 /**
054 * Convenience method that prints out extracted data of a local file
055 *
056 * @param filePath
057 * the path of the file to be tested
058 */
059 protected void testLocalFile(String filePath) {
060 try {
061 InputStream in = new BufferedInputStream(new FileInputStream(filePath));
062 String name = getClass().getName();
063 Element data = new Element(name.substring(name.lastIndexOf('.') + 1));
064 extractData(data, in);
065 System.out.println(outputData(data));
066 in.close();
067 } catch (Exception ex) {
068 ex.printStackTrace();
069 }
070 }
071
072 /**
073 * Returns the XML data element as a String
074 *
075 * @param data
076 * the extracted data as XML element
077 * @return the XML pretty-outputted as a String
078 */
079 protected String outputData(Element data) {
080 XMLOutputter xout = new XMLOutputter();
081 xout.setFormat(Format.getPrettyFormat().setEncoding("ISO-8859-1"));
082 return xout.outputString(data);
083 }
084
085 protected void handleFileCreated(MCREvent evt, MCRFile file) {
086 String supported = " " + getSupportedContentTypeIDs() + " ";
087 if (supported.indexOf(" " + file.getContentTypeID() + " ") == -1)
088 return;
089
090 try {
091 InputStream in = new BufferedInputStream(file.getContentAsInputStream());
092 String name = getClass().getName();
093 Element data = new Element(name.substring(name.lastIndexOf('.') + 1));
094 extractData(data, in);
095 if (LOGGER.isDebugEnabled())
096 LOGGER.debug(outputData(data));
097 if (data.getChildren().size() > 0)
098 file.setAdditionalData(data);
099 } catch (Exception ex) {
100 ex.printStackTrace();
101 }
102 }
103
104 protected void handleFileUpdated(MCREvent evt, MCRFile file) {
105 handleFileCreated(evt, file);
106 }
107
108 /**
109 * Extracts metadata from a file. This method must be overwritten by
110 * subclasses.
111 *
112 * @param container
113 * empty XML element that the extractor should fill with data
114 * @param in
115 * the InputStream to read the file's content from
116 * @return the XML element containing the extracted data
117 * @throws Exception
118 */
119 protected abstract void extractData(Element container, InputStream in) throws Exception;
120
121 /**
122 * Returns the IDs of the FileContentTypes that are supported by this
123 * metadata extractor. Only if the given file matches one of these types,
124 * metadata is extracted.
125 *
126 * @return a String of supported MCRFileContentType ID(s), separated by
127 * spaces
128 */
129 protected abstract String getSupportedContentTypeIDs();
130
131 /**
132 * Adds extracted metadata value to the resulting XML output, if it is not
133 * null or empty.
134 */
135 protected void addDataValue(Element parent, String name, String value) {
136 if (value == null)
137 return;
138 value = value.trim();
139 if (value.length() == 0)
140 return;
141 if (value.equals("0") || value.equals("0.0"))
142 return;
143 parent.addContent(new Element(name).setText(value));
144 }
145 }