001 /*
002 * $Revision: 14844 $
003 * $Date: 2009-03-10 12:17:43 +0100 (Tue, 10 Mar 2009) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.datamodel.ifs2;
025
026 import java.io.FilterInputStream;
027 import java.io.IOException;
028 import java.io.InputStream;
029 import java.security.DigestInputStream;
030 import java.security.MessageDigest;
031 import java.security.NoSuchAlgorithmException;
032
033 import org.mycore.common.MCRConfigurationException;
034 import org.mycore.common.MCRException;
035
036 /**
037 * This input stream is used by the MyCoRe filesystem classes to read the
038 * content of a file and import it into the System. MCRContentInputStream
039 * provides the header of the file that is read (the first 64k) for content type
040 * detection purposes, counts the number of bytes read and builds an MD5
041 * checksum String while content goes through this input stream.
042 *
043 * @author Frank Lützenkirchen
044 */
045 public class MCRContentInputStream extends FilterInputStream {
046 /** The number of bytes that will be read for content type detection */
047 protected final static int headerSize = 65536;
048
049 /** The MD5 checksum of all bytes read through this stream */
050 protected MessageDigest digest = null;
051
052 /** The total number of bytes read so far */
053 protected long length;
054
055 /** The header of the file read */
056 protected byte[] header;
057
058 /**
059 * Constructs a new MCRContentInputStream
060 *
061 * @param in
062 * the InputStream to read from
063 * @throws MCRConfigurationException
064 * if java classes supporting MD5 checksums are not found
065 */
066 public MCRContentInputStream(InputStream in) throws MCRException {
067 super(null);
068
069 digest = buildMD5Digest();
070
071 DigestInputStream dis = new DigestInputStream(in, digest);
072 MCRBlockingInputStream bis = new MCRBlockingInputStream(dis, headerSize);
073
074 byte[] buffer = new byte[headerSize];
075
076 try {
077 int num = bis.read(buffer, 0, buffer.length);
078 header = new byte[Math.max(0, num)];
079
080 if (num > 0) {
081 System.arraycopy(buffer, 0, header, 0, num);
082 }
083 } catch (IOException ex) {
084 String msg = "Error while reading content input stream header";
085 throw new MCRException(msg, ex);
086 }
087
088 this.in = bis;
089 }
090
091 public int read() throws IOException {
092 int b;
093
094 // if current position is in header buffer, return value from there
095 if ((header.length > 0) && (length < header.length)) {
096 b = header[(int) length];
097 length++;
098 } else {
099 b = super.read();
100 if (b != -1) {
101 length++;
102 }
103 }
104
105 return b;
106 }
107
108 public int read(byte[] buf, int off, int len) throws IOException {
109 // if current position is in header buffer, return bytes from there
110 if ((header.length > 0) && (length < header.length)) {
111 int numAvail = header.length - (int) length;
112 len = Math.min(len, numAvail);
113 System.arraycopy(header, (int) length, buf, off, len);
114 length += len;
115 return len;
116 } else {
117 len = super.read(buf, off, len);
118 if (len != -1) {
119 length += len;
120 }
121 return len;
122 }
123 }
124
125 /**
126 * Returns the first 64 k of the underlying input stream. This is used for
127 * content type detection during file import into MyCoRe.
128 *
129 * @return the first 64 k of the input stream
130 */
131 public byte[] getHeader() {
132 return header;
133 }
134
135 /**
136 * Returns the number of bytes read so far
137 *
138 * @return the number of bytes read
139 */
140 public long getLength() {
141 return length;
142 }
143
144 /**
145 * Returns the MD5 message digest that has been built during reading of the
146 * underlying input stream.
147 *
148 * @return the MD5 message digest checksum of all bytes that have been read
149 */
150 public byte[] getMD5() {
151 return digest.digest();
152 }
153
154 /**
155 * Returns the MD5 checksum as a String
156 *
157 * @return the MD5 checksum as a String of hex digits
158 */
159 public String getMD5String() {
160 return getMD5String(digest);
161 }
162
163 /**
164 * Given an MD5 message digest, returns the MD5 checksum as a String
165 *
166 * @return the MD5 checksum as a String of hex digits
167 */
168 public static String getMD5String(MessageDigest digest) {
169 byte[] bytes = digest.digest();
170 StringBuffer sb = new StringBuffer();
171
172 for (int i = 0; i < bytes.length; i++) {
173 String sValue = "0" + Integer.toHexString(bytes[i]);
174 sb.append(sValue.substring(sValue.length() - 2));
175 }
176
177 return sb.toString();
178 }
179
180 /**
181 * Builds a MessageDigest instance for MD5 checksum computation.
182 *
183 * @throws MCRConfigurationException
184 * if no java classes that support MD5 algorithm could be found
185 */
186 public static MessageDigest buildMD5Digest() {
187 try {
188 return MessageDigest.getInstance("MD5");
189 } catch (NoSuchAlgorithmException exc) {
190 String msg = "Could not find java classes that support MD5 checksum algorithm";
191 throw new MCRConfigurationException(msg, exc);
192 }
193 }
194 }