001 /*
002 *
003 * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.datamodel.ifs;
025
026 import java.io.FilterInputStream;
027 import java.io.IOException;
028 import java.io.InputStream;
029 import java.security.DigestInputStream;
030 import java.security.MessageDigest;
031 import java.security.NoSuchAlgorithmException;
032
033 import org.mycore.common.MCRArgumentChecker;
034 import org.mycore.common.MCRConfigurationException;
035 import org.mycore.common.MCRException;
036
037 /**
038 * This input stream is used by the MyCoRe filesystem classes to read the
039 * content of a file and import it into the System. MCRContentInputStream
040 * provides the header of the file that is read (the first 64k) for content type
041 * detection purposes, counts the number of bytes read and builds an MD5
042 * checksum String while content goes through this input stream.
043 *
044 * @author Frank Lützenkirchen
045 * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
046 */
047 public class MCRContentInputStream extends FilterInputStream {
048 /** The number of bytes that will be read for content type detection */
049 protected final static int headerSize = 65536;
050
051 /** The MD5 checksum of all bytes read through this stream */
052 protected MessageDigest digest = null;
053
054 /** The total number of bytes read so far */
055 protected long length;
056
057 /** The header of the file read */
058 protected byte[] header;
059
060 /**
061 * Constructs a new MCRContentInputStream
062 *
063 * @param in
064 * the InputStream to read from
065 * @throws MCRConfigurationException
066 * if java classes supporting MD5 checksums are not found
067 */
068 public MCRContentInputStream(InputStream in) throws MCRException {
069 super(null);
070
071 MCRArgumentChecker.ensureNotNull(in, "InputStream");
072
073 digest = buildMD5Digest();
074
075 DigestInputStream dis = new DigestInputStream(in, digest);
076 MCRBlockingInputStream bis = new MCRBlockingInputStream(dis, headerSize);
077
078 byte[] buffer = new byte[headerSize];
079
080 try {
081 int num = bis.read(buffer, 0, buffer.length);
082 header = new byte[Math.max(0, num)];
083
084 if (num > 0) {
085 System.arraycopy(buffer, 0, header, 0, num);
086 }
087 } catch (IOException ex) {
088 String msg = "Error while reading content input stream header";
089 throw new MCRException(msg, ex);
090 }
091
092 this.in = bis;
093 }
094
095 public int read() throws IOException {
096 int b;
097
098 // if current position is in header buffer, return value from there
099 if( ( header.length > 0 ) && ( length < header.length ) )
100 {
101 b = header[(int)length];
102 length++;
103 }
104 else
105 {
106 b = super.read();
107 if (b != -1) {
108 length++;
109 }
110 }
111
112 return b;
113 }
114
115 public int read(byte[] buf, int off, int len) throws IOException {
116 // if current position is in header buffer, return bytes from there
117 if( ( header.length > 0 ) && ( length < header.length ) )
118 {
119 int numAvail = header.length - (int)length;
120 len = Math.min( len, numAvail );
121 System.arraycopy(header,(int)length,buf,off,len);
122 length += len;
123 return len;
124 }
125 else
126 {
127 len = super.read(buf, off, len);
128 if (len != -1) {
129 length += len;
130 }
131 return len;
132 }
133 }
134
135 /**
136 * Returns the first 64 k of the underlying input stream. This is used for
137 * content type detection during file import into MyCoRe.
138 *
139 * @return the first 64 k of the input stream
140 */
141 public byte[] getHeader() {
142 return header;
143 }
144
145 /**
146 * Returns the number of bytes read so far
147 *
148 * @return the number of bytes read
149 */
150 public long getLength() {
151 return length;
152 }
153
154 /**
155 * Returns the MD5 message digest that has been built during reading of the
156 * underlying input stream.
157 *
158 * @return the MD5 message digest checksum of all bytes that have been read
159 */
160 public byte[] getMD5() {
161 return digest.digest();
162 }
163
164 /**
165 * Returns the MD5 checksum as a String
166 *
167 * @return the MD5 checksum as a String of hex digits
168 */
169 public String getMD5String() {
170 return getMD5String(digest);
171 }
172
173 /**
174 * Given an MD5 message digest, returns the MD5 checksum as a String
175 *
176 * @return the MD5 checksum as a String of hex digits
177 */
178 public static String getMD5String(MessageDigest digest) {
179 byte[] bytes = digest.digest();
180 StringBuffer sb = new StringBuffer();
181
182 for (int i = 0; i < bytes.length; i++) {
183 String sValue = "0" + Integer.toHexString(bytes[i]);
184 sb.append(sValue.substring(sValue.length() - 2));
185 }
186
187 return sb.toString();
188 }
189
190 /**
191 * Builds a MessageDigest instance for MD5 checksum computation.
192 *
193 * @throws MCRConfigurationException
194 * if no java classes that support MD5 algorithm could be found
195 */
196 public static MessageDigest buildMD5Digest() {
197 try {
198 return MessageDigest.getInstance("MD5");
199 } catch (NoSuchAlgorithmException exc) {
200 String msg = "Could not find java classes that support MD5 checksum algorithm";
201 throw new MCRConfigurationException(msg, exc);
202 }
203 }
204 }