View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.datamodel.ifs;
20  
21  import java.io.FilterInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.security.DigestInputStream;
25  import java.security.MessageDigest;
26  import java.security.NoSuchAlgorithmException;
27  
28  import org.mycore.common.MCRException;
29  import org.mycore.common.config.MCRConfigurationException;
30  import org.mycore.common.content.streams.MCRBlockingInputStream;
31  
32  /**
33   * This input stream is used by the MyCoRe filesystem classes to read the
34   * content of a file and import it into the System. MCRContentInputStream
35   * provides the header of the file that is read (the first 64k) for content type
36   * detection purposes, counts the number of bytes read and builds an MD5
37   * checksum String while content goes through this input stream.
38   * 
39   * @author Frank Lützenkirchen
40   * @version $Revision$ $Date$
41   */
42  public class MCRContentInputStream extends FilterInputStream {
43      /** The number of bytes that will be read for content type detection */
44      protected static final int HEADER_SIZE = 65536;
45  
46      /** The MD5 checksum of all bytes read through this stream */
47      protected byte[] md5 = null;
48  
49      /** The message digest to build the MD5 checksum */
50      protected MessageDigest digest = null;
51  
52      /** The total number of bytes read so far */
53      protected long length;
54  
55      /** The header of the file read */
56      protected byte[] header;
57  
58      /**
59       * Constructs a new MCRContentInputStream
60       * 
61       * @param in
62       *            the InputStream to read from
63       * @throws MCRConfigurationException
64       *             if java classes supporting MD5 checksums are not found
65       */
66      public MCRContentInputStream(InputStream in) throws MCRException {
67          super(null);
68  
69          digest = buildMD5Digest();
70  
71          DigestInputStream dis = new DigestInputStream(in, digest);
72          MCRBlockingInputStream bis = new MCRBlockingInputStream(dis, HEADER_SIZE);
73  
74          byte[] buffer = new byte[HEADER_SIZE];
75  
76          try {
77              int num = bis.read(buffer, 0, buffer.length);
78              header = new byte[Math.max(0, num)];
79  
80              if (num > 0) {
81                  System.arraycopy(buffer, 0, header, 0, num);
82              }
83          } catch (IOException ex) {
84              String msg = "Error while reading content input stream header";
85              throw new MCRException(msg, ex);
86          }
87  
88          this.in = bis;
89      }
90  
91      public int consume() throws IOException {
92          byte[] buffer = new byte[4096];
93          int numRead, totalRead = 0;
94          do {
95              numRead = read(buffer);
96              if (numRead > 0) {
97                  totalRead += numRead;
98              }
99          } while (numRead != -1);
100         return totalRead;
101     }
102 
103     @Override
104     public int read() throws IOException {
105         int b;
106 
107         // if current position is in header buffer, return value from there
108         if (header.length > 0 && length < header.length) {
109             b = header[(int) length];
110             length++;
111         } else {
112             b = super.read();
113             if (b != -1) {
114                 length++;
115             }
116         }
117 
118         return b;
119     }
120 
121     @Override
122     public int read(byte[] buf, int off, int len) throws IOException {
123         // if current position is in header buffer, return bytes from there
124         if (header.length > 0 && length < header.length) {
125             int numAvail = header.length - (int) length;
126             len = Math.min(len, numAvail);
127             System.arraycopy(header, (int) length, buf, off, len);
128             length += len;
129             return len;
130         } else {
131             len = super.read(buf, off, len);
132             if (len != -1) {
133                 length += len;
134             }
135             return len;
136         }
137     }
138 
139     /**
140      * Returns the first 64 k of the underlying input stream. This is used for
141      * content type detection during file import into MyCoRe.
142      * 
143      * @return the first 64 k of the input stream
144      */
145     public byte[] getHeader() {
146         return header;
147     }
148 
149     /**
150      * Returns the number of bytes read so far
151      * 
152      * @return the number of bytes read
153      */
154     public long getLength() {
155         return length;
156     }
157 
158     /**
159      * Returns the MD5 message digest that has been built during reading of the
160      * underlying input stream.
161      * 
162      * @return the MD5 message digest checksum of all bytes that have been read
163      */
164     public byte[] getMD5() {
165         if (md5 == null) {
166             md5 = digest.digest();
167         }
168         return md5;
169     }
170 
171     /**
172      * Returns the MD5 checksum as a String
173      * 
174      * @return the MD5 checksum as a String of hex digits
175      */
176     public String getMD5String() {
177         return getMD5String(getMD5());
178     }
179 
180     /**
181      * Given an MD5 message digest, returns the MD5 checksum as a String
182      * 
183      * @return the MD5 checksum as a String of hex digits
184      */
185     public static String getMD5String(byte[] digest) {
186         StringBuilder md5SumBuilder = new StringBuilder();
187         for (byte b : digest) {
188             md5SumBuilder.append(Integer.toString((b & 0xff) + 0x100, 16).substring(1));
189         }
190         return md5SumBuilder.toString();
191     }
192 
193     /**
194      * Builds a MessageDigest instance for MD5 checksum computation.
195      * 
196      * @throws MCRConfigurationException
197      *             if no java classes that support MD5 algorithm could be found
198      */
199     private static MessageDigest buildMD5Digest() {
200         try {
201             return MessageDigest.getInstance("MD5");
202         } catch (NoSuchAlgorithmException exc) {
203             String msg = "Could not find java classes that support MD5 checksum algorithm";
204             throw new MCRConfigurationException(msg, exc);
205         }
206     }
207 }