001    /*
002     * 
003     * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.datamodel.ifs;
025    
026    import java.io.FilterInputStream;
027    import java.io.IOException;
028    import java.io.InputStream;
029    import java.security.DigestInputStream;
030    import java.security.MessageDigest;
031    import java.security.NoSuchAlgorithmException;
032    
033    import org.mycore.common.MCRArgumentChecker;
034    import org.mycore.common.MCRConfigurationException;
035    import org.mycore.common.MCRException;
036    
037    /**
038     * This input stream is used by the MyCoRe filesystem classes to read the
039     * content of a file and import it into the System. MCRContentInputStream
040     * provides the header of the file that is read (the first 64k) for content type
041     * detection purposes, counts the number of bytes read and builds an MD5
042     * checksum String while content goes through this input stream.
043     * 
044     * @author Frank Lützenkirchen
045     * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
046     */
047    public class MCRContentInputStream extends FilterInputStream {
048        /** The number of bytes that will be read for content type detection */
049        protected final static int headerSize = 65536;
050    
051        /** The MD5 checksum of all bytes read through this stream */
052        protected MessageDigest digest = null;
053    
054        /** The total number of bytes read so far */
055        protected long length;
056        
057        /** The header of the file read */
058        protected byte[] header;
059    
060        /**
061         * Constructs a new MCRContentInputStream
062         * 
063         * @param in
064         *            the InputStream to read from
065         * @throws MCRConfigurationException
066         *             if java classes supporting MD5 checksums are not found
067         */
068        public MCRContentInputStream(InputStream in) throws MCRException {
069            super(null);
070    
071            MCRArgumentChecker.ensureNotNull(in, "InputStream");
072    
073            digest = buildMD5Digest();
074    
075            DigestInputStream dis = new DigestInputStream(in, digest);
076            MCRBlockingInputStream bis = new MCRBlockingInputStream(dis, headerSize);
077    
078            byte[] buffer = new byte[headerSize];
079    
080            try {
081                int num = bis.read(buffer, 0, buffer.length);
082                header = new byte[Math.max(0, num)];
083    
084                if (num > 0) {
085                    System.arraycopy(buffer, 0, header, 0, num);
086                }
087            } catch (IOException ex) {
088                String msg = "Error while reading content input stream header";
089                throw new MCRException(msg, ex);
090            }
091    
092            this.in = bis;
093        }
094    
095        public int read() throws IOException {
096            int b;
097            
098            // if current position is in header buffer, return value from there
099            if( ( header.length > 0 ) && ( length < header.length ) )
100            {
101              b = header[(int)length];
102              length++;
103            }
104            else
105            {
106              b = super.read();
107              if (b != -1) {
108                  length++;
109              }
110            }
111    
112            return b;
113        }
114    
115        public int read(byte[] buf, int off, int len) throws IOException {
116            // if current position is in header buffer, return bytes from there
117            if( ( header.length > 0 ) && ( length < header.length ) )
118            {
119              int numAvail = header.length - (int)length;
120              len = Math.min( len, numAvail );
121              System.arraycopy(header,(int)length,buf,off,len);
122              length += len;
123              return len;
124            }
125            else
126            {
127              len = super.read(buf, off, len);
128              if (len != -1) {
129                  length += len;
130              }
131              return len;
132            }
133        }
134    
135        /**
136         * Returns the first 64 k of the underlying input stream. This is used for
137         * content type detection during file import into MyCoRe.
138         * 
139         * @return the first 64 k of the input stream
140         */
141        public byte[] getHeader() {
142            return header;
143        }
144    
145        /**
146         * Returns the number of bytes read so far
147         * 
148         * @return the number of bytes read
149         */
150        public long getLength() {
151            return length;
152        }
153    
154        /**
155         * Returns the MD5 message digest that has been built during reading of the
156         * underlying input stream.
157         * 
158         * @return the MD5 message digest checksum of all bytes that have been read
159         */
160        public byte[] getMD5() {
161            return digest.digest();
162        }
163    
164        /**
165         * Returns the MD5 checksum as a String
166         * 
167         * @return the MD5 checksum as a String of hex digits
168         */
169        public String getMD5String() {
170            return getMD5String(digest);
171        }
172    
173        /**
174         * Given an MD5 message digest, returns the MD5 checksum as a String
175         * 
176         * @return the MD5 checksum as a String of hex digits
177         */
178        public static String getMD5String(MessageDigest digest) {
179            byte[] bytes = digest.digest();
180            StringBuffer sb = new StringBuffer();
181    
182            for (int i = 0; i < bytes.length; i++) {
183                String sValue = "0" + Integer.toHexString(bytes[i]);
184                sb.append(sValue.substring(sValue.length() - 2));
185            }
186    
187            return sb.toString();
188        }
189    
190        /**
191         * Builds a MessageDigest instance for MD5 checksum computation.
192         * 
193         * @throws MCRConfigurationException
194         *             if no java classes that support MD5 algorithm could be found
195         */
196        public static MessageDigest buildMD5Digest() {
197            try {
198                return MessageDigest.getInstance("MD5");
199            } catch (NoSuchAlgorithmException exc) {
200                String msg = "Could not find java classes that support MD5 checksum algorithm";
201                throw new MCRConfigurationException(msg, exc);
202            }
203        }
204    }