001    /*
002     * $Revision: 14844 $ 
003     * $Date: 2009-03-10 12:17:43 +0100 (Tue, 10 Mar 2009) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.datamodel.ifs2;
025    
026    import java.io.FilterInputStream;
027    import java.io.IOException;
028    import java.io.InputStream;
029    import java.security.DigestInputStream;
030    import java.security.MessageDigest;
031    import java.security.NoSuchAlgorithmException;
032    
033    import org.mycore.common.MCRConfigurationException;
034    import org.mycore.common.MCRException;
035    
036    /**
037     * This input stream is used by the MyCoRe filesystem classes to read the
038     * content of a file and import it into the System. MCRContentInputStream
039     * provides the header of the file that is read (the first 64k) for content type
040     * detection purposes, counts the number of bytes read and builds an MD5
041     * checksum String while content goes through this input stream.
042     * 
043     * @author Frank Lützenkirchen
044     */
045    public class MCRContentInputStream extends FilterInputStream {
046        /** The number of bytes that will be read for content type detection */
047        protected final static int headerSize = 65536;
048    
049        /** The MD5 checksum of all bytes read through this stream */
050        protected MessageDigest digest = null;
051    
052        /** The total number of bytes read so far */
053        protected long length;
054    
055        /** The header of the file read */
056        protected byte[] header;
057    
058        /**
059         * Constructs a new MCRContentInputStream
060         * 
061         * @param in
062         *            the InputStream to read from
063         * @throws MCRConfigurationException
064         *             if java classes supporting MD5 checksums are not found
065         */
066        public MCRContentInputStream(InputStream in) throws MCRException {
067            super(null);
068    
069            digest = buildMD5Digest();
070    
071            DigestInputStream dis = new DigestInputStream(in, digest);
072            MCRBlockingInputStream bis = new MCRBlockingInputStream(dis, headerSize);
073    
074            byte[] buffer = new byte[headerSize];
075    
076            try {
077                int num = bis.read(buffer, 0, buffer.length);
078                header = new byte[Math.max(0, num)];
079    
080                if (num > 0) {
081                    System.arraycopy(buffer, 0, header, 0, num);
082                }
083            } catch (IOException ex) {
084                String msg = "Error while reading content input stream header";
085                throw new MCRException(msg, ex);
086            }
087    
088            this.in = bis;
089        }
090    
091        public int read() throws IOException {
092            int b;
093    
094            // if current position is in header buffer, return value from there
095            if ((header.length > 0) && (length < header.length)) {
096                b = header[(int) length];
097                length++;
098            } else {
099                b = super.read();
100                if (b != -1) {
101                    length++;
102                }
103            }
104    
105            return b;
106        }
107    
108        public int read(byte[] buf, int off, int len) throws IOException {
109            // if current position is in header buffer, return bytes from there
110            if ((header.length > 0) && (length < header.length)) {
111                int numAvail = header.length - (int) length;
112                len = Math.min(len, numAvail);
113                System.arraycopy(header, (int) length, buf, off, len);
114                length += len;
115                return len;
116            } else {
117                len = super.read(buf, off, len);
118                if (len != -1) {
119                    length += len;
120                }
121                return len;
122            }
123        }
124    
125        /**
126         * Returns the first 64 k of the underlying input stream. This is used for
127         * content type detection during file import into MyCoRe.
128         * 
129         * @return the first 64 k of the input stream
130         */
131        public byte[] getHeader() {
132            return header;
133        }
134    
135        /**
136         * Returns the number of bytes read so far
137         * 
138         * @return the number of bytes read
139         */
140        public long getLength() {
141            return length;
142        }
143    
144        /**
145         * Returns the MD5 message digest that has been built during reading of the
146         * underlying input stream.
147         * 
148         * @return the MD5 message digest checksum of all bytes that have been read
149         */
150        public byte[] getMD5() {
151            return digest.digest();
152        }
153    
154        /**
155         * Returns the MD5 checksum as a String
156         * 
157         * @return the MD5 checksum as a String of hex digits
158         */
159        public String getMD5String() {
160            return getMD5String(digest);
161        }
162    
163        /**
164         * Given an MD5 message digest, returns the MD5 checksum as a String
165         * 
166         * @return the MD5 checksum as a String of hex digits
167         */
168        public static String getMD5String(MessageDigest digest) {
169            byte[] bytes = digest.digest();
170            StringBuffer sb = new StringBuffer();
171    
172            for (int i = 0; i < bytes.length; i++) {
173                String sValue = "0" + Integer.toHexString(bytes[i]);
174                sb.append(sValue.substring(sValue.length() - 2));
175            }
176    
177            return sb.toString();
178        }
179    
180        /**
181         * Builds a MessageDigest instance for MD5 checksum computation.
182         * 
183         * @throws MCRConfigurationException
184         *             if no java classes that support MD5 algorithm could be found
185         */
186        public static MessageDigest buildMD5Digest() {
187            try {
188                return MessageDigest.getInstance("MD5");
189            } catch (NoSuchAlgorithmException exc) {
190                String msg = "Could not find java classes that support MD5 checksum algorithm";
191                throw new MCRConfigurationException(msg, exc);
192            }
193        }
194    }