001    /*
002     * 
003     * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.backend.lucene;
025    
026    import java.io.File;
027    import java.io.StringReader;
028    
029    import org.apache.log4j.Logger;
030    import org.apache.lucene.analysis.Analyzer;
031    import org.apache.lucene.analysis.Token;
032    import org.apache.lucene.analysis.TokenStream;
033    import org.apache.lucene.analysis.de.GermanAnalyzer;
034    import org.apache.lucene.index.IndexReader;
035    import org.apache.lucene.index.IndexWriter;
036    import org.apache.lucene.index.Term;
037    import org.apache.lucene.search.Hits;
038    import org.apache.lucene.search.IndexSearcher;
039    import org.apache.lucene.search.TermQuery;
040    import org.mycore.common.MCRConfiguration;
041    import java.util.Map;
042    import java.util.HashMap;
043    
044    /**
045     * Use Lucene Analyzer to normalize strings
046     * 
047     * @author Harald Richter
048     * 
049     * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
050     * 
051     */
052    public class MCRLuceneTools {
053        MCRConfiguration config = MCRConfiguration.instance();
054        private static Map<String,Analyzer> analyzerMap = new HashMap<String,Analyzer>();
055        /** The logger */
056        private final static Logger LOGGER = Logger.getLogger(MCRLuceneTools.class);
057    
058        /**
059         * Use Lucene Analyzer to normalize strings
060         * 
061         * @param value
062         *            string to convert
063         * @param ID
064         *            The classes that do the normalization come from the lucene package
065         *            and are configured by the property
066         *            <tt>MCR.Lucene.Analyzer.<ID>.Class</tt> in mycore.properties.
067         * 
068         * @return the normalized string
069         */
070        public static String luceneNormalize(String value, String ID) throws Exception {
071          Analyzer analyzer = analyzerMap.get( ID );
072          if (null == analyzer)
073          {
074            analyzer = (Analyzer)MCRConfiguration.instance().getInstanceOf("MCR.Lucene.Analyzer." + ID + ".Class");
075            analyzerMap.put(ID, analyzer);
076          }
077          
078          StringBuffer sb = new StringBuffer();
079    
080          TokenStream ts = analyzer.tokenStream(null, new StringReader(value));
081          Token to;
082    
083          while ((to = ts.next()) != null) 
084          {
085            if ( sb.length() > 0)
086              sb.append(" ");
087            sb.append(to.termText());
088          }
089    
090          return sb.toString();
091        }
092    
093        /**
094         * Get Lucene Writer
095         * 
096         * @param indexDir
097         *            directory where lucene index is store first check existance of
098         *            index directory, if it does nor exist create it
099         * 
100         * @return the lucene writer, calling programm must close writer
101         */
102        public static IndexWriter getLuceneWriter(String indexDir, boolean first) throws Exception {
103            IndexWriter writer;
104            Analyzer analyzer = new GermanAnalyzer();
105    
106            // does directory for text index exist, if not build it
107            if (first) {
108                File file = new File(indexDir);
109    
110                if (!file.exists()) {
111                    LOGGER.info("The Directory doesn't exist: " + indexDir + " try to build it");
112    
113                    IndexWriter writer2 = new IndexWriter(indexDir, analyzer, true);
114                    writer2.close();
115                } else if (file.isDirectory()) {
116                    if (0 == file.list().length) {
117                        LOGGER.info("No Entries in Directory, initialize: " + indexDir);
118    
119                        IndexWriter writer2 = new IndexWriter(indexDir, analyzer, true);
120                        writer2.close();
121                    }
122                }
123            } // if ( first
124    
125            writer = new IndexWriter(indexDir, analyzer, false);
126            // writer.mergeFactor = 200;
127            writer.setMergeFactor(200);
128            // writer.maxMergeDocs = 2000;
129            writer.setMaxMergeDocs(2000);
130    
131            return writer;
132        }
133        
134        /**
135         * Delete all documents in Lucene with id
136         * 
137         * @param fieldname
138         *            string name of lucene field with stored id
139         * @param id
140         *            string document id
141         * @param indexDir *
142         *            the directory where index is stored
143         * 
144         */
145        public static synchronized void deleteLuceneDocument(String fieldname, String id, String indexDir) throws Exception {
146            IndexSearcher searcher = new IndexSearcher(indexDir);
147    
148            if (null == searcher) {
149                return;
150            }
151    
152            Term te1 = new Term(fieldname, id);
153    
154            TermQuery qu = new TermQuery(te1);
155    
156            LOGGER.info("Searching for: " + qu.toString(""));
157    
158            Hits hits = searcher.search(qu);
159    
160            LOGGER.info("Number of documents found : " + hits.length());
161    
162            if (hits.length() > 0) {
163                IndexReader reader = IndexReader.open(indexDir);
164                for (int i = 0; i < hits.length(); i++) {
165                    // reader.delete(hits.id(i));
166                    reader.deleteDocument(hits.id(i));
167                }
168                LOGGER.info("DELETE: " + id);
169                reader.close();
170            }
171    
172            searcher.close();
173        }
174      
175    }