001 /*
002 *
003 * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.backend.lucene;
025
026 import java.io.File;
027 import java.io.StringReader;
028
029 import org.apache.log4j.Logger;
030 import org.apache.lucene.analysis.Analyzer;
031 import org.apache.lucene.analysis.Token;
032 import org.apache.lucene.analysis.TokenStream;
033 import org.apache.lucene.analysis.de.GermanAnalyzer;
034 import org.apache.lucene.index.IndexReader;
035 import org.apache.lucene.index.IndexWriter;
036 import org.apache.lucene.index.Term;
037 import org.apache.lucene.search.Hits;
038 import org.apache.lucene.search.IndexSearcher;
039 import org.apache.lucene.search.TermQuery;
040 import org.mycore.common.MCRConfiguration;
041 import java.util.Map;
042 import java.util.HashMap;
043
044 /**
045 * Use Lucene Analyzer to normalize strings
046 *
047 * @author Harald Richter
048 *
049 * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
050 *
051 */
052 public class MCRLuceneTools {
053 MCRConfiguration config = MCRConfiguration.instance();
054 private static Map<String,Analyzer> analyzerMap = new HashMap<String,Analyzer>();
055 /** The logger */
056 private final static Logger LOGGER = Logger.getLogger(MCRLuceneTools.class);
057
058 /**
059 * Use Lucene Analyzer to normalize strings
060 *
061 * @param value
062 * string to convert
063 * @param ID
064 * The classes that do the normalization come from the lucene package
065 * and are configured by the property
066 * <tt>MCR.Lucene.Analyzer.<ID>.Class</tt> in mycore.properties.
067 *
068 * @return the normalized string
069 */
070 public static String luceneNormalize(String value, String ID) throws Exception {
071 Analyzer analyzer = analyzerMap.get( ID );
072 if (null == analyzer)
073 {
074 analyzer = (Analyzer)MCRConfiguration.instance().getInstanceOf("MCR.Lucene.Analyzer." + ID + ".Class");
075 analyzerMap.put(ID, analyzer);
076 }
077
078 StringBuffer sb = new StringBuffer();
079
080 TokenStream ts = analyzer.tokenStream(null, new StringReader(value));
081 Token to;
082
083 while ((to = ts.next()) != null)
084 {
085 if ( sb.length() > 0)
086 sb.append(" ");
087 sb.append(to.termText());
088 }
089
090 return sb.toString();
091 }
092
093 /**
094 * Get Lucene Writer
095 *
096 * @param indexDir
097 * directory where lucene index is store first check existance of
098 * index directory, if it does nor exist create it
099 *
100 * @return the lucene writer, calling programm must close writer
101 */
102 public static IndexWriter getLuceneWriter(String indexDir, boolean first) throws Exception {
103 IndexWriter writer;
104 Analyzer analyzer = new GermanAnalyzer();
105
106 // does directory for text index exist, if not build it
107 if (first) {
108 File file = new File(indexDir);
109
110 if (!file.exists()) {
111 LOGGER.info("The Directory doesn't exist: " + indexDir + " try to build it");
112
113 IndexWriter writer2 = new IndexWriter(indexDir, analyzer, true);
114 writer2.close();
115 } else if (file.isDirectory()) {
116 if (0 == file.list().length) {
117 LOGGER.info("No Entries in Directory, initialize: " + indexDir);
118
119 IndexWriter writer2 = new IndexWriter(indexDir, analyzer, true);
120 writer2.close();
121 }
122 }
123 } // if ( first
124
125 writer = new IndexWriter(indexDir, analyzer, false);
126 // writer.mergeFactor = 200;
127 writer.setMergeFactor(200);
128 // writer.maxMergeDocs = 2000;
129 writer.setMaxMergeDocs(2000);
130
131 return writer;
132 }
133
134 /**
135 * Delete all documents in Lucene with id
136 *
137 * @param fieldname
138 * string name of lucene field with stored id
139 * @param id
140 * string document id
141 * @param indexDir *
142 * the directory where index is stored
143 *
144 */
145 public static synchronized void deleteLuceneDocument(String fieldname, String id, String indexDir) throws Exception {
146 IndexSearcher searcher = new IndexSearcher(indexDir);
147
148 if (null == searcher) {
149 return;
150 }
151
152 Term te1 = new Term(fieldname, id);
153
154 TermQuery qu = new TermQuery(te1);
155
156 LOGGER.info("Searching for: " + qu.toString(""));
157
158 Hits hits = searcher.search(qu);
159
160 LOGGER.info("Number of documents found : " + hits.length());
161
162 if (hits.length() > 0) {
163 IndexReader reader = IndexReader.open(indexDir);
164 for (int i = 0; i < hits.length(); i++) {
165 // reader.delete(hits.id(i));
166 reader.deleteDocument(hits.id(i));
167 }
168 LOGGER.info("DELETE: " + id);
169 reader.close();
170 }
171
172 searcher.close();
173 }
174
175 }