001 /*
002 *
003 * $Revision: 15593 $ $Date: 2009-07-23 13:14:34 +0200 (Thu, 23 Jul 2009) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.services.fieldquery;
025
026 import java.util.ArrayList;
027 import java.util.HashMap;
028 import java.util.Iterator;
029 import java.util.LinkedList;
030 import java.util.List;
031 import java.util.Map;
032 import java.util.NoSuchElementException;
033
034 import org.apache.log4j.Logger;
035 import org.mycore.common.MCRUsageException;
036 import org.mycore.parsers.bool.MCRAndCondition;
037 import org.mycore.parsers.bool.MCRCondition;
038 import org.mycore.parsers.bool.MCRFalseCondition;
039 import org.mycore.parsers.bool.MCRNotCondition;
040 import org.mycore.parsers.bool.MCROrCondition;
041 import org.mycore.parsers.bool.MCRSetCondition;
042 import org.mycore.parsers.bool.MCRTrueCondition;
043
044 /**
045 * Executes queries on all configured searchers and returns query results.
046 *
047 * @author Frank Lützenkirchen
048 */
049 public class MCRQueryManager {
050
051 protected static final Logger LOGGER = Logger.getLogger(MCRQueryManager.class);
052
053 /**
054 * Executes a query and returns the query results. If the query contains
055 * fields from different indexes or should span across multiple hosts, the
056 * results of multiple searchers are combined.
057 *
058 * @param query
059 * the query
060 *
061 * @return the query results
062 */
063 public static MCRResults search(MCRQuery query) {
064 return search(query, false);
065 }
066
067 /**
068 * Executes a query and returns the query results. If the query contains
069 * fields from different indexes or should span across multiple hosts, the
070 * results of multiple searchers are combined.
071 *
072 * @param query
073 * the query
074 * @param comesFromRemoteHost
075 * if true, this query is originated from a remote host, so no
076 * sorting of results is done for performance reasons
077 *
078 * @return the query results
079 */
080 public static MCRResults search(MCRQuery query, boolean comesFromRemoteHost) {
081 long start = System.currentTimeMillis();
082
083 int maxResults = query.getMaxResults();
084
085 // Build results of local query
086 LOGGER.info("Query: " + query.getCondition().toString());
087 MCRResults results = buildResults(query.getCondition(), maxResults, query.getSortBy(), comesFromRemoteHost);
088 if (results.isReadonly() && !query.getHosts().isEmpty()) {
089 //need to produce a mergeable MCRResults
090 MCRResults mResults = new MCRResults();
091 for (MCRHit hit : results) {
092 mResults.addHit(hit);
093 }
094 results = mResults;
095 }
096
097 // Add results of remote query
098 MCRQueryClient.search(query, results);
099
100 // Add missing sort data and sort results, if not already sorted
101 sortResults(query, results);
102
103 // After sorting, cut result list to maxResults if needed
104 results.cutResults(maxResults);
105
106 long qtime = System.currentTimeMillis() - start;
107 LOGGER.debug("total query time: " + qtime);
108
109 return results;
110 }
111
112 /**
113 * Sorts the results if not already done and if the query contains sort
114 * criteria. Data needed for sorting is automatically added to the hits if
115 * not present.
116 *
117 * @param query
118 * the original query
119 * @param results
120 * the result list to be sorted
121 */
122 private static void sortResults(MCRQuery query, final MCRResults results) {
123 List<MCRSortBy> sortBy = query.getSortBy();
124 if ((results.getNumHits() == 0) || results.isSorted() || sortBy.isEmpty())
125 return;
126
127 // Iterator over all MCRHits that have no sort data set
128 Iterator<MCRHit> hitIterator = new Iterator<MCRHit>() {
129 private int i = 0;
130
131 private int max = results.getNumHits();
132
133 public void remove() {
134 throw new UnsupportedOperationException();
135 }
136
137 public boolean hasNext() {
138 for (; i < max; i++)
139 if (!results.getHit(i).hasSortData())
140 return true;
141
142 return false;
143 }
144
145 public MCRHit next() {
146 if (!hasNext())
147 throw new NoSuchElementException();
148
149 return results.getHit(i++);
150 }
151 };
152
153 String index = sortBy.get(0).getField().getIndex();
154 MCRSearcher searcher = MCRSearcherFactory.getSearcherForIndex(index);
155 searcher.addSortData(hitIterator, sortBy);
156 results.sortBy(query.getSortBy());
157 }
158
159 /**
160 * If a condition references fields from multiple indexes, this constant is
161 * returned
162 */
163 private final static String mixed = "--mixed--";
164
165 /**
166 * Returns the ID of the index of all fields referenced in this condition.
167 * If the fields come from multiple indexes, the constant mixed is returned.
168 */
169 private static String getIndex(MCRCondition cond) {
170 if (cond instanceof MCRQueryCondition)
171 return ((MCRQueryCondition) cond).getField().getIndex();
172 else if (cond instanceof MCRNotCondition)
173 return getIndex(((MCRNotCondition) cond).getChild());
174
175 List<MCRCondition> children = ((MCRSetCondition) cond).getChildren();
176
177 String index = getIndex(children.get(0));
178 for (int i = 1; i < children.size(); i++) {
179 String other = getIndex(children.get(i));
180 if (!index.equals(other))
181 return mixed; // mixed indexes here!
182 }
183 return index;
184 }
185
186 /** Executes query, if necessary splits into subqueries for each index */
187 private static MCRResults buildResults(MCRCondition cond, int maxResults, List<MCRSortBy> sortBy, boolean addSortData) {
188 if (cond instanceof MCRTrueCondition || cond instanceof MCRFalseCondition) {
189 String msg = "Your query makes no sense. What do you mean when you search for '" + cond.toString() + "'?";
190 throw new MCRUsageException(msg);
191 }
192 String index = getIndex(cond);
193 if (index != mixed) {
194 // All fields are from same index, just one searcher
195 MCRSearcher searcher = MCRSearcherFactory.getSearcherForIndex(index);
196 // Filter sort criteria only for those fields of the same index
197 List<MCRSortBy> sortByCopy = new ArrayList<MCRSortBy>();
198 for (MCRSortBy sb : sortBy)
199 if (sb.getField().getIndex().equals(index))
200 sortByCopy.add(sb);
201 return searcher.search(cond, maxResults, sortByCopy, addSortData);
202 } else if (cond instanceof MCRSetCondition) {
203 return buildCombinedResults((MCRSetCondition) cond, sortBy, false);
204 } else { // Move not down: not(a and/or b)=(not a) and/or (not b)
205 MCRCondition child = ((MCRNotCondition) cond).getChild();
206 return buildCombinedResults((MCRSetCondition) child, sortBy, true);
207 }
208 }
209
210 /** Split query into subqueries for each index, recombine results */
211 private static MCRResults buildCombinedResults(MCRSetCondition cond, List<MCRSortBy> sortBy, boolean not) {
212 boolean and = (cond instanceof MCRAndCondition);
213 HashMap<String, List<MCRCondition>> table = groupConditionsByIndex(cond);
214 List<MCRResults> results = new LinkedList<MCRResults>();
215
216 for (Map.Entry<String, List<MCRCondition>> mapEntry : table.entrySet()) {
217 List<MCRCondition> conditions = mapEntry.getValue();
218 String index = mapEntry.getKey();
219 if (!index.equals(mixed)) {
220 MCRCondition subCond = buildSubCondition(conditions, and, not);
221 results.add(buildResults(subCond, 0, sortBy, true));
222 } else
223 for (MCRCondition subCond : conditions) {
224 if (not)
225 subCond = new MCRNotCondition(subCond);
226 results.add(buildResults(subCond, 0, sortBy, true));
227 }
228 }
229
230 if (and)
231 return MCRResults.intersect(results.toArray(new MCRResults[0]));
232 else
233 return MCRResults.union(results.toArray(new MCRResults[0]));
234 }
235
236 /**
237 * Build a table from index ID to a List of conditions referencing this
238 * index
239 */
240 private static HashMap<String, List<MCRCondition>> groupConditionsByIndex(MCRSetCondition cond) {
241 HashMap<String, List<MCRCondition>> table = new HashMap<String, List<MCRCondition>>();
242 List<MCRCondition> children = cond.getChildren();
243
244 for (MCRCondition child : children) {
245 String index = getIndex(child);
246 List<MCRCondition> conditions = table.get(index);
247 if (conditions == null) {
248 conditions = new ArrayList<MCRCondition>();
249 table.put(index, conditions);
250 }
251 conditions.add(child);
252 }
253 return table;
254 }
255
256 /** Builds a new condition for all fields from one single index */
257 private static MCRCondition buildSubCondition(List<MCRCondition> conditions, boolean and, boolean not) {
258 MCRCondition subCond;
259 if (conditions.size() == 1)
260 subCond = conditions.get(0);
261 else if (and)
262 subCond = new MCRAndCondition().addAll(conditions);
263 else
264 subCond = new MCROrCondition().addAll(conditions);
265 if (not)
266 subCond = new MCRNotCondition(subCond);
267 return subCond;
268 }
269 }