001 /*
002 *
003 * $Revision: 14943 $ $Date: 2009-03-18 12:07:51 +0100 (Wed, 18 Mar 2009) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.backend.jdom;
025
026 import java.io.ByteArrayOutputStream;
027 import java.util.Enumeration;
028 import java.util.HashMap;
029 import java.util.Iterator;
030 import java.util.List;
031 import java.util.Properties;
032 import java.util.StringTokenizer;
033 import java.util.regex.Pattern;
034
035 import javax.xml.transform.Source;
036 import javax.xml.transform.Transformer;
037 import javax.xml.transform.TransformerConfigurationException;
038 import javax.xml.transform.TransformerFactory;
039 import javax.xml.transform.stream.StreamResult;
040
041 import org.apache.log4j.Logger;
042 import org.jdom.Document;
043 import org.jdom.Element;
044 import org.jdom.Namespace;
045 import org.jdom.output.Format;
046 import org.jdom.output.XMLOutputter;
047 import org.jdom.transform.JDOMSource;
048 import org.mycore.common.MCRConfiguration;
049 import org.mycore.common.MCRConstants;
050 import org.mycore.common.MCRException;
051 import org.mycore.datamodel.metadata.MCRObject;
052 import org.mycore.datamodel.metadata.MCRObjectID;
053 import org.mycore.datamodel.common.MCRXMLTableManager;
054 import org.mycore.frontend.editor.MCRInputValidator;
055 import org.mycore.parsers.bool.MCRAndCondition;
056 import org.mycore.parsers.bool.MCRCondition;
057 import org.mycore.parsers.bool.MCRNotCondition;
058 import org.mycore.parsers.bool.MCROrCondition;
059 import org.mycore.services.fieldquery.MCRData2Fields;
060 import org.mycore.services.fieldquery.MCRFieldDef;
061 import org.mycore.services.fieldquery.MCRFieldValue;
062 import org.mycore.services.fieldquery.MCRHit;
063 import org.mycore.services.fieldquery.MCRResults;
064 import org.mycore.services.fieldquery.MCRSearcher;
065 import org.mycore.services.fieldquery.MCRQueryCondition;
066 import org.mycore.services.fieldquery.MCRSortBy;
067
068 /**
069 * Implements a searcher and indexer for MCRObject metadata using only data in
070 * memory without any persistent structures. When data is indexed, the values
071 * are stored as XML document in memory. When data is searched, the query is
072 * transformed to a XSL condition and run against the XML in memory. Before
073 * first use of instances of this class, all MCRObject metadata is loaded from
074 * persistent store and indexed in memory. This class may also be useful for
075 * learning how to implement MCRSearchers and indexers.
076 *
077 * @author Frank Lützenkirchen
078 */
079 public class MCRJDOMSearcher extends MCRSearcher {
080 /** The logger */
081 private final static Logger LOGGER = Logger.getLogger(MCRJDOMSearcher.class);
082
083 /**
084 * Map where key is entryID and value is XML document containing indexed
085 * data
086 */
087 private HashMap<String,Document> map = new HashMap<String,Document>();
088
089 /** XSL transformer factory */
090 private TransformerFactory factory = TransformerFactory.newInstance();
091
092 public void init(String ID) {
093 super.init(ID);
094
095 MCRXMLTableManager mcr_xml = MCRXMLTableManager.instance();
096
097 // Find all types of MCRObject data:
098 String cfgPrefix = "MCR.Metadata.Config.";
099 Properties props = MCRConfiguration.instance().getProperties(cfgPrefix);
100 for (Enumeration keys = props.keys(); keys.hasMoreElements();) {
101 String key = (String) (keys.nextElement());
102 String type = key.substring(cfgPrefix.length());
103 if ("derivate".equals(type))
104 continue;
105
106 LOGGER.debug("Now indexing metadata of all stored MCRObjects from type " + type);
107
108 try {
109 List IDs = mcr_xml.retrieveAllIDs(type);
110 int numObjects = IDs.size();
111 for (int i = 0; i < numObjects; i++) {
112 String sid = (String) (IDs.get(i));
113 MCRObject obj = new MCRObject();
114 MCRObjectID oid = new MCRObjectID(sid);
115 obj.setId(oid);
116 obj.setFromXML(mcr_xml.retrieveAsXML(oid), false);
117 List fields = MCRData2Fields.buildFields(obj, index);
118 addToIndex(sid, sid, fields);
119 }
120 } catch (Exception ex) {
121 LOGGER.error(ex);
122 }
123 }
124 }
125
126 public void addToIndex(String entryID, String returnID, List fields) {
127 if ((fields == null) || (fields.size() == 0)) {
128 return;
129 }
130
131 LOGGER.info("MCRJDOMSearcher indexing data of " + entryID);
132 Element data = new Element("data");
133 data.setAttribute("returnID", returnID);
134
135 for (int i = 0; i < fields.size(); i++) {
136 MCRFieldValue fv = (MCRFieldValue) (fields.get(i));
137 Element field = new Element(fv.getField().getName());
138 field.addContent(fv.getValue());
139 data.addContent(field);
140 }
141
142 if (LOGGER.isDebugEnabled()) {
143 String s = new XMLOutputter(Format.getPrettyFormat()).outputString(data);
144 LOGGER.debug("----------" + entryID + "----------");
145 LOGGER.debug(s);
146 LOGGER.debug("-----------------------------------");
147 }
148
149 map.put(entryID, new Document(data));
150 }
151
152 public void removeFromIndex(String entryID) {
153 LOGGER.info("MCRJDOMSearcher removing indexed data of " + entryID);
154 map.remove(entryID);
155 }
156
157 public MCRResults search(MCRCondition condition, int maxResults, List sortBy, boolean addSortData) {
158 String xslCondition = buildXSLCondition(condition);
159 LOGGER.debug("MCRJDOMSearcher searching for " + xslCondition);
160
161 Transformer transformer = buildStylesheet(xslCondition);
162 ByteArrayOutputStream out = new ByteArrayOutputStream();
163
164 MCRResults results = new MCRResults();
165
166 for (Iterator keys = map.keySet().iterator(); keys.hasNext();) {
167 String entryID = (String) (keys.next());
168 Document xml = map.get(entryID);
169
170 if (matches(xml, transformer, out)) {
171 String returnID = xml.getRootElement().getAttributeValue("returnID");
172 MCRHit hit = new MCRHit(returnID);
173
174 // Add values of all fields that may be sort criteria
175 for (int i = 0; i < sortBy.size(); i++) {
176 MCRSortBy by = (MCRSortBy) (sortBy.get(i));
177
178 List values = xml.getRootElement().getChildren(by.getField().getName());
179 for (Iterator itv = values.iterator(); itv.hasNext();) {
180 Element value = (Element) (itv.next());
181 MCRFieldDef def = MCRFieldDef.getDef(value.getName());
182 hit.addSortData(new MCRFieldValue(def, value.getText()));
183 }
184 }
185
186 results.addHit(hit);
187 }
188
189 if (sortBy.isEmpty() && (maxResults > 0) && (results.getNumHits() >= maxResults))
190 break;
191 }
192
193 LOGGER.debug("MCRJDOMSearcher results completed");
194 return results;
195 }
196
197 /**
198 * Returns true if the xml input document matches the xsl when condition in
199 * the xsl stylesheet.
200 */
201 private boolean matches(Document xml, Transformer transformer, ByteArrayOutputStream out) {
202 Source xmlsrc = new JDOMSource(xml);
203
204 try {
205 out.reset();
206 transformer.transform(xmlsrc, new StreamResult(out));
207 out.flush();
208
209 return "t".equals(out.toString("UTF-8"));
210 } catch (Exception ex) {
211 LOGGER.warn("Exception while testing indexed data with XSL condition", ex);
212
213 return false;
214 }
215 }
216
217 /**
218 * XSL stylesheet template where only the when test attribute has to be
219 * added
220 */
221 private Document xslTemplate = null;
222
223 /** Prepares an XSL stylesheet in memory used as template */
224 private Document prepareStylesheet() {
225 Namespace extns = Namespace.getNamespace("ext", "xalan://org.mycore.backend.jdom.MCRJDOMSearcher");
226
227 Element stylesheet = new Element("stylesheet");
228 stylesheet.setAttribute("version", "1.0");
229 stylesheet.setNamespace(MCRConstants.XSL_NAMESPACE);
230 stylesheet.addNamespaceDeclaration(MCRFieldDef.xalanns);
231 stylesheet.addNamespaceDeclaration(extns);
232 stylesheet.setAttribute("extension-element-prefixes", "ext");
233
234 Element output = new Element("output", MCRConstants.XSL_NAMESPACE);
235 output.setAttribute("method", "text");
236 stylesheet.addContent(output);
237
238 Element template = new Element("template", MCRConstants.XSL_NAMESPACE);
239 template.setAttribute("match", "/data");
240 stylesheet.addContent(template);
241
242 Element choose = new Element("choose", MCRConstants.XSL_NAMESPACE);
243 template.addContent(choose);
244
245 Element when = new Element("when", MCRConstants.XSL_NAMESPACE);
246 when.addContent("t");
247
248 Element otherwise = new Element("otherwise", MCRConstants.XSL_NAMESPACE);
249 otherwise.addContent("f");
250 choose.addContent(when).addContent(otherwise);
251
252 return new Document(stylesheet);
253 }
254
255 /** Adds the condition as xsl when test attribute to the stylesheet template */
256 private Transformer buildStylesheet(String condition) {
257 if (xslTemplate == null) {
258 xslTemplate = prepareStylesheet();
259 }
260
261 Document xsl = (Document) (xslTemplate.clone());
262 xsl.getRootElement().getChild("template", MCRConstants.XSL_NAMESPACE).getChild("choose", MCRConstants.XSL_NAMESPACE).getChild("when", MCRConstants.XSL_NAMESPACE).setAttribute("test", condition);
263 Source xslsrc = new JDOMSource(xsl);
264 Transformer transformer;
265 try {
266 transformer = factory.newTransformer(xslsrc);
267 } catch (TransformerConfigurationException ex) {
268 String msg = "Could not compile XSL stylesheet to be used for searching";
269 throw new MCRException(msg, ex);
270 }
271
272 return transformer;
273 }
274
275 /** Converter from MCRCondition to XSL test condition */
276 private String buildXSLCondition(MCRCondition cond) {
277 if (cond instanceof MCRQueryCondition) {
278 MCRQueryCondition sc = (MCRQueryCondition) cond;
279 StringBuffer sb = new StringBuffer(sc.getField().getName());
280 sb.append("[");
281
282 if ("= < > <= >=".indexOf(sc.getOperator()) >= 0) {
283 String type = sc.getField().getDataType();
284
285 if ("integer".equals(type) || "decimal".equals(type)) {
286 sb.append("number(text()) ");
287 sb.append(sc.getOperator());
288 sb.append(" ");
289 sb.append(sc.getValue());
290 } else {
291 sb.append("ext:compare(text(),'");
292 sb.append(sc.getValue());
293 sb.append("','");
294 sb.append(sc.getOperator());
295 sb.append("')");
296 }
297 } else if ("phrase".equals(sc.getOperator())) {
298 sb.append("contains(text(),'");
299 sb.append(sc.getValue()).append("')");
300 } else if ("contains".equals(sc.getOperator())) {
301 sb.append("ext:contains(text(),'");
302 sb.append(sc.getValue()).append("')");
303 } else if ("like".equals(sc.getOperator())) {
304 sb.append("ext:like(text(),'");
305 sb.append(sc.getValue()).append("')");
306 }
307
308 sb.append("]");
309
310 return sb.toString();
311 } else if (cond instanceof MCRNotCondition) {
312 MCRNotCondition nc = (MCRNotCondition) cond;
313 return "not(" + buildXSLCondition(nc.getChild()) + ")";
314 } else if (cond instanceof MCRAndCondition) {
315 MCRAndCondition ac = (MCRAndCondition) cond;
316 return buildXSLCondition(ac.getChildren(), "and");
317 } else if (cond instanceof MCROrCondition) {
318 MCROrCondition oc = (MCROrCondition) cond;
319 return buildXSLCondition(oc.getChildren(), "or");
320 } else {
321 return "";
322 }
323 }
324
325 /** Builds a combined and/or XSL condition */
326 private String buildXSLCondition(List children, String operator) {
327 StringBuffer sb = new StringBuffer();
328 sb.append("(");
329
330 for (int i = 0; i < children.size(); i++) {
331 MCRCondition sc = (MCRCondition) (children.get(i));
332 sb.append(buildXSLCondition(sc));
333
334 if (i < (children.size() - 1)) {
335 sb.append(" ").append(operator).append(" ");
336 }
337 }
338
339 sb.append(")");
340 return sb.toString();
341 }
342
343 /** Implements the contains operator as Xalan function extension */
344 public static boolean contains(String value, String words) {
345 if ((value == null) || (value.trim().length() == 0)) {
346 return false;
347 }
348
349 if ((words == null) || (words.trim().length() == 0)) {
350 return true;
351 }
352
353 StringTokenizer st = new StringTokenizer(words);
354 while (st.hasMoreTokens())
355
356 if (value.indexOf(st.nextToken()) == -1) {
357 return false;
358 }
359
360 return true;
361 }
362
363 /** Implements the like operator as Xalan function extension */
364 public static boolean like(String value, String pattern) {
365 if ((value == null) || (value.trim().length() == 0)) {
366 return false;
367 }
368
369 if ((pattern == null) || (pattern.trim().length() == 0)) {
370 return true;
371 }
372
373 if (!pattern.endsWith("*"))
374 pattern = pattern + "*";
375 if (!pattern.startsWith("*"))
376 pattern = "*" + pattern;
377
378 pattern = pattern.replaceAll("\\?", ".");
379 pattern = pattern.replaceAll("\\*", "(.*)");
380
381 LOGGER.debug("Search regex " + pattern + " in text \"" + value + "\"");
382
383 return Pattern.matches(pattern, value);
384 }
385
386 /** Implements a string compare operator as Xalan function extension */
387 public static boolean compare(String valueA, String valueB, String operator) {
388 return MCRInputValidator.instance().compare(valueA, valueB, operator, "string", null);
389 }
390
391 public void addSortData(Iterator<MCRHit> hits, List<MCRSortBy> sortBy) {
392 while (hits.hasNext()) {
393 MCRHit hit = (MCRHit) hits.next();
394 Document data = map.get(hit.getID());
395
396 for (int j = 0; j < sortBy.size(); j++) {
397 MCRFieldDef fd = sortBy.get(j).getField();
398 List values = data.getRootElement().getChildren(fd.getName());
399 for (Iterator itv = values.iterator(); itv.hasNext();) {
400 Element value = (Element) (itv.next());
401 hit.addSortData(new MCRFieldValue(fd, value.getText()));
402 }
403 }
404 }
405 }
406 }