001 /*
002 *
003 * $Revision: 14971 $ $Date: 2009-03-20 09:05:02 +0100 (Fri, 20 Mar 2009) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.services.fieldquery;
025
026 import java.util.ArrayList;
027 import java.util.Calendar;
028 import java.util.List;
029 import java.util.StringTokenizer;
030 import java.util.regex.Matcher;
031 import java.util.regex.Pattern;
032
033 import com.ibm.icu.util.GregorianCalendar;
034
035 import org.apache.log4j.Logger;
036 import org.jdom.Element;
037 import org.mycore.common.MCRCalendar;
038 import org.mycore.datamodel.metadata.MCRMetaISO8601Date;
039 import org.mycore.parsers.bool.MCRAndCondition;
040 import org.mycore.parsers.bool.MCRBooleanClauseParser;
041 import org.mycore.parsers.bool.MCRCondition;
042 import org.mycore.parsers.bool.MCRNotCondition;
043 import org.mycore.parsers.bool.MCROrCondition;
044 import org.mycore.parsers.bool.MCRParseException;
045 import org.mycore.parsers.bool.MCRSetCondition;
046
047 /**
048 * Parses query conditions for use in MCRSearcher.
049 *
050 * @see MCRSearcher
051 *
052 * @author Frank L\u00fctzenkirchen
053 */
054 public class MCRQueryParser extends MCRBooleanClauseParser {
055
056 private final static Logger LOGGER = Logger.getLogger(MCRQueryParser.class);
057
058 /**
059 * Parses XML element containing a simple query condition
060 *
061 * @param e
062 * the 'condition' element
063 * @return the parsed MCRQueryCondition object
064 */
065 protected MCRCondition parseSimpleCondition(Element e) throws MCRParseException {
066 String name = e.getName();
067
068 if (!name.equals("condition"))
069 throw new MCRParseException("Not a valid <" + name + ">");
070
071 String field = e.getAttributeValue("field");
072 String opera = e.getAttributeValue("operator");
073 String value = e.getAttributeValue("value");
074
075 return buildConditions(field, opera, value);
076 }
077
078 /**
079 * Builds a new MCRCondition from parsed elements
080 *
081 * @param field
082 * one or more field names, separated by comma
083 * @param oper
084 * the condition operator
085 * @param value
086 * the condition value
087 * @return
088 */
089 private MCRCondition buildConditions(String field, String oper, String value) {
090 if (field.contains(","))
091 { // Multiple fields in one condition, combine with OR
092 StringTokenizer st = new StringTokenizer(field, ", ");
093 MCROrCondition oc = new MCROrCondition();
094 while (st.hasMoreTokens())
095 oc.addChild(buildConditions(st.nextToken(), oper, value));
096 return oc;
097 } else if (field.contains("-"))
098 { // date and MCRMetaHistoryDate condition von-bis
099 StringTokenizer st = new StringTokenizer(field, "- ");
100 String fieldFrom = st.nextToken();
101 String fieldTo = st.nextToken();
102 if (oper.equals("=")) {
103 // von-bis = x --> (von <= x) AND (bis >= x)
104 MCRAndCondition ac = new MCRAndCondition();
105 ac.addChild(buildCondition(fieldFrom, "<=", value, true));
106 ac.addChild(buildCondition(fieldTo, ">=", value, true ));
107 return ac;
108 } else if (oper.contains("<"))
109 return buildCondition(fieldFrom, oper, value, true);
110 else
111 // oper.contains( ">" )
112 return buildCondition(fieldTo, oper, value, true);
113 } else
114 return buildCondition(field, oper, value, false);
115 }
116
117 /**
118 * Builds a new MCRQueryCondition
119 *
120 * @param field
121 * the name of the search field
122 * @param oper
123 * the condition operator
124 * @param value
125 * the condition value
126 * @param vonbis
127 * is a 'from to' query, used for date and MetaHistoryDate
128 * @return
129 */
130 private MCRQueryCondition buildCondition(String field, String oper, String value, boolean vonbis) {
131 MCRFieldDef def = MCRFieldDef.getDef(field);
132 if (def == null)
133 throw new MCRParseException("Field not defined: <" + field + ">");
134 String datatype = def.getDataType();
135 if (!"date".equals(datatype) && vonbis)
136 value = normalizeHistoryDate(oper,value);
137 LOGGER.debug(value);
138 if ("date".equals(datatype) && "TODAY".equals(value))
139 value = getToday();
140 return new MCRQueryCondition(def, oper, value);
141 }
142
143 private String getToday() {
144 GregorianCalendar cal = new GregorianCalendar();
145 int year = cal.get(Calendar.YEAR);
146 int month = cal.get(Calendar.MONTH) + 1;
147 int day = cal.get(Calendar.DAY_OF_MONTH);
148 return String.valueOf(day) + "." + String.valueOf(month) + "." + String.valueOf(year);
149 }
150
151 /** Pattern for MCRQueryConditions expressed as String */
152 private static Pattern pattern = Pattern.compile("([^ \t\r\n]+)\\s+([^ \t\r\n]+)\\s+([^ \"\t\r\n]+|\"[^\"]*\")");
153
154 /**
155 * Parses a String containing a simple query condition, for example: (title
156 * contains "Java") and (creatorID = "122132131")
157 *
158 * @param s
159 * the condition as a String
160 * @return the parsed MCRQueryCondition object
161 */
162 protected MCRCondition parseSimpleCondition(String s) throws MCRParseException {
163 Matcher m = pattern.matcher(s);
164
165 if (!m.find())
166 throw new MCRParseException("Not a valid condition: " + s);
167
168 String field = m.group(1);
169 String operator = m.group(2);
170 String value = m.group(3);
171
172 if (value.startsWith("\"") && value.endsWith("\"")) {
173 value = value.substring(1, value.length() - 1);
174 }
175
176 return buildConditions(field, operator, value);
177 }
178
179 public MCRCondition parse(Element condition) throws MCRParseException {
180 MCRCondition cond = super.parse(condition);
181 return normalizeCondition(cond);
182 }
183
184 public MCRCondition parse(String s) throws MCRParseException {
185 MCRCondition cond = super.parse(s);
186 return normalizeCondition(cond);
187 }
188
189 /**
190 * Normalizes a parsed query condition. AND/OR conditions that just have one
191 * child will be replaced with that child. NOT(NOT(X)) will be normalized to X.
192 * (A AND (b AND c)) will be normalized to (A AND B AND C), same for nested ORs.
193 * AND/OR/NOT conditions with no child conditions will be removed.
194 * Conditions that use the operator "contains" will be splitted into multiple
195 * simpler conditions if the condition value contains phrases surrounded
196 * by '...' or wildcard search with * or ?.
197 */
198 static MCRCondition normalizeCondition(MCRCondition cond) {
199 if (cond == null) return null;
200 else if (cond instanceof MCRSetCondition) {
201 MCRSetCondition sc = (MCRSetCondition) cond;
202 List<MCRCondition> children = sc.getChildren();
203 sc = ( sc instanceof MCRAndCondition ? new MCRAndCondition() : new MCROrCondition() );
204 for (MCRCondition child : children )
205 {
206 child = normalizeCondition(child);
207 if( child == null )
208 continue; // Remove empty child conditions
209 else if ( (child instanceof MCRSetCondition) && sc.getOperator().equals( ((MCRSetCondition)child).getOperator() ) )
210 {
211 // Replace (a AND (b AND c)) with (a AND b AND c), same for OR
212 sc.addAll(((MCRSetCondition)child).getChildren());
213 }
214 else sc.addChild( child );
215 }
216 children = sc.getChildren();
217 if (children.size() == 0)
218 return null; // Completely remove empty AND condition
219 else if( children.size() == 1 )
220 return children.get(0); // Replace AND with just one child
221 else
222 return sc;
223 } else if (cond instanceof MCRNotCondition) {
224 MCRNotCondition nc = (MCRNotCondition) cond;
225 MCRCondition child = normalizeCondition( nc.getChild() );
226 if(child == null )
227 return null; // Remove empty NOT
228 else if( child instanceof MCRNotCondition ) // Replace NOT(NOT(x)) with x
229 return normalizeCondition( ((MCRNotCondition)child).getChild() );
230 else
231 return new MCRNotCondition(child);
232 } else if (cond instanceof MCRQueryCondition) {
233 MCRQueryCondition qc = (MCRQueryCondition) cond;
234
235 // Normalize values in date conditions
236 if (qc.getField().getDataType().equals("date")) {
237 try {
238 MCRMetaISO8601Date iDate = new MCRMetaISO8601Date();
239 iDate.setDate(qc.getValue());
240 String sDate = iDate.getISOString().substring(0, 10);
241 return new MCRQueryCondition(qc.getField(), qc.getOperator(), sDate);
242 } catch (Exception ex) {
243 LOGGER.debug(ex);
244 return qc;
245 }
246 }
247
248 if (!qc.getOperator().equals("contains"))
249 return qc;
250
251 // Normalize value when contains operator is used
252 List<String> values = new ArrayList<String>();
253
254 String phrase = null;
255 StringTokenizer st = new StringTokenizer(qc.getValue(), " ");
256 while (st.hasMoreTokens()) {
257 String value = st.nextToken();
258 if ((phrase != null)) // we are within phrase
259 {
260 if (value.endsWith("'")) // end of phrase
261 {
262 value = phrase + " " + value;
263 values.add(value);
264 phrase = null;
265 } else // in middle of phrase
266 {
267 phrase = phrase + " " + value;
268 }
269 } else if (value.startsWith("'")) // begin of phrase
270 {
271 if (value.endsWith("'")) // one-word phrase
272 {
273 values.add(value.substring(1, value.length() - 1));
274 } else {
275 phrase = value;
276 }
277 } else if (value.startsWith("-'")) // begin of NOT phrase
278 {
279 if (value.endsWith("'")) // one-word phrase
280 {
281 values.add("-" + value.substring(2, value.length() - 1));
282 } else {
283 phrase = value;
284 }
285 } else
286 values.add(value);
287 }
288
289 MCRAndCondition ac = new MCRAndCondition();
290 for (int i = 0; i < values.size(); i++) {
291 String value = values.get(i);
292 if (value.startsWith("'")) // phrase
293 ac.addChild(new MCRQueryCondition(qc.getField(), "phrase", value.substring(1, value.length() - 1)));
294 else if (value.startsWith("-'")) // NOT phrase
295 ac.addChild( new MCRNotCondition(new MCRQueryCondition(qc.getField(), "phrase", value.substring(2, value.length() - 1))));
296 else if ((value.indexOf("*") >= 0) || (value.indexOf("?") >= 0)) // like
297 ac.addChild(new MCRQueryCondition(qc.getField(), "like", value));
298 else if (value.startsWith("-")) // -word means "NOT word"
299 {
300 MCRCondition subCond = new MCRQueryCondition(qc.getField(), "contains", value.substring(1));
301 ac.addChild(new MCRNotCondition(subCond));
302 } else
303 ac.addChild(new MCRQueryCondition(qc.getField(), "contains", value));
304 }
305
306 if (values.size() == 1)
307 return (MCRCondition) (ac.getChildren().get(0));
308 else
309 return ac;
310 } else
311 return cond;
312 }
313
314 /** Used for input validation in editor search form */
315 public static boolean validateQueryExpression(String query) {
316 try {
317 MCRCondition cond = new MCRQueryParser().parse(query);
318 return (cond != null);
319 } catch (Throwable t) {
320 return false;
321 }
322 }
323
324 /**
325 * Normalizes MCRMetaHistoryDate values used in a query. If the
326 * date is incomplete (for example, only the year given), it depends
327 * on the search operator used, whether the upper (31th Dec of year)
328 * or lower (1st Jan of year) bound is used.
329 *
330 * @param operator the search operator, one of >, >=, <, <=
331 * @param date the date to search for
332 * @return the julian day number, as a String
333 */
334 private static String normalizeHistoryDate(String operator, String date) {
335 GregorianCalendar cal = null;
336 if (operator.equals(">"))
337 cal = MCRCalendar.getGregorianHistoryDate(date, true);
338 if (operator.equals("<"))
339 cal = MCRCalendar.getGregorianHistoryDate(date, false);
340 if (operator.equals(">="))
341 cal = MCRCalendar.getGregorianHistoryDate(date, false);
342 if (operator.equals("<="))
343 cal = MCRCalendar.getGregorianHistoryDate(date, true);
344 return String.valueOf(MCRCalendar.getJulianDayNumber(cal));
345 }
346 }