View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.services.fieldquery;
20  
21  import java.time.LocalDate;
22  import java.time.ZoneOffset;
23  import java.time.format.DateTimeFormatter;
24  import java.util.ArrayList;
25  import java.util.List;
26  import java.util.Locale;
27  import java.util.StringTokenizer;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.jdom2.Element;
32  import org.mycore.parsers.bool.MCRAndCondition;
33  import org.mycore.parsers.bool.MCRBooleanClauseParser;
34  import org.mycore.parsers.bool.MCRCondition;
35  import org.mycore.parsers.bool.MCRNotCondition;
36  import org.mycore.parsers.bool.MCROrCondition;
37  import org.mycore.parsers.bool.MCRParseException;
38  import org.mycore.parsers.bool.MCRSetCondition;
39  
40  /**
41   * Parses query conditions for use in MCRSearcher.
42   *
43   * @author Frank Lützenkirchen
44   */
45  public class MCRQueryParser extends MCRBooleanClauseParser<Void> {
46  
47      /**
48       * Parses XML element containing a simple query condition
49       *
50       * @param e
51       *            the 'condition' element
52       * @return the parsed MCRQueryCondition object
53       */
54      @Override
55      protected MCRCondition<Void> parseSimpleCondition(Element e) throws MCRParseException {
56          String name = e.getName();
57  
58          if (!name.equals("condition")) {
59              throw new MCRParseException("Not a valid <" + name + ">");
60          }
61  
62          String field = e.getAttributeValue("field");
63          String opera = e.getAttributeValue("operator");
64          String value = e.getAttributeValue("value");
65  
66          return buildConditions(field, opera, value);
67      }
68  
69      /**
70       * Builds a new MCRCondition from parsed elements
71       *
72       * @param field
73       *            one or more field names, separated by comma
74       * @param oper
75       *            the condition operator
76       * @param value
77       *            the condition value
78       * @return
79       */
80      private MCRCondition<Void> buildConditions(String field, String oper, String value) {
81          if (field.contains(",")) { // Multiple fields in one condition, combine with OR
82              StringTokenizer st = new StringTokenizer(field, ", ");
83              MCROrCondition<Void> oc = new MCROrCondition<>();
84              while (st.hasMoreTokens()) {
85                  oc.addChild(buildConditions(st.nextToken(), oper, value));
86              }
87              return oc;
88          } else if (field.contains("-")) { // date condition von-bis
89              StringTokenizer st = new StringTokenizer(field, "- ");
90              String fieldFrom = st.nextToken();
91              String fieldTo = st.nextToken();
92              if (oper.equals("=")) {
93                  // von-bis = x --> (von <= x) AND (bis >= x)
94                  MCRAndCondition<Void> ac = new MCRAndCondition<>();
95                  ac.addChild(buildCondition(fieldFrom, "<=", value));
96                  ac.addChild(buildCondition(fieldTo, ">=", value));
97                  return ac;
98              } else if (oper.contains("<")) {
99                  return buildCondition(fieldFrom, oper, value);
100             } else {
101                 // oper.contains( ">" )
102                 return buildCondition(fieldTo, oper, value);
103             }
104         } else {
105             return buildCondition(field, oper, value);
106         }
107     }
108 
109     /**
110      * Builds a new MCRQueryCondition
111      *
112      * @param field
113      *            the name of the search field
114      * @param oper
115      *            the condition operator
116      * @param value
117      *            the condition value
118      * @return
119      */
120     private MCRQueryCondition buildCondition(String field, String oper, String value) {
121         if ("TODAY".equals(value)) {
122             value = getToday();
123         }
124         return new MCRQueryCondition(field, oper, value);
125     }
126 
127     private String getToday() {
128         return LocalDate.now(ZoneOffset.systemDefault())
129             .format(DateTimeFormatter.ofPattern("dd.MM.yyyy", Locale.GERMANY));
130     }
131 
132     /** Pattern for MCRQueryConditions expressed as String */
133     private static Pattern pattern = Pattern.compile("([^ \t\r\n]+)\\s+([^ \t\r\n]+)\\s+([^ \"\t\r\n]+|\"[^\"]*\")");
134 
135     /**
136      * Parses a String containing a simple query condition, for example: (title
137      * contains "Java") and (creatorID = "122132131")
138      *
139      * @param s
140      *            the condition as a String
141      * @return the parsed MCRQueryCondition object
142      */
143     @Override
144     protected MCRCondition<Void> parseSimpleCondition(String s) throws MCRParseException {
145         Matcher m = pattern.matcher(s);
146 
147         if (!m.find()) {
148             return super.parseSimpleCondition(s);
149         }
150 
151         String field = m.group(1);
152         String operator = m.group(2);
153         String value = m.group(3);
154 
155         if (value.startsWith("\"") && value.endsWith("\"")) {
156             value = value.substring(1, value.length() - 1);
157         }
158 
159         return buildConditions(field, operator, value);
160     }
161 
162     @Override
163     public MCRCondition<Void> parse(Element condition) throws MCRParseException {
164         MCRCondition<Void> cond = super.parse(condition);
165         return normalizeCondition(cond);
166     }
167 
168     @Override
169     public MCRCondition<Void> parse(String s) throws MCRParseException {
170         MCRCondition<Void> cond = super.parse(s);
171         return normalizeCondition(cond);
172     }
173 
174     /**
175      * Normalizes a parsed query condition. AND/OR conditions that just have one
176      * child will be replaced with that child. NOT(NOT(X)) will be normalized to X.
177      * (A AND (b AND c)) will be normalized to (A AND B AND C), same for nested ORs.
178      * AND/OR/NOT conditions with no child conditions will be removed.
179      * Conditions that use the operator "contains" will be splitted into multiple
180      * simpler conditions if the condition value contains phrases surrounded
181      * by '...' or wildcard search with * or ?.
182      */
183     public static MCRCondition<Void> normalizeCondition(MCRCondition<Void> cond) {
184         if (cond == null) {
185             return null;
186         } else if (cond instanceof MCRSetCondition) {
187             MCRSetCondition<Void> sc = (MCRSetCondition<Void>) cond;
188             List<MCRCondition<Void>> children = sc.getChildren();
189             sc = sc instanceof MCRAndCondition ? new MCRAndCondition<>() : new MCROrCondition<>();
190             for (MCRCondition<Void> child : children) {
191                 MCRCondition<Void> normalizedChild = normalizeCondition(child);
192                 if (normalizedChild != null) {
193                     if (normalizedChild instanceof MCRSetCondition
194                         && sc.getOperator().equals(((MCRSetCondition) normalizedChild).getOperator())) {
195                         // Replace (a AND (b AND c)) with (a AND b AND c), same for OR
196                         sc.addAll(((MCRSetCondition<Void>) normalizedChild).getChildren());
197                     } else {
198                         sc.addChild(normalizedChild);
199                     }
200                 }
201             }
202             children = sc.getChildren();
203             if (children.size() == 0) {
204                 return null; // Completely remove empty AND condition
205             } else if (children.size() == 1) {
206                 return children.get(0); // Replace AND with just one child
207             } else {
208                 return sc;
209             }
210         } else if (cond instanceof MCRNotCondition) {
211             MCRNotCondition<Void> nc = (MCRNotCondition<Void>) cond;
212             MCRCondition<Void> child = normalizeCondition(nc.getChild());
213             if (child == null) {
214                 return null; // Remove empty NOT
215             } else if (child instanceof MCRNotCondition) {
216                 return normalizeCondition(((MCRNotCondition<Void>) child).getChild());
217             } else {
218                 return new MCRNotCondition<>(child);
219             }
220         } else if (cond instanceof MCRQueryCondition) {
221             MCRQueryCondition qc = (MCRQueryCondition) cond;
222 
223             if (!qc.getOperator().equals("contains")) {
224                 return qc;
225             }
226 
227             // Normalize value when contains operator is used
228             List<String> values = new ArrayList<>();
229 
230             StringBuilder phrase = null;
231             StringTokenizer st = new StringTokenizer(qc.getValue(), " ");
232             while (st.hasMoreTokens()) {
233                 String value = st.nextToken();
234                 if (phrase != null) {
235                     // we are within phrase
236                     if (value.endsWith("'")) {
237                         // end of phrase
238                         value = phrase + " " + value;
239                         values.add(value);
240                         phrase = null;
241                     } else {
242                         // in middle of phrase
243                         phrase.append(' ').append(value);
244                     }
245                 } else if (value.startsWith("'")) {
246                     // begin of phrase
247                     if (value.endsWith("'")) {
248                         // one-word phrase
249                         values.add(value.substring(1, value.length() - 1));
250                     } else {
251                         phrase = new StringBuilder(value);
252                     }
253                 } else if (value.startsWith("-'")) {
254                     // begin of NOT phrase
255                     if (value.endsWith("'")) {
256                         // one-word phrase
257                         values.add("-" + value.substring(2, value.length() - 1));
258                     } else {
259                         phrase = new StringBuilder(value);
260                     }
261                 } else {
262                     values.add(value);
263                 }
264             }
265 
266             MCRAndCondition<Void> ac = new MCRAndCondition<>();
267             for (String value : values) {
268                 if (value.startsWith("'")) {
269                     ac.addChild(new MCRQueryCondition(qc.getFieldName(), "phrase", value.substring(1,
270                         value.length() - 1)));
271                 } else if (value.startsWith("-'")) {
272                     ac.addChild(new MCRNotCondition<>(
273                         new MCRQueryCondition(qc.getFieldName(), "phrase", value.substring(2, value.length() - 1))));
274                 } else if (value.contains("*") || value.contains("?")) {
275                     ac.addChild(new MCRQueryCondition(qc.getFieldName(), "like", value));
276                 } else if (value.startsWith("-")) {
277                     // -word means "NOT word"
278                     MCRCondition<Void> subCond = new MCRQueryCondition(qc.getFieldName(), "contains",
279                         value.substring(1));
280                     ac.addChild(new MCRNotCondition<>(subCond));
281                 } else {
282                     ac.addChild(new MCRQueryCondition(qc.getFieldName(), "contains", value));
283                 }
284             }
285 
286             if (values.size() == 1) {
287                 return ac.getChildren().get(0);
288             } else {
289                 return ac;
290             }
291         } else {
292             return cond;
293         }
294     }
295 
296     /** Used for input validation in editor search form */
297     public static boolean validateQueryExpression(String query) {
298         try {
299             MCRCondition<Void> cond = new MCRQueryParser().parse(query);
300             return cond != null;
301         } catch (Throwable t) {
302             return false;
303         }
304     }
305 }