View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.parsers.bool;
20  
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.Locale;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.jdom2.Element;
28  
29  /**
30   * Class for parsing Boolean clauses
31   * 
32   * @author Matthias Kramm
33   * @author Christoph Neidahl (OPNA2608)
34   */
35  public class MCRBooleanClauseParser<T> {
36      private static Pattern bracket = Pattern.compile("\\([^)(]*\\)");
37  
38      private static Pattern apostrophe = Pattern.compile("\"[^\"]*?\"");
39  
40      private static Pattern and = Pattern.compile("[)\\s]+[aA][nN][dD][\\s(]+");
41  
42      private static Pattern or = Pattern.compile("[)\\s]+[oO][rR][\\s(]+");
43  
44      private static Pattern bracket_marker = Pattern.compile("@<([0-9]*)>@");
45  
46      /**
47       * This both strings are for temporary bracket substitution in case of brackets 
48       * in a text string in a condition like 'title contains "foo (and bar)".
49       */
50      private static String opening_bracket = "%%%%%%%%%%";
51  
52      private static String closing_bracket = "##########";
53  
54      private static String extendClauses(final String s, final List<String> l) {
55          String sintern = s;
56          while (true) {
57              Matcher m = bracket_marker.matcher(sintern);
58  
59              if (m.find()) {
60                  String c = m.group();
61                  String clause = l.get(Integer.parseInt(m.group(1)));
62                  sintern = sintern.replaceAll(c, clause);
63              } else {
64                  break;
65              }
66          }
67  
68          return sintern;
69      }
70  
71      /**
72       * Parse a complex or simple condition in XML format and put it in an condition object.
73       * 
74       * @param condition a MyCoRe condition object in XML format
75       * @return a MyCoRe condition object in the MCRCondition format
76       */
77      public MCRCondition<T> parse(Element condition) {
78          if (condition == null) {
79              return defaultRule();
80          }
81  
82          if (condition.getName().equalsIgnoreCase("boolean")) {
83              String operator = condition.getAttributeValue("operator");
84              if (operator == null) {
85                  throw new MCRParseException("Syntax error: attribute operator not found");
86              }
87  
88              if (operator.equalsIgnoreCase("not")) {
89                  Element child = condition.getChildren().get(0);
90                  return new MCRNotCondition<>(parse(child));
91              } else if (operator.equalsIgnoreCase("and") || operator.equalsIgnoreCase("or")) {
92                  List<Element> children = condition.getChildren();
93                  MCRCondition<T> cond;
94  
95                  if (operator.equalsIgnoreCase("and")) {
96                      MCRAndCondition<T> acond = new MCRAndCondition<>();
97  
98                      for (Object aChildren : children) {
99                          Element child = (Element) aChildren;
100                         acond.addChild(parse(child));
101                     }
102 
103                     cond = acond;
104                 } else {
105                     MCROrCondition<T> ocond = new MCROrCondition<>();
106 
107                     for (Object aChildren : children) {
108                         Element child = (Element) aChildren;
109                         ocond.addChild(parse(child));
110                     }
111 
112                     cond = ocond;
113                 }
114 
115                 return cond;
116             } else {
117                 return parseSimpleCondition(condition);
118             }
119         }
120         return parseSimpleCondition(condition);
121     }
122 
123     /**
124      * Parse a complex or simple condition in String format and put it in an condition object.
125      * 
126      * @param s a MyCoRe condition object in String format
127      * @return a MyCoRe condition object in the MCRCondition format
128      */
129     public MCRCondition<T> parse(String s) throws MCRParseException {
130         s = s.replaceAll("\t", " ").replaceAll("\n", " ").replaceAll("\r", " ");
131 
132         if (s.trim().length() == 0 || s.equals("()")) {
133             return defaultRule();
134         }
135 
136         return parse(s, null);
137     }
138 
139     private MCRCondition<T> parse(String s, List<String> l) throws MCRParseException {
140         // initialize if start parsing
141         if (l == null) {
142             l = new ArrayList<>();
143         }
144 
145         // a empty condition
146         s = s.trim();
147         if (s.equals("()")) {
148             s = "(true)";
149         }
150 
151         while (true) {
152             // replace all bracket expressions with $n
153             while (s.charAt(0) == '(' && s.charAt(s.length() - 1) == ')'
154                 && s.substring(1, s.length() - 1).indexOf('(') < 0 && s.substring(1, s.length() - 1).indexOf(')') < 0) {
155                 s = s.trim().substring(1, s.length() - 1).trim();
156             }
157 
158             // replace brackets in texts inside "..." with temporary strings 
159             Matcher a = apostrophe.matcher(s); // find bracket pairs
160             if (a.find()) {
161                 String clause = a.group();
162                 clause = clause.replaceAll("\\(", opening_bracket);
163                 clause = clause.replaceAll("\\)", closing_bracket);
164                 s = s.substring(0, a.start()) + clause + s.substring(a.end());
165             }
166 
167             // find bracket pairs and replace text inside brackets with  @<number>@
168             Matcher m = bracket.matcher(s);
169             if (m.find()) {
170                 String clause = m.group();
171                 s = s.substring(0, m.start()) + "@<" + l.size() + ">@" + s.substring(m.end());
172                 l.add(extendClauses(clause, l));
173             } else {
174                 break;
175             }
176         }
177 
178         // after replacing bracket pairs check for unmatched parenthis
179         if ((s.indexOf('(') >= 0) ^ (s.indexOf(')') >= 0)) { // missing opening or closing bracket?
180             throw new MCRParseException("Syntax error: missing bracket in \"" + s + "\"");
181         }
182 
183         /* handle OR */
184         Matcher m = or.matcher(s);
185         int last = 0;
186         MCROrCondition<T> orclause = new MCROrCondition<>();
187         while (m.find()) {
188             int l1 = m.start();
189             if (last >= l1) {
190                 throw new MCRParseException("subclause of OR missing while parsing \"" + s + "\"");
191             }
192             MCRCondition<T> c = parse(extendClauses(s.substring(last, l1), l), l);
193             last = m.end();
194             orclause.addChild(c);
195         }
196         if (last != 0) {
197             MCRCondition<T> c = parse(extendClauses(s.substring(last), l), l);
198             orclause.addChild(c);
199             return orclause;
200         }
201 
202         /* handle AND */
203         m = and.matcher(s);
204         last = 0;
205         MCRAndCondition<T> andclause = new MCRAndCondition<>();
206         while (m.find()) {
207             int l1 = m.start();
208             if (last >= l1) {
209                 throw new MCRParseException("subclause of AND missing while parsing \"" + s + "\"");
210             }
211             MCRCondition<T> c = parse(extendClauses(s.substring(last, l1), l), l);
212             last = m.end();
213             andclause.addChild(c);
214         }
215         if (last != 0) {
216             MCRCondition<T> c = parse(extendClauses(s.substring(last), l), l);
217             andclause.addChild(c);
218             return andclause;
219         }
220 
221         /* handle NOT */
222         s = s.trim();
223         if (s.toLowerCase(Locale.ROOT).startsWith("not ")) {
224             MCRCondition<T> inverse = parse(extendClauses(s.substring(4), l), l);
225             return new MCRNotCondition<>(inverse);
226         }
227 
228         // expands tokens with previously analysed expressions
229         s = extendClauses(s, l);
230 
231         // recusion ONLY if parenthis (can) match
232         if ((s.indexOf('(') >= 0) && (s.indexOf(')') >= 0)) {
233             return parse(s, l);
234         } else {
235             // replace back brackets in apostrophe
236             s = s.replaceAll(opening_bracket, "(");
237             s = s.replaceAll(closing_bracket, ")");
238             return parseSimpleCondition(s);
239         }
240     }
241 
242     protected MCRCondition<T> parseSimpleCondition(String s) throws MCRParseException {
243         /* handle specific rules */
244         if (s.equalsIgnoreCase("true")) {
245             return new MCRTrueCondition<>();
246         }
247 
248         if (s.equalsIgnoreCase("false")) {
249             return new MCRFalseCondition<>();
250         }
251 
252         throw new MCRParseException("Syntax error: " + s); // extendClauses(s,
253         // l));
254     }
255 
256     protected MCRCondition<T> parseSimpleCondition(Element element) throws MCRParseException {
257         // <boolean operator="true|false" />
258         String name;
259         try {
260             name = element.getAttributeValue("operator").toLowerCase(Locale.ROOT);
261         } catch (Exception e) {
262             throw new MCRParseException("Syntax error: attribute operator not found");
263         }
264 
265         if (name.equals("true")) {
266             return new MCRTrueCondition<>();
267         }
268 
269         if (name.equals("false")) {
270             return new MCRFalseCondition<>();
271         }
272 
273         throw new MCRParseException("Syntax error: <" + name + ">");
274     }
275 
276     protected MCRCondition<T> defaultRule() {
277         return new MCRTrueCondition<>();
278     }
279 
280 }