001    /*
002     * 
003     * $Revision: 14971 $ $Date: 2009-03-20 09:05:02 +0100 (Fri, 20 Mar 2009) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.services.fieldquery;
025    
026    import java.util.ArrayList;
027    import java.util.Calendar;
028    import java.util.List;
029    import java.util.StringTokenizer;
030    import java.util.regex.Matcher;
031    import java.util.regex.Pattern;
032    
033    import com.ibm.icu.util.GregorianCalendar;
034    
035    import org.apache.log4j.Logger;
036    import org.jdom.Element;
037    import org.mycore.common.MCRCalendar;
038    import org.mycore.datamodel.metadata.MCRMetaISO8601Date;
039    import org.mycore.parsers.bool.MCRAndCondition;
040    import org.mycore.parsers.bool.MCRBooleanClauseParser;
041    import org.mycore.parsers.bool.MCRCondition;
042    import org.mycore.parsers.bool.MCRNotCondition;
043    import org.mycore.parsers.bool.MCROrCondition;
044    import org.mycore.parsers.bool.MCRParseException;
045    import org.mycore.parsers.bool.MCRSetCondition;
046    
047    /**
048     * Parses query conditions for use in MCRSearcher.
049     * 
050     * @see MCRSearcher
051     * 
052     * @author Frank L\u00fctzenkirchen
053     */
054    public class MCRQueryParser extends MCRBooleanClauseParser {
055    
056        private final static Logger LOGGER = Logger.getLogger(MCRQueryParser.class);
057    
058        /**
059         * Parses XML element containing a simple query condition
060         * 
061         * @param e
062         *            the 'condition' element
063         * @return the parsed MCRQueryCondition object
064         */
065        protected MCRCondition parseSimpleCondition(Element e) throws MCRParseException {
066            String name = e.getName();
067    
068            if (!name.equals("condition"))
069                throw new MCRParseException("Not a valid <" + name + ">");
070    
071            String field = e.getAttributeValue("field");
072            String opera = e.getAttributeValue("operator");
073            String value = e.getAttributeValue("value");
074    
075            return buildConditions(field, opera, value);
076        }
077    
078        /**
079         * Builds a new MCRCondition from parsed elements
080         * 
081         * @param field
082         *            one or more field names, separated by comma
083         * @param oper
084         *            the condition operator
085         * @param value
086         *            the condition value
087         * @return
088         */
089        private MCRCondition buildConditions(String field, String oper, String value) {
090            if (field.contains(",")) 
091            { // Multiple fields in one condition, combine with OR
092                StringTokenizer st = new StringTokenizer(field, ", ");
093                MCROrCondition oc = new MCROrCondition();
094                while (st.hasMoreTokens())
095                    oc.addChild(buildConditions(st.nextToken(), oper, value));
096                return oc;
097            } else if (field.contains("-")) 
098            { // date and MCRMetaHistoryDate condition von-bis
099                StringTokenizer st = new StringTokenizer(field, "- ");
100                String fieldFrom = st.nextToken();
101                String fieldTo = st.nextToken();
102                if (oper.equals("=")) {
103                    // von-bis = x --> (von <= x) AND (bis >= x)
104                    MCRAndCondition ac = new MCRAndCondition();
105                    ac.addChild(buildCondition(fieldFrom, "<=", value, true));
106                    ac.addChild(buildCondition(fieldTo, ">=", value, true ));
107                    return ac;
108                } else if (oper.contains("<"))
109                    return buildCondition(fieldFrom, oper, value, true);
110                else
111                    // oper.contains( ">" )
112                    return buildCondition(fieldTo, oper, value, true);
113            } else
114                return buildCondition(field, oper, value, false);
115        }
116    
117        /**
118         * Builds a new MCRQueryCondition
119         * 
120         * @param field
121         *            the name of the search field
122         * @param oper
123         *            the condition operator
124         * @param value
125         *            the condition value
126         * @param vonbis
127         *            is a 'from to' query, used for date and MetaHistoryDate
128         * @return
129         */
130        private MCRQueryCondition buildCondition(String field, String oper, String value, boolean vonbis) {
131            MCRFieldDef def = MCRFieldDef.getDef(field);
132            if (def == null)
133                throw new MCRParseException("Field not defined: <" + field + ">");
134            String datatype = def.getDataType();
135            if (!"date".equals(datatype) && vonbis)
136              value = normalizeHistoryDate(oper,value);
137            LOGGER.debug(value);
138            if ("date".equals(datatype) && "TODAY".equals(value))
139              value = getToday();
140            return new MCRQueryCondition(def, oper, value);
141        }
142        
143        private String getToday()  {
144           GregorianCalendar cal = new GregorianCalendar();
145           int year = cal.get(Calendar.YEAR);
146           int month = cal.get(Calendar.MONTH) + 1;
147           int day = cal.get(Calendar.DAY_OF_MONTH);
148           return String.valueOf(day) + "." + String.valueOf(month) + "." + String.valueOf(year);
149        }
150    
151        /** Pattern for MCRQueryConditions expressed as String */
152        private static Pattern pattern = Pattern.compile("([^ \t\r\n]+)\\s+([^ \t\r\n]+)\\s+([^ \"\t\r\n]+|\"[^\"]*\")");
153    
154        /**
155         * Parses a String containing a simple query condition, for example: (title
156         * contains "Java") and (creatorID = "122132131")
157         * 
158         * @param s
159         *            the condition as a String
160         * @return the parsed MCRQueryCondition object
161         */
162        protected MCRCondition parseSimpleCondition(String s) throws MCRParseException {
163            Matcher m = pattern.matcher(s);
164    
165            if (!m.find())
166                throw new MCRParseException("Not a valid condition: " + s);
167    
168            String field = m.group(1);
169            String operator = m.group(2);
170            String value = m.group(3);
171    
172            if (value.startsWith("\"") && value.endsWith("\"")) {
173                value = value.substring(1, value.length() - 1);
174            }
175    
176            return buildConditions(field, operator, value);
177        }
178    
179        public MCRCondition parse(Element condition) throws MCRParseException {
180            MCRCondition cond = super.parse(condition);
181            return normalizeCondition(cond);
182        }
183    
184        public MCRCondition parse(String s) throws MCRParseException {
185            MCRCondition cond = super.parse(s);
186            return normalizeCondition(cond);
187        }
188    
189        /**
190         * Normalizes a parsed query condition. AND/OR conditions that just have one
191         * child will be replaced with that child. NOT(NOT(X)) will be normalized to X.
192         * (A AND (b AND c)) will be normalized to (A AND B AND C), same for nested ORs.
193         * AND/OR/NOT conditions with no child conditions will be removed.
194         * Conditions that use the operator "contains" will be splitted into multiple 
195         * simpler conditions if the condition value contains phrases surrounded 
196         * by '...' or wildcard search with * or ?.
197         */
198        static MCRCondition normalizeCondition(MCRCondition cond) {
199            if (cond == null) return null;
200            else if (cond instanceof MCRSetCondition) {
201                MCRSetCondition sc = (MCRSetCondition) cond;
202                List<MCRCondition> children = sc.getChildren();
203                sc = ( sc instanceof MCRAndCondition ? new MCRAndCondition() : new MCROrCondition() );  
204                for (MCRCondition child : children )
205                {    
206                  child = normalizeCondition(child);
207                  if( child == null ) 
208                    continue; // Remove empty child conditions
209                  else if ( (child instanceof MCRSetCondition) && sc.getOperator().equals( ((MCRSetCondition)child).getOperator() ) ) 
210                  {
211                    // Replace (a AND (b AND c)) with (a AND b AND c), same for OR
212                    sc.addAll(((MCRSetCondition)child).getChildren());
213                  }
214                  else sc.addChild( child ); 
215                }
216                children = sc.getChildren();
217                if (children.size() == 0)
218                  return null; // Completely remove empty AND condition
219                else if( children.size() == 1 )
220                  return children.get(0); // Replace AND with just one child
221                else 
222                  return sc;  
223            } else if (cond instanceof MCRNotCondition) {
224                MCRNotCondition nc = (MCRNotCondition) cond;
225                MCRCondition child = normalizeCondition( nc.getChild() ); 
226                if(child == null )
227                  return null; // Remove empty NOT
228                else if( child instanceof MCRNotCondition ) // Replace NOT(NOT(x)) with x
229                  return normalizeCondition( ((MCRNotCondition)child).getChild() );
230                else 
231                  return new MCRNotCondition(child);
232            } else if (cond instanceof MCRQueryCondition) {
233                MCRQueryCondition qc = (MCRQueryCondition) cond;
234    
235                // Normalize values in date conditions
236                if (qc.getField().getDataType().equals("date")) {
237                    try {
238                        MCRMetaISO8601Date iDate = new MCRMetaISO8601Date();
239                        iDate.setDate(qc.getValue());
240                        String sDate = iDate.getISOString().substring(0, 10);
241                        return new MCRQueryCondition(qc.getField(), qc.getOperator(), sDate);
242                    } catch (Exception ex) {
243                        LOGGER.debug(ex);
244                        return qc;
245                    }
246                }
247    
248                if (!qc.getOperator().equals("contains"))
249                    return qc;
250    
251                // Normalize value when contains operator is used
252                List<String> values = new ArrayList<String>();
253    
254                String phrase = null;
255                StringTokenizer st = new StringTokenizer(qc.getValue(), " ");
256                while (st.hasMoreTokens()) {
257                    String value = st.nextToken();
258                    if ((phrase != null)) // we are within phrase
259                    {
260                        if (value.endsWith("'")) // end of phrase
261                        {
262                            value = phrase + " " + value;
263                            values.add(value);
264                            phrase = null;
265                        } else // in middle of phrase
266                        {
267                            phrase = phrase + " " + value;
268                        }
269                    } else if (value.startsWith("'")) // begin of phrase
270                    {
271                        if (value.endsWith("'")) // one-word phrase
272                        {
273                            values.add(value.substring(1, value.length() - 1));
274                        } else {
275                            phrase = value;
276                        }
277                    } else if (value.startsWith("-'")) // begin of NOT phrase
278                    {
279                        if (value.endsWith("'")) // one-word phrase
280                        {
281                            values.add("-" + value.substring(2, value.length() - 1));
282                        } else {
283                            phrase = value;
284                        }
285                    } else
286                        values.add(value);
287                }
288    
289                MCRAndCondition ac = new MCRAndCondition();
290                for (int i = 0; i < values.size(); i++) {
291                    String value = values.get(i);
292                    if (value.startsWith("'")) // phrase
293                        ac.addChild(new MCRQueryCondition(qc.getField(), "phrase", value.substring(1, value.length() - 1)));
294                    else if (value.startsWith("-'")) // NOT phrase
295                        ac.addChild( new MCRNotCondition(new MCRQueryCondition(qc.getField(), "phrase", value.substring(2, value.length() - 1))));
296                    else if ((value.indexOf("*") >= 0) || (value.indexOf("?") >= 0)) // like
297                        ac.addChild(new MCRQueryCondition(qc.getField(), "like", value));
298                    else if (value.startsWith("-")) // -word means "NOT word"
299                    {
300                        MCRCondition subCond = new MCRQueryCondition(qc.getField(), "contains", value.substring(1));
301                        ac.addChild(new MCRNotCondition(subCond));
302                    } else
303                        ac.addChild(new MCRQueryCondition(qc.getField(), "contains", value));
304                }
305    
306                if (values.size() == 1)
307                    return (MCRCondition) (ac.getChildren().get(0));
308                else
309                    return ac;
310            } else
311                return cond;
312        }
313    
314        /** Used for input validation in editor search form */
315        public static boolean validateQueryExpression(String query) {
316            try {
317                MCRCondition cond = new MCRQueryParser().parse(query);
318                return (cond != null);
319            } catch (Throwable t) {
320                return false;
321            }
322        }
323        
324        /**
325         * Normalizes MCRMetaHistoryDate values used in a query. If the
326         * date is incomplete (for example, only the year given), it depends
327         * on the search operator used, whether the upper (31th Dec of year)
328         * or lower (1st Jan of year) bound is used.
329         * 
330         * @param operator the search operator, one of >, >=, <, <=
331         * @param date the date to search for
332         * @return the julian day number, as a String
333         */
334        private static String normalizeHistoryDate(String operator, String date) {
335            GregorianCalendar cal = null;
336            if (operator.equals(">"))
337                cal = MCRCalendar.getGregorianHistoryDate(date, true);
338            if (operator.equals("<"))
339                cal = MCRCalendar.getGregorianHistoryDate(date, false);
340            if (operator.equals(">="))
341                cal = MCRCalendar.getGregorianHistoryDate(date, false);
342            if (operator.equals("<="))
343                cal = MCRCalendar.getGregorianHistoryDate(date, true);
344            return String.valueOf(MCRCalendar.getJulianDayNumber(cal));
345        }
346    }