001    /*
002     * 
003     * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004     *
005     * This file is part of ***  M y C o R e  ***
006     * See http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, in a file called gpl.txt or license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     */
023    
024    package org.mycore.services.z3950;
025    
026    /**
027     * Build MyCoRe query condition from Z39.50-query string in PQF (Prefix Query Format)
028     * based on Code from jzkit2 (http://developer.k-int.com/jzkit2/)
029     
030     * @author Harald Richter
031     * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
032     */
033    
034    import java.io.Reader;
035    import java.io.StringReader;
036    import java.util.Vector;
037    import java.util.Properties;
038    
039    import org.hibernate.Transaction;
040    import org.mycore.parsers.bool.MCRAndCondition;
041    import org.mycore.parsers.bool.MCROrCondition;
042    import org.mycore.parsers.bool.MCRNotCondition;
043    import org.mycore.parsers.bool.MCRCondition;
044    import org.mycore.services.fieldquery.MCRFieldDef;
045    import org.mycore.services.fieldquery.MCRQueryCondition;
046    import org.mycore.backend.hibernate.MCRHIBConnection;
047    import org.mycore.common.MCRException;
048    import org.mycore.common.MCRConfiguration;
049    
050    public class MCRZ3950PrefixQueryParser
051    {
052    
053      private int token = 0;
054    
055      private MCRZ3950PrefixQueryLexer p;
056    
057      private String defaultAttrset = "bib-1";
058    
059      private static Properties default_conversion_rules = null;
060    
061      public static Properties getDefaultConversionRules()
062      {
063        if (default_conversion_rules == null)
064        {
065          default_conversion_rules = MCRConfiguration.instance().getProperties("MCR.z3950");
066        }
067    
068        return default_conversion_rules;
069      }
070    
071      public MCRZ3950PrefixQueryParser(Reader r)
072      {
073        p = new MCRZ3950PrefixQueryLexer(r);
074      }
075    
076      public MCRCondition parse() throws MCRException
077      {
078        MCRAndCondition cAnd = new MCRAndCondition();
079        MCRCondition condition = null;
080        token = p.nextToken();
081    
082        if (token == MCRZ3950PrefixQueryLexer.ATTRSET)
083        {
084          // Consume the Attrset token
085          token = p.nextToken();
086          defaultAttrset = p.getString();
087          System.err.println("Setting attrset " + defaultAttrset);
088    
089          // Consume the namespace value token
090          token = p.nextToken();
091          condition = visitPrefixQuery(defaultAttrset);
092        } else
093        {
094          condition = visitPrefixQuery(null);
095        }
096    
097        if (token != MCRZ3950PrefixQueryLexer.EOF)
098          throw new MCRException("Unparsed text at end of PQF expression: " + p.getString());
099    
100        cAnd.addChild(condition);
101        return cAnd;
102      }
103    
104      private MCRCondition visitPrefixQuery(String currentAttrset) throws MCRException
105      {
106        MCRCondition qn = null;
107    
108        switch (token)
109        {
110        case MCRZ3950PrefixQueryLexer.AND:
111          token = p.nextToken();
112          MCRAndCondition andc = new MCRAndCondition();
113          MCRCondition anda = visitPrefixQuery(currentAttrset);
114          if (null != anda)
115            andc.addChild(anda);
116          anda = visitPrefixQuery(currentAttrset);
117          if (null != anda)
118            andc.addChild(anda);
119          return andc;
120    
121        case MCRZ3950PrefixQueryLexer.OR:
122          token = p.nextToken();
123          MCROrCondition orc = new MCROrCondition();
124          MCRCondition ora = visitPrefixQuery(currentAttrset);
125          if (null != ora)
126            orc.addChild(ora);
127          ora = visitPrefixQuery(currentAttrset);
128          if (null != ora)
129            orc.addChild(ora);
130          return orc;
131    
132        case MCRZ3950PrefixQueryLexer.NOT:
133          token = p.nextToken();
134          MCRAndCondition notc = new MCRAndCondition();
135          MCRCondition nota = visitPrefixQuery(currentAttrset);
136          if (null != nota)
137            notc.addChild(nota);
138          nota = visitPrefixQuery(currentAttrset);
139          if (null != nota)
140            notc.addChild(new MCRNotCondition(nota));
141          return notc;
142    
143        case MCRZ3950PrefixQueryLexer.TERM:
144          qn = visitQueryNode(currentAttrset);
145          break;
146    
147        case MCRZ3950PrefixQueryLexer.ATTR:
148          qn = visitQueryNode(currentAttrset);
149          break;
150        }
151    
152        return qn;
153      }
154    
155      private MCRQueryCondition visitQueryNode(String currentAttrset) throws MCRException
156      {
157    
158        Vector terms = new Vector();
159    
160        String use = null, relation = null, structure = null, truncation = null;
161        while (token == MCRZ3950PrefixQueryLexer.ATTR)
162        {
163          int attrType = 0;
164          Object attrVal = null;
165          String localAttrset = "MCR.z3950";
166    
167          // Consume the @attr and see what's next
168          token = p.nextToken();
169    
170          // See if there is an attrset, as in "@attr gils 1=2016"
171          if (token == MCRZ3950PrefixQueryLexer.TERM)
172          {
173            // It must be an attribute set identifier, since attr types are always numeric
174            localAttrset = p.getString();
175            token = p.nextToken();
176          }
177    
178          // Process the attribute as in @attr 1=4
179          if (token == MCRZ3950PrefixQueryLexer.NUMBER)
180          {
181            attrType = p.getInt();
182            // Consume the attribute token
183            token = p.nextToken();
184          } else
185            throw new MCRException("Unexpected error processing RPN query, expected attribute type");
186    
187          // Ensure that there is an equals
188          if (token == MCRZ3950PrefixQueryLexer.EQUALS)
189          {
190            // Consume it
191            token = p.nextToken();
192          } else
193            throw new MCRException("Unexpected error processing RPN query, expected =");
194    
195          // Ensure there is a value
196          if (token == MCRZ3950PrefixQueryLexer.NUMBER)
197          {
198            attrVal = java.math.BigInteger.valueOf(p.getInt());
199            // Consume It
200            token = p.nextToken();
201          } else if (token == MCRZ3950PrefixQueryLexer.TERM)
202          {
203            // With the new attribute set architecture we will start to get string values... Deal here
204            attrVal = p.getString();
205            // Consume It
206            token = p.nextToken();
207          } else
208            throw new MCRException(
209                "Unexpected error processing RPN query, expected str or num attribute");
210    
211          // Use the config to figure out which one of
212          // ACCESS_POINT_ATTR = "AccessPoint"; "Relation"; "Position"; "Structure"; "Truncation"; "Completeness";
213          // the selected attr relates to
214    
215          String attrType_str = "" + attrType;
216    
217          String lookupStr = null;
218          if (localAttrset != null)
219            lookupStr = localAttrset + "." + attrType;
220          else
221            lookupStr = currentAttrset + "." + attrType;
222    
223          String internalAttrType = getDefaultConversionRules().getProperty(lookupStr);
224    
225          if (internalAttrType == null)
226            throw new MCRException("Query attribute not found in properties: " + lookupStr);
227    
228          //      System.err.println("++attrType_str="+attrType_str + " attrValue="+attrVal);
229          lookupStr = localAttrset + "." + attrType_str + "." + attrVal;
230          String mcr = getDefaultConversionRules().getProperty(lookupStr);
231          if (null == mcr)
232            throw new MCRException("Query attribute not found in properties: " + lookupStr);
233    
234          switch (attrType)
235          {
236          case 1:
237            use = mcr;
238            break;
239          case 2:
240            relation = mcr;
241            break;
242          case 4:
243            structure = mcr;
244            break;
245          case 5:
246            truncation = mcr;
247            break;
248          }
249        }
250    
251        // See if we have an element name
252        if (token == MCRZ3950PrefixQueryLexer.ELEMENTNAME)
253        {
254          // Consume the element name token and move on to the actual element name
255          token = p.nextToken();
256    
257          // Consume the actual element name
258          token = p.nextToken();
259        }
260    
261        // Process any terms following the attrs
262    
263        // System.err.println("Expecting terms . Next token type = "+token);
264        while ((token == MCRZ3950PrefixQueryLexer.TERM) || (token == MCRZ3950PrefixQueryLexer.NUMBER))
265        {
266    
267          // Handle the term
268          if (token == MCRZ3950PrefixQueryLexer.TERM)
269            terms.addElement(p.getString());
270          else
271            terms.addElement("" + p.getNumber());
272    
273          //      System.err.println("Processing Term(s)"+p.getString());
274    
275          token = p.nextToken();
276        }
277    
278        String value = "";
279        if (terms.size() > 0)
280        {
281          for (int i = 0; i < terms.size(); i++)
282          {
283            if (i > 0)
284              value = value + " ";
285            value = value + terms.get(i);
286          }
287        } else
288          throw new MCRException("No Terms");
289    
290        String operator;
291        
292        MCRFieldDef fd = MCRFieldDef.getDef(use);
293        if ("identifier".equals(fd.getDataType()))
294          operator = "=";
295        else 
296          operator = "contains";
297        
298        if (null != relation)               // <, <=, =, >, <=
299          operator = relation;
300        if (null != structure)              // phrase, word/worlist
301          operator = structure;
302        if (null != truncation)
303        {
304          operator = "like";
305          if ("1".equals(truncation) )      // right
306            value = value + "*";
307          else if ("2".equals(truncation) ) // left
308            value = "*" + value;
309          else if ("3".equals(truncation) ) // left and right
310            value = "*" + value + "*";
311        }
312        
313        return new MCRQueryCondition(fd, operator, value);
314      }
315    
316      public static void main(String args[])
317      {
318        //OK    String pqfQuery = "@attrset bib-1 @and @attr 4=1 @attr 1=1 \"bob dylan\" @or @attr 1=4 or1 @attr 1=4 or2 xxx";
319        String pqfQuery = "@attrset bib-1 @not  @attr 4=1 @attr 1=1 \"bob dßylanä\" @attr 1=4 äöüWert3 ÄÖÜ?*";
320        //OK    String pqfQuery = "@attrset bib-1 @and @attr 1=4 Wert1 @not @attr 1=4 Wert2    @attr 1=4 Wert3";
321        //OK    String pqfQuery = "@attrset bib-1 @or @attr 1=4 Wert1 @not @attr 1=4 Wert2    @attr 1=4 Wert3";
322        //OK    String pqfQuery = "@attrset bib-1 @or @attr 1=4 Wert1 @not @attr 1=4 Wert2    @attr 1=4 Wert3a Wert3b";
323        //OK    String pqfQuery = "@attrset bib-1 @or @attr 1=1 Wert1 @not @attr 1=4 Wert2    @attr 1=4 @attr 4=1 Wert3a Wert3b";
324        //OK   String pqfQuery = "@attrset bib-1 @and @attr 1=1 Wert1 @not @attr 1=4 Wert2    @attr 1=4 @attr 4=1 Wert3a Wert3b";
325        //OK    String pqfQuery = "@attrset bib-1 @not @attr 1=1 Wert1 @and @attr 1=4 Wert2    @attr 1=4 @attr 4=1 Wert3a Wert3b";
326        //    String pqfQuery = "@attrset bib-1 @not @attr 4=1 @attr 1=1 \"bob dylan\" @attr 1=4 not2 aaouAOU";
327        if (1 == args.length)
328          pqfQuery = args[0];
329    
330        Transaction tx = MCRHIBConnection.instance().getSession().beginTransaction();
331        MCRZ3950PrefixQueryParser pqs = new MCRZ3950PrefixQueryParser(new StringReader(
332            pqfQuery));
333        try
334        {
335          MCRCondition result = pqs.parse();
336          System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
337          System.out.println("pqf query    : " + pqfQuery);
338          System.out.println("mycore query : " + result.toString());
339          tx.commit();
340        } catch (Exception e)
341        {
342          tx.rollback();
343          e.printStackTrace();
344        }
345      }
346    }