001 /*
002 *
003 * $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
004 *
005 * This file is part of *** M y C o R e ***
006 * See http://www.mycore.de/ for details.
007 *
008 * This program is free software; you can use it, redistribute it
009 * and / or modify it under the terms of the GNU General Public License
010 * (GPL) as published by the Free Software Foundation; either version 2
011 * of the License or (at your option) any later version.
012 *
013 * This program is distributed in the hope that it will be useful, but
014 * WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program, in a file called gpl.txt or license.txt.
020 * If not, write to the Free Software Foundation Inc.,
021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA
022 */
023
024 package org.mycore.services.z3950;
025
026 /**
027 * Build MyCoRe query condition from Z39.50-query string in PQF (Prefix Query Format)
028 * based on Code from jzkit2 (http://developer.k-int.com/jzkit2/)
029
030 * @author Harald Richter
031 * @version $Revision: 13085 $ $Date: 2008-02-06 18:27:24 +0100 (Mi, 06 Feb 2008) $
032 */
033
034 import java.io.Reader;
035 import java.io.StringReader;
036 import java.util.Vector;
037 import java.util.Properties;
038
039 import org.hibernate.Transaction;
040 import org.mycore.parsers.bool.MCRAndCondition;
041 import org.mycore.parsers.bool.MCROrCondition;
042 import org.mycore.parsers.bool.MCRNotCondition;
043 import org.mycore.parsers.bool.MCRCondition;
044 import org.mycore.services.fieldquery.MCRFieldDef;
045 import org.mycore.services.fieldquery.MCRQueryCondition;
046 import org.mycore.backend.hibernate.MCRHIBConnection;
047 import org.mycore.common.MCRException;
048 import org.mycore.common.MCRConfiguration;
049
050 public class MCRZ3950PrefixQueryParser
051 {
052
053 private int token = 0;
054
055 private MCRZ3950PrefixQueryLexer p;
056
057 private String defaultAttrset = "bib-1";
058
059 private static Properties default_conversion_rules = null;
060
061 public static Properties getDefaultConversionRules()
062 {
063 if (default_conversion_rules == null)
064 {
065 default_conversion_rules = MCRConfiguration.instance().getProperties("MCR.z3950");
066 }
067
068 return default_conversion_rules;
069 }
070
071 public MCRZ3950PrefixQueryParser(Reader r)
072 {
073 p = new MCRZ3950PrefixQueryLexer(r);
074 }
075
076 public MCRCondition parse() throws MCRException
077 {
078 MCRAndCondition cAnd = new MCRAndCondition();
079 MCRCondition condition = null;
080 token = p.nextToken();
081
082 if (token == MCRZ3950PrefixQueryLexer.ATTRSET)
083 {
084 // Consume the Attrset token
085 token = p.nextToken();
086 defaultAttrset = p.getString();
087 System.err.println("Setting attrset " + defaultAttrset);
088
089 // Consume the namespace value token
090 token = p.nextToken();
091 condition = visitPrefixQuery(defaultAttrset);
092 } else
093 {
094 condition = visitPrefixQuery(null);
095 }
096
097 if (token != MCRZ3950PrefixQueryLexer.EOF)
098 throw new MCRException("Unparsed text at end of PQF expression: " + p.getString());
099
100 cAnd.addChild(condition);
101 return cAnd;
102 }
103
104 private MCRCondition visitPrefixQuery(String currentAttrset) throws MCRException
105 {
106 MCRCondition qn = null;
107
108 switch (token)
109 {
110 case MCRZ3950PrefixQueryLexer.AND:
111 token = p.nextToken();
112 MCRAndCondition andc = new MCRAndCondition();
113 MCRCondition anda = visitPrefixQuery(currentAttrset);
114 if (null != anda)
115 andc.addChild(anda);
116 anda = visitPrefixQuery(currentAttrset);
117 if (null != anda)
118 andc.addChild(anda);
119 return andc;
120
121 case MCRZ3950PrefixQueryLexer.OR:
122 token = p.nextToken();
123 MCROrCondition orc = new MCROrCondition();
124 MCRCondition ora = visitPrefixQuery(currentAttrset);
125 if (null != ora)
126 orc.addChild(ora);
127 ora = visitPrefixQuery(currentAttrset);
128 if (null != ora)
129 orc.addChild(ora);
130 return orc;
131
132 case MCRZ3950PrefixQueryLexer.NOT:
133 token = p.nextToken();
134 MCRAndCondition notc = new MCRAndCondition();
135 MCRCondition nota = visitPrefixQuery(currentAttrset);
136 if (null != nota)
137 notc.addChild(nota);
138 nota = visitPrefixQuery(currentAttrset);
139 if (null != nota)
140 notc.addChild(new MCRNotCondition(nota));
141 return notc;
142
143 case MCRZ3950PrefixQueryLexer.TERM:
144 qn = visitQueryNode(currentAttrset);
145 break;
146
147 case MCRZ3950PrefixQueryLexer.ATTR:
148 qn = visitQueryNode(currentAttrset);
149 break;
150 }
151
152 return qn;
153 }
154
155 private MCRQueryCondition visitQueryNode(String currentAttrset) throws MCRException
156 {
157
158 Vector terms = new Vector();
159
160 String use = null, relation = null, structure = null, truncation = null;
161 while (token == MCRZ3950PrefixQueryLexer.ATTR)
162 {
163 int attrType = 0;
164 Object attrVal = null;
165 String localAttrset = "MCR.z3950";
166
167 // Consume the @attr and see what's next
168 token = p.nextToken();
169
170 // See if there is an attrset, as in "@attr gils 1=2016"
171 if (token == MCRZ3950PrefixQueryLexer.TERM)
172 {
173 // It must be an attribute set identifier, since attr types are always numeric
174 localAttrset = p.getString();
175 token = p.nextToken();
176 }
177
178 // Process the attribute as in @attr 1=4
179 if (token == MCRZ3950PrefixQueryLexer.NUMBER)
180 {
181 attrType = p.getInt();
182 // Consume the attribute token
183 token = p.nextToken();
184 } else
185 throw new MCRException("Unexpected error processing RPN query, expected attribute type");
186
187 // Ensure that there is an equals
188 if (token == MCRZ3950PrefixQueryLexer.EQUALS)
189 {
190 // Consume it
191 token = p.nextToken();
192 } else
193 throw new MCRException("Unexpected error processing RPN query, expected =");
194
195 // Ensure there is a value
196 if (token == MCRZ3950PrefixQueryLexer.NUMBER)
197 {
198 attrVal = java.math.BigInteger.valueOf(p.getInt());
199 // Consume It
200 token = p.nextToken();
201 } else if (token == MCRZ3950PrefixQueryLexer.TERM)
202 {
203 // With the new attribute set architecture we will start to get string values... Deal here
204 attrVal = p.getString();
205 // Consume It
206 token = p.nextToken();
207 } else
208 throw new MCRException(
209 "Unexpected error processing RPN query, expected str or num attribute");
210
211 // Use the config to figure out which one of
212 // ACCESS_POINT_ATTR = "AccessPoint"; "Relation"; "Position"; "Structure"; "Truncation"; "Completeness";
213 // the selected attr relates to
214
215 String attrType_str = "" + attrType;
216
217 String lookupStr = null;
218 if (localAttrset != null)
219 lookupStr = localAttrset + "." + attrType;
220 else
221 lookupStr = currentAttrset + "." + attrType;
222
223 String internalAttrType = getDefaultConversionRules().getProperty(lookupStr);
224
225 if (internalAttrType == null)
226 throw new MCRException("Query attribute not found in properties: " + lookupStr);
227
228 // System.err.println("++attrType_str="+attrType_str + " attrValue="+attrVal);
229 lookupStr = localAttrset + "." + attrType_str + "." + attrVal;
230 String mcr = getDefaultConversionRules().getProperty(lookupStr);
231 if (null == mcr)
232 throw new MCRException("Query attribute not found in properties: " + lookupStr);
233
234 switch (attrType)
235 {
236 case 1:
237 use = mcr;
238 break;
239 case 2:
240 relation = mcr;
241 break;
242 case 4:
243 structure = mcr;
244 break;
245 case 5:
246 truncation = mcr;
247 break;
248 }
249 }
250
251 // See if we have an element name
252 if (token == MCRZ3950PrefixQueryLexer.ELEMENTNAME)
253 {
254 // Consume the element name token and move on to the actual element name
255 token = p.nextToken();
256
257 // Consume the actual element name
258 token = p.nextToken();
259 }
260
261 // Process any terms following the attrs
262
263 // System.err.println("Expecting terms . Next token type = "+token);
264 while ((token == MCRZ3950PrefixQueryLexer.TERM) || (token == MCRZ3950PrefixQueryLexer.NUMBER))
265 {
266
267 // Handle the term
268 if (token == MCRZ3950PrefixQueryLexer.TERM)
269 terms.addElement(p.getString());
270 else
271 terms.addElement("" + p.getNumber());
272
273 // System.err.println("Processing Term(s)"+p.getString());
274
275 token = p.nextToken();
276 }
277
278 String value = "";
279 if (terms.size() > 0)
280 {
281 for (int i = 0; i < terms.size(); i++)
282 {
283 if (i > 0)
284 value = value + " ";
285 value = value + terms.get(i);
286 }
287 } else
288 throw new MCRException("No Terms");
289
290 String operator;
291
292 MCRFieldDef fd = MCRFieldDef.getDef(use);
293 if ("identifier".equals(fd.getDataType()))
294 operator = "=";
295 else
296 operator = "contains";
297
298 if (null != relation) // <, <=, =, >, <=
299 operator = relation;
300 if (null != structure) // phrase, word/worlist
301 operator = structure;
302 if (null != truncation)
303 {
304 operator = "like";
305 if ("1".equals(truncation) ) // right
306 value = value + "*";
307 else if ("2".equals(truncation) ) // left
308 value = "*" + value;
309 else if ("3".equals(truncation) ) // left and right
310 value = "*" + value + "*";
311 }
312
313 return new MCRQueryCondition(fd, operator, value);
314 }
315
316 public static void main(String args[])
317 {
318 //OK String pqfQuery = "@attrset bib-1 @and @attr 4=1 @attr 1=1 \"bob dylan\" @or @attr 1=4 or1 @attr 1=4 or2 xxx";
319 String pqfQuery = "@attrset bib-1 @not @attr 4=1 @attr 1=1 \"bob dßylanä\" @attr 1=4 äöüWert3 ÄÖÜ?*";
320 //OK String pqfQuery = "@attrset bib-1 @and @attr 1=4 Wert1 @not @attr 1=4 Wert2 @attr 1=4 Wert3";
321 //OK String pqfQuery = "@attrset bib-1 @or @attr 1=4 Wert1 @not @attr 1=4 Wert2 @attr 1=4 Wert3";
322 //OK String pqfQuery = "@attrset bib-1 @or @attr 1=4 Wert1 @not @attr 1=4 Wert2 @attr 1=4 Wert3a Wert3b";
323 //OK String pqfQuery = "@attrset bib-1 @or @attr 1=1 Wert1 @not @attr 1=4 Wert2 @attr 1=4 @attr 4=1 Wert3a Wert3b";
324 //OK String pqfQuery = "@attrset bib-1 @and @attr 1=1 Wert1 @not @attr 1=4 Wert2 @attr 1=4 @attr 4=1 Wert3a Wert3b";
325 //OK String pqfQuery = "@attrset bib-1 @not @attr 1=1 Wert1 @and @attr 1=4 Wert2 @attr 1=4 @attr 4=1 Wert3a Wert3b";
326 // String pqfQuery = "@attrset bib-1 @not @attr 4=1 @attr 1=1 \"bob dylan\" @attr 1=4 not2 aaouAOU";
327 if (1 == args.length)
328 pqfQuery = args[0];
329
330 Transaction tx = MCRHIBConnection.instance().getSession().beginTransaction();
331 MCRZ3950PrefixQueryParser pqs = new MCRZ3950PrefixQueryParser(new StringReader(
332 pqfQuery));
333 try
334 {
335 MCRCondition result = pqs.parse();
336 System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
337 System.out.println("pqf query : " + pqfQuery);
338 System.out.println("mycore query : " + result.toString());
339 tx.commit();
340 } catch (Exception e)
341 {
342 tx.rollback();
343 e.printStackTrace();
344 }
345 }
346 }