001    /**
002     * 
003     * $Revision: 15160 $ $Date: 2009-05-08 09:35:46 +0200 (Fri, 08 May 2009) $
004     *
005     * This file is part of ** M y C o R e **
006     * Visit our homepage at http://www.mycore.de/ for details.
007     *
008     * This program is free software; you can use it, redistribute it
009     * and / or modify it under the terms of the GNU General Public License
010     * (GPL) as published by the Free Software Foundation; either version 2
011     * of the License or (at your option) any later version.
012     *
013     * This program is distributed in the hope that it will be useful, but
014     * WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program, normally in the file license.txt.
020     * If not, write to the Free Software Foundation Inc.,
021     * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
022     *
023     **/
024    package org.mycore.datamodel.metadata;
025    
026    import java.io.IOException;
027    import java.io.StringWriter;
028    import java.text.DateFormat;
029    import java.text.ParseException;
030    import java.util.Date;
031    import java.util.Locale;
032    import java.util.StringTokenizer;
033    import java.util.TimeZone;
034    import java.util.regex.Matcher;
035    import java.util.regex.Pattern;
036    
037    import org.apache.log4j.Logger;
038    import org.jdom.Element;
039    import org.jdom.Namespace;
040    import org.jdom.output.Format;
041    import org.jdom.output.XMLOutputter;
042    import org.joda.time.DateTime;
043    import org.joda.time.DateTimeZone;
044    import org.joda.time.format.DateTimeFormat;
045    import org.joda.time.format.DateTimeFormatter;
046    import org.joda.time.format.ISODateTimeFormat;
047    
048    import org.mycore.common.MCRConfiguration;
049    import org.mycore.common.MCRException;
050    
051    /**
052     * provides support for a restricted range of formats, all of which are valid
053     * ISO 8601 dates and times.
054     * 
055     * The range of supported formats is exactly the same range that is suggested by
056     * the W3C <a href="http://www.w3.org/TR/NOTE-datetime">datetime profile</a> in
057     * its version from 1997-09-15.
058     * 
059     * @author Thomas Scheffler (yagee)
060     * 
061     * @version $Revision: 15160 $ $Date: 2009-05-08 09:35:46 +0200 (Fri, 08 May 2009) $
062     * @since 1.3
063     */
064    public final class MCRMetaISO8601Date extends MCRMetaDefault {
065    
066        private Element export;
067    
068        private boolean changed = true;
069    
070        private static final Namespace DEFAULT_NAMESPACE = Namespace.NO_NAMESPACE;
071    
072        private DateTime dt;
073    
074        private IsoFormat isoFormat;
075    
076        public static enum IsoFormat {
077            YEAR, YEAR_MONTH, COMPLETE, COMPLETE_HH_MM, COMPLETE_HH_MM_SS, COMPLETE_HH_MM_SS_SSS;
078    
079            public final static String F_YEAR = "YYYY";
080    
081            public final static String F_YEAR_MONTH = "YYYY-MM";
082    
083            public final static String F_COMPLETE = "YYYY-MM-DD";
084    
085            public final static String F_COMPLETE_HH_MM = "YYYY-MM-DDThh:mmTZD";
086    
087            public final static String F_COMPLETE_HH_MM_SS = "YYYY-MM-DDThh:mm:ssTZD";
088    
089            public final static String F_COMPLETE_HH_MM_SS_SSS = "YYYY-MM-DDThh:mm:ss.sTZD";
090    
091            @Override
092            public String toString() {
093                switch (this) {
094                case YEAR:
095                    return F_YEAR;
096                case YEAR_MONTH:
097                    return F_YEAR_MONTH;
098                case COMPLETE:
099                    return F_COMPLETE;
100                case COMPLETE_HH_MM:
101                    return F_COMPLETE_HH_MM;
102                case COMPLETE_HH_MM_SS:
103                    return F_COMPLETE_HH_MM_SS;
104                case COMPLETE_HH_MM_SS_SSS:
105                    return F_COMPLETE_HH_MM_SS_SSS;
106                }
107                // never reached
108                return null;
109            }
110    
111            public static IsoFormat getFormat(String format) {
112                if (format == null)
113                    return null;
114                String fmt = format.intern();
115                if (fmt == F_YEAR)
116                    return YEAR;
117                if (fmt == F_YEAR_MONTH)
118                    return YEAR_MONTH;
119                if (fmt == F_COMPLETE)
120                    return COMPLETE;
121                if (fmt == F_COMPLETE_HH_MM)
122                    return COMPLETE_HH_MM;
123                if (fmt == F_COMPLETE_HH_MM_SS)
124                    return COMPLETE_HH_MM_SS;
125                if (fmt == F_COMPLETE_HH_MM_SS_SSS)
126                    return COMPLETE_HH_MM_SS_SSS;
127                // never reached
128                return null;
129            }
130    
131        }
132    
133        private DateTimeFormatter dateTimeFormatter = FormatChooser.getFormatter(null, null);
134    
135        private boolean valid = false;
136    
137        private static final Logger LOGGER = Logger.getLogger(MCRMetaISO8601Date.class);
138    
139        /**
140         * constructs a empty instance.
141         * 
142         * @see MCRMetaDefault#MCRMetaDefault()
143         */
144        public MCRMetaISO8601Date() {
145            super();
146        }
147    
148        /**
149         * same as superImplentation but sets lang attribute to "null"
150         * 
151         * @see MCRMetaDefault#MCRMetaDefault(String, String, String, String, int)
152         */
153        public MCRMetaISO8601Date(String set_datapart, String set_subtag, String set_type, int set_inherted) {
154            super(set_datapart, set_subtag, null, set_type, set_inherted);
155        }
156    
157        /*
158         * (non-Javadoc)
159         * 
160         * @see org.mycore.datamodel.metadata.MCRMetaDefault#createXML()
161         */
162        public Element createXML() throws MCRException {
163            if (!changed) {
164                return (Element) export.clone();
165            }
166            if (!isValid()) {
167                debug();
168                throw new MCRException("The content of MCRMetaISO8601Date is not valid.");
169            }
170            export = new org.jdom.Element(subtag, DEFAULT_NAMESPACE);
171            export.setAttribute("inherited", Integer.toString(inherited));
172            if (!(this.isoFormat == null || this.isoFormat == IsoFormat.COMPLETE_HH_MM_SS_SSS)) {
173                export.setAttribute("format", this.isoFormat.toString());
174            }
175            if ((type != null) && ((type = type.trim()).length() != 0)) {
176                export.setAttribute("type", type);
177            }
178            export.setText(getISOString());
179            changed = false;
180            return (Element) export.clone();
181        }
182    
183        /*
184         * (non-Javadoc)
185         * 
186         * @see org.mycore.datamodel.metadata.MCRMetaDefault#setFromDOM(org.jdom.Element)
187         */
188        public void setFromDOM(org.jdom.Element element) {
189            super.setFromDOM(element);
190            setFormat(element.getAttributeValue("format"));
191            setDate(element.getTextTrim());
192            this.export = (Element) element.clone();
193        }
194    
195        /**
196         * returns the namespace of this element
197         * 
198         * @return Returns the ns.
199         */
200        protected static Namespace getNs() {
201            return DEFAULT_NAMESPACE;
202        }
203    
204        /**
205         * sets the date for this meta data object
206         * 
207         * @param isoString
208         *            Date in any form that is a valid W3C dateTime
209         */
210        public final void setDate(String isoString) {
211            DateTime dt = null;
212            try {
213                dt = getDateTime(FormatChooser.cropSecondFractions(isoString));
214            } catch (RuntimeException e) {
215                boolean strictParsingEnabled = MCRConfiguration.instance().getBoolean("MCR.Metadata.SimpleDateFormat.StrictParsing", true);
216                if (!strictParsingEnabled) {
217                    /*
218                     * Last line of defence against the worst dates of the universe ;o)
219                     */
220                    LOGGER.warn("Strict date parsing is disabled. This may result in incorrect dates.");
221                    dt = guessDateTime(isoString);
222                } else {
223                    LOGGER.debug("Error while parsing date, set date to NULL.", e);
224                    dt = null;
225                }
226            }
227            setDateTime(dt);
228        }
229    
230        private DateTime guessDateTime(String date) {
231            String locales = MCRConfiguration.instance().getString("MCR.Metadata.SimpleDateFormat.Locales", "de_DE");
232            StringTokenizer tok = new StringTokenizer(locales, ",");
233            while (tok.hasMoreTokens()) {
234                Locale locale = getLocale(tok.nextToken());
235                DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
236                df.setTimeZone(TimeZone.getTimeZone("UTC"));
237                df.setLenient(true);
238                DateTime result = null;
239                try {
240                    Date pDate = df.parse(date);
241                    result = new DateTime(pDate.getTime());
242                    return result;
243                } catch (ParseException e) {
244                    LOGGER.warn("Date guess failed for locale: " + locale);
245                    //we need no big exception in the logs, if we can't guess what it is, a warning should be enough
246                }
247            }
248            LOGGER.error("Error trying to guess date for string: " + date);
249            return null;
250        }
251    
252        private static Locale getLocale(String locale) {
253            String lang = "", country = "";
254            int pos = locale.indexOf("_");
255            if (pos > 0) {
256                lang = locale.substring(0, pos);
257                country = locale.substring(pos + 1);
258            } else {
259                lang = locale;
260            }
261            return new Locale(lang, country);
262        }
263    
264        /**
265         * returns the Date representing this element.
266         * 
267         * @return a new Date instance of the time set in this element
268         */
269        public final Date getDate() {
270            return (dt == null) ? null : (Date) dt.toDate().clone();
271        }
272    
273        /**
274         * sets the date for this meta data object
275         * 
276         * @param dt
277         *            Date object representing date String in Element
278         */
279        public void setDate(Date dt) {
280            if (dt == null) {
281                this.dt = null;
282                valid = false;
283            } else {
284                this.dt = new DateTime(dt.getTime());
285                valid = true;
286            }
287            changed = true;
288        }
289    
290        private void setDateTime(DateTime dt) {
291            if (dt == null) {
292                this.dt = null;
293                valid = false;
294            } else {
295                this.dt = dt;
296                valid = true;
297            }
298            changed = true;
299        }
300    
301        private DateTime getDateTime(String timeString) {
302            dateTimeFormatter = FormatChooser.getFormatter(timeString, this.isoFormat);
303            return dateTimeFormatter.parseDateTime(timeString);
304        }
305    
306        /**
307         * returns a ISO 8601 conform String using the current set format.
308         * 
309         * @return date in ISO 8601 format, or null if date is unset.
310         */
311        public final String getISOString() {
312            return (dt == null) ? null : dateTimeFormatter.print(this.dt);
313        }
314    
315        /**
316         * sets the input and output format.
317         * 
318         * please use only the formats defined on the <a
319         * href="http://www.w3.org/TR/NOTE-datetime">W3C Page</a>, which are also
320         * exported as static fields by this class.
321         * 
322         * @param format
323         *            a format string that is valid conforming to xsd:duration
324         *            schema type.
325         * 
326         */
327        public void setFormat(String format) {
328            setFormat(IsoFormat.getFormat(format));
329        }
330    
331        /**
332         * sets the input and output format.
333         * 
334         * please use only the formats defined on the <a
335         * href="http://www.w3.org/TR/NOTE-datetime">W3C Page</a>, which are also
336         * exported as static fields by this class.
337         * 
338         */
339        public void setFormat(IsoFormat isoFormat) {
340            this.isoFormat = isoFormat;
341            dateTimeFormatter = FormatChooser.getFormatter(null, this.isoFormat);
342        }
343    
344        /**
345         * formats the date to a String
346         * @param format as in {@link DateTimeFormat}
347         * @param locale used by format process
348         * @return null if date is not set yet
349         */
350        public String format(String format, Locale locale) {
351            DateTimeFormatter df = DateTimeFormat.forPattern(format);
352            if (locale != null)
353                df = df.withLocale(locale);
354            return (this.dt == null) ? null : df.print(this.dt);
355        }
356    
357        /**
358         * gets the input and output format.
359         * 
360         * this is a String that is also exported as static fields by this class, or
361         * null if not defined.
362         * 
363         * @return a format string that is valid conforming to xsd:duration schema
364         *         type, or null if not defined.
365         * 
366         */
367        public IsoFormat getFormat() {
368            return this.isoFormat;
369        }
370    
371        /**
372         * This method put debug data to the logger (for the debug mode).
373         */
374        public void debug() {
375            LOGGER.debug("Start Class : MCRMetaISO8601Date");
376            super.debugDefault();
377            LOGGER.debug("Date=" + ((dt == null) ? null : dateTimeFormatter.print(dt)));
378            LOGGER.debug("Format=" + this.isoFormat);
379            XMLOutputter xout = new XMLOutputter(Format.getPrettyFormat());
380            StringWriter sw = new StringWriter();
381            try {
382                xout.output(this.export, sw);
383                LOGGER.debug("JDOM=" + sw.toString());
384            } catch (IOException e) {
385                //ignore
386            }
387        }
388    
389        /**
390         * clone of this instance
391         * 
392         * you will get a (deep) clone of this element
393         * 
394         * @see java.lang.Object#clone()
395         */
396        public Object clone() {
397            MCRMetaISO8601Date out = new MCRMetaISO8601Date();
398            out.setFromDOM((Element) createXML().clone());
399            return out;
400        }
401    
402        /**
403         * checks the formal correctness of this element.
404         * 
405         * This check includes:
406         * <ol>
407         * <li>the included date is set</li>
408         * <li>the super implementation returns true</li>
409         * </ol>
410         * 
411         * @see MCRMetaDefault#isValid()
412         * @return false, if any test fails and the instance should not be used for
413         *         persistence purposes
414         */
415        public boolean isValid() {
416            if (!valid || !super.isValid()) {
417                return false;
418            }
419            return true;
420        }
421    
422        /**
423         * is a helper class for MCRMetaISO8601Date.
424         * 
425         * Please be aware that this class is not supported. It may disappear some day or methods get removed.
426         * 
427         * @author Thomas Scheffler (yagee)
428         *
429         * @version $Revision: 15160 $ $Date: 2009-05-08 09:35:46 +0200 (Fri, 08 May 2009) $
430         * @since 1.3
431         */
432        protected static final class FormatChooser {
433    
434            protected final static DateTimeFormatter YEAR_FORMAT = ISODateTimeFormat.year();
435    
436            protected final static DateTimeFormatter YEAR_MONTH_FORMAT = ISODateTimeFormat.yearMonth();
437    
438            protected final static DateTimeFormatter COMPLETE_FORMAT = ISODateTimeFormat.date();
439    
440            protected final static DateTimeFormatter COMPLETE_HH_MM_FORMAT = ISODateTimeFormat.dateHourMinute();
441    
442            protected final static DateTimeFormatter COMPLETE_HH_MM_SS_FORMAT = ISODateTimeFormat.dateTimeNoMillis();
443    
444            protected final static DateTimeFormatter COMPLETE_HH_MM_SS_SSS_FORMAT = ISODateTimeFormat.dateTime();
445    
446            protected final static DateTimeFormatter UTC_YEAR_FORMAT = ISODateTimeFormat.year().withZone(DateTimeZone.UTC);
447    
448            protected final static DateTimeFormatter UTC_YEAR_MONTH_FORMAT = ISODateTimeFormat.yearMonth().withZone(DateTimeZone.UTC);
449    
450            protected final static DateTimeFormatter UTC_COMPLETE_FORMAT = ISODateTimeFormat.date().withZone(DateTimeZone.UTC);
451    
452            protected final static DateTimeFormatter UTC_COMPLETE_HH_MM_FORMAT = ISODateTimeFormat.dateHourMinute().withZone(DateTimeZone.UTC);
453    
454            protected final static DateTimeFormatter UTC_COMPLETE_HH_MM_SS_FORMAT = ISODateTimeFormat.dateTimeNoMillis().withZone(
455                    DateTimeZone.UTC);
456    
457            protected final static DateTimeFormatter UTC_COMPLETE_HH_MM_SS_SSS_FORMAT = ISODateTimeFormat.dateTime().withZone(DateTimeZone.UTC);
458    
459            private static final Pattern MILLI_CHECK_PATTERN = Pattern.compile("\\.\\d{4,}\\+");
460    
461            private static final boolean USE_UTC = true;
462    
463            /**
464             * returns a DateTimeFormatter for the given isoString or format.
465             * 
466             * This method prefers the format parameter. So if it's not null or not
467             * zero length this method will interpret the format string. You can
468             * also get a formatter for e specific iso String. In either case if the
469             * underlying algorithm can not determine an exact matching formatter it
470             * will allway fall back to a default. So this method will never return
471             * null.
472             * 
473             * @param isoString
474             *            an ISO 8601 formatted time String, or null
475             * @param isoFormat
476             *            a valid format String, or null
477             * @return returns a specific DateTimeFormatter
478             */
479            public static DateTimeFormatter getFormatter(String isoString, IsoFormat isoFormat) {
480                DateTimeFormatter df;
481                if (isoFormat != null) {
482                    df = getFormatterForFormat(isoFormat);
483                } else if ((isoString != null) && (isoString.length() != 0)) {
484                    String normalized = (isoString.charAt(0) == '-') ? isoString.substring(1) : isoString;
485                    df = getFormatterForDuration(normalized);
486                } else {
487                    df = COMPLETE_HH_MM_SS_SSS_FORMAT;
488                }
489                if (USE_UTC) {
490                    df = df.withZone(DateTimeZone.UTC);
491                }
492                return df;
493            }
494    
495            private static DateTimeFormatter getFormatterForFormat(IsoFormat isoFormat) {
496                switch (isoFormat) {
497                case YEAR:
498                    return USE_UTC ? UTC_YEAR_FORMAT : YEAR_FORMAT;
499                case YEAR_MONTH:
500                    return USE_UTC ? UTC_YEAR_MONTH_FORMAT : YEAR_MONTH_FORMAT;
501                case COMPLETE:
502                    return USE_UTC ? UTC_COMPLETE_FORMAT : COMPLETE_FORMAT;
503                case COMPLETE_HH_MM:
504                    return USE_UTC ? UTC_COMPLETE_HH_MM_FORMAT : COMPLETE_HH_MM_FORMAT;
505                case COMPLETE_HH_MM_SS:
506                    return USE_UTC ? UTC_COMPLETE_HH_MM_SS_FORMAT : COMPLETE_HH_MM_SS_FORMAT;
507                case COMPLETE_HH_MM_SS_SSS:
508                    return USE_UTC ? UTC_COMPLETE_HH_MM_SS_SSS_FORMAT : COMPLETE_HH_MM_SS_SSS_FORMAT;
509                default:
510                    return USE_UTC ? UTC_COMPLETE_HH_MM_SS_SSS_FORMAT : COMPLETE_HH_MM_SS_SSS_FORMAT;
511                }
512            }
513    
514            private static DateTimeFormatter getFormatterForDuration(String isoString) {
515                boolean test = false;
516                switch (isoString.length()) {
517                case 1:
518                case 2:
519                case 3:
520                    return USE_UTC ? UTC_YEAR_FORMAT : YEAR_FORMAT;
521                case 4:
522                    if (isoString.indexOf('-') == -1)
523                        return USE_UTC ? UTC_YEAR_FORMAT : YEAR_FORMAT;
524                case 5:
525                case 6:
526                case 7:
527                    return USE_UTC ? UTC_YEAR_MONTH_FORMAT : YEAR_MONTH_FORMAT;
528                case 10:
529                    return USE_UTC ? UTC_COMPLETE_FORMAT : COMPLETE_FORMAT;
530                case 17: // YYYY-MM-DDThh:mm'Z'
531                    test = true;
532                case 22:
533                    if (test || !isoString.endsWith("Z")) {
534                        // YYYY-MM-DDThh:mm[+-]hh:mm
535                        return USE_UTC ? UTC_COMPLETE_HH_MM_FORMAT : COMPLETE_HH_MM_FORMAT;
536                    }
537                    // YYYY-MM-DDThh:mm:ss.s'Z'
538                    return USE_UTC ? UTC_COMPLETE_HH_MM_SS_SSS_FORMAT : COMPLETE_HH_MM_SS_SSS_FORMAT;
539                case 20: // YYYY-MM-DDThh:mm:ss'Z'
540                case 25: // YYYY-MM-DDThh:mm:ss[+-]hh:mm
541                    return USE_UTC ? UTC_COMPLETE_HH_MM_SS_FORMAT : COMPLETE_HH_MM_SS_FORMAT;
542                case 23: // YYYY-MM-DDThh:mm:ss.ss'Z'
543                case 24: // YYYY-MM-DDThh:mm:ss.sss'Z'
544                case 27: // YYYY-MM-DDThh:mm:ss.s[+-]hh:mm
545                case 28: // YYYY-MM-DDThh:mm:ss.ss[+-]hh:mm
546                case 29: // YYYY-MM-DDThh:mm:ss.ss[+-]hh:mm
547                    return USE_UTC ? UTC_COMPLETE_HH_MM_SS_SSS_FORMAT : COMPLETE_HH_MM_SS_SSS_FORMAT;
548                default:
549                    return USE_UTC ? UTC_COMPLETE_HH_MM_SS_SSS_FORMAT : COMPLETE_HH_MM_SS_SSS_FORMAT;
550                }
551            }
552    
553            /**
554             * returns a String that has not more than 3 digits representing
555             * "fractions of a second".
556             * 
557             * If isoString has no or not more than 3 digits this method simply
558             * returns isoString.
559             * 
560             * @param isoString
561             *            an ISO 8601 formatted time String
562             * @return an ISO 8601 formatted time String with at max 3 digits for
563             *         fractions of a second
564             */
565            public final static String cropSecondFractions(String isoString) {
566                Matcher matcher = MILLI_CHECK_PATTERN.matcher(isoString);
567                boolean result = matcher.find();
568                if (result) {
569                    return matcher.replaceFirst(isoString.substring(matcher.start(), matcher.start() + 4) + "+");
570                }
571                return isoString;
572            }
573    
574        }
575    }