View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.orcid.works;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.util.ArrayList;
24  import java.util.List;
25  import java.util.Optional;
26  
27  import org.apache.commons.lang3.StringUtils;
28  import org.apache.logging.log4j.LogManager;
29  import org.apache.logging.log4j.Logger;
30  import org.jdom2.Element;
31  import org.jdom2.JDOMException;
32  import org.mycore.common.MCRConstants;
33  import org.mycore.common.config.MCRConfiguration2;
34  import org.mycore.common.content.MCRContent;
35  import org.mycore.common.content.MCRStreamContent;
36  import org.mycore.common.content.MCRStringContent;
37  import org.mycore.common.content.transformer.MCRContentTransformer;
38  import org.mycore.common.content.transformer.MCRContentTransformerFactory;
39  import org.mycore.mods.merger.MCRMergeTool;
40  import org.mycore.orcid.MCRORCIDConstants;
41  import org.mycore.orcid.MCRORCIDProfile;
42  import org.mycore.orcid.oauth.MCRReadPublicTokenFactory;
43  import org.xml.sax.SAXException;
44  
45  import jakarta.ws.rs.client.WebTarget;
46  import jakarta.ws.rs.core.Response;
47  
48  /**
49   * Provides functionality to fetch work groups, work summaries and work details
50   * from a remote ORCID profile
51   *
52   * @author Frank L\u00FCtzenkirchen
53   */
54  public class MCRWorksFetcher {
55  
56      private static final Logger LOGGER = LogManager.getLogger(MCRWorksFetcher.class);
57  
58      /** The maximum number of works to fetch at once in a bulk request */
59      private static final int BULK_FETCH_SIZE = MCRConfiguration2
60          .getOrThrow("MCR.ORCID.Works.BulkFetchSize", Integer::parseInt);
61  
62      /** Transformer used to convert ORCID's work XML schema to MODS and a representation we use here */
63      private static final MCRContentTransformer T_WORK2MCR = MCRContentTransformerFactory.getTransformer("Work2MyCoRe");
64  
65      /** Transformer used to parse bibTeX to MODS */
66      private static final MCRContentTransformer T_BIBTEX2MODS = MCRContentTransformerFactory
67          .getTransformer("BibTeX2MODS");
68  
69      private MCRORCIDProfile orcid;
70  
71      public MCRWorksFetcher(MCRORCIDProfile orcid) {
72          this.orcid = orcid;
73      }
74  
75      List<MCRGroupOfWorks> fetchGroups(MCRWorksSection worksSection)
76          throws JDOMException, IOException, SAXException {
77          WebTarget target = orcid.getWebTarget().path("works");
78          Element worksXML = fetchWorksXML(target);
79  
80          List<MCRGroupOfWorks> groups = new ArrayList<>();
81          for (Element groupXML : worksXML.getChildren("group", MCRORCIDConstants.NS_ACTIVITIES)) {
82              MCRGroupOfWorks group = new MCRGroupOfWorks();
83              groups.add(group);
84  
85              for (Element workSummary : groupXML.getChildren("work-summary", MCRORCIDConstants.NS_WORK)) {
86                  String putCode = workSummary.getAttributeValue("put-code");
87                  MCRWork work = worksSection.getWork(putCode);
88                  if (work == null) {
89                      work = new MCRWork(orcid, putCode);
90                      setFromWorkXML(work, workSummary);
91                  }
92                  group.add(work);
93              }
94          }
95          return groups;
96      }
97  
98      void fetchDetails(MCRWorksSection worksSection) throws IOException, JDOMException, SAXException {
99          List<String> putCodes = new ArrayList<>();
100         worksSection.getWorks().forEach(work -> putCodes.add(work.getPutCode()));
101 
102         for (int offset = 0; offset < putCodes.size(); offset += BULK_FETCH_SIZE) {
103             int chunkEndIndex = Math.min(offset + BULK_FETCH_SIZE, putCodes.size());
104             String joinedPutCodes = StringUtils.join(putCodes.subList(offset, chunkEndIndex), ',');
105             WebTarget target = orcid.getWebTarget().path("works").path(joinedPutCodes);
106             Element bulk = fetchWorksXML(target);
107 
108             for (Element workXML : bulk.getChildren("work", MCRORCIDConstants.NS_WORK)) {
109                 String putCode = workXML.getAttributeValue("put-code");
110                 workXML.setAttribute("path", "/" + orcid.getORCID() + "/work/" + putCode);
111                 MCRWork work = worksSection.getWork(putCode);
112                 setFromWorkXML(work, workXML);
113             }
114         }
115     }
116 
117     void fetchDetails(MCRWork work) throws JDOMException, IOException, SAXException {
118         WebTarget target = orcid.getWebTarget().path("work").path(work.getPutCode());
119         Element workXML = fetchWorksXML(target);
120         setFromWorkXML(work, workXML);
121     }
122 
123     private Element fetchWorksXML(WebTarget target) throws JDOMException, IOException, SAXException {
124         Response r = getResponse(target, orcid.getAccessToken() != null);
125         MCRContent response = new MCRStreamContent(r.readEntity(InputStream.class));
126         MCRContent transformed = T_WORK2MCR.transform(response);
127         return transformed.asXML().detachRootElement();
128     }
129 
130     private Response getResponse(WebTarget target, boolean usePersonalToken) {
131         LOGGER.info("get {}", target.getUri());
132         Response r = target.request().accept(MCRORCIDConstants.ORCID_XML_MEDIA_TYPE)
133             .header("Authorization",
134                 "Bearer " + (usePersonalToken ? orcid.getAccessToken() : MCRReadPublicTokenFactory.getToken()))
135             .get();
136         if (r.getStatusInfo().getFamily() == Response.Status.Family.SUCCESSFUL) {
137             return r;
138         } else if (!usePersonalToken) {
139             LOGGER.warn("Bad request with public ORDiD token. "
140                 + "Please check respective setting in mycore.properties.");
141             return r;
142         } else {
143             LOGGER.info("Bad request with personal ORCiD token. Using public token instead.");
144             return getResponse(target, false);
145         }
146     }
147 
148     /** Sets the work's properties from the pre-processed, transformed works XML */
149     private void setFromWorkXML(MCRWork work, Element workXML) {
150         Element mods = workXML.getChild("mods", MCRConstants.MODS_NAMESPACE).detach();
151 
152         String bibTeX = workXML.getChildTextTrim("bibTeX");
153         Optional<Element> modsFromBibTeX = bibTeX2MODS(bibTeX);
154         modsFromBibTeX.ifPresent(m -> MCRMergeTool.merge(mods, m));
155 
156         work.setMODS(mods);
157         String sourceID = workXML.getAttributeValue("source");
158         work.setSource(MCRWorkSource.getInstance(sourceID));
159     }
160 
161     /**
162      * Parses the bibTeX code that may be included in the work entry
163      * and returns its transformation to MODS
164      */
165     private Optional<Element> bibTeX2MODS(String bibTeX) {
166         if ((bibTeX != null) && !bibTeX.isEmpty()) {
167             try {
168                 MCRContent result = T_BIBTEX2MODS.transform(new MCRStringContent(bibTeX));
169                 Element modsCollection = result.asXML().getRootElement();
170                 Element modsFromBibTeX = modsCollection.getChild("mods", MCRConstants.MODS_NAMESPACE);
171                 // Remove mods:extension containing the original BibTeX:
172                 modsFromBibTeX.removeChildren("extension", MCRConstants.MODS_NAMESPACE);
173                 return Optional.of(modsFromBibTeX);
174             } catch (Exception ex) {
175                 String msg = "Exception parsing BibTeX: " + bibTeX;
176                 LOGGER.warn("{} {}", msg, ex.getMessage());
177             }
178         }
179         return Optional.empty();
180     }
181 }