1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.mycore.orcid.works;
20
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.util.ArrayList;
24 import java.util.List;
25 import java.util.Optional;
26
27 import org.apache.commons.lang3.StringUtils;
28 import org.apache.logging.log4j.LogManager;
29 import org.apache.logging.log4j.Logger;
30 import org.jdom2.Element;
31 import org.jdom2.JDOMException;
32 import org.mycore.common.MCRConstants;
33 import org.mycore.common.config.MCRConfiguration2;
34 import org.mycore.common.content.MCRContent;
35 import org.mycore.common.content.MCRStreamContent;
36 import org.mycore.common.content.MCRStringContent;
37 import org.mycore.common.content.transformer.MCRContentTransformer;
38 import org.mycore.common.content.transformer.MCRContentTransformerFactory;
39 import org.mycore.mods.merger.MCRMergeTool;
40 import org.mycore.orcid.MCRORCIDConstants;
41 import org.mycore.orcid.MCRORCIDProfile;
42 import org.mycore.orcid.oauth.MCRReadPublicTokenFactory;
43 import org.xml.sax.SAXException;
44
45 import jakarta.ws.rs.client.WebTarget;
46 import jakarta.ws.rs.core.Response;
47
48
49
50
51
52
53
54 public class MCRWorksFetcher {
55
56 private static final Logger LOGGER = LogManager.getLogger(MCRWorksFetcher.class);
57
58
59 private static final int BULK_FETCH_SIZE = MCRConfiguration2
60 .getOrThrow("MCR.ORCID.Works.BulkFetchSize", Integer::parseInt);
61
62
63 private static final MCRContentTransformer T_WORK2MCR = MCRContentTransformerFactory.getTransformer("Work2MyCoRe");
64
65
66 private static final MCRContentTransformer T_BIBTEX2MODS = MCRContentTransformerFactory
67 .getTransformer("BibTeX2MODS");
68
69 private MCRORCIDProfile orcid;
70
71 public MCRWorksFetcher(MCRORCIDProfile orcid) {
72 this.orcid = orcid;
73 }
74
75 List<MCRGroupOfWorks> fetchGroups(MCRWorksSection worksSection)
76 throws JDOMException, IOException, SAXException {
77 WebTarget target = orcid.getWebTarget().path("works");
78 Element worksXML = fetchWorksXML(target);
79
80 List<MCRGroupOfWorks> groups = new ArrayList<>();
81 for (Element groupXML : worksXML.getChildren("group", MCRORCIDConstants.NS_ACTIVITIES)) {
82 MCRGroupOfWorks group = new MCRGroupOfWorks();
83 groups.add(group);
84
85 for (Element workSummary : groupXML.getChildren("work-summary", MCRORCIDConstants.NS_WORK)) {
86 String putCode = workSummary.getAttributeValue("put-code");
87 MCRWork work = worksSection.getWork(putCode);
88 if (work == null) {
89 work = new MCRWork(orcid, putCode);
90 setFromWorkXML(work, workSummary);
91 }
92 group.add(work);
93 }
94 }
95 return groups;
96 }
97
98 void fetchDetails(MCRWorksSection worksSection) throws IOException, JDOMException, SAXException {
99 List<String> putCodes = new ArrayList<>();
100 worksSection.getWorks().forEach(work -> putCodes.add(work.getPutCode()));
101
102 for (int offset = 0; offset < putCodes.size(); offset += BULK_FETCH_SIZE) {
103 int chunkEndIndex = Math.min(offset + BULK_FETCH_SIZE, putCodes.size());
104 String joinedPutCodes = StringUtils.join(putCodes.subList(offset, chunkEndIndex), ',');
105 WebTarget target = orcid.getWebTarget().path("works").path(joinedPutCodes);
106 Element bulk = fetchWorksXML(target);
107
108 for (Element workXML : bulk.getChildren("work", MCRORCIDConstants.NS_WORK)) {
109 String putCode = workXML.getAttributeValue("put-code");
110 workXML.setAttribute("path", "/" + orcid.getORCID() + "/work/" + putCode);
111 MCRWork work = worksSection.getWork(putCode);
112 setFromWorkXML(work, workXML);
113 }
114 }
115 }
116
117 void fetchDetails(MCRWork work) throws JDOMException, IOException, SAXException {
118 WebTarget target = orcid.getWebTarget().path("work").path(work.getPutCode());
119 Element workXML = fetchWorksXML(target);
120 setFromWorkXML(work, workXML);
121 }
122
123 private Element fetchWorksXML(WebTarget target) throws JDOMException, IOException, SAXException {
124 Response r = getResponse(target, orcid.getAccessToken() != null);
125 MCRContent response = new MCRStreamContent(r.readEntity(InputStream.class));
126 MCRContent transformed = T_WORK2MCR.transform(response);
127 return transformed.asXML().detachRootElement();
128 }
129
130 private Response getResponse(WebTarget target, boolean usePersonalToken) {
131 LOGGER.info("get {}", target.getUri());
132 Response r = target.request().accept(MCRORCIDConstants.ORCID_XML_MEDIA_TYPE)
133 .header("Authorization",
134 "Bearer " + (usePersonalToken ? orcid.getAccessToken() : MCRReadPublicTokenFactory.getToken()))
135 .get();
136 if (r.getStatusInfo().getFamily() == Response.Status.Family.SUCCESSFUL) {
137 return r;
138 } else if (!usePersonalToken) {
139 LOGGER.warn("Bad request with public ORDiD token. "
140 + "Please check respective setting in mycore.properties.");
141 return r;
142 } else {
143 LOGGER.info("Bad request with personal ORCiD token. Using public token instead.");
144 return getResponse(target, false);
145 }
146 }
147
148
149 private void setFromWorkXML(MCRWork work, Element workXML) {
150 Element mods = workXML.getChild("mods", MCRConstants.MODS_NAMESPACE).detach();
151
152 String bibTeX = workXML.getChildTextTrim("bibTeX");
153 Optional<Element> modsFromBibTeX = bibTeX2MODS(bibTeX);
154 modsFromBibTeX.ifPresent(m -> MCRMergeTool.merge(mods, m));
155
156 work.setMODS(mods);
157 String sourceID = workXML.getAttributeValue("source");
158 work.setSource(MCRWorkSource.getInstance(sourceID));
159 }
160
161
162
163
164
165 private Optional<Element> bibTeX2MODS(String bibTeX) {
166 if ((bibTeX != null) && !bibTeX.isEmpty()) {
167 try {
168 MCRContent result = T_BIBTEX2MODS.transform(new MCRStringContent(bibTeX));
169 Element modsCollection = result.asXML().getRootElement();
170 Element modsFromBibTeX = modsCollection.getChild("mods", MCRConstants.MODS_NAMESPACE);
171
172 modsFromBibTeX.removeChildren("extension", MCRConstants.MODS_NAMESPACE);
173 return Optional.of(modsFromBibTeX);
174 } catch (Exception ex) {
175 String msg = "Exception parsing BibTeX: " + bibTeX;
176 LOGGER.warn("{} {}", msg, ex.getMessage());
177 }
178 }
179 return Optional.empty();
180 }
181 }