1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.mycore.mods.rss;
20
21 import java.io.IOException;
22 import java.net.URL;
23 import java.text.MessageFormat;
24 import java.util.ArrayList;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Locale;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 import java.util.regex.PatternSyntaxException;
31
32 import org.apache.logging.log4j.LogManager;
33 import org.apache.logging.log4j.Logger;
34 import org.apache.solr.client.solrj.SolrClient;
35 import org.apache.solr.client.solrj.SolrQuery;
36 import org.apache.solr.common.SolrDocumentList;
37 import org.jdom2.Document;
38 import org.jdom2.Element;
39 import org.jdom2.filter.ElementFilter;
40 import org.mycore.access.MCRAccessException;
41 import org.mycore.common.MCRConstants;
42 import org.mycore.common.MCRException;
43 import org.mycore.common.MCRMailer;
44 import org.mycore.common.MCRPersistenceException;
45 import org.mycore.common.config.MCRConfiguration2;
46 import org.mycore.common.config.MCRConfigurationException;
47 import org.mycore.common.xml.MCRURIResolver;
48 import org.mycore.datamodel.metadata.MCRMetadataManager;
49 import org.mycore.datamodel.metadata.MCRObject;
50 import org.mycore.datamodel.metadata.MCRObjectID;
51 import org.mycore.mods.MCRMODSWrapper;
52 import org.mycore.solr.MCRSolrClientFactory;
53 import org.mycore.solr.MCRSolrUtils;
54
55 import com.rometools.rome.feed.synd.SyndEntry;
56 import com.rometools.rome.feed.synd.SyndFeed;
57 import com.rometools.rome.io.FeedException;
58 import com.rometools.rome.io.SyndFeedInput;
59 import com.rometools.rome.io.XmlReader;
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92 public class MCRRSSFeedImporter {
93
94 private String sourceSystemID;
95
96 private String feedURL;
97
98 private Pattern pattern2findID;
99
100 private String field2queryID;
101
102 private String importURI;
103
104 private String xsl2BuildNotificationMail;
105
106 private static final String STATUS_FLAG = "imported";
107
108 private static final String PROPERTY_MAIL_ADDRESS = "MCR.Mail.Address";
109
110 private static final Logger LOGGER = LogManager.getLogger(MCRRSSFeedImporter.class);
111
112 public static void importFromFeed(String sourceSystemID, String projectID) throws Exception {
113 MCRRSSFeedImporter importer = new MCRRSSFeedImporter(sourceSystemID);
114 importer.importPublications(projectID);
115 }
116
117 public MCRRSSFeedImporter(String sourceSystemID) {
118 this.sourceSystemID = sourceSystemID;
119
120 String prefix = "MCR.MODS.RSSFeedImporter." + sourceSystemID + ".";
121
122 feedURL = MCRConfiguration2.getStringOrThrow(prefix + "FeedURL");
123 importURI = MCRConfiguration2.getStringOrThrow(prefix + "PublicationURI");
124 field2queryID = MCRConfiguration2.getStringOrThrow(prefix + "Field2QueryID");
125 xsl2BuildNotificationMail = MCRConfiguration2.getString(prefix + "XSL2BuildNotificationMail").orElse(null);
126
127 getPattern2FindID(prefix);
128 }
129
130 private void getPattern2FindID(String prefix) {
131 String patternProperty = prefix + "Pattern2FindID";
132 try {
133 String pattern = MCRConfiguration2.getStringOrThrow(patternProperty);
134 pattern2findID = Pattern.compile(pattern);
135 } catch (PatternSyntaxException ex) {
136 String msg = "Regular expression syntax error: " + patternProperty;
137 throw new MCRConfigurationException(msg, ex);
138 }
139 }
140
141 public void importPublications(String projectID) throws Exception {
142 LOGGER.info("Getting new publications from {} RSS feed...", sourceSystemID);
143 SyndFeed feed = retrieveFeed();
144
145 List<MCRObject> importedObjects = new ArrayList<>();
146 for (SyndEntry entry : feed.getEntries()) {
147 MCRObject importedObject = handleFeedEntry(entry, projectID);
148 if (importedObject != null) {
149 importedObjects.add(importedObject);
150 }
151 }
152
153 int numPublicationsImported = importedObjects.size();
154 LOGGER.info("imported {} publications.", numPublicationsImported);
155
156 if ((numPublicationsImported > 0) && (xsl2BuildNotificationMail != null)) {
157 sendNotificationMail(importedObjects);
158 }
159 }
160
161 private SyndFeed retrieveFeed() throws IOException, FeedException {
162 XmlReader feedReader = new XmlReader(new URL(feedURL));
163 SyndFeedInput input = new SyndFeedInput();
164 return input.build(feedReader);
165 }
166
167 private MCRObject handleFeedEntry(SyndEntry entry, String projectID)
168 throws MCRPersistenceException, MCRAccessException {
169 String publicationID = getPublicationID(entry);
170 if (publicationID == null) {
171 return null;
172 }
173
174 if (isAlreadyStored(publicationID)) {
175 LOGGER.info("publication with ID {} already existing, will not import.", publicationID);
176 return null;
177 }
178
179 LOGGER.info("publication with ID {} does not exist yet, retrieving data...", publicationID);
180 Element publicationXML = retrieveAndConvertPublication(publicationID);
181 if (shouldIgnore(publicationXML)) {
182 LOGGER.info("publication will be ignored, do not store.");
183 return null;
184 }
185
186 MCRObject obj = buildMCRObject(publicationXML, projectID);
187 MCRMetadataManager.create(obj);
188 return obj;
189 }
190
191 private String getPublicationID(SyndEntry entry) {
192 String link = entry.getLink();
193 if (link == null) {
194 LOGGER.warn("no link found in feed entry");
195 return null;
196 }
197 link = link.trim();
198 Matcher m = pattern2findID.matcher(link);
199 if (m.matches()) {
200 return m.group(1);
201 } else {
202 LOGGER.warn("no publication ID found in link {}", link);
203 return null;
204 }
205 }
206
207 private boolean isAlreadyStored(String publicationID) {
208 SolrClient solrClient = MCRSolrClientFactory.getMainSolrClient();
209 SolrQuery query = new SolrQuery();
210 query.setQuery(field2queryID + ":" + MCRSolrUtils.escapeSearchValue(publicationID));
211 query.setRows(0);
212 SolrDocumentList results;
213 try {
214 results = solrClient.query(query).getResults();
215 return (results.getNumFound() > 0);
216 } catch (Exception ex) {
217 throw new MCRException(ex);
218 }
219 }
220
221 private Element retrieveAndConvertPublication(String externalID) {
222 String uri = new MessageFormat(importURI, Locale.ROOT).format(new String[] { externalID });
223 return MCRURIResolver.instance().resolve(uri);
224 }
225
226
227 private static boolean shouldIgnore(Element publication) {
228 return !publication.getDescendants(new ElementFilter("genre", MCRConstants.MODS_NAMESPACE)).hasNext();
229 }
230
231 private MCRObject buildMCRObject(Element publicationXML, String projectID) {
232 MCRObject obj = new MCRObject(new Document(publicationXML));
233 MCRMODSWrapper wrapper = new MCRMODSWrapper(obj);
234 wrapper.setServiceFlag("status", STATUS_FLAG);
235 MCRObjectID oid = MCRObjectID.getNextFreeId(projectID, "mods");
236 obj.setId(oid);
237 return obj;
238 }
239
240 private void sendNotificationMail(List<MCRObject> importedObjects) throws Exception {
241 Element xml = new Element(STATUS_FLAG).setAttribute("source", this.sourceSystemID);
242 for (MCRObject obj : importedObjects) {
243 xml.addContent(obj.createXML().detachRootElement());
244 }
245
246 HashMap<String, String> parameters = new HashMap<>();
247 parameters.put(PROPERTY_MAIL_ADDRESS, MCRConfiguration2.getStringOrThrow(PROPERTY_MAIL_ADDRESS));
248 MCRMailer.sendMail(new Document(xml), xsl2BuildNotificationMail, parameters);
249 }
250 }