Open Monograph Press  1.1
 All Classes Namespaces Functions Variables Groups Pages
WorldcatNlm30CitationSchemaFilter.inc.php
1 <?php
2 
23 import('lib.pkp.plugins.metadata.nlm30.filter.Nlm30CitationSchemaFilter');
24 import('lib.pkp.classes.filter.FilterSetting');
25 
26 // TODO: Might wish to change this if the publication type is NLM30_PUBLICATION_TYPE_BOOK, etc. for advanced search
27 define('WORLDCAT_WEBSERVICE_SEARCH', 'http://www.worldcat.org/search');
28 define('WORLDCAT_WEBSERVICE_OCLC', 'http://xisbn.worldcat.org/webservices/xid/oclcnum/');
29 // Lookup in MARCXML which has better granularity than Dublin Core
30 define('WORLDCAT_WEBSERVICE_EXTRACT', 'http://www.worldcat.org/webservices/catalog/content/');
31 define('WORLDCAT_WEBSERVICE_XISBN', 'http://xisbn.worldcat.org/webservices/xid/isbn/');
32 // TODO: Should we use OCLC basic API as fallback (see <http://www.worldcat.org/devnet/wiki/BasicAPIDetails>)?
33 
39  function WorldcatNlm30CitationSchemaFilter($filterGroup) {
40  $this->setDisplayName('WorldCat');
41 
42  // Instantiate the settings of this filter
43  $apiKeySetting = new FilterSetting('apiKey',
44  'metadata.filters.worldcat.settings.apiKey.displayName',
45  'metadata.filters.worldcat.settings.apiKey.validationMessage',
46  FORM_VALIDATOR_OPTIONAL_VALUE);
47  $this->addSetting($apiKeySetting);
48 
49  parent::Nlm30CitationSchemaFilter($filterGroup, array(NLM30_PUBLICATION_TYPE_BOOK));
50  }
51 
52  //
53  // Getters and Setters
54  //
59  function getApiKey() {
60  return $this->getData('apiKey');
61  }
62 
63 
64  //
65  // Implement template methods from PersistableFilter
66  //
70  function getClassName() {
71  return 'lib.pkp.plugins.citationLookup.worldcat.filter.WorldcatNlm30CitationSchemaFilter';
72  }
73 
74 
75  //
76  // Implement template methods from Filter
77  //
83  function &process(&$citationDescription) {
84  $nullVar = null;
85 
86  // Get the search strings
87  $searchTemplates =& $this->_getSearchTemplates();
88  $searchStrings = $this->constructSearchStrings($searchTemplates, $citationDescription);
89 
90  // Run the searches, in order, until we have a result
91  $searchParams = array('qt' => 'worldcat_org_all');
92  foreach ($searchStrings as $searchString) {
93  $searchParams['q'] = $searchString;
94  // Worldcat Web search; results are (mal-formed) XHTML
95  if (is_null($result = $this->callWebService(WORLDCAT_WEBSERVICE_SEARCH, $searchParams, XSL_TRANSFORMER_DOCTYPE_STRING))) return $nullVar;
96 
97  // parse the OCLC numbers from search results
98  String::regexp_match_all('/id="itemid_(\d+)"/', $result, $matches);
99  if (!empty($matches[1])) break;
100  }
101 
102  // If we don't have an OCLC number, then we cannot get any metadata
103  if (empty($matches[1])) return $nullVar;
104 
105  // use xISBN because it's free
106  foreach($matches[1] as $oclcId) {
107  $isbns = $this->_oclcToIsbns($oclcId);
108  if (is_array($isbns)) break;
109  }
110  if (is_null($isbns)) return $nullVar;
111 
112  $apiKey = $this->getApiKey();
113  if (empty($apiKey)) {
114  // Use the first ISBN if we have multiple
115  $citationDescription =& $this->_lookupXIsbn($isbns[0]);
116  return $citationDescription;
117  } elseif (!empty($isbns[0])) {
118  // Worldcat lookup only works with an API key
119  if (is_null($citationDescription =& $this->_lookupWorldcat($matches[1][0]))) return $nullVar;
120 
121  // Prefer ISBN from xISBN if possible
122  if (!empty($isbns[0])) $citationDescription->addStatement('ibsn', $isbns[0], null, true);
123  return $citationDescription;
124  }
125 
126  // Nothing found
127  return $nullVar;
128  }
129 
130  //
131  // Private methods
132  //
138  function _oclcToIsbns($oclcId) {
139  $nullVar = null;
140  $lookupParams = array(
141  'method' => 'getMetadata',
142  'format' => 'xml',
143  'fl' => '*'
144  );
145  if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_OCLC.urlencode($oclcId), $lookupParams))) return $nullVar;
146 
147  // Extract ISBN from response
148  $oclcnumNodes = $resultDOM->getElementsByTagName('oclcnum');
149  $oclcnumFirstNode = $oclcnumNodes->item(0);
150 
151  if (isset($oclcnumFirstNode)) {
152  return explode(' ', $oclcnumFirstNode->getAttribute('isbn'));
153  } else {
154  return null;
155  }
156  }
157 
164  function &_lookupWorldcat($oclcId) {
165  $nullVar = null;
166  $lookupParams = array('wskey' => $this->getApiKey());
167  if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_EXTRACT.urlencode($oclcId), $lookupParams))) return $nullVar;
168 
169  if (is_null($metadata = $this->transformWebServiceResults($resultDOM, dirname(__FILE__).DIRECTORY_SEPARATOR.'worldcat.xsl'))) return $nullVar;
170  // FIXME: Use MARC parsed author field in XSL rather than full name
171 
172  // Clean non-numerics from ISBN
173  if (!empty($metadata['isbn'])) $metadata['isbn'] = String::regexp_replace('/[^\dX]*/', '', $metadata['isbn']);
174 
175  // Clean non-numerics from issued date (year)
176  if (!empty($metadata['date'])) {
177  $metadata['date'] = String::regexp_replace('/,.*/', ', ', $metadata['date']);
178  $metadata['date'] = String::regexp_replace('/[^\d{4}]/', '', $metadata['date']);
179  }
180 
181  $citationDescription =& $this->getNlm30CitationDescriptionFromMetadataArray($metadata);
182  return $citationDescription;
183  }
184 
191  function &_lookupXIsbn($isbn) {
192  $nullVar = null;
193  $lookupParams = array(
194  'method' => 'getMetadata',
195  'format' => 'xml',
196  'fl' => '*'
197  );
198  if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_XISBN.urlencode($isbn), $lookupParams))) return $nullVar;
199 
200  // Extract metadata from response
201  $recordNodes = $resultDOM->getElementsByTagName('isbn');
202  if (is_null($recordNode = $recordNodes->item(0))) return $nullVar;
203 
204  $metadata['isbn'] = $isbn;
205  $metadata['date'] = $recordNode->getAttribute('year');
206  $metadata['edition'] = $recordNode->getAttribute('ed');
207  $metadata['source'] = $recordNode->getAttribute('title');
208  $metadata['publisher-name'] = $recordNode->getAttribute('publisher');
209  $metadata['publisher-loc'] = $recordNode->getAttribute('city');
210  // Authors are of low quality in xISBN compared to Worldcat's MARC records
211  $metadata['author'] = $recordNode->getAttribute('author');
212 
213  // Clean and process the meta-data
214  $metadata =& $this->postProcessMetadataArray($metadata);
215  $citationDescription =& $this->getNlm30CitationDescriptionFromMetadataArray($metadata);
216  return $citationDescription;
217  }
218 
219  //
220  // Private methods
221  //
226  function &_getSearchTemplates() {
227  $searchTemplates = array(
228  '%isbn%',
229  '%aulast% %title% %date%',
230  '%title% %date%',
231  '%aulast% %date%',
232  '%aulast% %title%',
233  );
234  return $searchTemplates;
235  }
236 }
237 ?>
static regexp_replace($pattern, $replacement, $subject, $limit=-1)
Definition: String.inc.php:377
Class that describes a configurable filter setting.
constructSearchStrings(&$searchTemplates, &$citationDescription)
setDisplayName($displayName)
Definition: Filter.inc.php:140
& transformWebServiceResults(&$xmlResult, $xslFileName)
& getData($key, $locale=null)
Citation lookup filter that uses the OCLC Worldcat Search API and xISBN services to search for book c...
Abstract base class for all filters that transform NLM citation metadata descriptions.
& postProcessMetadataArray(&$preliminaryNlm30Array)
static regexp_match_all($pattern, $subject, &$matches)
Definition: String.inc.php:364
& getNlm30CitationDescriptionFromMetadataArray(&$metadataArray)
& callWebService($url, &$params, $returnType=XSL_TRANSFORMER_DOCTYPE_DOM, $method= 'GET')