Open Monograph Press  1.1
 All Classes Namespaces Functions Variables Groups Pages
ParaciteRawCitationNlm30CitationSchemaFilter.inc.php
1 <?php
32 import('lib.pkp.plugins.metadata.nlm30.filter.Nlm30CitationSchemaFilter');
33 import('lib.pkp.plugins.metadata.nlm30.filter.Openurl10Nlm30CitationSchemaCrosswalkFilter');
34 import('lib.pkp.classes.filter.SetFilterSetting');
35 
36 define('CITATION_PARSER_PARACITE_STANDARD', 'Standard');
37 define('CITATION_PARSER_PARACITE_CITEBASE', 'Citebase');
38 define('CITATION_PARSER_PARACITE_JIAO', 'Jiao');
39 
41  /*
42  * Constructor
43  * @param $filterGroup FilterGroup
44  */
45  function ParaciteRawCitationNlm30CitationSchemaFilter($filterGroup) {
46  $this->setDisplayName('ParaCite');
47 
48  // Instantiate the settings of this filter
49  $citationModuleSetting = new SetFilterSetting('citationModule',
50  'metadata.filters.paracite.settings.citationModule.displayName',
51  'metadata.filters.paracite.settings.citationModule.validationMessage',
53  $this->addSetting($citationModuleSetting);
54 
55  parent::Nlm30CitationSchemaFilter($filterGroup);
56  }
57 
58  //
59  // Getters and Setters
60  //
65  function getCitationModule() {
66  return $this->getData('citationModule');
67  }
68 
69 
70  //
71  // Implement template methods from PersistableFilter
72  //
76  function getClassName() {
77  return 'lib.pkp.plugins.citationParser.paracite.filter.ParaciteRawCitationNlm30CitationSchemaFilter';
78  }
79 
80 
81  //
82  // Implement template methods from Filter
83  //
89  function &process(&$input) {
90  $citationString =& $input;
91  $nullVar = null;
92 
93  // Check the availability of perl
94  $perlCommand = Config::getVar('cli', 'perl');
95  if (empty($perlCommand) || !file_exists($perlCommand)) return $nullVar;
96 
97  // Convert to ASCII - Paracite doesn't handle UTF-8 well
98  $citationString = String::utf8_to_ascii($citationString);
99 
100  // Call the paracite parser
101  $wrapperScript = dirname(__FILE__).DIRECTORY_SEPARATOR.'paracite.pl';
102  $paraciteCommand = $perlCommand.' '.escapeshellarg($wrapperScript).' '.
103  $this->getCitationModule().' '.escapeshellarg($citationString);
104  $xmlResult = shell_exec($paraciteCommand);
105  if (empty($xmlResult)) return $nullVar;
106 
107  if ( Config::getVar('i18n', 'charset_normalization') == 'On' && !String::utf8_compliant($xmlResult) ) {
108  $xmlResult = String::utf8_normalize($xmlResult);
109  }
110 
111  // Create a temporary DOM document
112  $resultDOM = new DOMDocument();
113  $resultDOM->recover = true;
114  $resultDOM->loadXML($xmlResult);
115 
116  // Extract the parser results as an array
117  $xmlHelper = new XMLHelper();
118  $metadata = $xmlHelper->xmlToArray($resultDOM->documentElement);
119 
120  // We have to merge subtitle and title as neither OpenURL
121  // nor NLM can handle subtitles.
122  if (isset($metadata['subtitle'])) {
123  $metadata['title'] .= '. '.$metadata['subtitle'];
124  unset($metadata['subtitle']);
125  }
126 
127  // Break up the authors field
128  if (isset($metadata['authors'])) {
129  $metadata['authors'] = String::trimPunctuation($metadata['authors']);
130  $metadata['authors'] = String::iterativeExplode(array(':', ';'), $metadata['authors']);
131  }
132 
133  // Convert pages to integers
134  foreach(array('spage', 'epage') as $pageProperty) {
135  if (isset($metadata[$pageProperty])) $metadata[$pageProperty] = (integer)$metadata[$pageProperty];
136  }
137 
138  // Convert titles to title case
139  foreach(array('title', 'chapter', 'publication') as $titleProperty) {
140  if (isset($metadata[$titleProperty])) $metadata[$titleProperty] = String::titleCase($metadata[$titleProperty]);
141  }
142 
143  // Map ParaCite results to OpenURL - null means
144  // throw the value away.
145  $metadataMapping = array(
146  'genre' => 'genre',
147  '_class' => null,
148  'any' => null,
149  'authors' => 'au',
150  'aufirst' => 'aufirst',
151  'aufull' => null,
152  'auinit' => 'auinit',
153  'aulast' => 'aulast',
154  'atitle' => 'atitle',
155  'cappublication' => null,
156  'captitle' => null,
157  'date' => 'date',
158  'epage' => 'epage',
159  'featureID' => null,
160  'id' => null,
161  'issue' => 'issue',
162  'jnl_epos' => null,
163  'jnl_spos' => null,
164  'match' => null,
165  'marked' => null,
166  'num_of_fig' => null,
167  'pages' => 'pages',
168  'publisher' => 'pub',
169  'publoc' => 'place',
170  'ref' => null,
171  'rest_text' => null,
172  'spage' => 'spage',
173  'targetURL' => 'url',
174  'text' => null,
175  'ucpublication' => null,
176  'uctitle' => null,
177  'volume' => 'volume',
178  'year' => 'date'
179  );
180 
181  // Ignore 'year' if 'date' is set
182  if (isset($metadata['date'])) {
183  $metadataMapping['year'] = null;
184  }
185 
186  // Set default genre
187  if (empty($metadata['genre'])) $metadata['genre'] = OPENURL10_GENRE_ARTICLE;
188 
189  // Handle title, chapter and publication depending on
190  // the (inferred) genre. Also instantiate the target schema.
191  switch($metadata['genre']) {
192  case OPENURL10_GENRE_BOOK:
193  case OPENURL10_GENRE_BOOKITEM:
194  case OPENURL10_GENRE_REPORT:
195  case OPENURL10_GENRE_DOCUMENT:
196  $metadataMapping += array(
197  'publication' => 'btitle',
198  'chapter' => 'atitle'
199  );
200  if (isset($metadata['title'])) {
201  if (!isset($metadata['publication'])) {
202  $metadata['publication'] = $metadata['title'];
203  } elseif (!isset($metadata['chapter'])) {
204  $metadata['chapter'] = $metadata['title'];
205  }
206  unset($metadata['title']);
207  }
208  $openurl10SchemaName = 'lib.pkp.plugins.metadata.openurl10.schema.Openurl10BookSchema';
209  $openurl10SchemaClass = 'Openurl10BookSchema';
210  break;
211 
212  case OPENURL10_GENRE_ARTICLE:
213  case OPENURL10_GENRE_JOURNAL:
214  case OPENURL10_GENRE_ISSUE:
215  case OPENURL10_GENRE_CONFERENCE:
216  case OPENURL10_GENRE_PROCEEDING:
217  case OPENURL10_GENRE_PREPRINT:
218  default:
219  $metadataMapping += array('publication' => 'jtitle');
220  if (isset($metadata['title'])) {
221  if (!isset($metadata['publication'])) {
222  $metadata['publication'] = $metadata['title'];
223  } elseif (!isset($metadata['atitle'])) {
224  $metadata['atitle'] = $metadata['title'];
225  }
226  unset($metadata['title']);
227  }
228  $openurl10SchemaName = 'lib.pkp.plugins.metadata.openurl10.schema.Openurl10JournalSchema';
229  $openurl10SchemaClass = 'Openurl10JournalSchema';
230  break;
231  }
232 
233  // Instantiate an OpenURL description
234  $openurl10Description = new MetadataDescription($openurl10SchemaName, ASSOC_TYPE_CITATION);
235  $openurl10Schema = new $openurl10SchemaClass();
236 
237  // Map the ParaCite result to OpenURL
238  foreach ($metadata as $paraciteElementName => $paraciteValue) {
239  if (!empty($paraciteValue)) {
240  // Trim punctuation
241  if (is_string($paraciteValue)) $paraciteValue = String::trimPunctuation($paraciteValue);
242 
243  // Transfer the value to the OpenURL result array
244  assert(array_key_exists($paraciteElementName, $metadataMapping));
245  $openurl10PropertyName = $metadataMapping[$paraciteElementName];
246  if (!is_null($openurl10PropertyName) && $openurl10Schema->hasProperty($openurl10PropertyName)) {
247  if (is_array($paraciteValue)) {
248  foreach($paraciteValue as $singleValue) {
249  $success = $openurl10Description->addStatement($openurl10PropertyName, $singleValue);
250  assert($success);
251  }
252  } else {
253  $success = $openurl10Description->addStatement($openurl10PropertyName, $paraciteValue);
254  assert($success);
255  }
256  }
257  }
258  }
259 
260  // Crosswalk to NLM
261  $crosswalkFilter = new Openurl10Nlm30CitationSchemaCrosswalkFilter();
262  $nlm30Description =& $crosswalkFilter->execute($openurl10Description);
263  assert(is_a($nlm30Description, 'MetadataDescription'));
264 
265  // Add 'rest_text' as NLM comment (if given)
266  if (isset($metadata['rest_text'])) {
267  $nlm30Description->addStatement('comment', String::trimPunctuation($metadata['rest_text']));
268  }
269 
270  // Set display name and sequence id in the meta-data description
271  // to the corresponding values from the filter. This is important
272  // so that we later know which result came from which filter.
273  $nlm30Description->setDisplayName($this->getDisplayName());
274  $nlm30Description->setSeq($this->getSeq());
275 
276  return $nlm30Description;
277  }
278 
279 
280  //
281  // Private helper methods
282  //
287  static function getSupportedCitationModules() {
288  static $_supportedCitationModules = array(
289  CITATION_PARSER_PARACITE_STANDARD,
290  CITATION_PARSER_PARACITE_CITEBASE,
291  CITATION_PARSER_PARACITE_JIAO
292  );
293 
294  return $_supportedCitationModules;
295  }
296 }
297 
298 ?>
static utf8_to_ascii($str)
Definition: String.inc.php:576
Class that describes a configurable filter setting which must be one of a given set of values...
static trimPunctuation($string)
Definition: String.inc.php:685
static iterativeExplode($delimiters, $input)
Definition: String.inc.php:743
static utf8_normalize($str)
Definition: String.inc.php:541
setDisplayName($displayName)
Definition: Filter.inc.php:140
& getData($key, $locale=null)
static getVar($section, $key, $default=null)
Definition: Config.inc.php:35
static titleCase($title)
Definition: String.inc.php:694
Filter that converts from NLM citation to OpenURL schemas.
A class that groups useful XML helper functions.
Abstract base class for all filters that transform NLM citation metadata descriptions.
static utf8_compliant($str)
Definition: String.inc.php:490
Class modeling a description (DCMI abstract model) or subject- predicate-object graph (RDF)...
getDisplayName()
Definition: Filter.inc.php:155