Open Journal Systems  3.3.0
XMLParser.inc.php
1 <?php
2 
22 // The default character encodings
23 define('XML_PARSER_SOURCE_ENCODING', Config::getVar('i18n', 'client_charset'));
24 define('XML_PARSER_TARGET_ENCODING', Config::getVar('i18n', 'client_charset'));
25 
26 import('lib.pkp.classes.xml.XMLParserDOMHandler');
27 
28 class XMLParser {
30  var $handler;
31 
33  var $errors;
34 
39  function __construct() {
40  $this->errors = array();
41  }
42 
43  function parseText($text) {
44  $parser = $this->createParser();
45 
46  if (!isset($this->handler)) {
47  // Use default handler for parsing
49  $this->setHandler($handler);
50  }
51 
52  xml_set_object($parser, $this->handler);
53  xml_set_element_handler($parser, "startElement", "endElement");
54  xml_set_character_data_handler($parser, "characterData");
55 
56  if (!xml_parse($parser, $text, true)) {
57  $this->addError(xml_error_string(xml_get_error_code($parser)));
58  }
59 
60  $result = $this->handler->getResult();
61  $this->destroyParser($parser);
62  if (isset($handler)) {
63  $handler->destroy();
64  }
65  return $result;
66  }
67 
74  function parse($file) {
75  $parser = $this->createParser();
76 
77  if (!isset($this->handler)) {
78  // Use default handler for parsing
80  $this->setHandler($handler);
81  }
82 
83  xml_set_object($parser, $this->handler);
84  xml_set_element_handler($parser, "startElement", "endElement");
85  xml_set_character_data_handler($parser, "characterData");
86 
87  if (!$stream = $this->_getStream($file)) return false;
88 
89  while (($data = $stream->read(16384)) !== '') {
90  if (!xml_parse($parser, $data, $stream->eof())) {
91  $this->addError(xml_error_string(xml_get_error_code($parser)));
92  }
93  }
94 
95  $stream->close();
96  $result = $this->handler->getResult();
97  $this->destroyParser($parser);
98  if (isset($handler)) {
99  $handler->destroy();
100  }
101  return $result;
102  }
103 
109  protected function _getStream($filenameOrUrl) {
110  $urlPieces = parse_url($filenameOrUrl);
111  if ($urlPieces && isset($urlPieces['scheme'])) {
112  // Remote URL.
113  $client = Application::get()->getHttpClient();
114  $response = $client->request('GET', $filenameOrUrl);
115  return GuzzleHttp\Psr7\stream_for($response->getBody());
116  } elseif (file_exists($filenameOrUrl) && is_readable($filenameOrUrl)) {
117  $resource = fopen($filenameOrUrl, 'r');
118  return GuzzleHttp\Psr7\stream_for($resource);
119  }
120  return null;
121  }
122 
127  function addError($error) {
128  array_push($this->errors, $error);
129  }
130 
134  function getErrors() {
135  return $this->errors;
136  }
137 
143  function getStatus() {
144  return empty($this->errors);
145  }
146 
151  function setHandler($handler) {
152  $this->handler = $handler;
153  }
154 
162  function parseTextStruct($text, $tagsToMatch = array()) {
163  $parser = $this->createParser();
164  $result = xml_parse_into_struct($parser, $text, $values, $tags);
165  $this->destroyParser($parser);
166  if (!$result) return null;
167 
168  // Clean up data struct, removing undesired tags if necessary
169  $data = array();
170  foreach ($tags as $key => $indices) {
171  if (!empty($tagsToMatch) && !in_array($key, $tagsToMatch)) {
172  continue;
173  }
174 
175  $data[$key] = array();
176 
177  foreach ($indices as $index) {
178  if (!isset($values[$index]['type']) || ($values[$index]['type'] != 'open' && $values[$index]['type'] != 'complete')) {
179  continue;
180  }
181 
182  $data[$key][] = array(
183  'attributes' => isset($values[$index]['attributes']) ? $values[$index]['attributes'] : array(),
184  'value' => isset($values[$index]['value']) ? $values[$index]['value'] : ''
185  );
186  }
187  }
188 
189  return $data;
190  }
191 
199  function parseStruct($file, $tagsToMatch = array()) {
200  $stream = $this->_getStream($file);
201  $fileContents = $stream->getContents();
202  if (!$fileContents) {
203  return false;
204  }
205  return $this->parseTextStruct($fileContents, $tagsToMatch);
206  }
207 
212  function createParser() {
213  $parser = xml_parser_create(XML_PARSER_SOURCE_ENCODING);
214  xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, XML_PARSER_TARGET_ENCODING);
215  xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
216  return $parser;
217  }
218 
223  function destroyParser($parser) {
224  xml_parser_free($parser);
225  }
226 }
227 
232 class XMLParserHandler {
233 
240  function startElement($parser, $tag, $attributes) {
241  }
242 
248  function endElement($parser, $tag) {
249  }
250 
256  function characterData($parser, $data) {
257  }
258 
264  function getResult() {
265  return null;
266  }
267 
272  function destroy() {
273  }
274 }
XMLParser\createParser
createParser()
Definition: XMLParser.inc.php:218
XMLParserHandler\characterData
characterData($parser, $data)
Definition: XMLParser.inc.php:262
XMLParserHandler\endElement
endElement($parser, $tag)
Definition: XMLParser.inc.php:254
XMLParser\_getStream
_getStream($filenameOrUrl)
Definition: XMLParser.inc.php:115
XMLParserHandler\startElement
startElement($parser, $tag, $attributes)
Definition: XMLParser.inc.php:246
XMLParser\parseTextStruct
parseTextStruct($text, $tagsToMatch=array())
Definition: XMLParser.inc.php:168
XMLParser\__construct
__construct()
Definition: XMLParser.inc.php:45
XMLParser\parseStruct
parseStruct($file, $tagsToMatch=array())
Definition: XMLParser.inc.php:205
XMLParser\parse
parse($file)
Definition: XMLParser.inc.php:80
XMLParser\$handler
$handler
Definition: XMLParser.inc.php:33
Config\getVar
static getVar($section, $key, $default=null)
Definition: Config.inc.php:35
XMLParserHandler\getResult
getResult()
Definition: XMLParser.inc.php:270
XMLParser\parseText
parseText($text)
Definition: XMLParser.inc.php:49
XMLParser\$errors
$errors
Definition: XMLParser.inc.php:39
XMLParser\destroyParser
destroyParser($parser)
Definition: XMLParser.inc.php:229
XMLParserHandler
Definition: XMLParser.inc.php:238
XMLParser
Generic class for parsing an XML document into a data structure.
Definition: XMLParser.inc.php:28
XMLParserDOMHandler
Default handler for XMLParser returning a simple DOM-style object. This handler parses an XML documen...
Definition: XMLParserDOMHandler.inc.php:22
XMLParser\getStatus
getStatus()
Definition: XMLParser.inc.php:149
XMLParser\setHandler
setHandler($handler)
Definition: XMLParser.inc.php:157
GuzzleHttp\Psr7\stream_for
stream_for($resource='', array $options=[])
Definition: guzzlehttp/psr7/src/functions.php:78
PKPApplication\get
static get()
Definition: PKPApplication.inc.php:235
XMLParser\addError
addError($error)
Definition: XMLParser.inc.php:133
XMLParser\getErrors
getErrors()
Definition: XMLParser.inc.php:140
XMLParserHandler\destroy
destroy()
Definition: XMLParser.inc.php:278