16 import(
'lib.pkp.classes.task.FileLoader');
20 define(
'COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS_HTML', 10);
21 define(
'COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS_OTHER', 30);
46 $this->_plugin = $plugin;
48 if ($plugin->getSetting(CONTEXT_ID_NONE,
'compressArchives')) {
52 $arg = current($args);
56 if ($plugin->getSetting(0,
'createLogFiles')) {
57 $this->_autoStage =
true;
60 case 'externalLogFiles':
61 $this->_externalLogFiles =
true;
67 $args[0] = $plugin->getFilesPath();
69 parent::__construct($args);
71 if ($plugin->getEnabled()) {
75 import(
'classes.statistics.StatisticsHelper');
77 $geoLocationTool = $statsHelper->getGeoLocationTool();
78 $this->_geoLocationTool = $geoLocationTool;
80 $plugin->import(
'UsageStatsTemporaryRecordDAO');
85 $contextFactory = $contextDao->getAll();
86 $contextsByPath = array();
87 while ($context = $contextFactory->next()) {
88 $contextsByPath[$context->getPath()] = $context;
90 $this->_contextsByPath = $contextsByPath;
102 return __(
'plugins.generic.usageStats.usageStatsLoaderName');
110 if (!$plugin->getEnabled()) {
111 $this->
addExecutionLogEntry(__(
'plugins.generic.usageStats.pluginNotEnabled'), SCHEDULED_TASK_MESSAGE_TYPE_WARNING);
119 $processingDirFiles = glob($this->
getProcessingPath() . DIRECTORY_SEPARATOR .
'*');
120 $processingDirError = is_array($processingDirFiles) && count($processingDirFiles);
121 if ($processingDirError) {
125 if ($this->_autoStage) $this->
autoStage();
127 return (parent::executeActions() && !$processingDirError);
136 $fhandle = fopen($filePath,
'r');
139 throw new Exception(__(
'plugins.generic.usageStats.openFileFailed', array(
'file' => $filePath)));
142 $loadId = basename($filePath);
147 $statsDao->deleteByLoadId($loadId);
149 $lastInsertedEntries = array();
152 while(!feof($fhandle)) {
154 $line = trim(fgets($fhandle));
155 if (empty($line) || substr($line, 0, 1) ===
"#")
continue;
156 $entryData = $this->_getDataFromLogEntry($line);
157 if (!$this->_isLogEntryValid($entryData, $lineNumber)) {
158 throw new Exception(__(
'plugins.generic.usageStats.invalidLogEntry',
159 array(
'file' => $filePath,
'lineNumber' => $lineNumber)));
163 if ($entryData[
'url'] ==
'*')
continue;
166 $sucessfulReturnCodes = array(200, 304);
167 if (!in_array($entryData[
'returnCode'], $sucessfulReturnCodes))
continue;
172 list($assocType, $contextPaths, $page,
$op, $args) = $this->_getUrlMatches($entryData[
'url'], $filePath, $lineNumber);
173 if ($assocType && $contextPaths && $page &&
$op) {
174 list($assocId, $assocType) = $this->
getAssoc($assocType, $contextPaths, $page,
$op, $args);
176 $assocId = $assocType =
null;
179 if(!$assocId || !$assocType)
continue;
181 $countryCode = $cityName = $region =
null;
183 if (!$plugin->getSetting(CONTEXT_ID_NONE,
'dataPrivacyOption')) {
184 list($countryCode, $cityName, $region) = $geoTool ? $geoTool->getGeoLocation($entryData[
'ip']) : array(
null,
null,
null);
186 $optionalColumns = $plugin->getSetting(CONTEXT_ID_NONE,
'optionalColumns');
187 if (!in_array(STATISTICS_DIMENSION_CITY, $optionalColumns)) $cityName =
null;
188 if (!in_array(STATISTICS_DIMENSION_REGION, $optionalColumns)) $cityName = $region =
null;
190 $day = date(
'Ymd', $entryData[
'date']);
195 $entryHash = $assocType . $assocId . $entryData[
'ip'];
199 $biggestTimeFilter = COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS_OTHER;
200 foreach($lastInsertedEntries as $hash => $time) {
201 if ($time + $biggestTimeFilter < $entryData[
'date']) {
202 unset($lastInsertedEntries[$hash]);
207 if (isset($lastInsertedEntries[$entryHash])) {
209 if ($type == STATISTICS_FILE_TYPE_PDF || $type == STATISTICS_FILE_TYPE_OTHER) {
210 $timeFilter = COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS_OTHER;
212 $timeFilter = COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS_HTML;
215 $secondsBetweenRequests = $entryData[
'date'] - $lastInsertedEntries[$entryHash];
216 if ($secondsBetweenRequests < $timeFilter) {
219 $statsDao->deleteRecord($assocType, $assocId, $lastInsertedEntries[$entryHash], $loadId);
223 $lastInsertedEntries[$entryHash] = $entryData[
'date'];
224 $statsDao->insert($assocType, $assocId, $day, $entryData[
'date'], $countryCode, $region, $cityName, $type, $loadId);
228 $loadResult = $this->_loadData($loadId);
229 $statsDao->deleteByLoadId($loadId);
233 array(
'file' => $filePath)), SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
234 return FILE_LOADER_RETURN_TO_STAGING;
250 switch ($applicationName) {
252 return OJS_METRIC_TYPE_COUNTER;
255 return OMP_METRIC_TYPE_COUNTER;
258 return OPS_METRIC_TYPE_COUNTER;
279 $logsDirFiles = glob($plugin->getUsageEventLogsPath() . DIRECTORY_SEPARATOR .
'*');
283 $processingDirFiles = glob($this->
getProcessingPath() . DIRECTORY_SEPARATOR .
'*');
284 if (is_array($logsDirFiles)) {
285 $logFiles = array_merge($logFiles, $logsDirFiles);
288 if (is_array($processingDirFiles)) {
289 $logFiles = array_merge($logFiles, $processingDirFiles);
292 foreach ($logFiles as $filePath) {
294 if ($fileMgr->fileExists($filePath)) {
296 $filename = pathinfo($filePath, PATHINFO_BASENAME);
297 $currentDayFilename = $plugin->getUsageEventCurrentDayLogName();
298 if ($filename == $currentDayFilename)
continue;
316 if ($assocType == ASSOC_TYPE_SUBMISSION_FILE || $assocType == ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER) {
318 $file = $submissionFileDao->getLatestRevision($assocId);
326 switch ($applicationName) {
345 switch ($assocType) {
346 case ASSOC_TYPE_ISSUE_GALLEY:
348 $issueGalley = $issueGalleyDao->getById($assocId);
362 if (!is_a($file,
'PKPFile')) {
363 throw new Exception(
'Wrong object type, expected PKPFile.');
365 $fileType = $file->getFileType();
366 $fileExtension = pathinfo($file->getOriginalFileName(), PATHINFO_EXTENSION);
368 case 'application/pdf':
369 case 'application/x-pdf':
372 $type = STATISTICS_FILE_TYPE_PDF;
374 case 'application/octet-stream':
375 if ($fileExtension ==
'pdf') {
376 $type = STATISTICS_FILE_TYPE_PDF;
378 $type = STATISTICS_FILE_TYPE_OTHER;
381 case 'application/msword':
382 $type = STATISTICS_FILE_TYPE_DOC;
384 case 'application/zip':
385 if ($fileExtension ==
'docx') {
386 $type = STATISTICS_FILE_TYPE_DOC;
388 $type = STATISTICS_FILE_TYPE_OTHER;
392 $type = STATISTICS_FILE_TYPE_HTML;
395 $type = STATISTICS_FILE_TYPE_OTHER;
411 protected function getAssoc($assocType, $contextPaths, $page,
$op, $args) {
412 $assocId = $assocTypeToReturn =
null;
413 switch ($assocType) {
414 case ASSOC_TYPE_SUBMISSION:
415 if (!isset($args[0]))
break;
416 $submissionId = $args[0];
418 $submission = $submissionDao->getById($submissionId);
420 $assocId = $submission->getId();
421 $assocTypeToReturn = $assocType;
427 $assocId = $context->getId();
428 $assocTypeToReturn = $assocType;
434 if (!$assocId) $assocTypeToReturn =
null;
436 if (!$assocId && !$assocTypeToReturn) {
439 switch ($applicationName) {
441 list($assocId, $assocTypeToReturn) = $this->
getOJSAssoc($assocType, $contextPaths, $page,
$op, $args);
444 list($assocId, $assocTypeToReturn) = $this->
getOMPAssoc($assocType, $contextPaths, $page,
$op, $args);
447 list($assocId, $assocTypeToReturn) = $this->
getOPSAssoc($assocType, $contextPaths, $page,
$op, $args);
451 return array($assocId, $assocTypeToReturn);
464 protected function getOJSAssoc($assocType, $contextPaths, $page,
$op, $args) {
465 $assocId = $assocTypeToReturn =
null;
466 switch ($assocType) {
467 case ASSOC_TYPE_SUBMISSION_FILE:
468 if (!isset($args[0]))
break;
469 $submissionId = $args[0];
471 $article = $submissionDao->getById($submissionId);
472 if (!$article)
break;
474 if (!isset($args[2]))
break;
477 $articleFile = $articleFileDao->getLatestRevision($fileId);
478 if (!$articleFile)
break;
480 $assocId = $articleFile->getFileId();
484 $genre = $genreDao->getById($articleFile->getGenreId());
485 if ($genre->getCategory() != GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
486 $assocTypeToReturn = ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
488 $assocTypeToReturn = $assocType;
491 case ASSOC_TYPE_ISSUE:
492 case ASSOC_TYPE_ISSUE_GALLEY:
493 if (!isset($args[0]))
break;
496 if (isset($this->_contextsByPath[current($contextPaths)])) {
497 $context = $this->_contextsByPath[current($contextPaths)];
498 $issue = $issueDao->getById($issueId, $context->getId());
500 $assocId = $issue->getId();
508 $assocTypeToReturn = $assocType;
510 case ASSOC_TYPE_ISSUE_GALLEY:
511 if (!isset($issue) || !isset($args[1]))
break;
512 $issueGalleyId = $args[1];
514 $issueGalley = $issueGalleyDao->getById($issueGalleyId, $issue->getId());
516 $assocId = $issueGalley->getId();
519 $assocId = $assocTypeToReturn =
null;
523 return array($assocId, $assocTypeToReturn);
534 protected function getOMPAssoc($assocType, $contextPaths, $page,
$op, $args) {
535 $assocId = $assocTypeToReturn =
null;
536 switch ($assocType) {
537 case ASSOC_TYPE_SUBMISSION_FILE:
538 if (!isset($args[0]))
break;
539 $submissionId = $args[0];
541 $monograph = $submissionDao->getById($submissionId);
542 if (!$monograph)
break;
544 if (!isset($args[2]))
break;
545 $fileIdAndRevision = $args[2];
546 list($fileId, $revision) = array_map(
function($a) {
548 }, preg_split(
'/-/', $fileIdAndRevision));
551 $monographFile = $monographFileDao->getRevision($fileId, $revision);
552 if ($monographFile) {
553 $assocId = $monographFile->getFileId();
556 $assocTypeToReturn = $assocType;
558 case ASSOC_TYPE_SERIES:
559 if (!isset($args[0]))
break;
560 $seriesPath = $args[0];
562 if (isset($this->_contextsByPath[current($contextPaths)])) {
563 $context = $this->_contextsByPath[current($contextPaths)];
564 $series = $seriesDao->getByPath($seriesPath, $context->getId());
566 $assocId = $series->getId();
570 $assocTypeToReturn = $assocType;
574 return array($assocId, $assocTypeToReturn);
587 protected function getOPSAssoc($assocType, $contextPaths, $page,
$op, $args) {
588 $assocId = $assocTypeToReturn =
null;
589 switch ($assocType) {
590 case ASSOC_TYPE_SUBMISSION_FILE:
591 if (!isset($args[0]))
break;
592 $submissionId = $args[0];
594 $article = $submissionDao->getById($submissionId);
595 if (!$article)
break;
597 if (!isset($args[2]))
break;
600 $articleFile = $articleFileDao->getLatestRevision($fileId);
601 if (!$articleFile)
break;
603 $assocId = $articleFile->getFileId();
607 $genre = $genreDao->getById($articleFile->getGenreId());
608 if ($genre->getCategory() != GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
609 $assocTypeToReturn = ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
611 $assocTypeToReturn = $assocType;
615 return array($assocId, $assocTypeToReturn);
627 $deepestContextDepthIndex =
$application->getContextDepth() - 1;
628 $contextPath = $contextPaths[$deepestContextDepthIndex];
631 if (isset($this->_contextsByPath[$contextPath])) {
632 $context = $this->_contextsByPath[$contextPath];
652 switch ($applicationName) {
654 $pageAndOp = $pageAndOp + array(
655 ASSOC_TYPE_SUBMISSION_FILE => array(
657 ASSOC_TYPE_SUBMISSION => array(
659 ASSOC_TYPE_ISSUE => array(
661 ASSOC_TYPE_ISSUE_GALLEY => array(
667 $pageAndOp = $pageAndOp + array(
668 ASSOC_TYPE_SUBMISSION_FILE => array(
670 ASSOC_TYPE_MONOGRAPH => array(
672 ASSOC_TYPE_SERIES => array(
678 $pageAndOp = $pageAndOp + array(
679 ASSOC_TYPE_SUBMISSION_FILE => array(
680 'preprint/download'),
681 ASSOC_TYPE_SUBMISSION => array(
700 private function _isLogEntryValid($entry, $lineNumber) {
705 $date = $entry[
'date'];
706 if (!is_numeric($date) && $date <= 0) {
718 private function _getDataFromLogEntry($entry) {
720 $createLogFiles = $plugin->getSetting(0,
'createLogFiles');
721 if (!$createLogFiles || $this->_externalLogFiles) {
725 $parseRegex = $plugin->getSetting(0,
'accessLogFileParseRegex');
728 $parseRegex =
'/^(?P<ip>\S+) \S+ \S+ "(?P<date>.*?)" (?P<url>\S+) (?P<returnCode>\S+) "(?P<userAgent>.*?)"/';
732 if (!$parseRegex) $parseRegex =
'/^(?P<ip>\S+) \S+ \S+ \[(?P<date>.*?)\] "\S+ (?P<url>\S+).*?" (?P<returnCode>\S+) \S+ ".*?" "(?P<userAgent>.*?)"/';
735 if (preg_match($parseRegex, $entry, $m)) {
736 $associative = count(array_filter(array_keys($m),
'is_string')) > 0;
737 $returner[
'ip'] = $associative ? $m[
'ip'] : $m[1];
738 $returner[
'date'] = strtotime($associative ? $m[
'date'] : $m[2]);
739 $returner[
'url'] = urldecode($associative ? $m[
'url'] : $m[3]);
740 $returner[
'returnCode'] = $associative ? $m[
'returnCode'] : $m[4];
741 $returner[
'userAgent'] = $associative ? $m[
'userAgent'] : $m[5];
757 private function _getUrlMatches($url, $filePath, $lineNumber) {
758 $noMatchesReturner = array(
null,
null,
null,
null,
null);
762 $pathInfoDisabled =
Config::getVar(
'general',
'disable_path_info');
771 $operation =
Core::getOp($url, !$pathInfoDisabled);
776 array(
'file' => $filePath,
'lineNumber' => $lineNumber)), SCHEDULED_TASK_MESSAGE_TYPE_WARNING);
777 return $noMatchesReturner;
781 if (is_array($contextPaths) && !$page && $operation ==
'index') {
785 if (empty($contextPaths) || !$page || !$operation)
return $noMatchesReturner;
787 $pageAndOperation = $page .
'/' . $operation;
789 $pageAndOpMatch =
false;
791 foreach ($expectedPageAndOp as $workingAssocType => $workingPageAndOps) {
792 foreach($workingPageAndOps as $workingPageAndOp) {
793 if ($pageAndOperation == $workingPageAndOp) {
795 $pageAndOpMatch =
true;
801 if ($pageAndOpMatch) {
802 return array($workingAssocType, $contextPaths, $page, $operation, $args);
804 return $noMatchesReturner;
816 private function _loadData($loadId) {
819 $metricsDao->purgeLoadBatch($loadId);
821 while ($record = $statsDao->getNextByLoadId($loadId)) {
823 $metricsDao->insertRecord($record);