Moderators: jmacgreg, michael, John
Index: admin.php
===================================================================
RCS file: /cvs/harvester/admin.php,v
retrieving revision 1.11
retrieving revision 1.14
diff -u -r1.11 -r1.14
--- a/admin.php 8 Jun 2003 22:31:20 -0000 1.11
+++ b/admin.php 23 Feb 2004 00:50:03 -0000 1.14
@@ -294,6 +294,9 @@
</form>
</div>
+
+ <br /><br />
+ <img src="images/arrow.gif" width="10" height="10" alt="" /> <a href="admin.php?op=archives" class="backLink">Back to Archives</a>
<?php
}
@@ -331,13 +334,13 @@
// get admin email, protocol version via OAI protocol to verify it is a valid archive
$xml_data = parseXML($vars[archive_oai]."?verb=Identify");
- if(!empty($xml_data[identify][repositoryname]) && !empty($xml_data[identify][protocolversion]) && !empty($xml_data[identify][adminemail])) {
+ if(isset($xml_data['identify']['repositoryname']) && isset($xml_data['identify']['protocolversion']) && isset($xml_data['identify']['adminemail'])) {
// OAI 1.1
$vars[repositoryname] = $xml_data[identify][repositoryname];
$vars[admin_email] = str_replace("mailto:", "", $xml_data[identify][adminemail]);
$vars[protocol_version] = $xml_data[identify][protocolversion];
- } else if(!empty($xml_data['oai-pmh'][identify][repositoryname]) && !empty($xml_data['oai-pmh'][identify][protocolversion]) && !empty($xml_data['oai-pmh'][identify][adminemail])) {
+ } else if(isset($xml_data['oai-pmh']['identify']['repositoryname']) && isset($xml_data['oai-pmh']['identify']['protocolversion']) && isset($xml_data['oai-pmh']['identify']['adminemail'])) {
// OAI 2.0
$vars[repositoryname] = $xml_data['oai-pmh'][identify][repositoryname];
$vars[admin_email] = str_replace("mailto:", "", $xml_data['oai-pmh'][identify][adminemail]);
@@ -345,7 +348,7 @@
} else {
?>
- The URL you entered does not appear to be a valid OAI 1.1 or 2.0 archive. Please go <a href="javascript:history.go(-1)">back</a> and check that you entered the correct URL.
+ The URL you entered does not appear to be a valid OAI 1.1 or 2.0 archive. Please go <a href="javascript:history.go(-1)">back</a> and verify that you entered the correct URL.
<?php
return;
}
@@ -574,10 +577,6 @@
}
$items = $db->assoc_array($result);
-
- $metadata_result = $db->query("SELECT COUNT(id) AS count FROM $dbtable[metadata] WHERE archive='$archive'");
- $metadata_items = $db->assoc_array($metadata_result);
-
?>
<div class="form">
@@ -623,7 +622,7 @@
<div class="row">
<span class="formLabel">Number of papers indexed:</span>
- <span class="formField"><?php echo $metadata_items[count] ?> (<a href="archives.php?id=<?php echo $items[id] ?>">Browse</a>)</span>
+ <span class="formField"><?php echo $items[num_records] ?> (<a href="archives.php?id=<?php echo $items[id] ?>">Browse</a>)</span>
</div>
<div class="row">
@@ -666,6 +665,7 @@
if($num_archives == 0) {
?>
No archives indexed.
+ <br />
<?php
} else {
?>
@@ -687,6 +687,18 @@
</form>
<?php
}
+ ?>
+
+ <br />
+
+ <hr width="100%" />
+
+ <br />
+
+ <span class="sectionTitle">Add Archive</span>
+ <br /><br />
+ <a href="admin.php?op=newArchive">Click here</a> to add a new archive to your index.
+ <?php
}
@@ -993,7 +1005,7 @@
<br />
- <span class="formTitle">Look</span>
+ <span class="formTitle">Appearance</span>
<br /><br />
@@ -1076,7 +1088,7 @@
<br /><br />
» <a href="admin.php?op=archives" class="title">Archives</a><br />
- View and modify harvested archives, update the metadata index.
+ View and modify harvested archives, and update the metadata index.
<br /><br />
» <a href="admin.php?op=newArchive" class="title">Add Archive</a><br />
Index: archives.php
===================================================================
RCS file: /cvs/harvester/archives.php,v
retrieving revision 1.5
retrieving revision 1.7
diff -u -r1.5 -r1.7
--- a/archives.php 8 Jun 2003 22:31:20 -0000 1.5
+++ b/archives.php 23 Feb 2004 00:50:03 -0000 1.7
@@ -213,10 +213,10 @@
// show list of archives
else {
// get archive list from database
- $result = $db->query("SELECT id, name FROM $dbtable[archives] ORDER BY LOWER(name)");
+ $result = $db->query("SELECT id, name, num_records FROM $dbtable[archives] ORDER BY LOWER(name)");
// get total number of records
- $metadata_result = $db->query("SELECT COUNT(*) FROM $dbtable[metadata]");
+ $metadata_result = $db->query("SELECT SUM(num_records) FROM $dbtable[archives]");
list($num_records) = $db->assoc_array($metadata_result);
?>
@@ -234,11 +234,8 @@
for($i=0, $max=$db->num_rows($result); $i<$max; $i++) {
$items = $db->assoc_array($result);
echo "<br />\n<img src=\"images/arrow.gif\" width=\"10\" height=\"10\" alt=\"\" /> <a href=\"archives.php?id=$items[id]\" class=\"title\">$items[name]</a> (";
- // get number of records in this archive
- $metadata_result = $db->query("SELECT COUNT(id) FROM $dbtable[metadata] WHERE archive='$items[id]'");
- list($num_records) = $db->assoc_array($metadata_result);
- echo $num_records." record";
- if($num_records!=1) { echo "s"; }
+ echo $items[num_records]." record";
+ if($items[num_records] != 1) { echo "s"; }
echo ")\n";
}
Index: index.php
===================================================================
RCS file: /cvs/harvester/index.php,v
retrieving revision 1.5
retrieving revision 1.7
diff -u -r1.5 -r1.7
--- a/index.php 8 Jun 2003 08:46:14 -0000 1.5
+++ b/index.php 23 Feb 2004 00:50:03 -0000 1.7
@@ -36,10 +36,8 @@
include("include/header.inc.php");
// get number of items/archives in index
-$result = $db->query("SELECT COUNT(*) FROM $dbtable[archives]");
-list($num_archives) = $db->assoc_array($result);
-$result = $db->query("SELECT COUNT(*) FROM $dbtable[metadata]");
-list($num_records) = $db->assoc_array($result);
+$result = $db->query("SELECT COUNT(*), SUM(num_records) FROM $dbtable[archives]");
+list($num_archives, $num_records) = $db->assoc_array($result);
unset($result);
?>
Index: install.php
===================================================================
RCS file: /cvs/harvester/install.php,v
retrieving revision 1.6
retrieving revision 1.10
diff -u -r1.6 -r1.10
--- a/install.php 9 Jun 2003 01:06:22 -0000 1.6
+++ b/install.php 23 Feb 2004 01:06:15 -0000 1.10
@@ -66,14 +66,14 @@
// include html footer
-$config['html_footer'] = '<span style="font-size:10px">© 2003 <a href="http://www.pkp.ubc.ca/">Public Knowledge Project</a></span>';
+$config['html_footer'] = '<span style="font-size:10px">© 2003-2004 <a href="http://www.pkp.ubc.ca/">Public Knowledge Project</a></span>';
include("include/footer.inc.php");
// Show installation form
function installForm($vars = array(), $error = '') {
?>
- Thank you for downloading the Public Knowledge Project's <b>Open Archives Harvester v1.0</b>. Before proceeding, please read the <a href="docs/README">README</a> file included with this software. For more information about the Public Knowledge Project and our software, please visit our <a href="http://www.pkp.ubc.ca/">web site</a>. Send bug reports or technical support inquiries about the PKP OAI Harvester to <a href="mailto:harvester@pkp.ubc.ca">harvester@pkp.ubc.ca</a>, or visit our online <a href="http://www.pkp.ubc.ca/bugzilla/">bug reporting system</a>.
+ Thank you for downloading the Public Knowledge Project's <b>Open Archives Harvester v1.0.1</b>. Before proceeding, please read the <a href="docs/README">README</a> file included with this software. For more information about the Public Knowledge Project and our software, please visit our <a href="http://www.pkp.ubc.ca/">web site</a>. Send bug reports or technical support inquiries about the PKP OAI Harvester to <a href="mailto:harvester@pkp.ubc.ca">harvester@pkp.ubc.ca</a>, or visit our online <a href="http://www.pkp.ubc.ca/bugzilla/">bug reporting system</a>.
<br /><br />
@@ -232,7 +232,7 @@
'To ensure that the archive that is accumulating in your Public Knowledge Project system forms part of a globally distributed archive of research databases, register your website with our Public Knowledge Project metadata \"harvester\" that regularly gathers metadata on the research documents in your system\'s database.',
'1',
'<a href=\"http://www.pkp.ubc.ca/harvester/\"><img src=\"images/logo.gif\" width=\"321\" height=\"52\" border=\"0\" alt=\"Public Knowledge Project: Open Archives Harvester\" /></a>',
- '<span class=\"instructions\">© 2003 <a href=\"http://www.pkp.ubc.ca/\">Public Knowledge Project</a></span>')")) {
+ '<span class=\"instructions\">© 2003-2004 <a href=\"http://www.pkp.ubc.ca/\">Public Knowledge Project</a></span>')")) {
installForm($vars, "Error: unable to insert into table \"harvester_config\".");
return;
}
@@ -248,6 +248,7 @@
description TEXT NOT NULL DEFAULT '',
rst VARCHAR(32) NOT NULL DEFAULT '',
index_method VARCHAR(1) NOT NULL DEFAULT 'R',
+ num_records INT NOT NULL DEFAULT 0,
datestamp datetime NOT NULL,
UNIQUE(oai)
)")) {
Index: viewarchive.php
===================================================================
RCS file: /cvs/harvester/viewarchive.php,v
retrieving revision 1.7
retrieving revision 1.11
diff -u -r1.7 -r1.11
--- a/viewarchive.php 8 Jun 2003 23:33:59 -0000 1.7
+++ b/viewarchive.php 4 Mar 2004 14:56:49 -0000 1.11
@@ -49,11 +49,8 @@
include("include/footer.inc.php");
return;
}
-
$items = $db->assoc_array($result);
-$result = $db->query("SELECT COUNT(*) FROM $dbtable[metadata] WHERE archive='$id'");
-list($num_records) = $db->assoc_array($result);
// current document's hierarchy in the site
$site_hierarchy = array("Home"=>"index.php", "Archives"=>"archives.php", $items[name]=>"archives.php?id=$id");
@@ -84,7 +81,7 @@
<div class="row">
<span class="formLabel">Admin email:</span>
- <span class="formField"><?php echo $items[email] ?></span>
+ <span class="formField"><?php echo encode_email($items[email]) ?></span>
</div>
<div class="row">
@@ -109,7 +106,7 @@
<div class="row">
<span class="formLabel">Number of articles indexed from this archive:</span>
- <span class="formField"><?php echo $num_records ?> (<a href="archives.php?id=<?php echo $items[id] ?>">Browse</a>)</span>
+ <span class="formField"><?php echo $items[num_records] ?> (<a href="archives.php?id=<?php echo $items[id] ?>">Browse</a>)</span>
</div>
<div class="row">
Index: viewrecord.php
===================================================================
RCS file: /cvs/harvester/viewrecord.php,v
retrieving revision 1.10
retrieving revision 1.12
diff -u -r1.10 -r1.12
--- a/viewrecord.php 8 Jun 2003 22:31:21 -0000 1.10
+++ b/viewrecord.php 4 Mar 2004 14:56:49 -0000 1.12
@@ -149,7 +149,7 @@
<div class="row">
<span class="formLabel">Author(s):</span>
<span class="formField"><?php
- echo (empty($items[email]) ? "" : "<a href=\"mailto:$items[email]\">") . $items[author] . (empty($items[email]) ? "" : "</a>") . (empty($items[affiliation]) ? "" : " ($items[affiliation])") . "\n";
+ echo (empty($items[email]) ? "" : "<a href=\"". encode_email($items[email], 1). "\">") . $items[author] . (empty($items[email]) ? "" : "</a>") . (empty($items[affiliation]) ? "" : " ($items[affiliation])") . "\n";
// print out additional authors
if($items[add_authors]) {
$add_authors = split("\n",$items[add_authors]);
@@ -158,7 +158,7 @@
for($i=0;$i<count($add_authors);$i++) {
if($add_authors[$i]) {
- echo "<br />" . (empty($add_emails[$i]) ? "" : "<a href=\"mailto:$add_emails[$i]\">") . $add_authors[$i] . (empty($add_emails[$i]) ? "" : "</a>") . (empty($add_affiliations[$i]) ? "" : " ($add_affiliations[$i])") . "\n";
+ echo "<br />" . (empty($add_emails[$i]) ? "" : "<a href=\"" . encode_email($add_emails[$i], 1) . "\">") . $add_authors[$i] . (empty($add_emails[$i]) ? "" : "</a>") . (empty($add_affiliations[$i]) ? "" : " ($add_affiliations[$i])") . "\n";
}
}
}
Index: docs/harvester.sql
===================================================================
RCS file: /cvs/harvester/docs/harvester.sql,v
retrieving revision 1.1
retrieving revision 1.4
diff -u -r1.1 -r1.4
--- a/docs/harvester.sql 8 Jun 2003 00:21:26 -0000 1.1
+++ b/docs/harvester.sql 23 Feb 2004 01:06:15 -0000 1.4
@@ -74,6 +74,7 @@
description TEXT NOT NULL DEFAULT '',
rst VARCHAR(32) NOT NULL DEFAULT '',
index_method VARCHAR(1) NOT NULL DEFAULT 'R',
+ num_records INT NOT NULL DEFAULT 0,
datestamp datetime NOT NULL,
UNIQUE(oai)
);
@@ -160,4 +160,19 @@
}
}
+// Encode email
+function encode_email($email, $mailto = false) {
+ static $preg_callback;
+ if (!isset($preg_callback)) {
+ mt_srand((double) microtime()*1000000);
+ $preg_callback = create_function('$a', 'return mt_rand(0,1)?$a[0]:\'&#\'.ord($a[
+0]).\';\';');
+ }
+
+ $obfuscated_email = preg_replace_callback('/./', $preg_callback, $mailto ? 'mailto:' . $email : $email);
+ $obfuscated_email = str_replace('@', '&#' . ord('@'), $obfuscated_email);
+
+ return $obfuscated_email;
+}
+
?>
@@ -85,7 +85,7 @@
div.main {
margin-left: 10px;
margin-right: 10px;
- background: #fff;
+ background: #FFF;
border: 1px #D2D2D2 solid;
padding: 15px;
overflow: auto;
Index: include/harvester.inc.php
===================================================================
RCS file: /cvs/harvester/include/harvester.inc.php,v
retrieving revision 1.9
retrieving revision 1.12
diff -u -r1.9 -r1.12
--- a/include/harvester.inc.php 8 Jun 2003 22:31:21 -0000 1.9
+++ b/include/harvester.inc.php 23 Feb 2004 00:50:03 -0000 1.12
@@ -135,8 +135,10 @@
} while($resumptiontoken != "");
- // update datestamp for archive
- $db->query("UPDATE archives SET datestamp=NOW() WHERE id='$archive_items[id]'");
+ // update datestamp and record count for archive
+ $count_result = $db->query("SELECT COUNT(*) FROM $dbtable[metadata] WHERE archive='$archive_items[id]'");
+ list($total_records) = $db->assoc_array($count_result);
+ $db->query("UPDATE $dbtable[archives] SET num_records='$total_records', datestamp=NOW() WHERE id='$archive_items[id]'");
echo "<b>done</b> ($num_records new/updated record";
if($num_records != 1) { echo "s"; }
@@ -202,7 +204,7 @@
}
// record title
- $metadata[title] = $dc_metadata[title];
+ $metadata[title] = is_array($dc_metadata[title]) ? $dc_metadata[title][0] : $dc_metadata[title];
// record authors
if(is_array($dc_metadata[creator])) {
@@ -270,8 +272,15 @@
}
// record date
- $metadata[date] = $dc_metadata[date];
- if(preg_match("/^\d+\-\d+$/", $metadata[date])) {
+ if(is_array($dc_metadata[date])) {
+ $metadata[date] = $dc_metadata[date][0];
+ } else {
+ $metadata[date] = $dc_metadata[date];
+ }
+
+ if(preg_match('/^(\d\d\d\d\-\d\d\-\d\d)T(\d\d:\d\d:\d\d)Z$/', $metadata[date])) {
+ $metadata[date] = date("Y-m-d", strtotime(preg_replace('/[TZ]/', ' ', $metadata[date])));
+ } else if(preg_match("/^\d+\-\d+$/", $metadata[date])) {
$metadata[date] .= "-01";
} else if(preg_match("/^\d+$/", $metadata[date])) {
$metadata[date] .= "-01-01";
Index: include/xmlparser.inc.php
===================================================================
RCS file: /cvs/harvester/include/xmlparser.inc.php,v
retrieving revision 1.3
retrieving revision 1.8
diff -u -r1.3 -r1.8
--- a/include/xmlparser.inc.php 8 Jun 2003 00:05:32 -0000 1.3
+++ b/include/xmlparser.inc.php 23 Feb 2004 01:49:46 -0000 1.8
@@ -48,16 +48,56 @@
xml_set_character_data_handler($xml_parser, "characterData");
// Open the XML file for reading
+ /*
+ * Recent versions of Dspace reject connections that do not supply a User Agent
+ * so this method will no longer work.
+ *
$fp = @fopen($url, "r");
if(!$fp) { return; }
socket_set_timeout($fp, 320);
+ */
+
+ $url_parts = parse_url($url);
+ if(empty($url_parts['host'])) {
+ return;
+ }
+
+ if($url_parts['scheme'] != 'http') {
+ // only use direct socket connection for HTTP URLs
+ $fp = @fopen($url, "r");
+ if(!$fp) { return; }
+
+ } else {
+ $fp = @fsockopen($url_parts['host'], isset($url_parts['port']) ? $url_parts['port'] : 80, $errno, $errstr, 30);
+ if(!$fp) { return; }
+ $request = sprintf(
+ "GET %s?%s HTTP/1.0\r\n" .
+ "Host: %s\r\n" .
+ "User-Agent: %s\r\n" .
+ "From: %s\r\n" .
+ "Connection: Close\r\n\r\n",
+
+ $url_parts['path'],
+ $url_parts['query'],
+ $url_parts['host'],
+ 'PKPOAIHarvester/1.0',
+ isset($config['contact_email']) && !empty($config['contact_email']) ? $config['contact_email'] : 'harvester@pkp.ubc.ca'
+ );
+ fputs($fp, $request);
+
+ // trim HTTP headers
+ while(trim(fgets($fp, 4096)));
+
+ }
+
// Read the XML file 4KB at a time
- while ($data = fread($fp, 4096))
- // Parse each 4KB chunk with the XML parser created above
- if(!xml_parse($xml_parser, $data, feof($fp))) {
- echo "Error: " . xml_error_string(xml_get_error_code($xml_parser)) . " ... ";
- return;
+ while($data = fread($fp, 4096)) {
+ // Parse each 4KB chunk with the XML parser created above
+ if(!xml_parse($xml_parser, $data, feof($fp))) {
+ echo "Error: " . xml_error_string(xml_get_error_code($xml_parser)) . " ... ";
+ return;
+ }
}
// Close the XML file
@@ -137,7 +177,13 @@
function characterData($parser, $data) {
global $current_data;
- if(!empty($data)) { $data = htmlentities(unhtmlentities(utf8_decode($data))); }
+ if(!empty($data)) {
+ if(function_exists("html_entity_decode")) {
+ $data = str_replace("&", "&", str_replace("\0", "", htmlentities(html_entity_decode(htmlentities($data, ENT_COMPAT, 'UTF-8')))));
+ } else {
+ $data = str_replace("&", "&", utf8_decode(htmlentities(unhtmlentities($data))));
+ }
+ }
$current_data .= $data;
}Return to Open Harvester Systems Support and Development
Users browsing this forum: No registered users and 0 guests