Select Git revision
class.GetLUPB.php
Leif Andersson authored
class.GetLUPB.php 18.41 KiB
<?php
// räksmörgås
class GetLUPB {
private static $LUPqueryPart = array(
'Regler' => '&query=department%20exact%20"v1000253"%20AND%20',
'LCCC' => '&query=(researchGroup%20any+"LCCC"+OR+project+any+"LCCC")+AND+'
);
private static $LUPincludeMSc = array('Regler' => TRUE, 'LCCC' => FALSE);
private static $datePattern = '/[0-9]{4}-[0-9]{2}-[0-9]{2}/';
private static $short_months = array(
'none', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
'aug', 'sep', 'oct', 'nov', 'dec');
private static $monthNames = array(
'jan'=>'January', 'feb'=>'February', 'mar'=>'March', 'apr'=>'April',
'may'=>'May', 'jun'=>'June', 'jul'=>'July', 'aug'=>'August',
'sep'=>'September', 'oct'=>'October', 'nov'=>'November', 'dec'=>'December'
);
private $pubArray = array();
//private $includeMSc;
//private static $module = 'ReglerPublicationsB';
private $responseXML = '';
private function receiveResponse($curl,$data){
$this->responseXML .= $data;
return strlen($data);
}
private static $LUCRIS = 'https://lup.lub.lu.se/';
private static $MSc = 'https://lup.lub.lu.se/student-papers/';
private static $urlPart = 'sru?version=1.1&operation=searchRetrieve&maximumRecords=###&startRecord=1';
private $url = '';
private $curlOptions = array(
CURLOPT_URL => '',
CURLOPT_HEADER => False,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_TIMEOUT => 10,
//CURLOPT_WRITEFUNCTION => array($this,'receiveResponse')
);
public function __construct($caller = null) {
$config = cms_utils::get_config();
$db_name = $config['db_name'];
$this->queryPart = self::$LUPqueryPart[$db_name];
$this->includeMSc = self::$LUPincludeMSc[$db_name];
$c = __CLASS__;
$this->curlOptions[CURLOPT_WRITEFUNCTION] = array($c,'receiveResponse');
$allParams = array();
$pp = $caller->GetParameters();
//echo '<pre>'; print_r($pp); echo '</pre>';
foreach ($pp as $param) $allParams[] = $param['name'];
$this->allParams = $allParams;
$this->module = get_class($caller);
//echo '<pre>'; print_r($mydir); echo '</pre>';
}
public function collectURL($params) {
$paramNames = array(
'title','artkey','author','yearFrom','yearTo','keywords',
'abstract','articleType','project','limit',
);
$andArray = array();
foreach ($this->allParams as $name) {
$$name = ''; if (isset($params[$name])){
$$name = trim($params[$name]);
}
}
if ($articleType == 'all') $articleType = '';
if ($annual) $project = 'annual';
if ($title) {
$title = str_replace(' ','+',$title);
$entry = sprintf('title all "%s"',$title);
$andArray[] = str_replace(array(' ','"'),array('%20','%22'),$entry);
}
if ($author) {
$author = str_replace(' ','+',$author);
$andArray[] = sprintf('author+any+"%s"', $author);
}
if ($authorExact) {
$authorExact = str_replace(' ','+',$authorExact);
$pattern = '(author+exact+"%s"+or+editor+exact+"%s")';
$andArray[] = sprintf($pattern, $authorExact,$authorExact);
}
if ($LUCATid) {
$LUCATid = str_replace(' ','+',$LUCATid);
$pattern = '(author+exact+"%s"+or+editor+exact+"%s")';
$andArray[] = sprintf($pattern,$LUCATid,$LUCATid);
}
if ($yearFrom) {
$andArray[] = sprintf('publishingYear>=%s',$yearFrom);
}
if ($yearTo) {
$andArray[] = sprintf('publishingYear<=%s',$yearTo);
}
if ($articleType) {
$this->includeMSc = FALSE;
$module = $this->module;
$types = explode(',',$articleType);
$orArray = array();
foreach ($types as $type) {
$type = $module::$publicationTypesLUP[$type];
if ($type == 'studentPaper') {
//$orArray[] = 'documentType+exact+"studentPaper"';
$this->includeMSc = TRUE;
}
if ($type == 'book') {
$orArray[] = 'documentType+exact+"book"';
$orArray[] = 'documentType+exact+"bookEditor"';
} else if ($type == 'conference') {
$orArray[] = 'documentType+exact+"conference"';
$orArray[] = 'documentType+exact+"contributiontoconference"';
} else {
$orArray[] = sprintf('documentType+exact+"%s"',$type);
}
}
$andArray[] = '(' . join('+or+',$orArray) . ')';
}
if ($project) {
$projects = explode(',',$project);
$orArray = array();
foreach($projects as $proj) {
$proj = str_replace(' ','+',$proj);
$orArray[] = sprintf('project="%s"',$proj);
}
if (count($orArray) == 1) { $andArray[] = $orArray[0]; }
else $andArray[] = '(' . join('+or+',$orArray) . ')';
}
if ($abstract) {
$abstract = str_replace(' ','+',$abstract);
$andArray[] = sprintf('abstract+any+"%s"',$abstract);
}
if (!$limit) $limit = '50';
$urlPart = str_replace('###',$limit,self::$urlPart);
$this->urlLUCRIS = self::$LUCRIS . $urlPart
. $this->queryPart . join('+AND+',$andArray)
. '&sortKeys=publishingYear,,0+dateCreated,,0';
if ($this->includeMSc) {
$this->urlMSc = self::$MSc . $urlPart
. $this->queryPart . join('+AND+',$andArray)
. '&sortKeys=publishingYear,,0+dateCreated,,0';
} else {
$this->urlMSc = '';
}
//$this->url = $this->urlLUCRIS;
// echo '<pre>'; print_r($this->urlLUCRIS); echo '</pre>';
// echo '<pre>'; print_r($this->urlMSc); echo '</pre>';
$count = count($andArray);
return $count;
}
public function getInfo() {
$config = cms_utils::get_config();
$this->pubArray=array();
//echo '<pre>'; print_r($this->curlOptions); echo '</pre>';
$filenum = 0;
foreach(array($this->urlLUCRIS, $this->urlMSc) as $url) {
if (!$url) continue;
//echo '<pre>'; print_r($url); echo '</pre>';
$this->responseXML = '';
$this->curlOptions[CURLOPT_URL] = $url;
//echo '<pre>'; print_r($this->curlOptions); echo '</pre>';
$cu = curl_init();
curl_setopt_array($cu,$this->curlOptions);
$res=curl_exec($cu);
if (!$res) {
$error = curl_error($cu);
$res = strpos($error,'Operation timed out after ');
if ($res !== FALSE) print('<p>Timeout from central server. You may retry the search.</p>');
//printf('<pre>Curl error: %s</pre>', curl_error($cu));
return false;
}
curl_close($cu);
$filenum++;
$filename = $config['root_path'].'/tmp/cache/LUPresult' . $filenum . '.xml';
// file_put_contents($filename,$this->responseXML);
$this->decodeXML();
}
return true;
}
public function decodeXML() {
$config = cms_utils::get_config();
$xml = simplexml_load_string($this->responseXML);
if (!$xml) { die('XML error' . PHP_EOL); }
$count = (int)$xml->numberOfRecords;
if (!$count) return;
foreach ($xml->records->record as $record) {
$artkey = ''; $arttype = ''; $LUPdate = '';
$series = '';
$mods = $record->recordData->mods;
$pub = new PublicationB($this->module);
$LUPid = (string)$mods->recordInfo->recordIdentifier;
$date = (string)$mods->recordInfo->recordCreationDate;
$pub->putValue('LUPid',$LUPid);
foreach ($mods->note as $note) {
if($note['type'] == 'publicationStatus') {
$pubStatus = (string)$note;
switch ($pubStatus) {
case 'published':
break;
case 'inpress':
$pubStatus = 'Accepted for publication';
break;
case 'submitted':
$pubStatus = 'In submission';
break;
default:
$pubStatus = 'no';
break;
}
$pub->putValue('published',$pubStatus);
continue;
}
}
$title = (string)$mods->titleInfo->title;
$pub->putValue('title',$title);
$dateIssued = (string)$mods->originInfo->dateIssued;
$year = date_create($dateIssued.'-01-01')->format('Y');
//$year = (string)$mods->originInfo->dateIssued;
$pub->putValue('year',$year);
//get authors
foreach ($mods->name as $name) {
$last = ''; $first = '';
if ($name['type'] != 'personal') { continue; }
foreach ($name->namePart as $nn) {
if ($nn['type'] == 'given') {
$first = trim((string)$nn[0]);
}
if ($nn['type'] == 'family') {
$last = trim((string)$nn[0]);
}
}
if ((string)$name->role->roleTerm == 'author') {
$pub->putAuthorEditor($last,$first,'author');
}
if ((string)$name->role->roleTerm == 'editor') {
$pub->putAuthorEditor($last,$first,'editor');
}
}
// Handle keywords
foreach ($mods->subject as $subject) {
if ($subject['authority']) continue;
$keywords = array();
foreach ($subject->topic as $keyword) {
$keywords[] = (string)$keyword;
}
if ($keywords) {
$pub->putValue('keywords',join(', ',$keywords));
}
}
// handle abstract
$abstract = (string)$mods->abstract;
if ($abstract) {$pub->putValue('abstract',$abstract);}
// handle projects
$projects = array();
foreach ($mods->name as $name) {
if ($name['type'] != 'corporate') continue;
if ((string)$name->role->roleTerm != 'project') continue;
$projects[] = $name->namePart;
}
foreach ($mods->note as $note) {
if ($note['type'] != 'additionalInfo') continue;
$notes = explode(PHP_EOL,(string)$note);
foreach ($notes as $nn) {
$nn = trim($nn);
// echo '<pre>'; print_r($nn); echo '</pre>';
if (preg_match('/project *= */',$nn) == 1){
// echo '<pre>'; print_r($nn); echo '</pre>';
$projects[] = preg_replace('/project *= */','',$nn);
}
}
}
$pub->putvalue('project',join(', ',$projects));
// handle doi and fulltext
$fullText = '';
foreach ($mods->relatedItem as $item) {
if ($item && (string)$item['type'] == 'host') {
$url = (string)$item->location->url;
if ($url) {
$pos = strpos($url,'www.control.lth.se/Publication');
if ($pos !== false) continue;
$html = sprintf('<a href="%s">%s</a>',$url,$url);
$pub->putvalue('html',$html);
}
} elseif ($item && (string)$item['type'] == 'constituent') {
$url = (string)$item->location->url;
$displayLabel = (string)$item->location->url['displayLabel'];
if ($url && !$fullText) {
foreach ($item->accessCondition as $cond) {
if((string)$cond['type'] != 'restrictionOnAccess') continue;
$accessRestriction = (string)$cond;
switch ($accessRestriction) {
case 'no':
$pat = '<a href="%s">Download %s</a>';
$fullText = sprintf($pat,$url,$displayLabel);
$pub->putValue('fullText', $fullText);
$fullText = '';
break;
case 'LU/LTH access':
$pub->putValue('mailURL',$url);
break;
default: continue;
}
}
}
} elseif ($item && (string)$item['type'] == 'isPopularScience'){
$url = (string)$item->location->url;
$popTitle = (string)$item->titleInfo->title;
$popsciLink = sprintf('<a href="%s">%s</a>',$url,$popTitle);
$pub->putvalue('popsciLink',$popsciLink);
} else continue;
}
// Handle publication type
$genre = $mods->genre;
$aa = (string)$genre[0];
$bb = (string)$genre['type'];
$arttype = $aa;
switch ($aa) {
case 'book': $arttype = 'book'; break;
case 'book chapter': $arttype = 'inbook'; break;
case 'theses': $arttype = 'phd';
if ($bb == 'masters') $arttype = 'msc';
break;
case 'licentiate theses': $arttype = 'lic'; break;
case 'journalArticle':
if (!$bb || $bb == 'originalArticle' || $bb == 'letter') {
$arttype = 'art';
}
break;
case 'conference publication': $arttype = 'proc'; break;
case 'conference paper': $arttype = 'inproc'; break;
case 'conference abstract': $arttype = 'inproc'; break;
case 'report': $arttype = 'techrep'; break;
case 'studentPublicationsH3': $arttype = 'msc'; break;
}
$pub->putValue('arttype',$arttype);
switch ($arttype) {
case 'inbook':
foreach ($mods->relatedItem as $item) {
if ((string)$item['type'] != 'host') continue;
$booktitle = (string)$item->titleInfo->title;
if ($booktitle) { $pub->putValue('booktitle',$booktitle); }
}
$publisher = (string)$mods->originInfo->publisher;
if ($publisher) { $pub->putValue('publisher',$publisher); }
break;
case 'book':
$publisher = (string)$mods->originInfo->publisher;
if ($publisher) { $pub->putValue('publisher',$publisher); }
foreach ($mods->relatedItem as $item) {
if ((string)$item['type'] != 'host') continue;
$series = (string)$item->titleInfo->title;
if ($series) { $pub->putValue('series',$series); }
}
break;
case 'phd':
// The falling-through is intentional.
case 'lic':
case 'msc':
case 'techrep':
$matches = -1;
$matches = preg_match(self::$datePattern,$date);
if ($matches == 1) {
$da = explode('-',$date);
$m = (int)$da[1];
$month = self::$short_months[$m];
$pub->putValue('month',self::$monthNames[$month]);
}
$schoolinstitution = 'school';
if ($arttype == 'techrep') $schoolinstitution = 'institution';
foreach ($mods->name as $name) {
if ($name['type'] != 'corporate') { continue; }
if ((string)$name->identifier['type'] != 'lucatorg') continue;
if ((string)$name->identifier != 'v1000253') continue;
$inst = (string)$name->namePart;
$inst = sprintf('%s, Lund University, Sweden', $inst);
$pub->putValue($schoolinstitution,$inst);
}
foreach ($mods->relatedItem as $item) {
if ((string)$item['type'] != 'host') continue;
foreach ($item->identifier as $ident) {
if ((string)$ident['type'] != 'other') continue;
$isrn = (string)$ident;
$pub->putValue('isrn',$isrn);
}
foreach ($item->part as $part){
switch ((string)$part->detail['type']) {
case 'volume': //Falling through intentional
case 'reportNumber':
$number = (string)$part->detail->number;
$pub->putValue('number',$number);
break;
default: continue;
}
}
}
break;
case 'art':
foreach ($mods->relatedItem as $jj) {
if ($jj && (string)$jj['type'] == 'host') {
if ($jj->titleInfo->title) {
$journal = (string)$jj->titleInfo->title;
$pub->putValue('journal',$journal);
} else continue;
}
$pp = $jj->part;
if (!$pp) continue;
foreach ($pp->detail as $detail) {
if ((string)$detail['type'] == 'volume') {
$volume = (string)$detail->number;
$pub->putValue('volume',$volume);
}
if ((string)$detail['type'] == 'issue') {
$issue = (string)$detail->number;
$pub->putValue('number',$issue);
}
}
$pagestart = ''; $pageend = '';
if((string)$jj->part->extent['unit'] == 'pages') {
$pagestart = (string)$jj->part->extent->start;
$pageend = (string)$jj->part->extent->end;
$pub->putValue('pages', sprintf('%s–%s',$pagestart, $pageend));
}
}
$pub->putJournal($journal);
break;
case 'inproc': //falling through
case 'poster':
$conference='';
foreach ($mods->name as $name) {
if ($name['type'] != 'conference') { continue; }
$conference = (string)$name->namePart;
$pub->putConference($conference);
}
foreach ($mods->originInfo as $oi) {
foreach ($oi->dateOther as $dateOther) {
if ($dateOther['type'] != 'conferenceDate') continue;
$date = (string)$dateOther;
// echo '<pre>'; print_r($date); echo '</pre>';
$matches = -1;
$matches = preg_match(self::$datePattern,$date);
if ($matches == 1) {
$da = explode('-',$date);
$m = (int)$da[1];
$month = self::$short_months[$m];
//$pub->putValue('month',$this->short_months[$m]);
$pub->putValue('month',self::$monthNames[$month]);
}
}
foreach ($oi->place as $place) {
if ((string)$place->placeTerm)
$pub->putValue('address', (string)$place->placeTerm);
}
}
foreach ($mods->relatedItem as $item) {
if ((string)$item['type'] != 'host') continue;
if ($conference != '') continue;
$booktitle = (string)$item->titleInfo->title;
if ($booktitle) { $pub->putValue('booktitle',$booktitle); }
}
break;
default:
foreach ($mods->relatedItem as $item) {
if ((string)$item['type'] != 'host') continue;
$booktitle = (string)$item->titleInfo->title;
if ($booktitle) { $pub->putValue('booktitle',$booktitle); }
}
$publisher = (string)$mods->originInfo->publisher;
if ($publisher) { $pub->putValue('publisher',$publisher); }
break;
} // switch($arttype)
// Handle fulltext document
// foreach ($mods->relatedItem as $item) {
// if ((string)$item['type'] != 'constituent') continue;
// $url = (string)$item->location->url;
// $pub->putValue('fulltextURL',$url);
// }
$this->pubArray[$LUPid] = $pub;
} // foreach ($xml->records->record as $record)
} // public function decodeXML()
public function getPubArray() { return $this->pubArray; }
}
?>