#!/usr/bin/perl

# sru-server.cgi - the beginings of an SRU server

# Eric Lease Morgan <emorgan@nd.edu>

# January 27, 2006 - Moving to the MyLibrary distro
# December 23, 2004 - tweaking zeerex record; got diagnostics to work
# December 17, 2004 - moving to ockham.org
# December 15, 2004 - playing with no stylesheets and CQL toSwish
# September 7, 2004 - hacking w/ a flu, sort of
# September 5, 2004 - first investigations

=head1 NAME

sru-server.cgi - a rudimentary SRU server


=head1 DESCRIPTION

This is a rudimentary Search/Retrieve via URL (SRU) server, and it is used to search an index created by swish-e. The server only supports the explain and searchRetrieve operations, not scan. If you want to test the server, then a simple client was distributed with this script (client.html) for this purpose.


=head1 CONFIGURATION

Configuration requires you to set the value of SWISH_INDEX. This constant needs to point to the full path of your swish-e index. That is all you have to do.

If you are using this script in conjunction with the MyLibrary Portal application, then the value of SWISH_INDEX will be the same as the value for IndexFile defined in etc/swish.cfg. As written, this script only outputs title, description, and identifier Dublin Core data. If you changed the ways MyLibrary indexes its data, then you will probably want to enhance how this script does output. Read the code in the search subroutine to see how this is done.

If this server is intended to be used as a production-level service, then you will want to edit and enhance the record subroutine. This subroutine outputs the SRU explain response and as distributed it outputs information that is not valid for your institution.


=head1 AUTHOR

Eric Lease Morgan

=cut


# require the necessary modules
use lib '../lib';
use CGI qw(-oldstyle_urls);
use CGI::Carp qw(fatalsToBrowser);
use CQL::Parser;
use SRU::Request;
use SRU::Response;
use MyLibrary::Core;
use strict;
use SWISH::API;
use warnings;

# change this to point to the full path of your swish-e index
use constant SWISH_INDEX => '/usr/local/apache/htdocs/morgan/zagreb/etc/portal.idx';

# initlize the necessary objects
my $cgi      = CGI->new();
my $request  = SRU::Request->newFromCGI($cgi)          || &error;
my $response = SRU::Response->newFromRequest($request) || &error;

# check for type of response; explain
if ($response->type() eq 'explain') {
	
	# fill up the response's record 
	$response->record(SRU::Response::Record->new(recordSchema => 'http://explain.z3950.org/dtd/2.0/',
	                                             recordData   => &record));

}

# scan
elsif ($response->type() eq 'scan') {
	
	$response->addDiagnostic(SRU::Response::Diagnostic->newFromCode(4, 'Scan operation is not supported at this stage.'));
	$response->asXML();
}

# search
else {
	
	# parse the query and check it
	my $parser = CQL::Parser->new;
	eval { $parser->parse($request->query) };
    if ($@) {
    
    	# bummer, invalid query
    	$response->addDiagnostic(SRU::Response::Diagnostic->newFromCode(10,$@));
        
        print $cgi->header('text/xml');
		print $response->asXML();
		exit;

     }
     
    my $node = $parser->parse($request->query);
	
	# search
	my @results = &search($node->toSwish());
	
	# process each result
	for (my $i = 0; $i <= $#results; $i++) {

		# check for maximum records
		if (defined($request->maximumRecords)) { last if ($i >= $request->maximumRecords) }
		
		# create a records object and add it to the response
		my $record = SRU::Response::Record->new(recordSchema => 'info:srw/schema/1/dc-v1.1', recordData => $results[$i]);
		$response->addRecord($record);
	
	}
	
}

# done; output the result
print $cgi->header(-type => 'text/xml', -charset => 'utf-8');
print $response->asXML();
exit;


######################################################
# local subroutines; should I make these into methods?

sub record {
	
	return <<EOF
<explain xmlns="http://explain.z3950.org/dtd/2.0/">
	<serverInfo protocol='SRU' version='1.1'>
		<host>
			dewey.library.nd.edu
		</host>
		<port>
			80
		</port>
		<database>
			/mylibrary/portal/sru/sru-server.cgi
		</database>
	</serverInfo>
	<databaseInfo>
		<title lang='en' primary='true'>
			MyLibrary Portal SRU Interface
		</title>
		<description lang='en' primary='true'>
			This is a simple SRU server used to search the contents of a MyLibrary portal implementation.
		</description>
		<author lang='en' primary='true'>
			Eric Lease Morgan, University Libraries of Notre Dame
		</author>
	</databaseInfo>
	<metaInfo>
		<dateModified>
			2004-12-23 16:12:04
		</dateModified>
	</metaInfo>
	<indexInfo>
		<set identifier='info:srw/cql-context-set/1/dc-v1.1' name='dc' />
		<index>
			<title>
				title
			</title>
			<map>
				<name set='dc'>
					title
				</name>
			</map>
		</index>
		<index>
			<title>
				creator
			</title>
			<map>
				<name set='dc'>
					creator
				</name>
			</map>
		</index>
		<index>
			<title>
				description
			</title>
			<map>
				<name set='dc'>
					description
				</name>
			</map>
		</index>
	</indexInfo>
	<schemaInfo>
		<schema identifier='info:srw/schema/1/dc-v1.1' sort='false' name='dc'>
			<title>
				Dublin Core
			</title>
		</schema>
	</schemaInfo>
	<configInfo>
		<default type='numberOfRecords'>
			250
		</default>
	</configInfo>
</explain>

EOF

}


sub search {

	# initialize
	my $query = $_[0];
	my @results;
	
	# open the index
	my $swish = SWISH::API->new(SWISH_INDEX);
	
	# do a search with the command line input
	my $results = $swish->Query($query);
	
	# check for hits
	if ($results->Hits > 0) {
	
		# process each result
		while (my $r = $results->NextResult) {
		
			# get the id
			my $resource = MyLibrary::Resource->new(id => $r->Property("swishdocpath"));
			
			# get the location
			my $location;
			my @locations = $resource->resource_locations();
			foreach (@locations) { $location .= $_->location }

			# push on to results
			if ($resource) {push @results, '<srw_dc:dc xmlns="http://www.w3.org/TR/xhtml1/strict" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:srw_dc="info:srw/schema/1/dc-v1.1"><dc:title>' . &escape_entities($resource->name) . '</dc:title><dc:description>' . &escape_entities($resource->note) . '</dc:description><dc:identifier>' . &escape_entities($location) . '</dc:identifier></srw_dc:dc>'; }
			
		}
				
	}
	
	# done; return it
	return @results;
	
}


sub escape_entities {

	# get the input
	my $s = shift;
	
	# escape
	$s =~ s/&/&amp;/g;
	$s =~ s/</&lt;/g;
	$s =~ s/>/&gt;/g;
	$s =~ s/"/&quot;/g;
	$s =~ s/'/&apos;/g;

	# done
	return $s;
	
}


sub error {

	print $cgi->header();
	print $SRU::Error;
	exit;
	
}

