Esip fedsearch.pl
From Earth Science Information Partners (ESIP)
- !/usr/local/bin/perl
- N.B.: minimal included modules for portability
- (Could be more efficient with XML/Atom parsing and XPath.)
use Getopt::Long;
use LWP::Simple;
use Time::Local;
use strict;
- Parse command line
my ($osdd_url, $keywords, $bbox, $start, $end, $help);
my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords,
"bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end,
"help" => \$help);
usage() if ($help || !$keywords);
$start ||= epoch2ccsds(time());
$end ||= epoch2ccsds(ccsds2epoch($start)+86400);
warn ("start: $start\nend: $end\n");
- Get Dataset Open Search Description Document
my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end);
my @osdd = extract_links($datasets, "search", "opensearchdescription");
- Loop through returned dataset OpenSearch Description Documents
foreach my $osdd (@osdd) {
my $granules=opensearch($osdd, $keywords, $bbox, $start,$end);
my @links = extract_links($granules, "/data#", );
print join("\n", @links, );
}
- Extract links from Atom document based on rel and type values
sub extract_links {
my ($doc, $rel_target, $type_target) = @_;
my @links;
# Loop through <entry> elements
while ($doc =~ m/<entry>(.*?)<\/entry>/sg) {
my $entry = $1;
# Loop through <link> elements
while ($entry =~ m/<link(.*?)>/sg) {
my $link = $1;
my $match = 1;
my ($rel) = ($link =~ m/rel="(.*?)"/is);
$match = 0 if ($rel_target && $rel !~ /$rel_target/);
my ($type) = ($link =~ m/type="(.*?)"/is);
$match = 0 if ($type_target && $type !~ /$type_target/);
if ($match) {
my ($link_href) = ($link =~ m/href="(.*?)"/);
push @links, $link_href;
last;
}
}
}
return @links;
}
- opensearch: given a URL to an OpenSearch Description Document and the search values,
- fetch the OSDD and execute the search
sub opensearch {
my ($osdd_url, $keywords, $bbox, $start, $end) = @_;
# Fetch OpenSearch Description Document
my $osdd = get($osdd_url) or die "Could not get $osdd_url";
# Extract template for Atom response
my ($template) = ($osdd =~ /<[\w:]*Url .*atom.*template="(.*?)"/is);
# Fill template in with values
my $url = fill_template($template, $keywords, $bbox, $start, $end);
# Fetch results
my $results = get($url) or warn "No results returned for $url";
return $results;
}
- fill_template: fill in an OpenSearch template with values from command line
sub fill_template {
my ($template, $keywords, $bbox, $start, $end) = @_;
warn "Before: $template\n";
my $url = $template;
$template =~ s/\{time:start\?*\}/$start/ if ($start);
$template =~ s/\{time:end\?*\}/$end/ if ($end);
$template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox);
$template =~ s/\{searchTerms\}/$keywords/ if ($keywords);
$template =~ s/(\&|\?)\w+?=\{[\w:]+?\}//g;
warn "After: $template\n";
return $template;
}
sub ccsds2epoch {
my ($y, $m, $d, $h, $min, $s) = ($_[0] =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)/);
return timegm($s, $min, $h, $d, $m-1, $y-1900);
}
sub epoch2ccsds {
my @t = gmtime($_[0]);
return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $t[5]+1900,
$t[4]+1, $t[3], $t[2], $t[1], $t[0]);
}
sub usage() {
die "esip_fedsearch.pl [options]\
--osdd=url URL of dataset-level OpenSearch
Description Document (Required)\
--bbox=lon,lat,lon,lat Bounding box of search area\
--start=yyyy-mm-ddThh:mm:ssZ Start time of search (Default=yesterday)\
--end=yyyy-mm-ddThh:mm:ssZ End time of search (Default = start+1day)\
--keywords=word+word+word... Keywords, separated by '+' (Required)
";
}