Esip fedsearch.pl

From Earth Science Information Partners (ESIP)
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/local/ActivePerl-5.8/bin/perl
# N.B.:  minimal included modules for portability
# (Could be more efficient with XML/Atom parsing and XPath.)
# Example:
# esip_fedsearch.pl \
#    --osdd=http://mirador.gsfc.nasa.gov/mirador_dataset_opensearch.xml \
#    --bbox=-130,25,-60,50 \
#    --start=1998-01-01T00:00:00Z --end=2002-12-31T23:59:59Z \
#    --keywords=microwave --max_gran=1 --verbose
use Getopt::Long;
use LWP::Simple;
use Time::Local;
use strict;

# Parse command line
my ($osdd_url, $keywords, $bbox, $start, $end, $help);
my $max_ds = 1;
my $max_gran = 1;
our $verbose = 0;
my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords,
   "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end, 
   "max_ds=i" => \$max_ds, "max_gran=i" => \$max_gran,
   "verbose" => \$verbose, "help" => \$help);

usage() if ($help || !$keywords);
$start ||= epoch2ccsds(time());
$end ||= epoch2ccsds(ccsds2epoch($start)+86400);
warn ("start: $start\nend:   $end\n") if $verbose;

# Get Dataset Open Search Description Document
my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end, $max_ds);
my @osdd = extract_links($datasets, "search", "opensearchdescription");

# Loop through returned dataset OpenSearch Description Documents
my $n = 0;
foreach my $osdd (@osdd) {
    my $granules=opensearch($osdd, $keywords, $bbox, $start, $end, $max_gran);
    my @links = extract_links($granules, "/data#", '');
    print join("\n", @links, '');
    $n++;
    last if ($n >= $max_ds);   # In case count is not supported at dataset level
}

# Extract links from Atom document based on rel and type values
sub extract_links {
    my ($doc, $rel_target, $type_target) = @_;
    my @links;
    # Loop through <entry> elements
    while ($doc =~ m/<[\w:]*entry>(.*?)[:\/]entry>/isg) {
        my $entry = $1;
        # Loop through <link> elements
        while ($entry =~ m/<[\w:]*link(.*?)>/sg) {
            my $link = $1;
            my $match = 1;
            my ($rel) = ($link =~ m/rel="(.*?)"/is);
            $match = 0 if ($rel_target && $rel !~ /$rel_target/);
            my ($type) = ($link =~ m/type="(.*?)"/is);
            $match = 0 if ($type_target && $type !~ /$type_target/);
            if ($match) {
                my ($link_href) = ($link =~ m/href="(.*?)"/);
                $link_href =~ s/\&/\&/g;
                push @links, $link_href;
                last;
            }
        }
    }
    return @links;
}
# opensearch: given a URL to an OpenSearch Description Document and the search values,
# fetch the OSDD and execute the search
sub opensearch {
    my ($osdd_url, $keywords, $bbox, $start, $end, $count) = @_;
    # Fetch OpenSearch Description Document
    my $osdd = get($osdd_url) or die "Could not get $osdd_url";
    # Extract template for Atom response
    my ($template) = ($osdd =~ /<[\w:]*Url .*template="(.*?)"/is);
    # Fill template in with values
    my $url = fill_template($template, $keywords, $bbox, $start, $end, $count);
    # Fetch results
    my $results = get($url) or warn "No results returned for $url";
    return $results;
}
# fill_template:  fill in an OpenSearch template with values from command line
sub fill_template {
    my ($template, $keywords, $bbox, $start, $end, $count) = @_;
    warn "Before: $template\n" if ($verbose);
    my $url = $template;
    $template =~ s/\&/\&/g;  # Unescape
    $template =~ s/\{time:start\?*\}/$start/ if ($start);
    $template =~ s/\{time:end\?*\}/$end/ if ($end);
    $template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox);
    $template =~ s/\{searchTerms\?*\}/$keywords/ if ($keywords);
    $template =~ s/\{count\?*\}/$count/ if ($count);
    $template =~ s/(\&|\?)\w+?=\{[\w:]+\?*\}//g; # rm unfilled placeholders
    warn "After: $template\n" if ($verbose);
    return $template;
}
sub ccsds2epoch {
    my ($y, $m, $d, $h, $min, $s) = ($_[0] =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)/);
    return timegm($s, $min, $h, $d, $m-1, $y-1900);
}
sub epoch2ccsds {
    my @t = gmtime($_[0]);
    return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $t[5]+1900,
        $t[4]+1, $t[3], $t[2], $t[1], $t[0]);
}
sub usage() {
    die "esip_fedsearch.pl [options]\
  --osdd=url                     URL of dataset-level OpenSearch 
                                 Description Document (Required)\
  --bbox=lon,lat,lon,lat         Bounding box of search area\
  --start=yyyy-mm-ddThh:mm:ssZ   Start time of search (Default=yesterday)\
  --end=yyyy-mm-ddThh:mm:ssZ     End time of search (Default = start+1day)\
  --max_ds=N                     Maximum number of datasets (Default = 1)\
  --max_gran=N                   Maximum number of granules 
                                 per dataset (Default = 1)\
  --verbose                      Print some diagnostic messages
  --keywords=word+word+word...   Keywords, separated by '+' (Required)
";
}