Difference between revisions of "Esip fedsearch.pl"

From Earth Science Information Partners (ESIP)
Line 1: Line 1:
 
<pre>
 
<pre>
#!/usr/local/bin/perl
+
#!/usr/local/ActivePerl-5.8/bin/perl
 
# N.B.:  minimal included modules for portability
 
# N.B.:  minimal included modules for portability
 
# (Could be more efficient with XML/Atom parsing and XPath.)
 
# (Could be more efficient with XML/Atom parsing and XPath.)
 +
# Example:
 +
# esip_fedsearch.pl \
 +
#    --osdd=http://mirador.gsfc.nasa.gov/mirador_dataset_opensearch.xml \
 +
#    --bbox=-130,25,-60,50 \
 +
#    --start=1998-01-01T00:00:00Z --end=2002-12-31T23:59:59Z \
 +
#    --keywords=microwave --max_gran=1 --verbose
 
use Getopt::Long;
 
use Getopt::Long;
 
use LWP::Simple;
 
use LWP::Simple;
Line 10: Line 16:
 
# Parse command line
 
# Parse command line
 
my ($osdd_url, $keywords, $bbox, $start, $end, $help);
 
my ($osdd_url, $keywords, $bbox, $start, $end, $help);
 +
my $max_ds = 1;
 +
my $max_gran = 1;
 +
our $verbose = 0;
 
my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords,
 
my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords,
 
   "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end,  
 
   "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end,  
   "help" => \$help);
+
   "max_ds=i" => \$max_ds, "max_gran=i" => \$max_gran,
 +
  "verbose" => \$verbose, "help" => \$help);
  
 
usage() if ($help || !$keywords);
 
usage() if ($help || !$keywords);
 
$start ||= epoch2ccsds(time());
 
$start ||= epoch2ccsds(time());
 
$end ||= epoch2ccsds(ccsds2epoch($start)+86400);
 
$end ||= epoch2ccsds(ccsds2epoch($start)+86400);
warn ("start: $start\nend:  $end\n");
+
warn ("start: $start\nend:  $end\n") if $verbose;
  
 
# Get Dataset Open Search Description Document
 
# Get Dataset Open Search Description Document
my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end);
+
my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end, $max_ds);
 
my @osdd = extract_links($datasets, "search", "opensearchdescription");
 
my @osdd = extract_links($datasets, "search", "opensearchdescription");
  
 
# Loop through returned dataset OpenSearch Description Documents
 
# Loop through returned dataset OpenSearch Description Documents
 +
my $n = 0;
 
foreach my $osdd (@osdd) {
 
foreach my $osdd (@osdd) {
     my $granules=opensearch($osdd, $keywords, $bbox, $start,$end);
+
     my $granules=opensearch($osdd, $keywords, $bbox, $start, $end, $max_gran);
 
     my @links = extract_links($granules, "/data#", '');
 
     my @links = extract_links($granules, "/data#", '');
 
     print join("\n", @links, '');
 
     print join("\n", @links, '');
 +
    $n++;
 +
    last if ($n >= $max_ds);  # In case count is not supported at dataset level
 
}
 
}
  
Line 35: Line 48:
 
     my @links;
 
     my @links;
 
     # Loop through <entry> elements
 
     # Loop through <entry> elements
     while ($doc =~ m/<entry>(.*?)<\/entry>/sg) {
+
     while ($doc =~ m/<[\w:]*entry>(.*?)[:\/]entry>/isg) {
 
         my $entry = $1;
 
         my $entry = $1;
 
         # Loop through <link> elements
 
         # Loop through <link> elements
         while ($entry =~ m/<link(.*?)>/sg) {
+
         while ($entry =~ m/<[\w:]*link(.*?)>/sg) {
 
             my $link = $1;
 
             my $link = $1;
 
             my $match = 1;
 
             my $match = 1;
Line 47: Line 60:
 
             if ($match) {
 
             if ($match) {
 
                 my ($link_href) = ($link =~ m/href="(.*?)"/);
 
                 my ($link_href) = ($link =~ m/href="(.*?)"/);
 +
                $link_href =~ s/\&amp;/\&/g;
 
                 push @links, $link_href;
 
                 push @links, $link_href;
 
                 last;
 
                 last;
Line 57: Line 71:
 
# fetch the OSDD and execute the search
 
# fetch the OSDD and execute the search
 
sub opensearch {
 
sub opensearch {
     my ($osdd_url, $keywords, $bbox, $start, $end) = @_;
+
     my ($osdd_url, $keywords, $bbox, $start, $end, $count) = @_;
 
     # Fetch OpenSearch Description Document
 
     # Fetch OpenSearch Description Document
 
     my $osdd = get($osdd_url) or die "Could not get $osdd_url";
 
     my $osdd = get($osdd_url) or die "Could not get $osdd_url";
 
     # Extract template for Atom response
 
     # Extract template for Atom response
     my ($template) = ($osdd =~ /<[\w:]*Url .*atom.*template="(.*?)"/is);
+
     my ($template) = ($osdd =~ /<[\w:]*Url .*template="(.*?)"/is);
 
     # Fill template in with values
 
     # Fill template in with values
     my $url = fill_template($template, $keywords, $bbox, $start, $end);
+
     my $url = fill_template($template, $keywords, $bbox, $start, $end, $count);
 
     # Fetch results
 
     # Fetch results
 
     my $results = get($url) or warn "No results returned for $url";
 
     my $results = get($url) or warn "No results returned for $url";
Line 70: Line 84:
 
# fill_template:  fill in an OpenSearch template with values from command line
 
# fill_template:  fill in an OpenSearch template with values from command line
 
sub fill_template {
 
sub fill_template {
     my ($template, $keywords, $bbox, $start, $end) = @_;
+
     my ($template, $keywords, $bbox, $start, $end, $count) = @_;
     warn "Before: $template\n";
+
     warn "Before: $template\n" if ($verbose);
 
     my $url = $template;
 
     my $url = $template;
 +
    $template =~ s/\&amp;/\&/g;  # Unescape
 
     $template =~ s/\{time:start\?*\}/$start/ if ($start);
 
     $template =~ s/\{time:start\?*\}/$start/ if ($start);
 
     $template =~ s/\{time:end\?*\}/$end/ if ($end);
 
     $template =~ s/\{time:end\?*\}/$end/ if ($end);
 
     $template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox);
 
     $template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox);
     $template =~ s/\{searchTerms\}/$keywords/ if ($keywords);
+
     $template =~ s/\{searchTerms\?*\}/$keywords/ if ($keywords);
     $template =~ s/(\&amp;|\?)\w+?=\{[\w:]+?\}//g;
+
    $template =~ s/\{count\?*\}/$count/ if ($count);
     warn "After: $template\n";
+
     $template =~ s/(\&|\?)\w+?=\{[\w:]+\?*\}//g; # rm unfilled placeholders
 +
     warn "After: $template\n" if ($verbose);
 
     return $template;
 
     return $template;
 
}
 
}
Line 97: Line 113:
 
   --start=yyyy-mm-ddThh:mm:ssZ  Start time of search (Default=yesterday)\
 
   --start=yyyy-mm-ddThh:mm:ssZ  Start time of search (Default=yesterday)\
 
   --end=yyyy-mm-ddThh:mm:ssZ    End time of search (Default = start+1day)\
 
   --end=yyyy-mm-ddThh:mm:ssZ    End time of search (Default = start+1day)\
 +
  --max_ds=N                    Maximum number of datasets (Default = 1)\
 +
  --max_gran=N                  Maximum number of granules
 +
                                per dataset (Default = 1)\
 +
  --verbose                      Print some diagnostic messages
 
   --keywords=word+word+word...  Keywords, separated by '+' (Required)
 
   --keywords=word+word+word...  Keywords, separated by '+' (Required)
 
";
 
";
 
}
 
}
 
</pre>
 
</pre>

Revision as of 10:04, January 26, 2010

#!/usr/local/ActivePerl-5.8/bin/perl
# N.B.:  minimal included modules for portability
# (Could be more efficient with XML/Atom parsing and XPath.)
# Example:
# esip_fedsearch.pl \
#    --osdd=http://mirador.gsfc.nasa.gov/mirador_dataset_opensearch.xml \
#    --bbox=-130,25,-60,50 \
#    --start=1998-01-01T00:00:00Z --end=2002-12-31T23:59:59Z \
#    --keywords=microwave --max_gran=1 --verbose
use Getopt::Long;
use LWP::Simple;
use Time::Local;
use strict;

# Parse command line
my ($osdd_url, $keywords, $bbox, $start, $end, $help);
my $max_ds = 1;
my $max_gran = 1;
our $verbose = 0;
my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords,
   "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end, 
   "max_ds=i" => \$max_ds, "max_gran=i" => \$max_gran,
   "verbose" => \$verbose, "help" => \$help);

usage() if ($help || !$keywords);
$start ||= epoch2ccsds(time());
$end ||= epoch2ccsds(ccsds2epoch($start)+86400);
warn ("start: $start\nend:   $end\n") if $verbose;

# Get Dataset Open Search Description Document
my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end, $max_ds);
my @osdd = extract_links($datasets, "search", "opensearchdescription");

# Loop through returned dataset OpenSearch Description Documents
my $n = 0;
foreach my $osdd (@osdd) {
    my $granules=opensearch($osdd, $keywords, $bbox, $start, $end, $max_gran);
    my @links = extract_links($granules, "/data#", '');
    print join("\n", @links, '');
    $n++;
    last if ($n >= $max_ds);   # In case count is not supported at dataset level
}

# Extract links from Atom document based on rel and type values
sub extract_links {
    my ($doc, $rel_target, $type_target) = @_;
    my @links;
    # Loop through <entry> elements
    while ($doc =~ m/<[\w:]*entry>(.*?)[:\/]entry>/isg) {
        my $entry = $1;
        # Loop through <link> elements
        while ($entry =~ m/<[\w:]*link(.*?)>/sg) {
            my $link = $1;
            my $match = 1;
            my ($rel) = ($link =~ m/rel="(.*?)"/is);
            $match = 0 if ($rel_target && $rel !~ /$rel_target/);
            my ($type) = ($link =~ m/type="(.*?)"/is);
            $match = 0 if ($type_target && $type !~ /$type_target/);
            if ($match) {
                my ($link_href) = ($link =~ m/href="(.*?)"/);
                $link_href =~ s/\&/\&/g;
                push @links, $link_href;
                last;
            }
        }
    }
    return @links;
}
# opensearch: given a URL to an OpenSearch Description Document and the search values,
# fetch the OSDD and execute the search
sub opensearch {
    my ($osdd_url, $keywords, $bbox, $start, $end, $count) = @_;
    # Fetch OpenSearch Description Document
    my $osdd = get($osdd_url) or die "Could not get $osdd_url";
    # Extract template for Atom response
    my ($template) = ($osdd =~ /<[\w:]*Url .*template="(.*?)"/is);
    # Fill template in with values
    my $url = fill_template($template, $keywords, $bbox, $start, $end, $count);
    # Fetch results
    my $results = get($url) or warn "No results returned for $url";
    return $results;
}
# fill_template:  fill in an OpenSearch template with values from command line
sub fill_template {
    my ($template, $keywords, $bbox, $start, $end, $count) = @_;
    warn "Before: $template\n" if ($verbose);
    my $url = $template;
    $template =~ s/\&/\&/g;  # Unescape
    $template =~ s/\{time:start\?*\}/$start/ if ($start);
    $template =~ s/\{time:end\?*\}/$end/ if ($end);
    $template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox);
    $template =~ s/\{searchTerms\?*\}/$keywords/ if ($keywords);
    $template =~ s/\{count\?*\}/$count/ if ($count);
    $template =~ s/(\&|\?)\w+?=\{[\w:]+\?*\}//g; # rm unfilled placeholders
    warn "After: $template\n" if ($verbose);
    return $template;
}
sub ccsds2epoch {
    my ($y, $m, $d, $h, $min, $s) = ($_[0] =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)/);
    return timegm($s, $min, $h, $d, $m-1, $y-1900);
}
sub epoch2ccsds {
    my @t = gmtime($_[0]);
    return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $t[5]+1900,
        $t[4]+1, $t[3], $t[2], $t[1], $t[0]);
}
sub usage() {
    die "esip_fedsearch.pl [options]\
  --osdd=url                     URL of dataset-level OpenSearch 
                                 Description Document (Required)\
  --bbox=lon,lat,lon,lat         Bounding box of search area\
  --start=yyyy-mm-ddThh:mm:ssZ   Start time of search (Default=yesterday)\
  --end=yyyy-mm-ddThh:mm:ssZ     End time of search (Default = start+1day)\
  --max_ds=N                     Maximum number of datasets (Default = 1)\
  --max_gran=N                   Maximum number of granules 
                                 per dataset (Default = 1)\
  --verbose                      Print some diagnostic messages
  --keywords=word+word+word...   Keywords, separated by '+' (Required)
";
}