Esip fedsearch.pl

From Earth Science Information Partners (ESIP)
Revision as of 17:09, December 23, 2009 by Clynnes (talk | contribs) (New page: <code> #!/usr/local/bin/perl # N.B.: minimal included modules for portability # (Could be more efficient with XML/Atom parsing and XPath.) use Getopt::Long; use LWP::Simple; use Time::Loc...)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

  1. !/usr/local/bin/perl
  2. N.B.: minimal included modules for portability
  3. (Could be more efficient with XML/Atom parsing and XPath.)

use Getopt::Long; use LWP::Simple; use Time::Local; use strict;

  1. Parse command line

my ($osdd_url, $keywords, $bbox, $start, $end, $help); my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords,

  "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end, 
  "help" => \$help);

usage() if ($help || !$keywords); $start ||= epoch2ccsds(time()); $end ||= epoch2ccsds(ccsds2epoch($start)+86400); warn ("start: $start\nend: $end\n");

  1. Get Dataset Open Search Description Document

my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end); my @osdd = extract_links($datasets, "search", "opensearchdescription");

  1. Loop through returned dataset OpenSearch Description Documents

foreach my $osdd (@osdd) {

   my $granules=opensearch($osdd, $keywords, $bbox, $start,$end);
   my @links = extract_links($granules, "/data#", );
   print join("\n", @links, );

}

  1. Extract links from Atom document based on rel and type values

sub extract_links {

   my ($doc, $rel_target, $type_target) = @_;
   my @links;
   # Loop through <entry> elements
   while ($doc =~ m/<entry>(.*?)<\/entry>/sg) {
       my $entry = $1;
       # Loop through <link> elements
       while ($entry =~ m/<link(.*?)>/sg) {
           my $link = $1;
           my $match = 1;
           my ($rel) = ($link =~ m/rel="(.*?)"/is);
           $match = 0 if ($rel_target && $rel !~ /$rel_target/);
           my ($type) = ($link =~ m/type="(.*?)"/is);
           $match = 0 if ($type_target && $type !~ /$type_target/);
           if ($match) {
               my ($link_href) = ($link =~ m/href="(.*?)"/);
               push @links, $link_href;
               last;
           }
       }
   }
   return @links;

}

  1. opensearch: given a URL to an OpenSearch Description Document and the search values,
  2. fetch the OSDD and execute the search

sub opensearch {

   my ($osdd_url, $keywords, $bbox, $start, $end) = @_;
   # Fetch OpenSearch Description Document
   my $osdd = get($osdd_url) or die "Could not get $osdd_url";
   # Extract template for Atom response
   my ($template) = ($osdd =~ /<[\w:]*Url .*atom.*template="(.*?)"/is);
   # Fill template in with values
   my $url = fill_template($template, $keywords, $bbox, $start, $end);
   # Fetch results
   my $results = get($url) or warn "No results returned for $url";
   return $results;

}

  1. fill_template: fill in an OpenSearch template with values from command line

sub fill_template {

   my ($template, $keywords, $bbox, $start, $end) = @_;
   warn "Before: $template\n";
   my $url = $template;
   $template =~ s/\{time:start\?*\}/$start/ if ($start);
   $template =~ s/\{time:end\?*\}/$end/ if ($end);
   $template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox);
   $template =~ s/\{searchTerms\}/$keywords/ if ($keywords);
   $template =~ s/(\&|\?)\w+?=\{[\w:]+?\}//g;
   warn "After: $template\n";
   return $template;

} sub ccsds2epoch {

   my ($y, $m, $d, $h, $min, $s) = ($_[0] =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)/);
   return timegm($s, $min, $h, $d, $m-1, $y-1900);

} sub epoch2ccsds {

   my @t = gmtime($_[0]);
   return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $t[5]+1900,
       $t[4]+1, $t[3], $t[2], $t[1], $t[0]);

} sub usage() {

   die "esip_fedsearch.pl [options]\
 --osdd=url                     URL of dataset-level OpenSearch 
                                Description Document (Required)\
 --bbox=lon,lat,lon,lat         Bounding box of search area\
 --start=yyyy-mm-ddThh:mm:ssZ   Start time of search (Default=yesterday)\
 --end=yyyy-mm-ddThh:mm:ssZ     End time of search (Default = start+1day)\
 --keywords=word+word+word...   Keywords, separated by '+' (Required)

"; }