#!/usr/bin/env perl # N.B.: minimal included modules for portability # (Could be more efficient with XML/Atom parsing and XPath.) # Example: # esip_fedsearch.pl \ # --osdd=http://mirador.gsfc.nasa.gov/mirador_dataset_opensearch.xml \ # --bbox=-130,25,-60,50 \ # --start=1998-01-01T00:00:00Z --end=2002-12-31T23:59:59Z \ # --keywords=microwave --max_gran=1 --verbose use Getopt::Long; use LWP::Simple; use Time::Local; use strict; # Parse command line my ($osdd_url, $keywords, $bbox, $start, $end, $help); my $max_ds = 1; my $max_gran = 1; our $verbose = 0; my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords, "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end, "maxds=i" => \$max_ds, "maxgran=i" => \$max_gran, "verbose:i" => \$verbose, "help" => \$help); usage() if ($help || !$keywords); $start ||= epoch2ccsds(time() - 86400); $end ||= epoch2ccsds(time()); print ("start: $start\nend: $end\n") if $verbose; # Search Datasets and extract Open Search Description Document my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end, $max_ds); print "Datasets\n===========\n$datasets\n==========\n" if ($verbose > 1); # Extract OpenSearch Description Document Links from the dataset-level results my @osdd = extract_links($datasets, "search", "opensearchdescription"); # Loop through returned dataset OpenSearch Description Documents my $n = 0; foreach my $osdd (@osdd) { my $granules=opensearch($osdd, $keywords, $bbox, $start, $end, $max_gran); print "Granules\n===========\n$granules\n==========\n" if ($verbose > 1); # Extract granule (file) links my @links = extract_links($granules, "/data#", ''); print join("\n", @links, ''); $n++; last if ($n >= $max_ds); # In case count is not supported at dataset level } # Extract links from Atom document based on rel and type values sub extract_links { my ($doc, $rel_target, $type_target) = @_; my @links; my $atom_ns = get_namespace($doc, 'http://www.w3.org/2005/Atom'); # Loop through elements my $pat = sprintf("<%sentry.*?>(.*?)<\/%sentry>", $atom_ns, $atom_ns); while ($doc =~ m/$pat/isg) { my $entry = $1; # Loop through elements while ($entry =~ m/<[\w:]*link(.*?)>/sg) { my $link = $1; my $match = 1; my ($rel) = ($link =~ m/rel="(.*?)"/is); $match = 0 if ($rel_target && $rel !~ /$rel_target/); my ($type) = ($link =~ m/type="(.*?)"/is); $match = 0 if ($type_target && $type !~ /$type_target/); if ($match) { my ($link_href) = ($link =~ m/href="(.*?)"/); $link_href =~ s/\&/\&/g; push @links, $link_href; last; } } } return @links; } # opensearch: given a URL to an OpenSearch Description Document and the search values, # fetch the OSDD and execute the search sub opensearch { my ($osdd_url, $keywords, $bbox, $start, $end, $count) = @_; # Fetch OpenSearch Description Document my $osdd = get_file($osdd_url) or die "Could not get $osdd_url"; # Extract URL template for Atom response my ($template) = ($osdd =~ /<[\w:]*Url .*template="(.*?)"/is); # Fill URL template in with values my $url = fill_template($template, $keywords, $bbox, $start, $end, $count); # Fetch results my $results = get_file($url) or warn "No results returned for $url\n"; return $results; } # fill_template: fill in an OpenSearch template with values from command line sub fill_template { my ($template, $keywords, $bbox, $start, $end, $count) = @_; print "Before: $template\n" if ($verbose); # Strip namespace stuff out of template: crude but effective $template =~ s/\{\w+:/{/g; $template =~ s/\&/\&/g; # Unescape $template =~ s/\{start\?*\}/$start/ if ($start); $template =~ s/\{end\?*\}/$end/ if ($end); $template =~ s/\{box\?*\}/$bbox/ if ($bbox); $template =~ s/\{searchTerms\?*\}/$keywords/ if ($keywords); $template =~ s/\{count\?*\}/$count/ if ($count); $template =~ s/(\&|\?)\w+?=\{[\w:]+\?*\}//g; # rm unfilled placeholders print "After: $template\n" if ($verbose); return $template; } sub ccsds2epoch { my ($y, $m, $d, $h, $min, $s) = ($_[0] =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)/); return timegm($s, $min, $h, $d, $m-1, $y-1900); } sub epoch2ccsds { my @t = gmtime($_[0]); return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $t[5]+1900, $t[4]+1, $t[3], $t[2], $t[1], $t[0]); } # Fetch a file, either remotely or open a local one sub get_file { my $url = shift; # Read from internet if it starts with a protocol if ($url =~ m/^(http|ftp):/) { return get($url); } # Otherwise assume it's a local file else { my $file = $url; if (! open (F, $file)) { warn("Cannot open $file: $!\n"); return; } local($/)=undef; my $s = ; close F; return $s; } } # Find the namespace abbreviation used in the document for the specified URI # Returns "abbrev:" sub get_namespace { my ($doc, $ns_uri) = @_; $doc =~ m/xmlns:*(.*?)="$ns_uri"/is; my $ns = $1; print "Namespace for $ns_uri: $ns\n" if $verbose; $ns .= ':' if $ns; return $ns; } sub usage() { die "esip_fedsearch.pl [options]\ --osdd=url URL of dataset-level OpenSearch Description Document (Required)\ --bbox=lon,lat,lon,lat Bounding box of search area\ --start=yyyy-mm-ddThh:mm:ssZ Start time of search (Default=1 day ago)\ --end=yyyy-mm-ddThh:mm:ssZ End time of search (Default = now)\ --max_ds=N Maximum number of datasets (Default = 1)\ --max_gran=N Maximum number of granules per dataset (Default = 1)\ --verbose Print some diagnostic messages\ --keywords=word+word+word... Keywords, separated by '+' (Required) "; }