Esip fedsearch.pl
From Earth Science Information Partners (ESIP)
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/local/ActivePerl-5.8/bin/perl # N.B.: minimal included modules for portability # (Could be more efficient with XML/Atom parsing and XPath.) # Example: # esip_fedsearch.pl \ # --osdd=http://mirador.gsfc.nasa.gov/mirador_dataset_opensearch.xml \ # --bbox=-130,25,-60,50 \ # --start=1998-01-01T00:00:00Z --end=2002-12-31T23:59:59Z \ # --keywords=microwave --max_gran=1 --verbose use Getopt::Long; use LWP::Simple; use Time::Local; use strict; # Parse command line my ($osdd_url, $keywords, $bbox, $start, $end, $help); my $max_ds = 1; my $max_gran = 1; our $verbose = 0; my $result = GetOptions("osdd=s" => \$osdd_url, "keywords=s" => \$keywords, "bbox=s" => \$bbox, "start=s" => \$start, "end=s" => \$end, "max_ds=i" => \$max_ds, "max_gran=i" => \$max_gran, "verbose" => \$verbose, "help" => \$help); usage() if ($help || !$keywords); $start ||= epoch2ccsds(time()); $end ||= epoch2ccsds(ccsds2epoch($start)+86400); warn ("start: $start\nend: $end\n") if $verbose; # Get Dataset Open Search Description Document my $datasets = opensearch($osdd_url, $keywords, $bbox, $start, $end, $max_ds); my @osdd = extract_links($datasets, "search", "opensearchdescription"); # Loop through returned dataset OpenSearch Description Documents my $n = 0; foreach my $osdd (@osdd) { my $granules=opensearch($osdd, $keywords, $bbox, $start, $end, $max_gran); my @links = extract_links($granules, "/data#", ''); print join("\n", @links, ''); $n++; last if ($n >= $max_ds); # In case count is not supported at dataset level } # Extract links from Atom document based on rel and type values sub extract_links { my ($doc, $rel_target, $type_target) = @_; my @links; # Loop through <entry> elements while ($doc =~ m/<[\w:]*entry>(.*?)[:\/]entry>/isg) { my $entry = $1; # Loop through <link> elements while ($entry =~ m/<[\w:]*link(.*?)>/sg) { my $link = $1; my $match = 1; my ($rel) = ($link =~ m/rel="(.*?)"/is); $match = 0 if ($rel_target && $rel !~ /$rel_target/); my ($type) = ($link =~ m/type="(.*?)"/is); $match = 0 if ($type_target && $type !~ /$type_target/); if ($match) { my ($link_href) = ($link =~ m/href="(.*?)"/); $link_href =~ s/\&/\&/g; push @links, $link_href; last; } } } return @links; } # opensearch: given a URL to an OpenSearch Description Document and the search values, # fetch the OSDD and execute the search sub opensearch { my ($osdd_url, $keywords, $bbox, $start, $end, $count) = @_; # Fetch OpenSearch Description Document my $osdd = get($osdd_url) or die "Could not get $osdd_url"; # Extract template for Atom response my ($template) = ($osdd =~ /<[\w:]*Url .*template="(.*?)"/is); # Fill template in with values my $url = fill_template($template, $keywords, $bbox, $start, $end, $count); # Fetch results my $results = get($url) or warn "No results returned for $url"; return $results; } # fill_template: fill in an OpenSearch template with values from command line sub fill_template { my ($template, $keywords, $bbox, $start, $end, $count) = @_; warn "Before: $template\n" if ($verbose); my $url = $template; $template =~ s/\&/\&/g; # Unescape $template =~ s/\{time:start\?*\}/$start/ if ($start); $template =~ s/\{time:end\?*\}/$end/ if ($end); $template =~ s/\{geo:box\?*\}/$bbox/ if ($bbox); $template =~ s/\{searchTerms\?*\}/$keywords/ if ($keywords); $template =~ s/\{count\?*\}/$count/ if ($count); $template =~ s/(\&|\?)\w+?=\{[\w:]+\?*\}//g; # rm unfilled placeholders warn "After: $template\n" if ($verbose); return $template; } sub ccsds2epoch { my ($y, $m, $d, $h, $min, $s) = ($_[0] =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)/); return timegm($s, $min, $h, $d, $m-1, $y-1900); } sub epoch2ccsds { my @t = gmtime($_[0]); return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $t[5]+1900, $t[4]+1, $t[3], $t[2], $t[1], $t[0]); } sub usage() { die "esip_fedsearch.pl [options]\ --osdd=url URL of dataset-level OpenSearch Description Document (Required)\ --bbox=lon,lat,lon,lat Bounding box of search area\ --start=yyyy-mm-ddThh:mm:ssZ Start time of search (Default=yesterday)\ --end=yyyy-mm-ddThh:mm:ssZ End time of search (Default = start+1day)\ --max_ds=N Maximum number of datasets (Default = 1)\ --max_gran=N Maximum number of granules per dataset (Default = 1)\ --verbose Print some diagnostic messages --keywords=word+word+word... Keywords, separated by '+' (Required) "; }