DataFed Service Chain Example

WCS python demos by Hoijarvi

Service Chaining Example

This example uses python to

get point data
render it as an image
store the image on local computer

See WCS page SOAPifying_WCS

Getting Demo Script

Download http://datafed.net/demo/soapchain.txt and save it with extension .py

type from command line python soapchain.py and watch it run.

There are no parameters, the script is hard coded for demo purposes.

funtion execute_chain

This is the main program for the service chain.

Query data with WCS,
Render it,
save locally

There are two problems with SOAP and large amounts of data.

First, data has to be passed as xml, which results to enormous amounts of data for large tables, causing performance problems.
Second, SOAP defines 4 MB size limit for messages, so large data transfers have to partitioned, making things more complicated.
Third problem is WCS binary data, although binary encoding with MIME attachments is possoble, I do not know any system that would use it.

Datafed services have solved the issue by using common two phase pattern. The SOAP services return an envelope with small amount of metadata describing the result, and an uri pointer to the cached result.

Datafed SOAP services use this pattern also while passing data into services. For example in this chase, the table url is passed from WCS to RenderMapPoint directly. Since the services are located in the same machine, they can access the table directly, without turning it into xml ever.

def execute_chain():
    print "querying table"
    table_url = get_table_url()
    # table_url == http://webapps.datafed.net/storage.aspx?ID=GetCoverage_91
    # rendering service can get this directly from the cache with the ID GetCoverage_91
    print "rendering table " + table_url
    image_url = get_image_url(table_url)
    # image_url == http://webapps.datafed.net/storage.aspx?ID=RenderMapPoint_92
    # image processing services could again get the big image directly from
    # the server cache with ID RenderMapPoint_92
    #
    # Now we get the result from the server and save it on the local drive.
    print "fetching image " + image_url
    image_stream = urllib2.urlopen(image_url)
    try:
        print "writing image to file"
        dump_stream(image_stream, "soapchain.png")
    finally:
        image_stream.close()

# WCS query template

get_coverage_query = """
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
    <soap:Body>
        <wcs:GetCoverage version="1.0.0" service="WCS"
                xmlns:gml="http://www.opengis.net/gml" xmlns:wcs="http://www.opengis.net/wcs">
            <!-- datafed dataset_abbr.param_abbr defines the coverage name -->
            <wcs:sourceCoverage>[[dataset_abbr]].[[param_abbr]]</wcs:sourceCoverage> 
            <wcs:domainSubset>
                <wcs:spatialSubset>
                    <!--
                        This element queries the USA.
                        The dataset has no elevation, so only lat and lon
                        limits are needed. 
                    -->
                    <gml:Envelope srsName="WGS84(DD)">
                        <gml:pos>[[lon_min]] [[lat_min]]</gml:pos>
                        <gml:pos>[[lon_max]] [[lat_max]]</gml:pos>
                    </gml:Envelope>
                    <gml:Grid dimension="2">
                        <gml:limits>
                            <!--
                                grid size. This is a point dataset, so these numbers have no meaning.
                            -->
                            <gml:GridEnvelope>
                                <gml:low>0 0</gml:low>
                                <gml:high>99 99</gml:high>
                            </gml:GridEnvelope>
                        </gml:limits>
                        <gml:axisName>lat</gml:axisName>
                        <gml:axisName>lon</gml:axisName>
                    </gml:Grid>
                </wcs:spatialSubset>
                <wcs:temporalSubset>
                    <!--
                        query data for one time only.
                    -->
                    <gml:timePosition>[[datetime]]</gml:timePosition>
                </wcs:temporalSubset>
            </wcs:domainSubset>
            <wcs:output>
                <!--
                .NET dataset is a good format for point datasets
                -->
                <wcs:format>dataset-schema</wcs:format>
            </wcs:output>
        </wcs:GetCoverage>
    </soap:Body>
</soap:Envelope>
"""

# render call template

render_point_query = """
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
    <soap:Body>
        <rmp:Render xmlns:rmp="http://datafed.net/xs/RenderMapPoint">
            <Table xmlns="http://datafed.net/xs/Table">
                <TableRef>[[tableref]]</TableRef>
            </Table>
            <rmp:Settings xmlns:mi="http://datafed.net/xs/MapImageLatLon"
                 xmlns:ip="http://datafed.net/xs/ImagePrimitives">
                <rmp:image_desc>
                    <mi:zoom>
                        <mi:image_width>400</mi:image_width>
                        <mi:image_height>200</mi:image_height>
                        <mi:lat_min>[[lat_min]]</mi:lat_min>
                        <mi:lat_max>[[lat_max]]</mi:lat_max>
                        <mi:lon_min>[[lon_min]]</mi:lon_min>
                        <mi:lon_max>[[lon_max]]</mi:lon_max>
                    </mi:zoom>
                    <mi:bgcolor>0xE1FFF0</mi:bgcolor>
                    <mi:image_format>image/png</mi:image_format>
                </rmp:image_desc>
                <rmp:data_column>[[param_abbr]]</rmp:data_column>
                <rmp:scale_min>0</rmp:scale_min>
                <rmp:scale_max>[[scale_max]]</rmp:scale_max>
                <rmp:sqrt>false</rmp:sqrt>
                <rmp:symbol>
                    <ip:width>10</ip:width>
                    <ip:height>10</ip:height>
                    <ip:offset_x>0</ip:offset_x>
                    <ip:offset_y>0</ip:offset_y>
                    <ip:shape>circle</ip:shape>
                    <ip:num_of_sides>4</ip:num_of_sides>
                    <ip:baseline>false</ip:baseline>
                </rmp:symbol>
                <rmp:pen>
                    <ip:width>0.5</ip:width>
                    <ip:style>solid</ip:style>
                    <ip:color>red</ip:color>
                </rmp:pen>
                <rmp:brush>
                    <ip:style>solid</ip:style>
                    <ip:color>yellow</ip:color>
                </rmp:brush>
                <rmp:script>
                    used.symbol.width=symbol.width*norm_param_value;
                    used.symbol.height=symbol.height*norm_param_value;
                </rmp:script>
            </rmp:Settings>
        </rmp:Render>
    </soap:Body>
</soap:Envelope>
"""

# This function performs WCS SOAP query to datafed
# INPUT: Service url path, soap action, input envelope as text
# output: xml envelope

def query_datafed(service, action, soap_in_text):
    # open connection to std port 80
    c = httplib.HTTPConnection("webapps.datafed.net", 80)
    c.connect()
    try:
        # the Web Service Definition Language definition is at
        # http://webapps.datafed.net/WCS.asmx?WSDL
        c.putrequest("POST", "/" + service)
        c.putheader("soapAction", action)
        c.putheader("content-type", "text/xml")
        c.putheader("content-length", repr(len(soap_in_text)))
        c.endheaders()
        c.send(soap_in_text)
        r = c.getresponse()
        # 200 means OK. Anything other is a failure.
        if r.status <> 200:
            msg = r.read()
            raise 'http-error', repr(r.status) + ' ' + repr(r.reason) + ' ' + msg
        return xml.dom.minidom.parse(r)
    finally:
        #closing expensive resources ASAP is a good practice
        c.close()

# this script is for demo purposes, so the query parameters are hard coded.

parameters = {
    'dataset_abbr':'AIRNOW',
    'param_abbr':'pmfine',
    'lat_min':'24',
    'lat_max':'50',
    'lon_min':'-130',
    'lon_max':'-65',
    'datetime':'2006-04-15T12:00:00',
    'scale_max':'25'
}

def replace_parameters(raw_text):
    text = raw_text
    for p in parameters:
        text = text.replace("[[" + p + "]]", parameters[p])
    return text

def look_for_ns_name(xmldoc, ns, name):
    for x in xmldoc.getElementsByTagNameNS(ns, name):
        return x
    assert False, 'could not find node ' + ns + ':' + name

def dump_stream(stream, filename):
    # open for (w)write, (b)binary mode, (+)truncate
    outfile = open(filename, "wb+")
    try:
        outfile.write(stream.read())
        outfile.flush()
    finally:
        outfile.close()

# get the data as a table.
# Since the output envelope does not contain data,
# but a reference to data, this function returns
# the reference url to the data table
def get_table_url():
    data_soap_in_text = replace_parameters(get_coverage_query)
    data_soap_out = query_datafed("WCS.asmx", "http://datafed.net/WCS/GetCoverage", data_soap_in_text)
    try:
        table_url = look_for_ns_name(data_soap_out, "http://datafed.net/xs/wcs", "data").firstChild.nodeValue
    finally:
        data_soap_out.unlink()
    return table_url

# Render the table.
# The service accepts a table url, fetches it and renders it.
# Again, Since the output envelope does not contain an image,
# but a reference to the image, this function returns
# the reference url to the image which is still in the server
def get_image_url(table_url):
    render_soap_in_text = replace_parameters(render_point_query)
    render_soap_in_text = render_soap_in_text.replace("[[tableref]]", table_url)
    image_soap_out = query_datafed("RenderMapPoint.asmx", "http://datafed.net/RenderMapPoint/Render", render_soap_in_text)
    try:
        image_url = look_for_ns_name(image_soap_out, "http://datafed.net/xs/MapImageLatLon", "image_url").firstChild.nodeValue
    finally:
        image_soap_out.unlink()
    return image_url

#
#   If you run this script from command line, the next will be executed
#
if __name__ == "__main__":
    execute_chain()