Difference between revisions of "DataFed Service Chain Example"

Revision as of 22:56, June 6, 2006

WCS python demos by Hoijarvi

Service Chaining Example

This example uses python to

get point data
render it as an image
store the image on local computer

See WCS page SOAPifying_WCS

Getting Demo Script

Download http://datafed.net/demo/soapchain.txt and save it with extension .py

type from command line python soapchain.py and watch it run.

There are no parameters, the script is hard coded for demo purposes.

funtion execute_chain

This is the main program for the service chain.

Query data with WCS,
Render it,
save locally

There are two problems with SOAP and large amounts of data.

First, data has to be passed as xml, which results to enormous amounts of data for large tables, causing performance problems.
Second, SOAP defines 4 MB size limit for messages, so large data transfers have to partitioned, making things more complicated.
Third problem is WCS binary data, although binary encoding with MIME attachments is possoble, I do not know any system that would use it.

Datafed services have solved the issue by using common two phase pattern. The SOAP services return an envelope with small amount of metadata describing the result, and an uri pointer to the cached result.

Datafed SOAP services use this pattern also while passing data into services. For example in this chase, the table url is passed from WCS to RenderMapPoint directly. Since the services are located in the same machine, they can access the table directly, without turning it into xml ever.

def execute_chain():
    print "querying table"
    table_url = get_table_url()
    # table_url == http://webapps.datafed.net/storage.aspx?ID=GetCoverage_91
    # rendering service can get this directly from the cache with the ID GetCoverage_91
    print "rendering table " + table_url
    image_url = get_image_url(table_url)
    # image_url == http://webapps.datafed.net/storage.aspx?ID=RenderMapPoint_92
    # image processing services could again get the big image directly from
    # the server cache with ID RenderMapPoint_92
    #
    # Now we get the result from the server and save it on the local drive.
    print "fetching image " + image_url
    image_stream = urllib2.urlopen(image_url)
    try:
        print "writing image to file"
        dump_stream(image_stream, "soapchain.png")
    finally:
        image_stream.close()

# WCS query template

get_coverage_query = """
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
    <soap:Body>
        <wcs:GetCoverage version="1.0.0" service="WCS"
                xmlns:gml="http://www.opengis.net/gml" xmlns:wcs="http://www.opengis.net/wcs">
            <!-- datafed dataset_abbr.param_abbr defines the coverage name -->
            <wcs:sourceCoverage>[[dataset_abbr]].[[param_abbr]]</wcs:sourceCoverage> 
            <wcs:domainSubset>
                <wcs:spatialSubset>
                    <!--
                        This element queries the USA.
                        The dataset has no elevation, so only lat and lon
                        limits are needed. 
                    -->
                    <gml:Envelope srsName="WGS84(DD)">
                        <gml:pos>[[lon_min]] [[lat_min]]</gml:pos>
                        <gml:pos>[[lon_max]] [[lat_max]]</gml:pos>
                    </gml:Envelope>
                    <gml:Grid dimension="2">
                        <gml:limits>
                            <!--
                                grid size. This is a point dataset, so these numbers have no meaning.
                            -->
                            <gml:GridEnvelope>
                                <gml:low>0 0</gml:low>
                                <gml:high>99 99</gml:high>
                            </gml:GridEnvelope>
                        </gml:limits>
                        <gml:axisName>lat</gml:axisName>
                        <gml:axisName>lon</gml:axisName>
                    </gml:Grid>
                </wcs:spatialSubset>
                <wcs:temporalSubset>
                    <!--
                        query data for one time only.
                    -->
                    <gml:timePosition>[[datetime]]</gml:timePosition>
                </wcs:temporalSubset>
            </wcs:domainSubset>
            <wcs:output>
                <!--
                .NET dataset is a good format for point datasets
                -->
                <wcs:format>dataset-schema</wcs:format>
            </wcs:output>
        </wcs:GetCoverage>
    </soap:Body>
</soap:Envelope>
"""

# render call template

render_point_query = """
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
    <soap:Body>
        <rmp:Render xmlns:rmp="http://datafed.net/xs/RenderMapPoint">
            <Table xmlns="http://datafed.net/xs/Table">
                <TableRef>[[tableref]]</TableRef>
            </Table>
            <rmp:Settings xmlns:mi="http://datafed.net/xs/MapImageLatLon"
                 xmlns:ip="http://datafed.net/xs/ImagePrimitives">
                <rmp:image_desc>
                    <mi:zoom>
                        <mi:image_width>400</mi:image_width>
                        <mi:image_height>200</mi:image_height>
                        <mi:lat_min>[[lat_min]]</mi:lat_min>
                        <mi:lat_max>[[lat_max]]</mi:lat_max>
                        <mi:lon_min>[[lon_min]]</mi:lon_min>
                        <mi:lon_max>[[lon_max]]</mi:lon_max>
                    </mi:zoom>
                    <mi:bgcolor>0xE1FFF0</mi:bgcolor>
                    <mi:image_format>image/png</mi:image_format>
                </rmp:image_desc>
                <rmp:data_column>[[param_abbr]]</rmp:data_column>
                <rmp:scale_min>0</rmp:scale_min>
                <rmp:scale_max>[[scale_max]]</rmp:scale_max>
                <rmp:sqrt>false</rmp:sqrt>
                <rmp:symbol>
                    <ip:width>10</ip:width>
                    <ip:height>10</ip:height>
                    <ip:offset_x>0</ip:offset_x>
                    <ip:offset_y>0</ip:offset_y>
                    <ip:shape>circle</ip:shape>
                    <ip:num_of_sides>4</ip:num_of_sides>
                    <ip:baseline>false</ip:baseline>
                </rmp:symbol>
                <rmp:pen>
                    <ip:width>0.5</ip:width>
                    <ip:style>solid</ip:style>
                    <ip:color>red</ip:color>
                </rmp:pen>
                <rmp:brush>
                    <ip:style>solid</ip:style>
                    <ip:color>yellow</ip:color>
                </rmp:brush>
                <rmp:script>
                    used.symbol.width=symbol.width*norm_param_value;
                    used.symbol.height=symbol.height*norm_param_value;
                </rmp:script>
            </rmp:Settings>
        </rmp:Render>
    </soap:Body>
</soap:Envelope>
"""

# This function performs WCS SOAP query to datafed
# INPUT: Service url path, soap action, input envelope as text
# output: xml envelope

def query_datafed(service, action, soap_in_text):
    # open connection to std port 80
    c = httplib.HTTPConnection("webapps.datafed.net", 80)
    c.connect()
    try:
        # the Web Service Definition Language definition is at
        # http://webapps.datafed.net/WCS.asmx?WSDL
        c.putrequest("POST", "/" + service)
        c.putheader("soapAction", action)
        c.putheader("content-type", "text/xml")
        c.putheader("content-length", repr(len(soap_in_text)))
        c.endheaders()
        c.send(soap_in_text)
        r = c.getresponse()
        # 200 means OK. Anything other is a failure.
        if r.status <> 200:
            msg = r.read()
            raise 'http-error', repr(r.status) + ' ' + repr(r.reason) + ' ' + msg
        return xml.dom.minidom.parse(r)
    finally:
        #closing expensive resources ASAP is a good practice
        c.close()

# this script is for demo purposes, so the query parameters are hard coded.

parameters = {
    'dataset_abbr':'AIRNOW',
    'param_abbr':'pmfine',
    'lat_min':'24',
    'lat_max':'50',
    'lon_min':'-130',
    'lon_max':'-65',
    'datetime':'2006-04-15T12:00:00',
    'scale_max':'25'
}

def replace_parameters(raw_text):
    text = raw_text
    for p in parameters:
        text = text.replace("[[" + p + "]]", parameters[p])
    return text

def look_for_ns_name(xmldoc, ns, name):
    for x in xmldoc.getElementsByTagNameNS(ns, name):
        return x
    assert False, 'could not find node ' + ns + ':' + name

def dump_stream(stream, filename):
    # open for (w)write, (b)binary mode, (+)truncate
    outfile = open(filename, "wb+")
    try:
        outfile.write(stream.read())
        outfile.flush()
    finally:
        outfile.close()

# get the data as a table.
# Since the output envelope does not contain data,
# but a reference to data, this function returns
# the reference url to the data table
def get_table_url():
    data_soap_in_text = replace_parameters(get_coverage_query)
    data_soap_out = query_datafed("WCS.asmx", "http://datafed.net/WCS/GetCoverage", data_soap_in_text)
    try:
        table_url = look_for_ns_name(data_soap_out, "http://datafed.net/xs/wcs", "data").firstChild.nodeValue
    finally:
        data_soap_out.unlink()
    return table_url

# Render the table.
# The service accepts a table url, fetches it and renders it.
# Again, Since the output envelope does not contain an image,
# but a reference to the image, this function returns
# the reference url to the image which is still in the server
def get_image_url(table_url):
    render_soap_in_text = replace_parameters(render_point_query)
    render_soap_in_text = render_soap_in_text.replace("[[tableref]]", table_url)
    image_soap_out = query_datafed("RenderMapPoint.asmx", "http://datafed.net/RenderMapPoint/Render", render_soap_in_text)
    try:
        image_url = look_for_ns_name(image_soap_out, "http://datafed.net/xs/MapImageLatLon", "image_url").firstChild.nodeValue
    finally:
        image_soap_out.unlink()
    return image_url

#
#   If you run this script from command line, the next will be executed
#
if __name__ == "__main__":
    execute_chain()

@@ Line 2: / Line 2: @@
 == Service Chaining Example ==
-This example uses popular python to get point data and render it as an image. See WCS page [[SOAPifying_WCS]]
+This example uses python to
+* get point data
+* render it as an image
+* store the image on local computer
+See WCS page [[SOAPifying_WCS]]
 === Getting Demo Script ===
@@ Line 11: / Line 16: @@
 There are no parameters, the script is hard coded for demo purposes.
+== funtion execute_chain ==
+This is the main program for the service chain.
+* Query data with WCS,
+* Render it,
+* save locally
+There are two problems with SOAP and large amounts of data.
+* First, data has to be passed as xml, which results to enormous amounts of data for large tables, causing performance problems.
+* Second, SOAP defines 4 MB size limit for messages, so large data transfers have to partitioned, making things more complicated.
+* Third problem is WCS binary data, although binary encoding with MIME attachments is possoble, I do not know any system that would use it.
+Datafed services have solved the issue by using common two phase pattern.
+The SOAP services return an envelope with small amount of metadata
+describing the result, and an uri pointer to the cached result.
+Datafed SOAP services use this pattern also while passing data
+into services. For example in this chase, the table url is passed
+from WCS to RenderMapPoint directly. Since the services are located
+in the same machine, they can access the table directly, without
+turning it into xml ever.
+<pre>
+def execute_chain():
+    print "querying table"
+    table_url = get_table_url()
+    # table_url == http://webapps.datafed.net/storage.aspx?ID=GetCoverage_91
+    # rendering service can get this directly from the cache with the ID GetCoverage_91
+    print "rendering table " + table_url
+    image_url = get_image_url(table_url)
+    # image_url == http://webapps.datafed.net/storage.aspx?ID=RenderMapPoint_92
+    # image processing services could again get the big image directly from
+    # the server cache with ID RenderMapPoint_92
+    #
+    # Now we get the result from the server and save it on the local drive.
+    print "fetching image " + image_url
+    image_stream = urllib2.urlopen(image_url)
+    try:
+        print "writing image to file"
+        dump_stream(image_stream, "soapchain.png")
+    finally:
+        image_stream.close()
+</pre>
+<pre>
+# WCS query template
+get_coverage_query = """
+<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
+    <soap:Body>
+        <wcs:GetCoverage version="1.0.0" service="WCS"
+                xmlns:gml="http://www.opengis.net/gml" xmlns:wcs="http://www.opengis.net/wcs">
+            <!-- datafed dataset_abbr.param_abbr defines the coverage name -->
+            <wcs:sourceCoverage>[[dataset_abbr]].[[param_abbr]]</wcs:sourceCoverage>
+            <wcs:domainSubset>
+                <wcs:spatialSubset>
+                    <!--
+                        This element queries the USA.
+                        The dataset has no elevation, so only lat and lon
+                        limits are needed.
+                    -->
+                    <gml:Envelope srsName="WGS84(DD)">
+                        <gml:pos>[[lon_min]] [[lat_min]]</gml:pos>
+                        <gml:pos>[[lon_max]] [[lat_max]]</gml:pos>
+                    </gml:Envelope>
+                    <gml:Grid dimension="2">
+                        <gml:limits>
+                            <!--
+                                grid size. This is a point dataset, so these numbers have no meaning.
+                            -->
+                            <gml:GridEnvelope>
+                                <gml:low>0 0</gml:low>
+                                <gml:high>99 99</gml:high>
+                            </gml:GridEnvelope>
+                        </gml:limits>
+                        <gml:axisName>lat</gml:axisName>
+                        <gml:axisName>lon</gml:axisName>
+                    </gml:Grid>
+                </wcs:spatialSubset>
+                <wcs:temporalSubset>
+                    <!--
+                        query data for one time only.
+                    -->
+                    <gml:timePosition>[[datetime]]</gml:timePosition>
+                </wcs:temporalSubset>
+            </wcs:domainSubset>
+            <wcs:output>
+                <!--
+                .NET dataset is a good format for point datasets
+                -->
+                <wcs:format>dataset-schema</wcs:format>
+            </wcs:output>
+        </wcs:GetCoverage>
+    </soap:Body>
+</soap:Envelope>
+"""
+# render call template
+render_point_query = """
+<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
+    <soap:Body>
+        <rmp:Render xmlns:rmp="http://datafed.net/xs/RenderMapPoint">
+            <Table xmlns="http://datafed.net/xs/Table">
+                <TableRef>[[tableref]]</TableRef>
+            </Table>
+            <rmp:Settings xmlns:mi="http://datafed.net/xs/MapImageLatLon"
+                 xmlns:ip="http://datafed.net/xs/ImagePrimitives">
+                <rmp:image_desc>
+                    <mi:zoom>
+                        <mi:image_width>400</mi:image_width>
+                        <mi:image_height>200</mi:image_height>
+                        <mi:lat_min>[[lat_min]]</mi:lat_min>
+                        <mi:lat_max>[[lat_max]]</mi:lat_max>
+                        <mi:lon_min>[[lon_min]]</mi:lon_min>
+                        <mi:lon_max>[[lon_max]]</mi:lon_max>
+                    </mi:zoom>
+                    <mi:bgcolor>0xE1FFF0</mi:bgcolor>
+                    <mi:image_format>image/png</mi:image_format>
+                </rmp:image_desc>
+                <rmp:data_column>[[param_abbr]]</rmp:data_column>
+                <rmp:scale_min>0</rmp:scale_min>
+                <rmp:scale_max>[[scale_max]]</rmp:scale_max>
+                <rmp:sqrt>false</rmp:sqrt>
+                <rmp:symbol>
+                    <ip:width>10</ip:width>
+                    <ip:height>10</ip:height>
+                    <ip:offset_x>0</ip:offset_x>
+                    <ip:offset_y>0</ip:offset_y>
+                    <ip:shape>circle</ip:shape>
+                    <ip:num_of_sides>4</ip:num_of_sides>
+                    <ip:baseline>false</ip:baseline>
+                </rmp:symbol>
+                <rmp:pen>
+                    <ip:width>0.5</ip:width>
+                    <ip:style>solid</ip:style>
+                    <ip:color>red</ip:color>
+                </rmp:pen>
+                <rmp:brush>
+                    <ip:style>solid</ip:style>
+                    <ip:color>yellow</ip:color>
+                </rmp:brush>
+                <rmp:script>
+                    used.symbol.width=symbol.width*norm_param_value;
+                    used.symbol.height=symbol.height*norm_param_value;
+                </rmp:script>
+            </rmp:Settings>
+        </rmp:Render>
+    </soap:Body>
+</soap:Envelope>
+"""
+# This function performs WCS SOAP query to datafed
+# INPUT: Service url path, soap action, input envelope as text
+# output: xml envelope
+def query_datafed(service, action, soap_in_text):
+    # open connection to std port 80
+    c = httplib.HTTPConnection("webapps.datafed.net", 80)
+    c.connect()
+    try:
+        # the Web Service Definition Language definition is at
+        # http://webapps.datafed.net/WCS.asmx?WSDL
+        c.putrequest("POST", "/" + service)
+        c.putheader("soapAction", action)
+        c.putheader("content-type", "text/xml")
+        c.putheader("content-length", repr(len(soap_in_text)))
+        c.endheaders()
+        c.send(soap_in_text)
+        r = c.getresponse()
+        # 200 means OK. Anything other is a failure.
+        if r.status <> 200:
+            msg = r.read()
+            raise 'http-error', repr(r.status) + ' ' + repr(r.reason) + ' ' + msg
+        return xml.dom.minidom.parse(r)
+    finally:
+        #closing expensive resources ASAP is a good practice
+        c.close()
+# this script is for demo purposes, so the query parameters are hard coded.
+parameters = {
+    'dataset_abbr':'AIRNOW',
+    'param_abbr':'pmfine',
+    'lat_min':'24',
+    'lat_max':'50',
+    'lon_min':'-130',
+    'lon_max':'-65',
+    'datetime':'2006-04-15T12:00:00',
+    'scale_max':'25'
+}
+def replace_parameters(raw_text):
+    text = raw_text
+    for p in parameters:
+        text = text.replace("[[" + p + "]]", parameters[p])
+    return text
+def look_for_ns_name(xmldoc, ns, name):
+    for x in xmldoc.getElementsByTagNameNS(ns, name):
+        return x
+    assert False, 'could not find node ' + ns + ':' + name
+def dump_stream(stream, filename):
+    # open for (w)write, (b)binary mode, (+)truncate
+    outfile = open(filename, "wb+")
+    try:
+        outfile.write(stream.read())
+        outfile.flush()
+    finally:
+        outfile.close()
+# get the data as a table.
+# Since the output envelope does not contain data,
+# but a reference to data, this function returns
+# the reference url to the data table
+def get_table_url():
+    data_soap_in_text = replace_parameters(get_coverage_query)
+    data_soap_out = query_datafed("WCS.asmx", "http://datafed.net/WCS/GetCoverage", data_soap_in_text)
+    try:
+        table_url = look_for_ns_name(data_soap_out, "http://datafed.net/xs/wcs", "data").firstChild.nodeValue
+    finally:
+        data_soap_out.unlink()
+    return table_url
+# Render the table.
+# The service accepts a table url, fetches it and renders it.
+# Again, Since the output envelope does not contain an image,
+# but a reference to the image, this function returns
+# the reference url to the image which is still in the server
+def get_image_url(table_url):
+    render_soap_in_text = replace_parameters(render_point_query)
+    render_soap_in_text = render_soap_in_text.replace("[[tableref]]", table_url)
+    image_soap_out = query_datafed("RenderMapPoint.asmx", "http://datafed.net/RenderMapPoint/Render", render_soap_in_text)
+    try:
+        image_url = look_for_ns_name(image_soap_out, "http://datafed.net/xs/MapImageLatLon", "image_url").firstChild.nodeValue
+    finally:
+        image_soap_out.unlink()
+    return image_url
+#
+#   If you run this script from command line, the next will be executed
+#
+if __name__ == "__main__":
+    execute_chain()
+</pre>