Source code for stetl.filters.gmlfeatureextractor
#!/usr/bin/env python
#
# Extracts arrays of etree GML features from an GML etree document.
#
# Author: Just van den Broecke
#
from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT
log = Util.get_log('gmlfeatureextractor')
[docs]class GmlFeatureExtractor(Filter):
"""
Extract arrays of GML features etree elements from etree docs.
consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array
"""
# XPATH Query base for extracting features by (non-namespaced thus local-name) tagname
xpath_base = "//*[local-name() = '%s']"
# Constructor
def __init__(self, configdict, section='gml_feature_extractor'):
Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array)
log.info("cfg = %s" % self.cfg.to_string())
# Build the Xpath expresion from configures tagnames
self.feature_tags = self.cfg.get('feature_tags').split(',')
self.total_features = 0
self.xpath_expression = ''
index = 0
for feature_tag in self.feature_tags:
if index > 0:
self.xpath_expression += '|'
self.xpath_expression += GmlFeatureExtractor.xpath_base % feature_tag
index += 1
log.info("xpath expression = %s" % self.xpath_expression)
[docs] def invoke(self, packet):
if packet.data is None or packet.is_end_of_stream():
return packet
# Input is etree_docs so extract all features into an array using XPATH
packet.data = packet.data.xpath(self.xpath_expression)
self.total_features += len(packet.data)
log.info('extracted %d features from GML etree doc (total = %d)' % (len(packet.data), self.total_features))
return packet