Source code for ocrd_models.ocrd_page_generateds

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
# Generated Wed Jun 30 17:57:54 2021 by generateDS.py version 2.35.20.
# Python 3.6.9 (default, Jan 26 2021, 15:33:00)  [GCC 8.4.0]
#
# Command line options:
#   ('-f', '')
#   ('--root-element', 'PcGts')
#   ('-o', 'ocrd_models/ocrd_models/ocrd_page_generateds.py')
#   ('--silence', '')
#   ('--export', 'write etree')
#   ('--disable-generatedssuper-lookup', '')
#   ('--user-methods', 'ocrd_models/ocrd_page_user_methods.py')
#
# Command line arguments:
#   ocrd_validators/ocrd_validators/page.xsd
#
# Command line:
#   /home/kba/monorepo/ocrd_all/venv/bin/generateDS -f --root-element="PcGts" -o "ocrd_models/ocrd_models/ocrd_page_generateds.py" --silence --export="write etree" --disable-generatedssuper-lookup --user-methods="ocrd_models/ocrd_page_user_methods.py" ocrd_validators/ocrd_validators/page.xsd
#
# Current working directory (os.getcwd()):
#   core
#

from six.moves import zip_longest
import os
import sys
import re as re_
import base64
import datetime as datetime_
import decimal as decimal_
try:
    from lxml import etree as etree_
except ImportError:
    from xml.etree import ElementTree as etree_


Validate_simpletypes_ = True
SaveElementTreeNode = True
if sys.version_info.major == 2:
    BaseStrType_ = basestring
else:
    BaseStrType_ = str


def parsexml_(infile, parser=None, **kwargs):
    if parser is None:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        try:
            parser = etree_.ETCompatXMLParser()
        except AttributeError:
            # fallback to xml.etree
            parser = etree_.XMLParser()
    try:
        if isinstance(infile, os.PathLike):
            infile = os.path.join(infile)
    except AttributeError:
        pass
    doc = etree_.parse(infile, parser=parser, **kwargs)
    return doc

def parsexmlstring_(instring, parser=None, **kwargs):
    if parser is None:
        # Use the lxml ElementTree compatible parser so that, e.g.,
        #   we ignore comments.
        try:
            parser = etree_.ETCompatXMLParser()
        except AttributeError:
            # fallback to xml.etree
            parser = etree_.XMLParser()
    element = etree_.fromstring(instring, parser=parser, **kwargs)
    return element

#
# Namespace prefix definition table (and other attributes, too)
#
# The module generatedsnamespaces, if it is importable, must contain
# a dictionary named GeneratedsNamespaceDefs.  This Python dictionary
# should map element type names (strings) to XML schema namespace prefix
# definitions.  The export method for any class for which there is
# a namespace prefix definition, will export that definition in the
# XML representation of that element.  See the export method of
# any generated element type class for an example of the use of this
# table.
# A sample table is:
#
#     # File: generatedsnamespaces.py
#
#     GenerateDSNamespaceDefs = {
#         "ElementtypeA": "http://www.xxx.com/namespaceA",
#         "ElementtypeB": "http://www.xxx.com/namespaceB",
#     }
#
# Additionally, the generatedsnamespaces module can contain a python
# dictionary named GenerateDSNamespaceTypePrefixes that associates element
# types with the namespace prefixes that are to be added to the
# "xsi:type" attribute value.  See the exportAttributes method of
# any generated element type and the generation of "xsi:type" for an
# example of the use of this table.
# An example table:
#
#     # File: generatedsnamespaces.py
#
#     GenerateDSNamespaceTypePrefixes = {
#         "ElementtypeC": "aaa:",
#         "ElementtypeD": "bbb:",
#     }
#

try:
    from generatedsnamespaces import GenerateDSNamespaceDefs as GenerateDSNamespaceDefs_
except ImportError:
    GenerateDSNamespaceDefs_ = {}
try:
    from generatedsnamespaces import GenerateDSNamespaceTypePrefixes as GenerateDSNamespaceTypePrefixes_
except ImportError:
    GenerateDSNamespaceTypePrefixes_ = {}

#
# You can replace the following class definition by defining an
# importable module named "generatedscollector" containing a class
# named "GdsCollector".  See the default class definition below for
# clues about the possible content of that class.
#
try:
    from generatedscollector import GdsCollector as GdsCollector_
except ImportError:

    class GdsCollector_(object):

        def __init__(self, messages=None):
            if messages is None:
                self.messages = []
            else:
                self.messages = messages

        def add_message(self, msg):
            self.messages.append(msg)

        def get_messages(self):
            return self.messages

        def clear_messages(self):
            self.messages = []

        def print_messages(self):
            for msg in self.messages:
                print("Warning: {}".format(msg))

        def write_messages(self, outstream):
            for msg in self.messages:
                outstream.write("Warning: {}\n".format(msg))


#
# The super-class for enum types
#

try:
    from enum import Enum
except ImportError:
    Enum = object

#
# The root super-class for element type classes
#
# Calls to the methods in these classes are generated by generateDS.py.
# You can replace these methods by re-implementing the following class
#   in a module named generatedssuper.py.


class GeneratedsSuper(object):
    __hash__ = object.__hash__
    tzoff_pattern = re_.compile(r'(\+|-)((0\d|1[0-3]):[0-5]\d|14:00)$')
    class _FixedOffsetTZ(datetime_.tzinfo):
        def __init__(self, offset, name):
            self.__offset = datetime_.timedelta(minutes=offset)
            self.__name = name
        def utcoffset(self, dt):
            return self.__offset
        def tzname(self, dt):
            return self.__name
        def dst(self, dt):
            return None
    def gds_format_string(self, input_data, input_name=''):
        return input_data
    def gds_parse_string(self, input_data, node=None, input_name=''):
        return input_data
    def gds_validate_string(self, input_data, node=None, input_name=''):
        if not input_data:
            return ''
        else:
            return input_data
    def gds_format_base64(self, input_data, input_name=''):
        return base64.b64encode(input_data)
    def gds_validate_base64(self, input_data, node=None, input_name=''):
        return input_data
    def gds_format_integer(self, input_data, input_name=''):
        return '%d' % input_data
    def gds_parse_integer(self, input_data, node=None, input_name=''):
        try:
            ival = int(input_data)
        except (TypeError, ValueError) as exp:
            raise_parse_error(node, 'Requires integer value: %s' % exp)
        return ival
    def gds_validate_integer(self, input_data, node=None, input_name=''):
        try:
            value = int(input_data)
        except (TypeError, ValueError):
            raise_parse_error(node, 'Requires integer value')
        return value
    def gds_format_integer_list(self, input_data, input_name=''):
        return '%s' % ' '.join(input_data)
    def gds_validate_integer_list(
            self, input_data, node=None, input_name=''):
        values = input_data.split()
        for value in values:
            try:
                int(value)
            except (TypeError, ValueError):
                raise_parse_error(node, 'Requires sequence of integer valuess')
        return values
    def gds_format_float(self, input_data, input_name=''):
        return ('%.15f' % input_data).rstrip('0')
    def gds_parse_float(self, input_data, node=None, input_name=''):
        try:
            fval_ = float(input_data)
        except (TypeError, ValueError) as exp:
            raise_parse_error(node, 'Requires float or double value: %s' % exp)
        return fval_
    def gds_validate_float(self, input_data, node=None, input_name=''):
        try:
            value = float(input_data)
        except (TypeError, ValueError):
            raise_parse_error(node, 'Requires float value')
        return value
    def gds_format_float_list(self, input_data, input_name=''):
        return '%s' % ' '.join(input_data)
    def gds_validate_float_list(
            self, input_data, node=None, input_name=''):
        values = input_data.split()
        for value in values:
            try:
                float(value)
            except (TypeError, ValueError):
                raise_parse_error(node, 'Requires sequence of float values')
        return values
    def gds_format_decimal(self, input_data, input_name=''):
        return ('%s' % input_data).rstrip('0')
    def gds_parse_decimal(self, input_data, node=None, input_name=''):
        try:
            decimal_value = decimal_.Decimal(input_data)
        except (TypeError, ValueError):
            raise_parse_error(node, 'Requires decimal value')
        return decimal_value
    def gds_validate_decimal(self, input_data, node=None, input_name=''):
        try:
            value = decimal_.Decimal(input_data)
        except (TypeError, ValueError):
            raise_parse_error(node, 'Requires decimal value')
        return value
    def gds_format_decimal_list(self, input_data, input_name=''):
        return '%s' % ' '.join(input_data)
    def gds_validate_decimal_list(
            self, input_data, node=None, input_name=''):
        values = input_data.split()
        for value in values:
            try:
                decimal_.Decimal(value)
            except (TypeError, ValueError):
                raise_parse_error(node, 'Requires sequence of decimal values')
        return values
    def gds_format_double(self, input_data, input_name=''):
        return '%e' % input_data
    def gds_parse_double(self, input_data, node=None, input_name=''):
        try:
            fval_ = float(input_data)
        except (TypeError, ValueError) as exp:
            raise_parse_error(node, 'Requires double or float value: %s' % exp)
        return fval_
    def gds_validate_double(self, input_data, node=None, input_name=''):
        try:
            value = float(input_data)
        except (TypeError, ValueError):
            raise_parse_error(node, 'Requires double or float value')
        return value
    def gds_format_double_list(self, input_data, input_name=''):
        return '%s' % ' '.join(input_data)
    def gds_validate_double_list(
            self, input_data, node=None, input_name=''):
        values = input_data.split()
        for value in values:
            try:
                float(value)
            except (TypeError, ValueError):
                raise_parse_error(
                    node, 'Requires sequence of double or float values')
        return values
    def gds_format_boolean(self, input_data, input_name=''):
        return ('%s' % input_data).lower()
    def gds_parse_boolean(self, input_data, node=None, input_name=''):
        if input_data in ('true', '1'):
            bval = True
        elif input_data in ('false', '0'):
            bval = False
        else:
            raise_parse_error(node, 'Requires boolean value')
        return bval
    def gds_validate_boolean(self, input_data, node=None, input_name=''):
        if input_data not in (True, 1, False, 0, ):
            raise_parse_error(
                node,
                'Requires boolean value '
                '(one of True, 1, False, 0)')
        return input_data
    def gds_format_boolean_list(self, input_data, input_name=''):
        return '%s' % ' '.join(input_data)
    def gds_validate_boolean_list(
            self, input_data, node=None, input_name=''):
        values = input_data.split()
        for value in values:
            if value not in (True, 1, False, 0, ):
                raise_parse_error(
                    node,
                    'Requires sequence of boolean values '
                    '(one of True, 1, False, 0)')
        return values
    def gds_validate_datetime(self, input_data, node=None, input_name=''):
        return input_data
    def gds_format_datetime(self, input_data, input_name=''):
        if input_data.microsecond == 0:
            _svalue = '%04d-%02d-%02dT%02d:%02d:%02d' % (
                input_data.year,
                input_data.month,
                input_data.day,
                input_data.hour,
                input_data.minute,
                input_data.second,
            )
        else:
            _svalue = '%04d-%02d-%02dT%02d:%02d:%02d.%s' % (
                input_data.year,
                input_data.month,
                input_data.day,
                input_data.hour,
                input_data.minute,
                input_data.second,
                ('%f' % (float(input_data.microsecond) / 1000000))[2:],
            )
        if input_data.tzinfo is not None:
            tzoff = input_data.tzinfo.utcoffset(input_data)
            if tzoff is not None:
                total_seconds = tzoff.seconds + (86400 * tzoff.days)
                if total_seconds == 0:
                    _svalue += 'Z'
                else:
                    if total_seconds < 0:
                        _svalue += '-'
                        total_seconds *= -1
                    else:
                        _svalue += '+'
                    hours = total_seconds // 3600
                    minutes = (total_seconds - (hours * 3600)) // 60
                    _svalue += '{0:02d}:{1:02d}'.format(hours, minutes)
        return _svalue
    @classmethod
    def gds_parse_datetime(cls, input_data):
        tz = None
        if input_data[-1] == 'Z':
            tz = GeneratedsSuper._FixedOffsetTZ(0, 'UTC')
            input_data = input_data[:-1]
        else:
            results = GeneratedsSuper.tzoff_pattern.search(input_data)
            if results is not None:
                tzoff_parts = results.group(2).split(':')
                tzoff = int(tzoff_parts[0]) * 60 + int(tzoff_parts[1])
                if results.group(1) == '-':
                    tzoff *= -1
                tz = GeneratedsSuper._FixedOffsetTZ(
                    tzoff, results.group(0))
                input_data = input_data[:-6]
        time_parts = input_data.split('.')
        if len(time_parts) > 1:
            micro_seconds = int(float('0.' + time_parts[1]) * 1000000)
            input_data = '%s.%s' % (
                time_parts[0], "{}".format(micro_seconds).rjust(6, "0"), )
            dt = datetime_.datetime.strptime(
                input_data, '%Y-%m-%dT%H:%M:%S.%f')
        else:
            dt = datetime_.datetime.strptime(
                input_data, '%Y-%m-%dT%H:%M:%S')
        dt = dt.replace(tzinfo=tz)
        return dt
    def gds_validate_date(self, input_data, node=None, input_name=''):
        return input_data
    def gds_format_date(self, input_data, input_name=''):
        _svalue = '%04d-%02d-%02d' % (
            input_data.year,
            input_data.month,
            input_data.day,
        )
        try:
            if input_data.tzinfo is not None:
                tzoff = input_data.tzinfo.utcoffset(input_data)
                if tzoff is not None:
                    total_seconds = tzoff.seconds + (86400 * tzoff.days)
                    if total_seconds == 0:
                        _svalue += 'Z'
                    else:
                        if total_seconds < 0:
                            _svalue += '-'
                            total_seconds *= -1
                        else:
                            _svalue += '+'
                        hours = total_seconds // 3600
                        minutes = (total_seconds - (hours * 3600)) // 60
                        _svalue += '{0:02d}:{1:02d}'.format(
                            hours, minutes)
        except AttributeError:
            pass
        return _svalue
    @classmethod
    def gds_parse_date(cls, input_data):
        tz = None
        if input_data[-1] == 'Z':
            tz = GeneratedsSuper._FixedOffsetTZ(0, 'UTC')
            input_data = input_data[:-1]
        else:
            results = GeneratedsSuper.tzoff_pattern.search(input_data)
            if results is not None:
                tzoff_parts = results.group(2).split(':')
                tzoff = int(tzoff_parts[0]) * 60 + int(tzoff_parts[1])
                if results.group(1) == '-':
                    tzoff *= -1
                tz = GeneratedsSuper._FixedOffsetTZ(
                    tzoff, results.group(0))
                input_data = input_data[:-6]
        dt = datetime_.datetime.strptime(input_data, '%Y-%m-%d')
        dt = dt.replace(tzinfo=tz)
        return dt.date()
    def gds_validate_time(self, input_data, node=None, input_name=''):
        return input_data
    def gds_format_time(self, input_data, input_name=''):
        if input_data.microsecond == 0:
            _svalue = '%02d:%02d:%02d' % (
                input_data.hour,
                input_data.minute,
                input_data.second,
            )
        else:
            _svalue = '%02d:%02d:%02d.%s' % (
                input_data.hour,
                input_data.minute,
                input_data.second,
                ('%f' % (float(input_data.microsecond) / 1000000))[2:],
            )
        if input_data.tzinfo is not None:
            tzoff = input_data.tzinfo.utcoffset(input_data)
            if tzoff is not None:
                total_seconds = tzoff.seconds + (86400 * tzoff.days)
                if total_seconds == 0:
                    _svalue += 'Z'
                else:
                    if total_seconds < 0:
                        _svalue += '-'
                        total_seconds *= -1
                    else:
                        _svalue += '+'
                    hours = total_seconds // 3600
                    minutes = (total_seconds - (hours * 3600)) // 60
                    _svalue += '{0:02d}:{1:02d}'.format(hours, minutes)
        return _svalue
    def gds_validate_simple_patterns(self, patterns, target):
        # pat is a list of lists of strings/patterns.
        # The target value must match at least one of the patterns
        # in order for the test to succeed.
        found1 = True
        for patterns1 in patterns:
            found2 = False
            for patterns2 in patterns1:
                mo = re_.search(patterns2, target)
                if mo is not None and len(mo.group(0)) == len(target):
                    found2 = True
                    break
            if not found2:
                found1 = False
                break
        return found1
    @classmethod
    def gds_parse_time(cls, input_data):
        tz = None
        if input_data[-1] == 'Z':
            tz = GeneratedsSuper._FixedOffsetTZ(0, 'UTC')
            input_data = input_data[:-1]
        else:
            results = GeneratedsSuper.tzoff_pattern.search(input_data)
            if results is not None:
                tzoff_parts = results.group(2).split(':')
                tzoff = int(tzoff_parts[0]) * 60 + int(tzoff_parts[1])
                if results.group(1) == '-':
                    tzoff *= -1
                tz = GeneratedsSuper._FixedOffsetTZ(
                    tzoff, results.group(0))
                input_data = input_data[:-6]
        if len(input_data.split('.')) > 1:
            dt = datetime_.datetime.strptime(input_data, '%H:%M:%S.%f')
        else:
            dt = datetime_.datetime.strptime(input_data, '%H:%M:%S')
        dt = dt.replace(tzinfo=tz)
        return dt.time()
    def gds_check_cardinality_(
            self, value, input_name,
            min_occurs=0, max_occurs=1, required=None):
        if value is None:
            length = 0
        elif isinstance(value, list):
            length = len(value)
        else:
            length = 1
        if required is not None :
            if required and length < 1:
                self.gds_collector_.add_message(
                    "Required value {}{} is missing".format(
                        input_name, self.gds_get_node_lineno_()))
        if length < min_occurs:
            self.gds_collector_.add_message(
                "Number of values for {}{} is below "
                "the minimum allowed, "
                "expected at least {}, found {}".format(
                    input_name, self.gds_get_node_lineno_(),
                    min_occurs, length))
        elif length > max_occurs:
            self.gds_collector_.add_message(
                "Number of values for {}{} is above "
                "the maximum allowed, "
                "expected at most {}, found {}".format(
                    input_name, self.gds_get_node_lineno_(),
                    max_occurs, length))
    def gds_validate_builtin_ST_(
            self, validator, value, input_name,
            min_occurs=None, max_occurs=None, required=None):
        if value is not None:
            try:
                validator(value, input_name=input_name)
            except GDSParseError as parse_error:
                self.gds_collector_.add_message(str(parse_error))
    def gds_validate_defined_ST_(
            self, validator, value, input_name,
            min_occurs=None, max_occurs=None, required=None):
        if value is not None:
            try:
                validator(value)
            except GDSParseError as parse_error:
                self.gds_collector_.add_message(str(parse_error))
    def gds_str_lower(self, instring):
        return instring.lower()
    def get_path_(self, node):
        path_list = []
        self.get_path_list_(node, path_list)
        path_list.reverse()
        path = '/'.join(path_list)
        return path
    Tag_strip_pattern_ = re_.compile(r'\{.*\}')
    def get_path_list_(self, node, path_list):
        if node is None:
            return
        tag = GeneratedsSuper.Tag_strip_pattern_.sub('', node.tag)
        if tag:
            path_list.append(tag)
        self.get_path_list_(node.getparent(), path_list)
    def get_class_obj_(self, node, default_class=None):
        class_obj1 = default_class
        if 'xsi' in node.nsmap:
            classname = node.get('{%s}type' % node.nsmap['xsi'])
            if classname is not None:
                names = classname.split(':')
                if len(names) == 2:
                    classname = names[1]
                class_obj2 = globals().get(classname)
                if class_obj2 is not None:
                    class_obj1 = class_obj2
        return class_obj1
    def gds_build_any(self, node, type_name=None):
        # provide default value in case option --disable-xml is used.
        content = ""
        content = etree_.tostring(node, encoding="unicode")
        return content
    @classmethod
    def gds_reverse_node_mapping(cls, mapping):
        return dict(((v, k) for k, v in mapping.items()))
    @staticmethod
    def gds_encode(instring):
        if sys.version_info.major == 2:
            if ExternalEncoding:
                encoding = ExternalEncoding
            else:
                encoding = 'utf-8'
            return instring.encode(encoding)
        else:
            return instring
    @staticmethod
    def convert_unicode(instring):
        if isinstance(instring, str):
            result = quote_xml(instring)
        elif sys.version_info.major == 2 and isinstance(instring, unicode):
            result = quote_xml(instring).encode('utf8')
        else:
            result = GeneratedsSuper.gds_encode(str(instring))
        return result
    def __eq__(self, other):
        def excl_select_objs_(obj):
            return (obj[0] != 'parent_object_' and
                    obj[0] != 'gds_collector_')
        if type(self) != type(other):
            return False
        return all(x == y for x, y in zip_longest(
            filter(excl_select_objs_, self.__dict__.items()),
            filter(excl_select_objs_, other.__dict__.items())))
    def __ne__(self, other):
        return not self.__eq__(other)
    # Django ETL transform hooks.
    def gds_djo_etl_transform(self):
        pass
    def gds_djo_etl_transform_db_obj(self, dbobj):
        pass
    # SQLAlchemy ETL transform hooks.
    def gds_sqa_etl_transform(self):
        return 0, None
    def gds_sqa_etl_transform_db_obj(self, dbobj):
        pass
    def gds_get_node_lineno_(self):
        if (hasattr(self, "gds_elementtree_node_") and
                self.gds_elementtree_node_ is not None):
            return ' near line {}'.format(
                self.gds_elementtree_node_.sourceline)
        else:
            return ""


def getSubclassFromModule_(module, class_):
    '''Get the subclass of a class from a specific module.'''
    name = class_.__name__ + 'Sub'
    if hasattr(module, name):
        return getattr(module, name)
    else:
        return None


#
# If you have installed IPython you can uncomment and use the following.
# IPython is available from http://ipython.scipy.org/.
#

## from IPython.Shell import IPShellEmbed
## args = ''
## ipshell = IPShellEmbed(args,
##     banner = 'Dropping into IPython',
##     exit_msg = 'Leaving Interpreter, back to program.')

# Then use the following line where and when you want to drop into the
# IPython shell:
#    ipshell('<some message> -- Entering ipshell.\nHit Ctrl-D to exit')

#
# Globals
#

ExternalEncoding = ''
# Set this to false in order to deactivate during export, the use of
# name space prefixes captured from the input document.
UseCapturedNS_ = True
CapturedNsmap_ = {}
Tag_pattern_ = re_.compile(r'({.*})?(.*)')
String_cleanup_pat_ = re_.compile(r"[\n\r\s]+")
Namespace_extract_pat_ = re_.compile(r'{(.*)}(.*)')
CDATA_pattern_ = re_.compile(r"<!\[CDATA\[.*?\]\]>", re_.DOTALL)

# Change this to redirect the generated superclass module to use a
# specific subclass module.
CurrentSubclassModule_ = None

#
# Support/utility functions.
#


def showIndent(outfile, level, pretty_print=True):
    if pretty_print:
        for idx in range(level):
            outfile.write('    ')


def quote_xml(inStr):
    "Escape markup chars, but do not modify CDATA sections."
    if not inStr:
        return ''
    s1 = (isinstance(inStr, BaseStrType_) and inStr or '%s' % inStr)
    s2 = ''
    pos = 0
    matchobjects = CDATA_pattern_.finditer(s1)
    for mo in matchobjects:
        s3 = s1[pos:mo.start()]
        s2 += quote_xml_aux(s3)
        s2 += s1[mo.start():mo.end()]
        pos = mo.end()
    s3 = s1[pos:]
    s2 += quote_xml_aux(s3)
    return s2


def quote_xml_aux(inStr):
    s1 = inStr.replace('&', '&amp;')
    s1 = s1.replace('<', '&lt;')
    s1 = s1.replace('>', '&gt;')
    return s1


def quote_attrib(inStr):
    s1 = (isinstance(inStr, BaseStrType_) and inStr or '%s' % inStr)
    s1 = s1.replace('&', '&amp;')
    s1 = s1.replace('<', '&lt;')
    s1 = s1.replace('>', '&gt;')
    if '"' in s1:
        if "'" in s1:
            s1 = '"%s"' % s1.replace('"', "&quot;")
        else:
            s1 = "'%s'" % s1
    else:
        s1 = '"%s"' % s1
    return s1


def quote_python(inStr):
    s1 = inStr
    if s1.find("'") == -1:
        if s1.find('\n') == -1:
            return "'%s'" % s1
        else:
            return "'''%s'''" % s1
    else:
        if s1.find('"') != -1:
            s1 = s1.replace('"', '\\"')
        if s1.find('\n') == -1:
            return '"%s"' % s1
        else:
            return '"""%s"""' % s1


def get_all_text_(node):
    if node.text is not None:
        text = node.text
    else:
        text = ''
    for child in node:
        if child.tail is not None:
            text += child.tail
    return text


def find_attr_value_(attr_name, node):
    attrs = node.attrib
    attr_parts = attr_name.split(':')
    value = None
    if len(attr_parts) == 1:
        value = attrs.get(attr_name)
    elif len(attr_parts) == 2:
        prefix, name = attr_parts
        namespace = node.nsmap.get(prefix)
        if namespace is not None:
            value = attrs.get('{%s}%s' % (namespace, name, ))
    return value


def encode_str_2_3(instr):
    return instr


class GDSParseError(Exception):
    pass


def raise_parse_error(node, msg):
    if node is not None:
        msg = '%s (element %s/line %d)' % (msg, node.tag, node.sourceline, )
    raise GDSParseError(msg)


class MixedContainer:
    # Constants for category:
    CategoryNone = 0
    CategoryText = 1
    CategorySimple = 2
    CategoryComplex = 3
    # Constants for content_type:
    TypeNone = 0
    TypeText = 1
    TypeString = 2
    TypeInteger = 3
    TypeFloat = 4
    TypeDecimal = 5
    TypeDouble = 6
    TypeBoolean = 7
    TypeBase64 = 8
    def __init__(self, category, content_type, name, value):
        self.category = category
        self.content_type = content_type
        self.name = name
        self.value = value
    def getCategory(self):
        return self.category
    def getContenttype(self, content_type):
        return self.content_type
    def getValue(self):
        return self.value
    def getName(self):
        return self.name
    def export(self, outfile, level, name, namespace,
               pretty_print=True):
        if self.category == MixedContainer.CategoryText:
            # Prevent exporting empty content as empty lines.
            if self.value.strip():
                outfile.write(self.value)
        elif self.category == MixedContainer.CategorySimple:
            self.exportSimple(outfile, level, name)
        else:    # category == MixedContainer.CategoryComplex
            self.value.export(
                outfile, level, namespace, name_=name,
                pretty_print=pretty_print)
    def exportSimple(self, outfile, level, name):
        if self.content_type == MixedContainer.TypeString:
            outfile.write('<%s>%s</%s>' % (
                self.name, self.value, self.name))
        elif self.content_type == MixedContainer.TypeInteger or \
                self.content_type == MixedContainer.TypeBoolean:
            outfile.write('<%s>%d</%s>' % (
                self.name, self.value, self.name))
        elif self.content_type == MixedContainer.TypeFloat or \
                self.content_type == MixedContainer.TypeDecimal:
            outfile.write('<%s>%f</%s>' % (
                self.name, self.value, self.name))
        elif self.content_type == MixedContainer.TypeDouble:
            outfile.write('<%s>%g</%s>' % (
                self.name, self.value, self.name))
        elif self.content_type == MixedContainer.TypeBase64:
            outfile.write('<%s>%s</%s>' % (
                self.name,
                base64.b64encode(self.value),
                self.name))
    def to_etree(self, element, mapping_=None, nsmap_=None):
        if self.category == MixedContainer.CategoryText:
            # Prevent exporting empty content as empty lines.
            if self.value.strip():
                if len(element) > 0:
                    if element[-1].tail is None:
                        element[-1].tail = self.value
                    else:
                        element[-1].tail += self.value
                else:
                    if element.text is None:
                        element.text = self.value
                    else:
                        element.text += self.value
        elif self.category == MixedContainer.CategorySimple:
            subelement = etree_.SubElement(
                element, '%s' % self.name)
            subelement.text = self.to_etree_simple()
        else:    # category == MixedContainer.CategoryComplex
            self.value.to_etree(element)
    def to_etree_simple(self, mapping_=None, nsmap_=None):
        if self.content_type == MixedContainer.TypeString:
            text = self.value
        elif (self.content_type == MixedContainer.TypeInteger or
                self.content_type == MixedContainer.TypeBoolean):
            text = '%d' % self.value
        elif (self.content_type == MixedContainer.TypeFloat or
                self.content_type == MixedContainer.TypeDecimal):
            text = '%f' % self.value
        elif self.content_type == MixedContainer.TypeDouble:
            text = '%g' % self.value
        elif self.content_type == MixedContainer.TypeBase64:
            text = '%s' % base64.b64encode(self.value)
        return text
    def exportLiteral(self, outfile, level, name):
        if self.category == MixedContainer.CategoryText:
            showIndent(outfile, level)
            outfile.write(
                'model_.MixedContainer(%d, %d, "%s", "%s"),\n' % (
                    self.category, self.content_type,
                    self.name, self.value))
        elif self.category == MixedContainer.CategorySimple:
            showIndent(outfile, level)
            outfile.write(
                'model_.MixedContainer(%d, %d, "%s", "%s"),\n' % (
                    self.category, self.content_type,
                    self.name, self.value))
        else:    # category == MixedContainer.CategoryComplex
            showIndent(outfile, level)
            outfile.write(
                'model_.MixedContainer(%d, %d, "%s",\n' % (
                    self.category, self.content_type, self.name,))
            self.value.exportLiteral(outfile, level + 1)
            showIndent(outfile, level)
            outfile.write(')\n')


class MemberSpec_(object):
    def __init__(self, name='', data_type='', container=0,
            optional=0, child_attrs=None, choice=None):
        self.name = name
        self.data_type = data_type
        self.container = container
        self.child_attrs = child_attrs
        self.choice = choice
        self.optional = optional
    def set_name(self, name): self.name = name
    def get_name(self): return self.name
    def set_data_type(self, data_type): self.data_type = data_type
    def get_data_type_chain(self): return self.data_type
    def get_data_type(self):
        if isinstance(self.data_type, list):
            if len(self.data_type) > 0:
                return self.data_type[-1]
            else:
                return 'xs:string'
        else:
            return self.data_type
    def set_container(self, container): self.container = container
    def get_container(self): return self.container
    def set_child_attrs(self, child_attrs): self.child_attrs = child_attrs
    def get_child_attrs(self): return self.child_attrs
    def set_choice(self, choice): self.choice = choice
    def get_choice(self): return self.choice
    def set_optional(self, optional): self.optional = optional
    def get_optional(self): return self.optional


def _cast(typ, value):
    if typ is None or value is None:
        return value
    return typ(value)

#
# Data representation classes.
#


class AlignSimpleType(str, Enum):
    LEFT='left'
    CENTRE='centre'
    RIGHT='right'
    JUSTIFY='justify'


class ChartTypeSimpleType(str, Enum):
    BAR='bar'
    LINE='line'
    PIE='pie'
    SCATTER='scatter'
    SURFACE='surface'
    OTHER='other'


class ColourDepthSimpleType(str, Enum):
    BILEVEL='bilevel'
    GREYSCALE='greyscale'
    COLOUR='colour'
    OTHER='other'


class ColourSimpleType(str, Enum):
    BLACK='black'
    BLUE='blue'
    BROWN='brown'
    CYAN='cyan'
    GREEN='green'
    GREY='grey'
    INDIGO='indigo'
    MAGENTA='magenta'
    ORANGE='orange'
    PINK='pink'
    RED='red'
    TURQUOISE='turquoise'
    VIOLET='violet'
    WHITE='white'
    YELLOW='yellow'
    OTHER='other'


class GraphicsTypeSimpleType(str, Enum):
    LOGO='logo'
    LETTERHEAD='letterhead'
    DECORATION='decoration'
    FRAME='frame'
    HANDWRITTENANNOTATION='handwritten-annotation'
    STAMP='stamp'
    SIGNATURE='signature'
    BARCODE='barcode'
    PAPERGROW='paper-grow'
    PUNCHHOLE='punch-hole'
    OTHER='other'


class GroupTypeSimpleType(str, Enum):
    PARAGRAPH='paragraph'
    LIST='list'
    LISTITEM='list-item'
    FIGURE='figure'
    ARTICLE='article'
    DIV='div'
    OTHER='other'


class PageTypeSimpleType(str, Enum):
    FRONTCOVER='front-cover'
    BACKCOVER='back-cover'
    TITLE='title'
    TABLEOFCONTENTS='table-of-contents'
    INDEX='index'
    CONTENT='content'
    BLANK='blank'
    OTHER='other'


class ProductionSimpleType(str, Enum):
    """Text production type"""
    PRINTED='printed'
    TYPEWRITTEN='typewritten'
    HANDWRITTENCURSIVE='handwritten-cursive'
    HANDWRITTENPRINTSCRIPT='handwritten-printscript'
    MEDIEVALMANUSCRIPT='medieval-manuscript'
    OTHER='other'


class ReadingDirectionSimpleType(str, Enum):
    LEFTTORIGHT='left-to-right'
    RIGHTTOLEFT='right-to-left'
    TOPTOBOTTOM='top-to-bottom'
    BOTTOMTOTOP='bottom-to-top'


class TextDataTypeSimpleType(str, Enum):
    XSDDECIMAL='xsd:decimal' # Examples: "123.456", "+1234.456", "-1234.456", "-.456", "-456"
    XSDFLOAT='xsd:float' # Examples: "123.456", "+1234.456", "-1.2344e56", "-.45E-6", "INF", "-INF", "NaN"
    XSDINTEGER='xsd:integer' # Examples: "123456", "+00000012", "-1", "-456"
    XSDBOOLEAN='xsd:boolean' # Examples: "true", "false", "1", "0"
    XSDDATE='xsd:date' # Examples: "2001-10-26", "2001-10-26+02:00", "2001-10-26Z", "2001-10-26+00:00", "-2001-10-26", "-20000-04-01"
    XSDTIME='xsd:time' # Examples: "21:32:52", "21:32:52+02:00", "19:32:52Z", "19:32:52+00:00", "21:32:52.12679"
    XSDDATE_TIME='xsd:dateTime' # Examples: "2001-10-26T21:32:52", "2001-10-26T21:32:52+02:00", "2001-10-26T19:32:52Z", "2001-10-26T19:32:52+00:00", "-2001-10-26T21:32:52", "2001-10-26T21:32:52.12679"
    XSDSTRING='xsd:string' # Generic text string
    OTHER='other' # An XSD type that is not listed or a custom type (use dataTypeDetails attribute).


class TextLineOrderSimpleType(str, Enum):
    TOPTOBOTTOM='top-to-bottom'
    BOTTOMTOTOP='bottom-to-top'
    LEFTTORIGHT='left-to-right'
    RIGHTTOLEFT='right-to-left'


class TextTypeSimpleType(str, Enum):
    PARAGRAPH='paragraph'
    HEADING='heading'
    CAPTION='caption'
    HEADER='header'
    FOOTER='footer'
    PAGENUMBER='page-number'
    DROPCAPITAL='drop-capital'
    CREDIT='credit'
    FLOATING='floating'
    SIGNATUREMARK='signature-mark'
    CATCHWORD='catch-word'
    MARGINALIA='marginalia'
    FOOTNOTE='footnote'
    FOOTNOTECONTINUED='footnote-continued'
    ENDNOTE='endnote'
    TOCENTRY='TOC-entry'
    LISTLABEL='list-label'
    OTHER='other'


class UnderlineStyleSimpleType(str, Enum):
    SINGLE_LINE='singleLine'
    DOUBLE_LINE='doubleLine'
    OTHER='other'


[docs]class PcGtsType(GeneratedsSuper): __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('pcGtsId', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('Metadata', 'MetadataType', 0, 0, {'name': 'Metadata', 'type': 'MetadataType'}, None), MemberSpec_('Page', 'PageType', 0, 0, {'name': 'Page', 'type': 'PageType'}, None), ] subclass = None superclass = None def __init__(self, pcGtsId=None, Metadata=None, Page=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.pcGtsId = _cast(None, pcGtsId) self.pcGtsId_nsprefix_ = "pc" self.Metadata = Metadata self.Metadata_nsprefix_ = "pc" self.Page = Page self.Page_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, PcGtsType) if subclass is not None: return subclass(*args_, **kwargs_) if PcGtsType.subclass: return PcGtsType.subclass(*args_, **kwargs_) else: return PcGtsType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_Metadata(self): return self.Metadata
[docs] def set_Metadata(self, Metadata): self.Metadata = Metadata
[docs] def get_Page(self): return self.Page
[docs] def set_Page(self, Page): self.Page = Page
[docs] def get_pcGtsId(self): return self.pcGtsId
[docs] def set_pcGtsId(self, pcGtsId): self.pcGtsId = pcGtsId
[docs] def hasContent_(self): if ( self.Metadata is not None or self.Page is not None ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PcGtsType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('PcGtsType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'PcGtsType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PcGtsType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PcGtsType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PcGtsType'): if self.pcGtsId is not None and 'pcGtsId' not in already_processed: already_processed.add('pcGtsId') outfile.write(' pcGtsId=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.pcGtsId), input_name='pcGtsId')), ))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PcGtsType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: eol_ = '' if self.Metadata is not None: namespaceprefix_ = self.Metadata_nsprefix_ + ':' if (UseCapturedNS_ and self.Metadata_nsprefix_) else '' self.Metadata.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Metadata', pretty_print=pretty_print) if self.Page is not None: namespaceprefix_ = self.Page_nsprefix_ + ':' if (UseCapturedNS_ and self.Page_nsprefix_) else '' self.Page.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Page', pretty_print=pretty_print)
[docs] def to_etree(self, parent_element=None, name_='PcGtsType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.pcGtsId is not None: element.set('pcGtsId', self.gds_format_string(self.pcGtsId)) if self.Metadata is not None: Metadata_ = self.Metadata Metadata_.to_etree(element, name_='Metadata', mapping_=mapping_, nsmap_=nsmap_) if self.Page is not None: Page_ = self.Page Page_.to_etree(element, name_='Page', mapping_=mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('pcGtsId', node) if value is not None and 'pcGtsId' not in already_processed: already_processed.add('pcGtsId') self.pcGtsId = value
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Metadata': obj_ = MetadataType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Metadata = obj_ obj_.original_tagname_ = 'Metadata' elif nodeName_ == 'Page': obj_ = PageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Page = obj_ obj_.original_tagname_ = 'Page'
def __hash__(self): return hash(self.id) @property def id(self): if hasattr(self, 'pcGtsId'): return self.pcGtsId return self.imageFilename
[docs] def get_AllAlternativeImagePaths(self, page=True, region=True, line=True, word=True, glyph=True): """ Get all the ``pc:AlternativeImage/@filename`` paths referenced in the PAGE-XML document. Arguments: page (boolean): Get images on ``pc:Page`` level region (boolean): Get images on ``pc:*Region`` level line (boolean): Get images on ``pc:TextLine`` level word (boolean): Get images on ``pc:Word`` level glyph (boolean): Get images on ``pc:Glyph`` level Returns: a list of image filename strings """ from .constants import NAMESPACES, PAGE_REGION_TYPES # pylint: disable=relative-beyond-top-level,import-outside-toplevel from io import StringIO # pylint: disable=import-outside-toplevel ret = [] # XXX Since we're only interested in the **paths** of the images, # export, parse and xpath are less convoluted than traversing # the generateDS API. Quite possibly not as efficient as could be. sio = StringIO() self.export( outfile=sio, level=0, name_='PcGts', namespaceprefix_='pc:', namespacedef_='xmlns:pc="%s" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="%s %s/pagecontent.xsd"' % ( NAMESPACES['page'], NAMESPACES['page'], NAMESPACES['page'] )) doc = parsexmlstring_(sio.getvalue()) # pylint: disable=undefined-variable # shortcut if page and region and line and word and glyph: ret += doc.xpath('//page:AlternativeImage/@filename', namespaces=NAMESPACES) else: if page: ret += doc.xpath('/page:PcGts/page:Page/page:AlternativeImage/@filename', namespaces=NAMESPACES) if region: for class_ in PAGE_REGION_TYPES: ret += doc.xpath('//page:%sRegion/page:AlternativeImage/@filename' % class_, namespaces=NAMESPACES) if line: ret += doc.xpath('//page:TextLine/page:AlternativeImage/@filename', namespaces=NAMESPACES) if word: ret += doc.xpath('//page:Word/page:AlternativeImage/@filename', namespaces=NAMESPACES) if glyph: ret += doc.xpath('//page:Glyph/page:AlternativeImage/@filename', namespaces=NAMESPACES) return ret
[docs] def prune_ReadingOrder(self): """ Remove any empty ReadingOrder elements """ ro = self.get_Page().get_ReadingOrder() if ro: og = ro.get_OrderedGroup() if og and (not og.get_RegionRefIndexed() and not og.get_OrderedGroupIndexed() and not og.get_UnorderedGroupIndexed()): og = None ug = ro.get_UnorderedGroup() if ug and (not ug.get_RegionRef() and not ug.get_OrderedGroup() and not ug.get_UnorderedGroup()): ug = None if not og and not ug: self.get_Page().set_ReadingOrder(None)
# end class PcGtsType
[docs]class MetadataType(GeneratedsSuper): """External reference of any kind""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('externalRef', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('Creator', 'string', 0, 0, {'name': 'Creator', 'type': 'string'}, None), MemberSpec_('Created', 'dateTime', 0, 0, {'name': 'Created', 'type': 'dateTime'}, None), MemberSpec_('LastChange', 'dateTime', 0, 0, {'name': 'LastChange', 'type': 'dateTime'}, None), MemberSpec_('Comments', 'string', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'Comments', 'type': 'string'}, None), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('MetadataItem', 'MetadataItemType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'MetadataItem', 'type': 'MetadataItemType'}, None), ] subclass = None superclass = None def __init__(self, externalRef=None, Creator=None, Created=None, LastChange=None, Comments=None, UserDefined=None, MetadataItem=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.externalRef = _cast(None, externalRef) self.externalRef_nsprefix_ = "pc" self.Creator = Creator self.Creator_nsprefix_ = "pc" if isinstance(Created, BaseStrType_): initvalue_ = datetime_.datetime.strptime(Created, '%Y-%m-%dT%H:%M:%S') else: initvalue_ = Created self.Created = initvalue_ self.Created_nsprefix_ = "pc" if isinstance(LastChange, BaseStrType_): initvalue_ = datetime_.datetime.strptime(LastChange, '%Y-%m-%dT%H:%M:%S') else: initvalue_ = LastChange self.LastChange = initvalue_ self.LastChange_nsprefix_ = "pc" self.Comments = Comments self.Comments_nsprefix_ = "pc" self.UserDefined = UserDefined self.UserDefined_nsprefix_ = "pc" if MetadataItem is None: self.MetadataItem = [] else: self.MetadataItem = MetadataItem self.MetadataItem_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, MetadataType) if subclass is not None: return subclass(*args_, **kwargs_) if MetadataType.subclass: return MetadataType.subclass(*args_, **kwargs_) else: return MetadataType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_Creator(self): return self.Creator
[docs] def set_Creator(self, Creator): self.Creator = Creator
[docs] def get_Created(self): return self.Created
[docs] def set_Created(self, Created): self.Created = Created
[docs] def get_LastChange(self): return self.LastChange
[docs] def set_LastChange(self, LastChange): self.LastChange = LastChange
[docs] def get_Comments(self): return self.Comments
[docs] def set_Comments(self, Comments): self.Comments = Comments
[docs] def get_UserDefined(self): return self.UserDefined
[docs] def set_UserDefined(self, UserDefined): self.UserDefined = UserDefined
[docs] def get_MetadataItem(self): return self.MetadataItem
[docs] def set_MetadataItem(self, MetadataItem): self.MetadataItem = MetadataItem
[docs] def add_MetadataItem(self, value): self.MetadataItem.append(value)
[docs] def insert_MetadataItem_at(self, index, value): self.MetadataItem.insert(index, value)
[docs] def replace_MetadataItem_at(self, index, value): self.MetadataItem[index] = value
[docs] def get_externalRef(self): return self.externalRef
[docs] def set_externalRef(self, externalRef): self.externalRef = externalRef
[docs] def hasContent_(self): if ( self.Creator is not None or self.Created is not None or self.LastChange is not None or self.Comments is not None or self.UserDefined is not None or self.MetadataItem ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:None="http://www.w3.org/2001/XMLSchema" ', name_='MetadataType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('MetadataType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'MetadataType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MetadataType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MetadataType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MetadataType'): if self.externalRef is not None and 'externalRef' not in already_processed: already_processed.add('externalRef') outfile.write(' externalRef=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.externalRef), input_name='externalRef')), ))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:None="http://www.w3.org/2001/XMLSchema" ', name_='MetadataType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: eol_ = '' if self.Creator is not None: namespaceprefix_ = self.Creator_nsprefix_ + ':' if (UseCapturedNS_ and self.Creator_nsprefix_) else '' showIndent(outfile, level, pretty_print) outfile.write('<%sCreator>%s</%sCreator>%s' % (namespaceprefix_ , self.gds_encode(self.gds_format_string(quote_xml(self.Creator), input_name='Creator')), namespaceprefix_ , eol_)) if self.Created is not None: namespaceprefix_ = self.Created_nsprefix_ + ':' if (UseCapturedNS_ and self.Created_nsprefix_) else '' showIndent(outfile, level, pretty_print) outfile.write('<%sCreated>%s</%sCreated>%s' % (namespaceprefix_ , self.gds_format_datetime(self.Created, input_name='Created'), namespaceprefix_ , eol_)) if self.LastChange is not None: namespaceprefix_ = self.LastChange_nsprefix_ + ':' if (UseCapturedNS_ and self.LastChange_nsprefix_) else '' showIndent(outfile, level, pretty_print) outfile.write('<%sLastChange>%s</%sLastChange>%s' % (namespaceprefix_ , self.gds_format_datetime(self.LastChange, input_name='LastChange'), namespaceprefix_ , eol_)) if self.Comments is not None: namespaceprefix_ = self.Comments_nsprefix_ + ':' if (UseCapturedNS_ and self.Comments_nsprefix_) else '' showIndent(outfile, level, pretty_print) outfile.write('<%sComments>%s</%sComments>%s' % (namespaceprefix_ , self.gds_encode(self.gds_format_string(quote_xml(self.Comments), input_name='Comments')), namespaceprefix_ , eol_)) if self.UserDefined is not None: namespaceprefix_ = self.UserDefined_nsprefix_ + ':' if (UseCapturedNS_ and self.UserDefined_nsprefix_) else '' self.UserDefined.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UserDefined', pretty_print=pretty_print) for MetadataItem_ in self.MetadataItem: namespaceprefix_ = self.MetadataItem_nsprefix_ + ':' if (UseCapturedNS_ and self.MetadataItem_nsprefix_) else '' MetadataItem_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='MetadataItem', pretty_print=pretty_print)
[docs] def to_etree(self, parent_element=None, name_='MetadataType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.externalRef is not None: element.set('externalRef', self.gds_format_string(self.externalRef)) if self.Creator is not None: Creator_ = self.Creator etree_.SubElement(element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}Creator').text = self.gds_format_string(Creator_) if self.Created is not None: Created_ = self.Created etree_.SubElement(element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}Created').text = self.gds_format_datetime(Created_) if self.LastChange is not None: LastChange_ = self.LastChange etree_.SubElement(element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}LastChange').text = self.gds_format_datetime(LastChange_) if self.Comments is not None: Comments_ = self.Comments etree_.SubElement(element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}Comments').text = self.gds_format_string(Comments_) if self.UserDefined is not None: UserDefined_ = self.UserDefined UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) for MetadataItem_ in self.MetadataItem: MetadataItem_.to_etree(element, name_='MetadataItem', mapping_=mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('externalRef', node) if value is not None and 'externalRef' not in already_processed: already_processed.add('externalRef') self.externalRef = value
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Creator': value_ = child_.text value_ = self.gds_parse_string(value_, node, 'Creator') value_ = self.gds_validate_string(value_, node, 'Creator') self.Creator = value_ elif nodeName_ == 'Created': sval_ = child_.text dval_ = self.gds_parse_datetime(sval_) self.Created = dval_ elif nodeName_ == 'LastChange': sval_ = child_.text dval_ = self.gds_parse_datetime(sval_) self.LastChange = dval_ elif nodeName_ == 'Comments': value_ = child_.text value_ = self.gds_parse_string(value_, node, 'Comments') value_ = self.gds_validate_string(value_, node, 'Comments') self.Comments = value_ elif nodeName_ == 'UserDefined': obj_ = UserDefinedType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.UserDefined = obj_ obj_.original_tagname_ = 'UserDefined' elif nodeName_ == 'MetadataItem': obj_ = MetadataItemType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.MetadataItem.append(obj_) obj_.original_tagname_ = 'MetadataItem'
def __hash__(self): return hash(self.id)
# end class MetadataType
[docs]class MetadataItemType(GeneratedsSuper): """Type of metadata (e.g. author) E.g. imagePhotometricInterpretation E.g. RGB""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('type_', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('name', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('value', 'string', 0, 0, {'use': 'required'}), MemberSpec_('date', 'dateTime', 0, 1, {'use': 'optional'}), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), ] subclass = None superclass = None def __init__(self, type_=None, name=None, value=None, date=None, Labels=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.type_ = _cast(None, type_) self.type__nsprefix_ = "pc" self.name = _cast(None, name) self.name_nsprefix_ = "pc" self.value = _cast(None, value) self.value_nsprefix_ = "pc" if isinstance(date, BaseStrType_): initvalue_ = datetime_.datetime.strptime(date, '%Y-%m-%dT%H:%M:%S') else: initvalue_ = date self.date = initvalue_ if Labels is None: self.Labels = [] else: self.Labels = Labels self.Labels_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, MetadataItemType) if subclass is not None: return subclass(*args_, **kwargs_) if MetadataItemType.subclass: return MetadataItemType.subclass(*args_, **kwargs_) else: return MetadataItemType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_Labels(self): return self.Labels
[docs] def set_Labels(self, Labels): self.Labels = Labels
[docs] def add_Labels(self, value): self.Labels.append(value)
[docs] def insert_Labels_at(self, index, value): self.Labels.insert(index, value)
[docs] def replace_Labels_at(self, index, value): self.Labels[index] = value
[docs] def get_type(self): return self.type_
[docs] def set_type(self, type_): self.type_ = type_
[docs] def get_name(self): return self.name
[docs] def set_name(self, name): self.name = name
[docs] def get_value(self): return self.value
[docs] def set_value(self, value): self.value = value
[docs] def get_date(self): return self.date
[docs] def set_date(self, date): self.date = date
[docs] def hasContent_(self): if ( self.Labels ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataItemType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('MetadataItemType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'MetadataItemType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MetadataItemType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MetadataItemType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MetadataItemType'): if self.type_ is not None and 'type_' not in already_processed: already_processed.add('type_') outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), )) if self.name is not None and 'name' not in already_processed: already_processed.add('name') outfile.write(' name=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.name), input_name='name')), )) if self.value is not None and 'value' not in already_processed: already_processed.add('value') outfile.write(' value=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.value), input_name='value')), )) if self.date is not None and 'date' not in already_processed: already_processed.add('date') outfile.write(' date="%s"' % self.gds_format_datetime(self.date, input_name='date'))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataItemType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: eol_ = '' for Labels_ in self.Labels: namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print)
[docs] def to_etree(self, parent_element=None, name_='MetadataItemType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.type_ is not None: element.set('type', self.gds_format_string(self.type_)) if self.name is not None: element.set('name', self.gds_format_string(self.name)) if self.value is not None: element.set('value', self.gds_format_string(self.value)) if self.date is not None: element.set('date', self.gds_format_datetime(self.date)) for Labels_ in self.Labels: Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('type', node) if value is not None and 'type' not in already_processed: already_processed.add('type') self.type_ = value value = find_attr_value_('name', node) if value is not None and 'name' not in already_processed: already_processed.add('name') self.name = value value = find_attr_value_('value', node) if value is not None and 'value' not in already_processed: already_processed.add('value') self.value = value value = find_attr_value_('date', node) if value is not None and 'date' not in already_processed: already_processed.add('date') try: self.date = self.gds_parse_datetime(value) except ValueError as exp: raise ValueError('Bad date-time attribute (date): %s' % exp)
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Labels': obj_ = LabelsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Labels.append(obj_) obj_.original_tagname_ = 'Labels'
def __hash__(self): return hash(self.id)
# end class MetadataItemType
[docs]class LabelsType(GeneratedsSuper): """Reference to external model / ontology / schema E.g. an RDF resource identifier (to be used as subject or object of an RDF triple) Prefix for all labels (e.g. first part of an URI)""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('externalModel', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('externalId', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('prefix', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('Label', 'LabelType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Label', 'type': 'LabelType'}, None), ] subclass = None superclass = None def __init__(self, externalModel=None, externalId=None, prefix=None, comments=None, Label=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.externalModel = _cast(None, externalModel) self.externalModel_nsprefix_ = "pc" self.externalId = _cast(None, externalId) self.externalId_nsprefix_ = "pc" self.prefix = _cast(None, prefix) self.prefix_nsprefix_ = "pc" self.comments = _cast(None, comments) self.comments_nsprefix_ = "pc" if Label is None: self.Label = [] else: self.Label = Label self.Label_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, LabelsType) if subclass is not None: return subclass(*args_, **kwargs_) if LabelsType.subclass: return LabelsType.subclass(*args_, **kwargs_) else: return LabelsType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_Label(self): return self.Label
[docs] def set_Label(self, Label): self.Label = Label
[docs] def add_Label(self, value): self.Label.append(value)
[docs] def insert_Label_at(self, index, value): self.Label.insert(index, value)
[docs] def replace_Label_at(self, index, value): self.Label[index] = value
[docs] def get_externalModel(self): return self.externalModel
[docs] def set_externalModel(self, externalModel): self.externalModel = externalModel
[docs] def get_externalId(self): return self.externalId
[docs] def set_externalId(self, externalId): self.externalId = externalId
[docs] def get_prefix(self): return self.prefix
[docs] def set_prefix(self, prefix): self.prefix = prefix
[docs] def get_comments(self): return self.comments
[docs] def set_comments(self, comments): self.comments = comments
[docs] def hasContent_(self): if ( self.Label ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelsType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('LabelsType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'LabelsType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LabelsType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LabelsType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LabelsType'): if self.externalModel is not None and 'externalModel' not in already_processed: already_processed.add('externalModel') outfile.write(' externalModel=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.externalModel), input_name='externalModel')), )) if self.externalId is not None and 'externalId' not in already_processed: already_processed.add('externalId') outfile.write(' externalId=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.externalId), input_name='externalId')), )) if self.prefix is not None and 'prefix' not in already_processed: already_processed.add('prefix') outfile.write(' prefix=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.prefix), input_name='prefix')), )) if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), ))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelsType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: eol_ = '' for Label_ in self.Label: namespaceprefix_ = self.Label_nsprefix_ + ':' if (UseCapturedNS_ and self.Label_nsprefix_) else '' Label_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Label', pretty_print=pretty_print)
[docs] def to_etree(self, parent_element=None, name_='LabelsType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.externalModel is not None: element.set('externalModel', self.gds_format_string(self.externalModel)) if self.externalId is not None: element.set('externalId', self.gds_format_string(self.externalId)) if self.prefix is not None: element.set('prefix', self.gds_format_string(self.prefix)) if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) for Label_ in self.Label: Label_.to_etree(element, name_='Label', mapping_=mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('externalModel', node) if value is not None and 'externalModel' not in already_processed: already_processed.add('externalModel') self.externalModel = value value = find_attr_value_('externalId', node) if value is not None and 'externalId' not in already_processed: already_processed.add('externalId') self.externalId = value value = find_attr_value_('prefix', node) if value is not None and 'prefix' not in already_processed: already_processed.add('prefix') self.prefix = value value = find_attr_value_('comments', node) if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Label': obj_ = LabelType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Label.append(obj_) obj_.original_tagname_ = 'Label'
def __hash__(self): return hash(self.id)
# end class LabelsType
[docs]class LabelType(GeneratedsSuper): """Semantic label The label / tag (e.g. 'person'). Can be an RDF resource identifier (e.g. object of an RDF triple). Additional information on the label (e.g. 'YYYY-mm-dd' for a date label). Can be used as predicate of an RDF triple.""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('value', 'string', 0, 0, {'use': 'required'}), MemberSpec_('type_', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), ] subclass = None superclass = None def __init__(self, value=None, type_=None, comments=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.value = _cast(None, value) self.value_nsprefix_ = "pc" self.type_ = _cast(None, type_) self.type__nsprefix_ = "pc" self.comments = _cast(None, comments) self.comments_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, LabelType) if subclass is not None: return subclass(*args_, **kwargs_) if LabelType.subclass: return LabelType.subclass(*args_, **kwargs_) else: return LabelType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_value(self): return self.value
[docs] def set_value(self, value): self.value = value
[docs] def get_type(self): return self.type_
[docs] def set_type(self, type_): self.type_ = type_
[docs] def get_comments(self): return self.comments
[docs] def set_comments(self, comments): self.comments = comments
[docs] def hasContent_(self): if ( ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('LabelType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'LabelType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LabelType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LabelType', pretty_print=pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LabelType'): if self.value is not None and 'value' not in already_processed: already_processed.add('value') outfile.write(' value=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.value), input_name='value')), )) if self.type_ is not None and 'type_' not in already_processed: already_processed.add('type_') outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), )) if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), ))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelType', fromsubclass_=False, pretty_print=True): pass
[docs] def to_etree(self, parent_element=None, name_='LabelType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.value is not None: element.set('value', self.gds_format_string(self.value)) if self.type_ is not None: element.set('type', self.gds_format_string(self.type_)) if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('value', node) if value is not None and 'value' not in already_processed: already_processed.add('value') self.value = value value = find_attr_value_('type', node) if value is not None and 'type' not in already_processed: already_processed.add('type') self.type_ = value value = find_attr_value_('comments', node) if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass
def __hash__(self): return hash(self.id)
# end class LabelType
[docs]class PageType(GeneratedsSuper): """Contains the image file name including the file extension. Specifies the width of the image.Specifies the height of the image.Specifies the image resolution in width.Specifies the image resolution in height. Specifies the unit of the resolution information referring to a standardised unit of measurement (pixels per inch, pixels per centimeter or other). For generic use The angle the rectangle encapsulating the page (or its Border) has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). (The rotated image can be further referenced via “AlternativeImage”.) Range: -179.999,180 The type of the page within the document (e.g. cover page). The primary language used in the page (lower-level definitions override the page-level definition). The secondary language used in the page (lower-level definitions override the page-level definition). The primary script used in the page (lower-level definitions override the page-level definition). The secondary script used in the page (lower-level definitions override the page-level definition). The direction in which text within lines should be read (order of words and characters), in addition to “textLineOrder” (lower-level definitions override the page-level definition). The order of text lines within a block, in addition to “readingDirection” (lower-level definitions override the page-level definition). Confidence value for whole page (between 0 and 1)""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('imageFilename', 'string', 0, 0, {'use': 'required'}), MemberSpec_('imageWidth', 'int', 0, 0, {'use': 'required'}), MemberSpec_('imageHeight', 'int', 0, 0, {'use': 'required'}), MemberSpec_('imageXResolution', 'float', 0, 1, {'use': 'optional'}), MemberSpec_('imageYResolution', 'float', 0, 1, {'use': 'optional'}), MemberSpec_('imageResolutionUnit', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), MemberSpec_('type_', 'pc:PageTypeSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('secondaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('textLineOrder', 'pc:TextLineOrderSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Border', 'BorderType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'Border', 'type': 'BorderType'}, None), MemberSpec_('PrintSpace', 'PrintSpaceType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'PrintSpace', 'type': 'PrintSpaceType'}, None), MemberSpec_('ReadingOrder', 'ReadingOrderType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'ReadingOrder', 'type': 'ReadingOrderType'}, None), MemberSpec_('Layers', 'LayersType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'Layers', 'type': 'LayersType'}, None), MemberSpec_('Relations', 'RelationsType', 0, 1, {'minOccurs': '0', 'name': 'Relations', 'type': 'RelationsType'}, None), MemberSpec_('TextStyle', 'TextStyleType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'TextStyle', 'type': 'TextStyleType'}, None), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), MemberSpec_('TextRegion', 'TextRegionType', 1, 1, {'name': 'TextRegion', 'type': 'TextRegionType'}, 1), MemberSpec_('ImageRegion', 'ImageRegionType', 1, 1, {'name': 'ImageRegion', 'type': 'ImageRegionType'}, 1), MemberSpec_('LineDrawingRegion', 'LineDrawingRegionType', 1, 1, {'name': 'LineDrawingRegion', 'type': 'LineDrawingRegionType'}, 1), MemberSpec_('GraphicRegion', 'GraphicRegionType', 1, 1, {'name': 'GraphicRegion', 'type': 'GraphicRegionType'}, 1), MemberSpec_('TableRegion', 'TableRegionType', 1, 1, {'name': 'TableRegion', 'type': 'TableRegionType'}, 1), MemberSpec_('ChartRegion', 'ChartRegionType', 1, 1, {'name': 'ChartRegion', 'type': 'ChartRegionType'}, 1), MemberSpec_('MapRegion', 'MapRegionType', 1, 1, {'name': 'MapRegion', 'type': 'MapRegionType'}, 1), MemberSpec_('SeparatorRegion', 'SeparatorRegionType', 1, 1, {'name': 'SeparatorRegion', 'type': 'SeparatorRegionType'}, 1), MemberSpec_('MathsRegion', 'MathsRegionType', 1, 1, {'name': 'MathsRegion', 'type': 'MathsRegionType'}, 1), MemberSpec_('ChemRegion', 'ChemRegionType', 1, 1, {'name': 'ChemRegion', 'type': 'ChemRegionType'}, 1), MemberSpec_('MusicRegion', 'MusicRegionType', 1, 1, {'name': 'MusicRegion', 'type': 'MusicRegionType'}, 1), MemberSpec_('AdvertRegion', 'AdvertRegionType', 1, 1, {'name': 'AdvertRegion', 'type': 'AdvertRegionType'}, 1), MemberSpec_('NoiseRegion', 'NoiseRegionType', 1, 1, {'name': 'NoiseRegion', 'type': 'NoiseRegionType'}, 1), MemberSpec_('UnknownRegion', 'UnknownRegionType', 1, 1, {'name': 'UnknownRegion', 'type': 'UnknownRegionType'}, 1), MemberSpec_('CustomRegion', 'CustomRegionType', 1, 1, {'name': 'CustomRegion', 'type': 'CustomRegionType'}, 1), ] subclass = None superclass = None def __init__(self, imageFilename=None, imageWidth=None, imageHeight=None, imageXResolution=None, imageYResolution=None, imageResolutionUnit=None, custom=None, orientation=None, type_=None, primaryLanguage=None, secondaryLanguage=None, primaryScript=None, secondaryScript=None, readingDirection=None, textLineOrder=None, conf=None, AlternativeImage=None, Border=None, PrintSpace=None, ReadingOrder=None, Layers=None, Relations=None, TextStyle=None, UserDefined=None, Labels=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, MapRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.imageFilename = _cast(None, imageFilename) self.imageFilename_nsprefix_ = "pc" self.imageWidth = _cast(int, imageWidth) self.imageWidth_nsprefix_ = "pc" self.imageHeight = _cast(int, imageHeight) self.imageHeight_nsprefix_ = "pc" self.imageXResolution = _cast(float, imageXResolution) self.imageXResolution_nsprefix_ = "pc" self.imageYResolution = _cast(float, imageYResolution) self.imageYResolution_nsprefix_ = "pc" self.imageResolutionUnit = _cast(None, imageResolutionUnit) self.imageResolutionUnit_nsprefix_ = "pc" self.custom = _cast(None, custom) self.custom_nsprefix_ = "pc" self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.type_ = _cast(None, type_) self.type__nsprefix_ = "pc" self.primaryLanguage = _cast(None, primaryLanguage) self.primaryLanguage_nsprefix_ = "pc" self.secondaryLanguage = _cast(None, secondaryLanguage) self.secondaryLanguage_nsprefix_ = "pc" self.primaryScript = _cast(None, primaryScript) self.primaryScript_nsprefix_ = "pc" self.secondaryScript = _cast(None, secondaryScript) self.secondaryScript_nsprefix_ = "pc" self.readingDirection = _cast(None, readingDirection) self.readingDirection_nsprefix_ = "pc" self.textLineOrder = _cast(None, textLineOrder) self.textLineOrder_nsprefix_ = "pc" self.conf = _cast(float, conf) self.conf_nsprefix_ = "pc" if AlternativeImage is None: self.AlternativeImage = [] else: self.AlternativeImage = AlternativeImage self.AlternativeImage_nsprefix_ = "pc" self.Border = Border self.Border_nsprefix_ = "pc" self.PrintSpace = PrintSpace self.PrintSpace_nsprefix_ = "pc" self.ReadingOrder = ReadingOrder self.ReadingOrder_nsprefix_ = "pc" self.Layers = Layers self.Layers_nsprefix_ = "pc" self.Relations = Relations self.Relations_nsprefix_ = "pc" self.TextStyle = TextStyle self.TextStyle_nsprefix_ = "pc" self.UserDefined = UserDefined self.UserDefined_nsprefix_ = "pc" if Labels is None: self.Labels = [] else: self.Labels = Labels self.Labels_nsprefix_ = "pc" if TextRegion is None: self.TextRegion = [] else: self.TextRegion = TextRegion self.TextRegion_nsprefix_ = "pc" if ImageRegion is None: self.ImageRegion = [] else: self.ImageRegion = ImageRegion self.ImageRegion_nsprefix_ = "pc" if LineDrawingRegion is None: self.LineDrawingRegion = [] else: self.LineDrawingRegion = LineDrawingRegion self.LineDrawingRegion_nsprefix_ = "pc" if GraphicRegion is None: self.GraphicRegion = [] else: self.GraphicRegion = GraphicRegion self.GraphicRegion_nsprefix_ = "pc" if TableRegion is None: self.TableRegion = [] else: self.TableRegion = TableRegion self.TableRegion_nsprefix_ = "pc" if ChartRegion is None: self.ChartRegion = [] else: self.ChartRegion = ChartRegion self.ChartRegion_nsprefix_ = "pc" if MapRegion is None: self.MapRegion = [] else: self.MapRegion = MapRegion self.MapRegion_nsprefix_ = "pc" if SeparatorRegion is None: self.SeparatorRegion = [] else: self.SeparatorRegion = SeparatorRegion self.SeparatorRegion_nsprefix_ = "pc" if MathsRegion is None: self.MathsRegion = [] else: self.MathsRegion = MathsRegion self.MathsRegion_nsprefix_ = "pc" if ChemRegion is None: self.ChemRegion = [] else: self.ChemRegion = ChemRegion self.ChemRegion_nsprefix_ = "pc" if MusicRegion is None: self.MusicRegion = [] else: self.MusicRegion = MusicRegion self.MusicRegion_nsprefix_ = "pc" if AdvertRegion is None: self.AdvertRegion = [] else: self.AdvertRegion = AdvertRegion self.AdvertRegion_nsprefix_ = "pc" if NoiseRegion is None: self.NoiseRegion = [] else: self.NoiseRegion = NoiseRegion self.NoiseRegion_nsprefix_ = "pc" if UnknownRegion is None: self.UnknownRegion = [] else: self.UnknownRegion = UnknownRegion self.UnknownRegion_nsprefix_ = "pc" if CustomRegion is None: self.CustomRegion = [] else: self.CustomRegion = CustomRegion self.CustomRegion_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, PageType) if subclass is not None: return subclass(*args_, **kwargs_) if PageType.subclass: return PageType.subclass(*args_, **kwargs_) else: return PageType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_AlternativeImage(self): return self.AlternativeImage
[docs] def set_AlternativeImage(self, AlternativeImage): self.AlternativeImage = AlternativeImage
[docs] def add_AlternativeImage(self, value): self.AlternativeImage.append(value)
[docs] def insert_AlternativeImage_at(self, index, value): self.AlternativeImage.insert(index, value)
[docs] def replace_AlternativeImage_at(self, index, value): self.AlternativeImage[index] = value
[docs] def get_Border(self): return self.Border
def set_Border(self, Border): self.Border = Border
[docs] def get_PrintSpace(self): return self.PrintSpace
[docs] def set_PrintSpace(self, PrintSpace): self.PrintSpace = PrintSpace
[docs] def get_ReadingOrder(self): return self.ReadingOrder
[docs] def set_ReadingOrder(self, ReadingOrder): self.ReadingOrder = ReadingOrder
[docs] def get_Layers(self): return self.Layers
[docs] def set_Layers(self, Layers): self.Layers = Layers
[docs] def get_Relations(self): return self.Relations
[docs] def set_Relations(self, Relations): self.Relations = Relations
[docs] def get_TextStyle(self): return self.TextStyle
[docs] def set_TextStyle(self, TextStyle): self.TextStyle = TextStyle
[docs] def get_UserDefined(self): return self.UserDefined
[docs] def set_UserDefined(self, UserDefined): self.UserDefined = UserDefined
[docs] def get_Labels(self): return self.Labels
[docs] def set_Labels(self, Labels): self.Labels = Labels
[docs] def add_Labels(self, value): self.Labels.append(value)
[docs] def insert_Labels_at(self, index, value): self.Labels.insert(index, value)
[docs] def replace_Labels_at(self, index, value): self.Labels[index] = value
[docs] def get_TextRegion(self): return self.TextRegion
[docs] def set_TextRegion(self, TextRegion): self.TextRegion = TextRegion
[docs] def add_TextRegion(self, value): self.TextRegion.append(value)
[docs] def insert_TextRegion_at(self, index, value): self.TextRegion.insert(index, value)
[docs] def replace_TextRegion_at(self, index, value): self.TextRegion[index] = value
[docs] def get_ImageRegion(self): return self.ImageRegion
[docs] def set_ImageRegion(self, ImageRegion): self.ImageRegion = ImageRegion
[docs] def add_ImageRegion(self, value): self.ImageRegion.append(value)
[docs] def insert_ImageRegion_at(self, index, value): self.ImageRegion.insert(index, value)
[docs] def replace_ImageRegion_at(self, index, value): self.ImageRegion[index] = value
[docs] def get_LineDrawingRegion(self): return self.LineDrawingRegion
[docs] def set_LineDrawingRegion(self, LineDrawingRegion): self.LineDrawingRegion = LineDrawingRegion
[docs] def add_LineDrawingRegion(self, value): self.LineDrawingRegion.append(value)
[docs] def insert_LineDrawingRegion_at(self, index, value): self.LineDrawingRegion.insert(index, value)
[docs] def replace_LineDrawingRegion_at(self, index, value): self.LineDrawingRegion[index] = value
[docs] def get_GraphicRegion(self): return self.GraphicRegion
[docs] def set_GraphicRegion(self, GraphicRegion): self.GraphicRegion = GraphicRegion
[docs] def add_GraphicRegion(self, value): self.GraphicRegion.append(value)
[docs] def insert_GraphicRegion_at(self, index, value): self.GraphicRegion.insert(index, value)
[docs] def replace_GraphicRegion_at(self, index, value): self.GraphicRegion[index] = value
[docs] def get_TableRegion(self): return self.TableRegion
[docs] def set_TableRegion(self, TableRegion): self.TableRegion = TableRegion
[docs] def add_TableRegion(self, value): self.TableRegion.append(value)
[docs] def insert_TableRegion_at(self, index, value): self.TableRegion.insert(index, value)
[docs] def replace_TableRegion_at(self, index, value): self.TableRegion[index] = value
[docs] def get_ChartRegion(self): return self.ChartRegion
[docs] def set_ChartRegion(self, ChartRegion): self.ChartRegion = ChartRegion
[docs] def add_ChartRegion(self, value): self.ChartRegion.append(value)
[docs] def insert_ChartRegion_at(self, index, value): self.ChartRegion.insert(index, value)
[docs] def replace_ChartRegion_at(self, index, value): self.ChartRegion[index] = value
[docs] def get_MapRegion(self): return self.MapRegion
[docs] def set_MapRegion(self, MapRegion): self.MapRegion = MapRegion
[docs] def add_MapRegion(self, value): self.MapRegion.append(value)
[docs] def insert_MapRegion_at(self, index, value): self.MapRegion.insert(index, value)
[docs] def replace_MapRegion_at(self, index, value): self.MapRegion[index] = value
[docs] def get_SeparatorRegion(self): return self.SeparatorRegion
[docs] def set_SeparatorRegion(self, SeparatorRegion): self.SeparatorRegion = SeparatorRegion
[docs] def add_SeparatorRegion(self, value): self.SeparatorRegion.append(value)
[docs] def insert_SeparatorRegion_at(self, index, value): self.SeparatorRegion.insert(index, value)
[docs] def replace_SeparatorRegion_at(self, index, value): self.SeparatorRegion[index] = value
[docs] def get_MathsRegion(self): return self.MathsRegion
[docs] def set_MathsRegion(self, MathsRegion): self.MathsRegion = MathsRegion
[docs] def add_MathsRegion(self, value): self.MathsRegion.append(value)
[docs] def insert_MathsRegion_at(self, index, value): self.MathsRegion.insert(index, value)
[docs] def replace_MathsRegion_at(self, index, value): self.MathsRegion[index] = value
[docs] def get_ChemRegion(self): return self.ChemRegion
[docs] def set_ChemRegion(self, ChemRegion): self.ChemRegion = ChemRegion
[docs] def add_ChemRegion(self, value): self.ChemRegion.append(value)
[docs] def insert_ChemRegion_at(self, index, value): self.ChemRegion.insert(index, value)
[docs] def replace_ChemRegion_at(self, index, value): self.ChemRegion[index] = value
[docs] def get_MusicRegion(self): return self.MusicRegion
[docs] def set_MusicRegion(self, MusicRegion): self.MusicRegion = MusicRegion
[docs] def add_MusicRegion(self, value): self.MusicRegion.append(value)
[docs] def insert_MusicRegion_at(self, index, value): self.MusicRegion.insert(index, value)
[docs] def replace_MusicRegion_at(self, index, value): self.MusicRegion[index] = value
[docs] def get_AdvertRegion(self): return self.AdvertRegion
[docs] def set_AdvertRegion(self, AdvertRegion): self.AdvertRegion = AdvertRegion
[docs] def add_AdvertRegion(self, value): self.AdvertRegion.append(value)
[docs] def insert_AdvertRegion_at(self, index, value): self.AdvertRegion.insert(index, value)
[docs] def replace_AdvertRegion_at(self, index, value): self.AdvertRegion[index] = value
[docs] def get_NoiseRegion(self): return self.NoiseRegion
[docs] def set_NoiseRegion(self, NoiseRegion): self.NoiseRegion = NoiseRegion
[docs] def add_NoiseRegion(self, value): self.NoiseRegion.append(value)
[docs] def insert_NoiseRegion_at(self, index, value): self.NoiseRegion.insert(index, value)
[docs] def replace_NoiseRegion_at(self, index, value): self.NoiseRegion[index] = value
[docs] def get_UnknownRegion(self): return self.UnknownRegion
[docs] def set_UnknownRegion(self, UnknownRegion): self.UnknownRegion = UnknownRegion
[docs] def add_UnknownRegion(self, value): self.UnknownRegion.append(value)
[docs] def insert_UnknownRegion_at(self, index, value): self.UnknownRegion.insert(index, value)
[docs] def replace_UnknownRegion_at(self, index, value): self.UnknownRegion[index] = value
[docs] def get_CustomRegion(self): return self.CustomRegion
[docs] def set_CustomRegion(self, CustomRegion): self.CustomRegion = CustomRegion
[docs] def add_CustomRegion(self, value): self.CustomRegion.append(value)
[docs] def insert_CustomRegion_at(self, index, value): self.CustomRegion.insert(index, value)
[docs] def replace_CustomRegion_at(self, index, value): self.CustomRegion[index] = value
[docs] def get_imageFilename(self): return self.imageFilename
[docs] def set_imageFilename(self, imageFilename): self.imageFilename = imageFilename
[docs] def get_imageWidth(self): return self.imageWidth
[docs] def set_imageWidth(self, imageWidth): self.imageWidth = imageWidth
[docs] def get_imageHeight(self): return self.imageHeight
[docs] def set_imageHeight(self, imageHeight): self.imageHeight = imageHeight
[docs] def get_imageXResolution(self): return self.imageXResolution
[docs] def set_imageXResolution(self, imageXResolution): self.imageXResolution = imageXResolution
[docs] def get_imageYResolution(self): return self.imageYResolution
[docs] def set_imageYResolution(self, imageYResolution): self.imageYResolution = imageYResolution
[docs] def get_imageResolutionUnit(self): return self.imageResolutionUnit
[docs] def set_imageResolutionUnit(self, imageResolutionUnit): self.imageResolutionUnit = imageResolutionUnit
[docs] def get_custom(self): return self.custom
[docs] def set_custom(self, custom): self.custom = custom
[docs] def get_orientation(self): return self.orientation
def set_orientation(self, orientation): self.orientation = orientation
[docs] def get_type(self): return self.type_
[docs] def set_type(self, type_): self.type_ = type_
[docs] def get_primaryLanguage(self): return self.primaryLanguage
[docs] def set_primaryLanguage(self, primaryLanguage): self.primaryLanguage = primaryLanguage
[docs] def get_secondaryLanguage(self): return self.secondaryLanguage
[docs] def set_secondaryLanguage(self, secondaryLanguage): self.secondaryLanguage = secondaryLanguage
[docs] def get_primaryScript(self): return self.primaryScript
[docs] def set_primaryScript(self, primaryScript): self.primaryScript = primaryScript
[docs] def get_secondaryScript(self): return self.secondaryScript
[docs] def set_secondaryScript(self, secondaryScript): self.secondaryScript = secondaryScript
[docs] def get_readingDirection(self): return self.readingDirection
[docs] def set_readingDirection(self, readingDirection): self.readingDirection = readingDirection
[docs] def get_textLineOrder(self): return self.textLineOrder
[docs] def set_textLineOrder(self, textLineOrder): self.textLineOrder = textLineOrder
[docs] def get_conf(self): return self.conf
[docs] def set_conf(self, conf): self.conf = conf
[docs] def validate_PageTypeSimpleType(self, value): # Validate type pc:PageTypeSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['front-cover', 'back-cover', 'title', 'table-of-contents', 'index', 'content', 'blank', 'other'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on PageTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_LanguageSimpleType(self, value): # Validate type pc:LanguageSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['Abkhaz', 'Afar', 'Afrikaans', 'Akan', 'Albanian', 'Amharic', 'Arabic', 'Aragonese', 'Armenian', 'Assamese', 'Avaric', 'Avestan', 'Aymara', 'Azerbaijani', 'Bambara', 'Bashkir', 'Basque', 'Belarusian', 'Bengali', 'Bihari', 'Bislama', 'Bosnian', 'Breton', 'Bulgarian', 'Burmese', 'Cambodian', 'Cantonese', 'Catalan', 'Chamorro', 'Chechen', 'Chichewa', 'Chinese', 'Chuvash', 'Cornish', 'Corsican', 'Cree', 'Croatian', 'Czech', 'Danish', 'Divehi', 'Dutch', 'Dzongkha', 'English', 'Esperanto', 'Estonian', 'Ewe', 'Faroese', 'Fijian', 'Finnish', 'French', 'Fula', 'Gaelic', 'Galician', 'Ganda', 'Georgian', 'German', 'Greek', 'Guaraní', 'Gujarati', 'Haitian', 'Hausa', 'Hebrew', 'Herero', 'Hindi', 'Hiri Motu', 'Hungarian', 'Icelandic', 'Ido', 'Igbo', 'Indonesian', 'Interlingua', 'Interlingue', 'Inuktitut', 'Inupiaq', 'Irish', 'Italian', 'Japanese', 'Javanese', 'Kalaallisut', 'Kannada', 'Kanuri', 'Kashmiri', 'Kazakh', 'Khmer', 'Kikuyu', 'Kinyarwanda', 'Kirundi', 'Komi', 'Kongo', 'Korean', 'Kurdish', 'Kwanyama', 'Kyrgyz', 'Lao', 'Latin', 'Latvian', 'Limburgish', 'Lingala', 'Lithuanian', 'Luba-Katanga', 'Luxembourgish', 'Macedonian', 'Malagasy', 'Malay', 'Malayalam', 'Maltese', 'Manx', 'Māori', 'Marathi', 'Marshallese', 'Mongolian', 'Nauru', 'Navajo', 'Ndonga', 'Nepali', 'North Ndebele', 'Northern Sami', 'Norwegian', 'Norwegian Bokmål', 'Norwegian Nynorsk', 'Nuosu', 'Occitan', 'Ojibwe', 'Old Church Slavonic', 'Oriya', 'Oromo', 'Ossetian', 'Pāli', 'Panjabi', 'Pashto', 'Persian', 'Polish', 'Portuguese', 'Punjabi', 'Quechua', 'Romanian', 'Romansh', 'Russian', 'Samoan', 'Sango', 'Sanskrit', 'Sardinian', 'Serbian', 'Shona', 'Sindhi', 'Sinhala', 'Slovak', 'Slovene', 'Somali', 'South Ndebele', 'Southern Sotho', 'Spanish', 'Sundanese', 'Swahili', 'Swati', 'Swedish', 'Tagalog', 'Tahitian', 'Tajik', 'Tamil', 'Tatar', 'Telugu', 'Thai', 'Tibetan', 'Tigrinya', 'Tonga', 'Tsonga', 'Tswana', 'Turkish', 'Turkmen', 'Twi', 'Uighur', 'Ukrainian', 'Urdu', 'Uzbek', 'Venda', 'Vietnamese', 'Volapük', 'Walloon', 'Welsh', 'Western Frisian', 'Wolof', 'Xhosa', 'Yiddish', 'Yoruba', 'Zhuang', 'Zulu', 'other'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on LanguageSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_ScriptSimpleType(self, value): # Validate type pc:ScriptSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['Adlm - Adlam', 'Afak - Afaka', 'Aghb - Caucasian Albanian', 'Ahom - Ahom, Tai Ahom', 'Arab - Arabic', 'Aran - Arabic (Nastaliq variant)', 'Armi - Imperial Aramaic', 'Armn - Armenian', 'Avst - Avestan', 'Bali - Balinese', 'Bamu - Bamum', 'Bass - Bassa Vah', 'Batk - Batak', 'Beng - Bengali', 'Bhks - Bhaiksuki', 'Blis - Blissymbols', 'Bopo - Bopomofo', 'Brah - Brahmi', 'Brai - Braille', 'Bugi - Buginese', 'Buhd - Buhid', 'Cakm - Chakma', 'Cans - Unified Canadian Aboriginal Syllabics', 'Cari - Carian', 'Cham - Cham', 'Cher - Cherokee', 'Cirt - Cirth', 'Copt - Coptic', 'Cprt - Cypriot', 'Cyrl - Cyrillic', 'Cyrs - Cyrillic (Old Church Slavonic variant)', 'Deva - Devanagari (Nagari)', 'Dsrt - Deseret (Mormon)', 'Dupl - Duployan shorthand, Duployan stenography', 'Egyd - Egyptian demotic', 'Egyh - Egyptian hieratic', 'Egyp - Egyptian hieroglyphs', 'Elba - Elbasan', 'Ethi - Ethiopic', 'Geok - Khutsuri (Asomtavruli and Nuskhuri)', 'Geor - Georgian (Mkhedruli)', 'Glag - Glagolitic', 'Goth - Gothic', 'Gran - Grantha', 'Grek - Greek', 'Gujr - Gujarati', 'Guru - Gurmukhi', 'Hanb - Han with Bopomofo', 'Hang - Hangul', 'Hani - Han (Hanzi, Kanji, Hanja)', 'Hano - Hanunoo (Hanunóo)', 'Hans - Han (Simplified variant)', 'Hant - Han (Traditional variant)', 'Hatr - Hatran', 'Hebr - Hebrew', 'Hira - Hiragana', 'Hluw - Anatolian Hieroglyphs', 'Hmng - Pahawh Hmong', 'Hrkt - Japanese syllabaries', 'Hung - Old Hungarian (Hungarian Runic)', 'Inds - Indus (Harappan)', 'Ital - Old Italic (Etruscan, Oscan etc.)', 'Jamo - Jamo', 'Java - Javanese', 'Jpan - Japanese', 'Jurc - Jurchen', 'Kali - Kayah Li', 'Kana - Katakana', 'Khar - Kharoshthi', 'Khmr - Khmer', 'Khoj - Khojki', 'Kitl - Khitan large script', 'Kits - Khitan small script', 'Knda - Kannada', 'Kore - Korean (alias for Hangul + Han)', 'Kpel - Kpelle', 'Kthi - Kaithi', 'Lana - Tai Tham (Lanna)', 'Laoo - Lao', 'Latf - Latin (Fraktur variant)', 'Latg - Latin (Gaelic variant)', 'Latn - Latin', 'Leke - Leke', 'Lepc - Lepcha (Róng)', 'Limb - Limbu', 'Lina - Linear A', 'Linb - Linear B', 'Lisu - Lisu (Fraser)', 'Loma - Loma', 'Lyci - Lycian', 'Lydi - Lydian', 'Mahj - Mahajani', 'Mand - Mandaic, Mandaean', 'Mani - Manichaean', 'Marc - Marchen', 'Maya - Mayan hieroglyphs', 'Mend - Mende Kikakui', 'Merc - Meroitic Cursive', 'Mero - Meroitic Hieroglyphs', 'Mlym - Malayalam', 'Modi - Modi, Moḍī', 'Mong - Mongolian', 'Moon - Moon (Moon code, Moon script, Moon type)', 'Mroo - Mro, Mru', 'Mtei - Meitei Mayek (Meithei, Meetei)', 'Mult - Multani', 'Mymr - Myanmar (Burmese)', 'Narb - Old North Arabian (Ancient North Arabian)', 'Nbat - Nabataean', 'Newa - Newa, Newar, Newari', 'Nkgb - Nakhi Geba', 'Nkoo - N’Ko', 'Nshu - Nüshu', 'Ogam - Ogham', 'Olck - Ol Chiki (Ol Cemet’, Ol, Santali)', 'Orkh - Old Turkic, Orkhon Runic', 'Orya - Oriya', 'Osge - Osage', 'Osma - Osmanya', 'Palm - Palmyrene', 'Pauc - Pau Cin Hau', 'Perm - Old Permic', 'Phag - Phags-pa', 'Phli - Inscriptional Pahlavi', 'Phlp - Psalter Pahlavi', 'Phlv - Book Pahlavi', 'Phnx - Phoenician', 'Piqd - Klingon (KLI pIqaD)', 'Plrd - Miao (Pollard)', 'Prti - Inscriptional Parthian', 'Rjng - Rejang (Redjang, Kaganga)', 'Roro - Rongorongo', 'Runr - Runic', 'Samr - Samaritan', 'Sara - Sarati', 'Sarb - Old South Arabian', 'Saur - Saurashtra', 'Sgnw - SignWriting', 'Shaw - Shavian (Shaw)', 'Shrd - Sharada, Śāradā', 'Sidd - Siddham', 'Sind - Khudawadi, Sindhi', 'Sinh - Sinhala', 'Sora - Sora Sompeng', 'Sund - Sundanese', 'Sylo - Syloti Nagri', 'Syrc - Syriac', 'Syre - Syriac (Estrangelo variant)', 'Syrj - Syriac (Western variant)', 'Syrn - Syriac (Eastern variant)', 'Tagb - Tagbanwa', 'Takr - Takri', 'Tale - Tai Le', 'Talu - New Tai Lue', 'Taml - Tamil', 'Tang - Tangut', 'Tavt - Tai Viet', 'Telu - Telugu', 'Teng - Tengwar', 'Tfng - Tifinagh (Berber)', 'Tglg - Tagalog (Baybayin, Alibata)', 'Thaa - Thaana', 'Thai - Thai', 'Tibt - Tibetan', 'Tirh - Tirhuta', 'Ugar - Ugaritic', 'Vaii - Vai', 'Visp - Visible Speech', 'Wara - Warang Citi (Varang Kshiti)', 'Wole - Woleai', 'Xpeo - Old Persian', 'Xsux - Cuneiform, Sumero-Akkadian', 'Yiii - Yi', 'Zinh - Code for inherited script', 'Zmth - Mathematical notation', 'Zsye - Symbols (Emoji variant)', 'Zsym - Symbols', 'Zxxx - Code for unwritten documents', 'Zyyy - Code for undetermined script', 'Zzzz - Code for uncoded script', 'other'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ScriptSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_ReadingDirectionSimpleType(self, value): # Validate type pc:ReadingDirectionSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['left-to-right', 'right-to-left', 'top-to-bottom', 'bottom-to-top'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ReadingDirectionSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_TextLineOrderSimpleType(self, value): # Validate type pc:TextLineOrderSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['top-to-bottom', 'bottom-to-top', 'left-to-right', 'right-to-left'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on TextLineOrderSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_ConfSimpleType(self, value): # Validate type pc:ConfSimpleType, a restriction on float. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, float): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (float)' % {"value": value, "lineno": lineno, }) return False if value < 0: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd minInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False if value > 1: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd maxInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False
[docs] def hasContent_(self): if ( self.AlternativeImage or self.Border is not None or self.PrintSpace is not None or self.ReadingOrder is not None or self.Layers is not None or self.Relations is not None or self.TextStyle is not None or self.UserDefined is not None or self.Labels or self.TextRegion or self.ImageRegion or self.LineDrawingRegion or self.GraphicRegion or self.TableRegion or self.ChartRegion or self.MapRegion or self.SeparatorRegion or self.MathsRegion or self.ChemRegion or self.MusicRegion or self.AdvertRegion or self.NoiseRegion or self.UnknownRegion or self.CustomRegion ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PageType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('PageType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'PageType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PageType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PageType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PageType'): if self.imageFilename is not None and 'imageFilename' not in already_processed: already_processed.add('imageFilename') outfile.write(' imageFilename=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.imageFilename), input_name='imageFilename')), )) if self.imageWidth is not None and 'imageWidth' not in already_processed: already_processed.add('imageWidth') outfile.write(' imageWidth="%s"' % self.gds_format_integer(self.imageWidth, input_name='imageWidth')) if self.imageHeight is not None and 'imageHeight' not in already_processed: already_processed.add('imageHeight') outfile.write(' imageHeight="%s"' % self.gds_format_integer(self.imageHeight, input_name='imageHeight')) if self.imageXResolution is not None and 'imageXResolution' not in already_processed: already_processed.add('imageXResolution') outfile.write(' imageXResolution="%s"' % self.gds_format_float(self.imageXResolution, input_name='imageXResolution')) if self.imageYResolution is not None and 'imageYResolution' not in already_processed: already_processed.add('imageYResolution') outfile.write(' imageYResolution="%s"' % self.gds_format_float(self.imageYResolution, input_name='imageYResolution')) if self.imageResolutionUnit is not None and 'imageResolutionUnit' not in already_processed: already_processed.add('imageResolutionUnit') outfile.write(' imageResolutionUnit=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.imageResolutionUnit), input_name='imageResolutionUnit')), )) if self.custom is not None and 'custom' not in already_processed: already_processed.add('custom') outfile.write(' custom=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.custom), input_name='custom')), )) if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) if self.type_ is not None and 'type_' not in already_processed: already_processed.add('type_') outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), )) if self.primaryLanguage is not None and 'primaryLanguage' not in already_processed: already_processed.add('primaryLanguage') outfile.write(' primaryLanguage=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.primaryLanguage), input_name='primaryLanguage')), )) if self.secondaryLanguage is not None and 'secondaryLanguage' not in already_processed: already_processed.add('secondaryLanguage') outfile.write(' secondaryLanguage=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.secondaryLanguage), input_name='secondaryLanguage')), )) if self.primaryScript is not None and 'primaryScript' not in already_processed: already_processed.add('primaryScript') outfile.write(' primaryScript=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.primaryScript), input_name='primaryScript')), )) if self.secondaryScript is not None and 'secondaryScript' not in already_processed: already_processed.add('secondaryScript') outfile.write(' secondaryScript=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.secondaryScript), input_name='secondaryScript')), )) if self.readingDirection is not None and 'readingDirection' not in already_processed: already_processed.add('readingDirection') outfile.write(' readingDirection=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.readingDirection), input_name='readingDirection')), )) if self.textLineOrder is not None and 'textLineOrder' not in already_processed: already_processed.add('textLineOrder') outfile.write(' textLineOrder=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.textLineOrder), input_name='textLineOrder')), )) if self.conf is not None and 'conf' not in already_processed: already_processed.add('conf') outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf'))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PageType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: eol_ = '' for AlternativeImage_ in self.AlternativeImage: namespaceprefix_ = self.AlternativeImage_nsprefix_ + ':' if (UseCapturedNS_ and self.AlternativeImage_nsprefix_) else '' AlternativeImage_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='AlternativeImage', pretty_print=pretty_print) if self.Border is not None: namespaceprefix_ = self.Border_nsprefix_ + ':' if (UseCapturedNS_ and self.Border_nsprefix_) else '' self.Border.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Border', pretty_print=pretty_print) if self.PrintSpace is not None: namespaceprefix_ = self.PrintSpace_nsprefix_ + ':' if (UseCapturedNS_ and self.PrintSpace_nsprefix_) else '' self.PrintSpace.export(outfile, level, namespaceprefix_, namespacedef_='', name_='PrintSpace', pretty_print=pretty_print) if self.ReadingOrder is not None: namespaceprefix_ = self.ReadingOrder_nsprefix_ + ':' if (UseCapturedNS_ and self.ReadingOrder_nsprefix_) else '' self.ReadingOrder.export(outfile, level, namespaceprefix_, namespacedef_='', name_='ReadingOrder', pretty_print=pretty_print) if self.Layers is not None: namespaceprefix_ = self.Layers_nsprefix_ + ':' if (UseCapturedNS_ and self.Layers_nsprefix_) else '' self.Layers.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Layers', pretty_print=pretty_print) if self.Relations is not None: namespaceprefix_ = self.Relations_nsprefix_ + ':' if (UseCapturedNS_ and self.Relations_nsprefix_) else '' self.Relations.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Relations', pretty_print=pretty_print) if self.TextStyle is not None: namespaceprefix_ = self.TextStyle_nsprefix_ + ':' if (UseCapturedNS_ and self.TextStyle_nsprefix_) else '' self.TextStyle.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TextStyle', pretty_print=pretty_print) if self.UserDefined is not None: namespaceprefix_ = self.UserDefined_nsprefix_ + ':' if (UseCapturedNS_ and self.UserDefined_nsprefix_) else '' self.UserDefined.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UserDefined', pretty_print=pretty_print) for Labels_ in self.Labels: namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) for TextRegion_ in self.TextRegion: namespaceprefix_ = self.TextRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.TextRegion_nsprefix_) else '' TextRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TextRegion', pretty_print=pretty_print) for ImageRegion_ in self.ImageRegion: namespaceprefix_ = self.ImageRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.ImageRegion_nsprefix_) else '' ImageRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='ImageRegion', pretty_print=pretty_print) for LineDrawingRegion_ in self.LineDrawingRegion: namespaceprefix_ = self.LineDrawingRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.LineDrawingRegion_nsprefix_) else '' LineDrawingRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='LineDrawingRegion', pretty_print=pretty_print) for GraphicRegion_ in self.GraphicRegion: namespaceprefix_ = self.GraphicRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.GraphicRegion_nsprefix_) else '' GraphicRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='GraphicRegion', pretty_print=pretty_print) for TableRegion_ in self.TableRegion: namespaceprefix_ = self.TableRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.TableRegion_nsprefix_) else '' TableRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TableRegion', pretty_print=pretty_print) for ChartRegion_ in self.ChartRegion: namespaceprefix_ = self.ChartRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.ChartRegion_nsprefix_) else '' ChartRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='ChartRegion', pretty_print=pretty_print) for MapRegion_ in self.MapRegion: namespaceprefix_ = self.MapRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.MapRegion_nsprefix_) else '' MapRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='MapRegion', pretty_print=pretty_print) for SeparatorRegion_ in self.SeparatorRegion: namespaceprefix_ = self.SeparatorRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.SeparatorRegion_nsprefix_) else '' SeparatorRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='SeparatorRegion', pretty_print=pretty_print) for MathsRegion_ in self.MathsRegion: namespaceprefix_ = self.MathsRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.MathsRegion_nsprefix_) else '' MathsRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='MathsRegion', pretty_print=pretty_print) for ChemRegion_ in self.ChemRegion: namespaceprefix_ = self.ChemRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.ChemRegion_nsprefix_) else '' ChemRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='ChemRegion', pretty_print=pretty_print) for MusicRegion_ in self.MusicRegion: namespaceprefix_ = self.MusicRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.MusicRegion_nsprefix_) else '' MusicRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='MusicRegion', pretty_print=pretty_print) for AdvertRegion_ in self.AdvertRegion: namespaceprefix_ = self.AdvertRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.AdvertRegion_nsprefix_) else '' AdvertRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='AdvertRegion', pretty_print=pretty_print) for NoiseRegion_ in self.NoiseRegion: namespaceprefix_ = self.NoiseRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.NoiseRegion_nsprefix_) else '' NoiseRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='NoiseRegion', pretty_print=pretty_print) for UnknownRegion_ in self.UnknownRegion: namespaceprefix_ = self.UnknownRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.UnknownRegion_nsprefix_) else '' UnknownRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UnknownRegion', pretty_print=pretty_print) for CustomRegion_ in self.CustomRegion: namespaceprefix_ = self.CustomRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.CustomRegion_nsprefix_) else '' CustomRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='CustomRegion', pretty_print=pretty_print)
[docs] def to_etree(self, parent_element=None, name_='PageType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.imageFilename is not None: element.set('imageFilename', self.gds_format_string(self.imageFilename)) if self.imageWidth is not None: element.set('imageWidth', self.gds_format_integer(self.imageWidth)) if self.imageHeight is not None: element.set('imageHeight', self.gds_format_integer(self.imageHeight)) if self.imageXResolution is not None: element.set('imageXResolution', self.gds_format_float(self.imageXResolution)) if self.imageYResolution is not None: element.set('imageYResolution', self.gds_format_float(self.imageYResolution)) if self.imageResolutionUnit is not None: element.set('imageResolutionUnit', self.gds_format_string(self.imageResolutionUnit)) if self.custom is not None: element.set('custom', self.gds_format_string(self.custom)) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.type_ is not None: element.set('type', self.gds_format_string(self.type_)) if self.primaryLanguage is not None: element.set('primaryLanguage', self.gds_format_string(self.primaryLanguage)) if self.secondaryLanguage is not None: element.set('secondaryLanguage', self.gds_format_string(self.secondaryLanguage)) if self.primaryScript is not None: element.set('primaryScript', self.gds_format_string(self.primaryScript)) if self.secondaryScript is not None: element.set('secondaryScript', self.gds_format_string(self.secondaryScript)) if self.readingDirection is not None: element.set('readingDirection', self.gds_format_string(self.readingDirection)) if self.textLineOrder is not None: element.set('textLineOrder', self.gds_format_string(self.textLineOrder)) if self.conf is not None: element.set('conf', self.gds_format_float(self.conf)) for AlternativeImage_ in self.AlternativeImage: AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, nsmap_=nsmap_) if self.Border is not None: Border_ = self.Border Border_.to_etree(element, name_='Border', mapping_=mapping_, nsmap_=nsmap_) if self.PrintSpace is not None: PrintSpace_ = self.PrintSpace PrintSpace_.to_etree(element, name_='PrintSpace', mapping_=mapping_, nsmap_=nsmap_) if self.ReadingOrder is not None: ReadingOrder_ = self.ReadingOrder ReadingOrder_.to_etree(element, name_='ReadingOrder', mapping_=mapping_, nsmap_=nsmap_) if self.Layers is not None: Layers_ = self.Layers Layers_.to_etree(element, name_='Layers', mapping_=mapping_, nsmap_=nsmap_) if self.Relations is not None: Relations_ = self.Relations Relations_.to_etree(element, name_='Relations', mapping_=mapping_, nsmap_=nsmap_) if self.TextStyle is not None: TextStyle_ = self.TextStyle TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, nsmap_=nsmap_) if self.UserDefined is not None: UserDefined_ = self.UserDefined UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) for TextRegion_ in self.TextRegion: TextRegion_.to_etree(element, name_='TextRegion', mapping_=mapping_, nsmap_=nsmap_) for ImageRegion_ in self.ImageRegion: ImageRegion_.to_etree(element, name_='ImageRegion', mapping_=mapping_, nsmap_=nsmap_) for LineDrawingRegion_ in self.LineDrawingRegion: LineDrawingRegion_.to_etree(element, name_='LineDrawingRegion', mapping_=mapping_, nsmap_=nsmap_) for GraphicRegion_ in self.GraphicRegion: GraphicRegion_.to_etree(element, name_='GraphicRegion', mapping_=mapping_, nsmap_=nsmap_) for TableRegion_ in self.TableRegion: TableRegion_.to_etree(element, name_='TableRegion', mapping_=mapping_, nsmap_=nsmap_) for ChartRegion_ in self.ChartRegion: ChartRegion_.to_etree(element, name_='ChartRegion', mapping_=mapping_, nsmap_=nsmap_) for MapRegion_ in self.MapRegion: MapRegion_.to_etree(element, name_='MapRegion', mapping_=mapping_, nsmap_=nsmap_) for SeparatorRegion_ in self.SeparatorRegion: SeparatorRegion_.to_etree(element, name_='SeparatorRegion', mapping_=mapping_, nsmap_=nsmap_) for MathsRegion_ in self.MathsRegion: MathsRegion_.to_etree(element, name_='MathsRegion', mapping_=mapping_, nsmap_=nsmap_) for ChemRegion_ in self.ChemRegion: ChemRegion_.to_etree(element, name_='ChemRegion', mapping_=mapping_, nsmap_=nsmap_) for MusicRegion_ in self.MusicRegion: MusicRegion_.to_etree(element, name_='MusicRegion', mapping_=mapping_, nsmap_=nsmap_) for AdvertRegion_ in self.AdvertRegion: AdvertRegion_.to_etree(element, name_='AdvertRegion', mapping_=mapping_, nsmap_=nsmap_) for NoiseRegion_ in self.NoiseRegion: NoiseRegion_.to_etree(element, name_='NoiseRegion', mapping_=mapping_, nsmap_=nsmap_) for UnknownRegion_ in self.UnknownRegion: UnknownRegion_.to_etree(element, name_='UnknownRegion', mapping_=mapping_, nsmap_=nsmap_) for CustomRegion_ in self.CustomRegion: CustomRegion_.to_etree(element, name_='CustomRegion', mapping_=mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('imageFilename', node) if value is not None and 'imageFilename' not in already_processed: already_processed.add('imageFilename') self.imageFilename = value value = find_attr_value_('imageWidth', node) if value is not None and 'imageWidth' not in already_processed: already_processed.add('imageWidth') self.imageWidth = self.gds_parse_integer(value, node, 'imageWidth') value = find_attr_value_('imageHeight', node) if value is not None and 'imageHeight' not in already_processed: already_processed.add('imageHeight') self.imageHeight = self.gds_parse_integer(value, node, 'imageHeight') value = find_attr_value_('imageXResolution', node) if value is not None and 'imageXResolution' not in already_processed: already_processed.add('imageXResolution') value = self.gds_parse_float(value, node, 'imageXResolution') self.imageXResolution = value value = find_attr_value_('imageYResolution', node) if value is not None and 'imageYResolution' not in already_processed: already_processed.add('imageYResolution') value = self.gds_parse_float(value, node, 'imageYResolution') self.imageYResolution = value value = find_attr_value_('imageResolutionUnit', node) if value is not None and 'imageResolutionUnit' not in already_processed: already_processed.add('imageResolutionUnit') self.imageResolutionUnit = value value = find_attr_value_('custom', node) if value is not None and 'custom' not in already_processed: already_processed.add('custom') self.custom = value value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') value = self.gds_parse_float(value, node, 'orientation') self.orientation = value value = find_attr_value_('type', node) if value is not None and 'type' not in already_processed: already_processed.add('type') self.type_ = value self.validate_PageTypeSimpleType(self.type_) # validate type PageTypeSimpleType value = find_attr_value_('primaryLanguage', node) if value is not None and 'primaryLanguage' not in already_processed: already_processed.add('primaryLanguage') self.primaryLanguage = value self.validate_LanguageSimpleType(self.primaryLanguage) # validate type LanguageSimpleType value = find_attr_value_('secondaryLanguage', node) if value is not None and 'secondaryLanguage' not in already_processed: already_processed.add('secondaryLanguage') self.secondaryLanguage = value self.validate_LanguageSimpleType(self.secondaryLanguage) # validate type LanguageSimpleType value = find_attr_value_('primaryScript', node) if value is not None and 'primaryScript' not in already_processed: already_processed.add('primaryScript') self.primaryScript = value self.validate_ScriptSimpleType(self.primaryScript) # validate type ScriptSimpleType value = find_attr_value_('secondaryScript', node) if value is not None and 'secondaryScript' not in already_processed: already_processed.add('secondaryScript') self.secondaryScript = value self.validate_ScriptSimpleType(self.secondaryScript) # validate type ScriptSimpleType value = find_attr_value_('readingDirection', node) if value is not None and 'readingDirection' not in already_processed: already_processed.add('readingDirection') self.readingDirection = value self.validate_ReadingDirectionSimpleType(self.readingDirection) # validate type ReadingDirectionSimpleType value = find_attr_value_('textLineOrder', node) if value is not None and 'textLineOrder' not in already_processed: already_processed.add('textLineOrder') self.textLineOrder = value self.validate_TextLineOrderSimpleType(self.textLineOrder) # validate type TextLineOrderSimpleType value = find_attr_value_('conf', node) if value is not None and 'conf' not in already_processed: already_processed.add('conf') value = self.gds_parse_float(value, node, 'conf') self.conf = value self.validate_ConfSimpleType(self.conf) # validate type ConfSimpleType
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'AlternativeImage': obj_ = AlternativeImageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.AlternativeImage.append(obj_) obj_.original_tagname_ = 'AlternativeImage' elif nodeName_ == 'Border': obj_ = BorderType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Border = obj_ obj_.original_tagname_ = 'Border' elif nodeName_ == 'PrintSpace': obj_ = PrintSpaceType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.PrintSpace = obj_ obj_.original_tagname_ = 'PrintSpace' elif nodeName_ == 'ReadingOrder': obj_ = ReadingOrderType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.ReadingOrder = obj_ obj_.original_tagname_ = 'ReadingOrder' elif nodeName_ == 'Layers': obj_ = LayersType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Layers = obj_ obj_.original_tagname_ = 'Layers' elif nodeName_ == 'Relations': obj_ = RelationsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Relations = obj_ obj_.original_tagname_ = 'Relations' elif nodeName_ == 'TextStyle': obj_ = TextStyleType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.TextStyle = obj_ obj_.original_tagname_ = 'TextStyle' elif nodeName_ == 'UserDefined': obj_ = UserDefinedType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.UserDefined = obj_ obj_.original_tagname_ = 'UserDefined' elif nodeName_ == 'Labels': obj_ = LabelsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Labels.append(obj_) obj_.original_tagname_ = 'Labels' elif nodeName_ == 'TextRegion': obj_ = TextRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.TextRegion.append(obj_) obj_.original_tagname_ = 'TextRegion' elif nodeName_ == 'ImageRegion': obj_ = ImageRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.ImageRegion.append(obj_) obj_.original_tagname_ = 'ImageRegion' elif nodeName_ == 'LineDrawingRegion': obj_ = LineDrawingRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.LineDrawingRegion.append(obj_) obj_.original_tagname_ = 'LineDrawingRegion' elif nodeName_ == 'GraphicRegion': obj_ = GraphicRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.GraphicRegion.append(obj_) obj_.original_tagname_ = 'GraphicRegion' elif nodeName_ == 'TableRegion': obj_ = TableRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.TableRegion.append(obj_) obj_.original_tagname_ = 'TableRegion' elif nodeName_ == 'ChartRegion': obj_ = ChartRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.ChartRegion.append(obj_) obj_.original_tagname_ = 'ChartRegion' elif nodeName_ == 'MapRegion': obj_ = MapRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.MapRegion.append(obj_) obj_.original_tagname_ = 'MapRegion' elif nodeName_ == 'SeparatorRegion': obj_ = SeparatorRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.SeparatorRegion.append(obj_) obj_.original_tagname_ = 'SeparatorRegion' elif nodeName_ == 'MathsRegion': obj_ = MathsRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.MathsRegion.append(obj_) obj_.original_tagname_ = 'MathsRegion' elif nodeName_ == 'ChemRegion': obj_ = ChemRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.ChemRegion.append(obj_) obj_.original_tagname_ = 'ChemRegion' elif nodeName_ == 'MusicRegion': obj_ = MusicRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.MusicRegion.append(obj_) obj_.original_tagname_ = 'MusicRegion' elif nodeName_ == 'AdvertRegion': obj_ = AdvertRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.AdvertRegion.append(obj_) obj_.original_tagname_ = 'AdvertRegion' elif nodeName_ == 'NoiseRegion': obj_ = NoiseRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.NoiseRegion.append(obj_) obj_.original_tagname_ = 'NoiseRegion' elif nodeName_ == 'UnknownRegion': obj_ = UnknownRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.UnknownRegion.append(obj_) obj_.original_tagname_ = 'UnknownRegion' elif nodeName_ == 'CustomRegion': obj_ = CustomRegionType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.CustomRegion.append(obj_) obj_.original_tagname_ = 'CustomRegion'
def __hash__(self): return hash(self.id) @property def id(self): if hasattr(self, 'pcGtsId'): return self.pcGtsId return self.imageFilename # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring def _region_class(self, x): # pylint: disable=unused-argument return x.__class__.__name__.replace('RegionType', '') def _get_recursive_regions(self, regions, level, classes=None): from .constants import PAGE_REGION_TYPES # pylint: disable=relative-beyond-top-level,import-outside-toplevel if level == 1: # stop recursion, filter classes if classes: return [r for r in regions if self._region_class(r) in classes] if regions and regions[0].__class__.__name__ == 'PageType': regions = regions[1:] return regions # find more regions recursively more_regions = [] for region in regions: more_regions.append([]) for class_ in PAGE_REGION_TYPES: if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable # 'Map' is not recursive in 2019 schema continue more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))() if not any(more_regions): return self._get_recursive_regions(regions, 1, classes) ret = [] for r, more in zip(regions, more_regions): ret.append(r) ret += self._get_recursive_regions(more, level - 1 if level else 0, classes) return self._get_recursive_regions(ret, 1, classes) def _get_recursive_reading_order(self, rogroup): if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable elements = rogroup.get_AllIndexed() if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup()) regionrefs = list() for elem in elements: regionrefs.append(elem.get_regionRef()) if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable regionrefs.extend(self._get_recursive_reading_order(elem)) return regionrefs
[docs] def get_AllRegions(self, classes=None, order='document', depth=0): """ Get all the ``*Region`` elements, or only those provided by `classes`. Return in document order, unless `order` is ``reading-order``. Arguments: classes (list): Classes of regions that shall be returned, \ e.g. ``['Text', 'Image']`` order ("document"|"reading-order"|"reading-order-only"): Whether to \ return regions sorted by document order (``document``, default) or by reading order with regions not in the reading order at the end of the returned list (``reading-order``) or regions not in the reading order omitted (``reading-order-only``) depth (int): Recursive depth to look for regions at, set to `0` for \ all regions at any depth. Default: 0 Returns: a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \ :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \ :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \ :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \ :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \ :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \ :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \ and/or :py:class:`CustomRegionType` For example, to get all text anywhere on the page in reading order, use: :: '\\n'.join(line.get_TextEquiv()[0].Unicode for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order') for line in region.get_TextLine()) """ if order not in ['document', 'reading-order', 'reading-order-only']: raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order)) if depth < 0: raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth)) ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes) if order.startswith('reading-order'): reading_order = self.get_ReadingOrder() if reading_order: reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup() if reading_order: reading_order = self._get_recursive_reading_order(reading_order) if reading_order: id2region = {region.id: region for region in ret} in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region] # print("ret: {} / in_ro: {} / not-in-ro: {}".format( # len(ret), # len([id2region[region_id] for region_id in reading_order if region_id in id2region]), # len([r for r in ret if r not in in_reading_order]) # )) if order == 'reading-order-only': ret = in_reading_order else: ret = in_reading_order + [r for r in ret if r not in in_reading_order] return ret
[docs] def get_AllAlternativeImages(self, page=True, region=True, line=True, word=True, glyph=True): """ Get all the ``pc:AlternativeImage`` in a document Arguments: page (boolean): Get images on ``pc:Page`` level region (boolean): Get images on ``pc:*Region`` level line (boolean): Get images on ``pc:TextLine`` level word (boolean): Get images on ``pc:Word`` level glyph (boolean): Get images on ``pc:Glyph`` level Returns: a list of :py:class:`AlternativeImageType` """ ret = [] if page: ret += self.get_AlternativeImage() for this_region in self.get_AllRegions(['Text']): if region: ret += this_region.get_AlternativeImage() for this_line in this_region.get_TextLine(): if line: ret += this_line.get_AlternativeImage() for this_word in this_line.get_Word(): if word: ret += this_word.get_AlternativeImage() for this_glyph in this_word.get_Glyph(): if glyph: ret += this_glyph.get_AlternativeImage() return ret
[docs] def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ existing_images = self.AlternativeImage or [] removed_images = [] if feature_selector: new_images = [] for image in existing_images: features = image.get_comments() or '' if any(feature in features.split(',') for feature in feature_selector.split(',') if feature): removed_images.append(image) else: new_images.append(image) self.AlternativeImage = new_images else: removed_images = existing_images self.AlternativeImage = [] if hasattr(self, 'id'): name = self.id elif hasattr(self, 'parent_object_') and hasattr(self.parent_object_, 'pcGtsId'): name = self.parent_object_.pcGtsId else: name = '' for image in removed_images: self.gds_collector_.add_message('Removing AlternativeImage %s from "%s"' % ( image.get_comments() or '', name))
[docs] def set_Border(self, Border): """ Set coordinate polygon by given :py:class:`BorderType` object. Moreover, invalidate self's ``pc:AlternativeImage``s (because they will have been cropped with a bbox of the previous polygon). """ self.invalidate_AlternativeImage(feature_selector='cropped') self.Border = Border
[docs] def get_AllTextLines(self, region_order='document', respect_textline_order=True): """ Return all the TextLine in the document Arguments: region_order ("document"|"reading-order"|"reading-order-only"): Whether to \ return regions sorted by document order (``document``, default) or by \ reading order with regions not in the reading order at the end of the \ returned list (``reading-order``) or regions not in the reading order \ omitted (``reading-order-only``) respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute Returns: a list of :py:class:`TextLineType` """ # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26 ret = [] for reg in self.get_AllRegions(['Text'], order=region_order): lines = reg.get_TextLine() if not respect_textline_order: ret += lines else: lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) return ret
[docs] def set_orientation(self, orientation): """ Set deskewing angle to given `orientation` number. Moreover, invalidate self's ``pc:AlternativeImage``s (because they will have been rotated and enlarged with the angle of the previous value). """ if hasattr(self, 'invalidate_AlternativeImage'): # PageType, RegionType: self.invalidate_AlternativeImage(feature_selector='deskewed') self.orientation = orientation
# end class PageType
[docs]class CoordsType(GeneratedsSuper): """Polygon outline of the element as a path of points. No points may lie outside the outline of its parent, which in the case of Border is the bounding rectangle of the root image. Paths are closed by convention, i.e. the last point logically connects with the first (and at least 3 points are required to span an area). Paths must be planar (i.e. must not self-intersect). Confidence value (between 0 and 1)""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('points', 'pc:PointsType', 0, 0, {'use': 'required'}), MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), ] subclass = None superclass = None def __init__(self, points=None, conf=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.points = _cast(None, points) self.points_nsprefix_ = "pc" self.conf = _cast(float, conf) self.conf_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, CoordsType) if subclass is not None: return subclass(*args_, **kwargs_) if CoordsType.subclass: return CoordsType.subclass(*args_, **kwargs_) else: return CoordsType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_points(self): return self.points
def set_points(self, points): self.points = points
[docs] def get_conf(self): return self.conf
[docs] def set_conf(self, conf): self.conf = conf
[docs] def validate_PointsType(self, value): # Validate type pc:PointsType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False if not self.gds_validate_simple_patterns( self.validate_PointsType_patterns_, value): self.gds_collector_.add_message('Value "%s" does not match xsd pattern restrictions: %s' % (encode_str_2_3(value), self.validate_PointsType_patterns_, ))
validate_PointsType_patterns_ = [['^(([0-9]+,[0-9]+ )+([0-9]+,[0-9]+))$']]
[docs] def validate_ConfSimpleType(self, value): # Validate type pc:ConfSimpleType, a restriction on float. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, float): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (float)' % {"value": value, "lineno": lineno, }) return False if value < 0: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd minInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False if value > 1: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd maxInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False
[docs] def hasContent_(self): if ( ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CoordsType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('CoordsType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'CoordsType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='CoordsType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='CoordsType', pretty_print=pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='CoordsType'): if self.points is not None and 'points' not in already_processed: already_processed.add('points') outfile.write(' points=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.points), input_name='points')), )) if self.conf is not None and 'conf' not in already_processed: already_processed.add('conf') outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf'))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CoordsType', fromsubclass_=False, pretty_print=True): pass
[docs] def to_etree(self, parent_element=None, name_='CoordsType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.points is not None: element.set('points', self.gds_format_string(self.points)) if self.conf is not None: element.set('conf', self.gds_format_float(self.conf)) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('points', node) if value is not None and 'points' not in already_processed: already_processed.add('points') self.points = value self.validate_PointsType(self.points) # validate type PointsType value = find_attr_value_('conf', node) if value is not None and 'conf' not in already_processed: already_processed.add('conf') value = self.gds_parse_float(value, node, 'conf') self.conf = value self.validate_ConfSimpleType(self.conf) # validate type ConfSimpleType
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass
def __hash__(self): return hash(self.id)
[docs] def set_points(self, points): """ Set coordinate polygon by given string. Moreover, invalidate the parent's ``pc:AlternativeImage``s (because they will have been cropped with a bbox of the previous polygon). """ if hasattr(self, 'parent_object_'): parent = self.parent_object_ if hasattr(parent, 'invalidate_AlternativeImage'): # RegionType, TextLineType, WordType, GlyphType: parent.invalidate_AlternativeImage() elif hasattr(parent, 'parent_object_') and hasattr(parent.parent_object_, 'invalidate_AlternativeImage'): # BorderType: parent.parent_object_.invalidate_AlternativeImage(feature_selector='cropped') self.points = points
# end class CoordsType
[docs]class TextLineType(GeneratedsSuper): """Overrides primaryLanguage attribute of parent text region The primary script used in the text line The secondary script used in the text line The direction in which text within the line should be read (order of words and characters). Overrides the production attribute of the parent text region For generic use Position (order number) of this text line within the parent text region.""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('index', 'int', 0, 1, {'use': 'optional'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), MemberSpec_('Baseline', 'BaselineType', 0, 1, {'minOccurs': '0', 'name': 'Baseline', 'type': 'BaselineType'}, None), MemberSpec_('Word', 'WordType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Word', 'type': 'WordType'}, None), MemberSpec_('TextEquiv', 'TextEquivType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'TextEquiv', 'type': 'TextEquivType'}, None), MemberSpec_('TextStyle', 'TextStyleType', 0, 1, {'minOccurs': '0', 'name': 'TextStyle', 'type': 'TextStyleType'}, None), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), ] subclass = None superclass = None def __init__(self, id=None, primaryLanguage=None, primaryScript=None, secondaryScript=None, readingDirection=None, production=None, custom=None, comments=None, index=None, AlternativeImage=None, Coords=None, Baseline=None, Word=None, TextEquiv=None, TextStyle=None, UserDefined=None, Labels=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.primaryLanguage = _cast(None, primaryLanguage) self.primaryLanguage_nsprefix_ = "pc" self.primaryScript = _cast(None, primaryScript) self.primaryScript_nsprefix_ = "pc" self.secondaryScript = _cast(None, secondaryScript) self.secondaryScript_nsprefix_ = "pc" self.readingDirection = _cast(None, readingDirection) self.readingDirection_nsprefix_ = "pc" self.production = _cast(None, production) self.production_nsprefix_ = "pc" self.custom = _cast(None, custom) self.custom_nsprefix_ = "pc" self.comments = _cast(None, comments) self.comments_nsprefix_ = "pc" self.index = _cast(int, index) self.index_nsprefix_ = "pc" if AlternativeImage is None: self.AlternativeImage = [] else: self.AlternativeImage = AlternativeImage self.AlternativeImage_nsprefix_ = "pc" self.Coords = Coords self.Coords_nsprefix_ = "pc" self.Baseline = Baseline self.Baseline_nsprefix_ = "pc" if Word is None: self.Word = [] else: self.Word = Word self.Word_nsprefix_ = "pc" if TextEquiv is None: self.TextEquiv = [] else: self.TextEquiv = TextEquiv self.TextEquiv_nsprefix_ = "pc" self.TextStyle = TextStyle self.TextStyle_nsprefix_ = "pc" self.UserDefined = UserDefined self.UserDefined_nsprefix_ = "pc" if Labels is None: self.Labels = [] else: self.Labels = Labels self.Labels_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, TextLineType) if subclass is not None: return subclass(*args_, **kwargs_) if TextLineType.subclass: return TextLineType.subclass(*args_, **kwargs_) else: return TextLineType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_AlternativeImage(self): return self.AlternativeImage
[docs] def set_AlternativeImage(self, AlternativeImage): self.AlternativeImage = AlternativeImage
[docs] def add_AlternativeImage(self, value): self.AlternativeImage.append(value)
[docs] def insert_AlternativeImage_at(self, index, value): self.AlternativeImage.insert(index, value)
[docs] def replace_AlternativeImage_at(self, index, value): self.AlternativeImage[index] = value
[docs] def get_Coords(self): return self.Coords
def set_Coords(self, Coords): self.Coords = Coords
[docs] def get_Baseline(self): return self.Baseline
[docs] def set_Baseline(self, Baseline): self.Baseline = Baseline
[docs] def get_Word(self): return self.Word
[docs] def set_Word(self, Word): self.Word = Word
[docs] def add_Word(self, value): self.Word.append(value)
[docs] def insert_Word_at(self, index, value): self.Word.insert(index, value)
[docs] def replace_Word_at(self, index, value): self.Word[index] = value
[docs] def get_TextEquiv(self): return self.TextEquiv
[docs] def set_TextEquiv(self, TextEquiv): self.TextEquiv = TextEquiv
[docs] def add_TextEquiv(self, value): self.TextEquiv.append(value)
[docs] def insert_TextEquiv_at(self, index, value): self.TextEquiv.insert(index, value)
[docs] def replace_TextEquiv_at(self, index, value): self.TextEquiv[index] = value
[docs] def get_TextStyle(self): return self.TextStyle
[docs] def set_TextStyle(self, TextStyle): self.TextStyle = TextStyle
[docs] def get_UserDefined(self): return self.UserDefined
[docs] def set_UserDefined(self, UserDefined): self.UserDefined = UserDefined
[docs] def get_Labels(self): return self.Labels
[docs] def set_Labels(self, Labels): self.Labels = Labels
[docs] def add_Labels(self, value): self.Labels.append(value)
[docs] def insert_Labels_at(self, index, value): self.Labels.insert(index, value)
[docs] def replace_Labels_at(self, index, value): self.Labels[index] = value
[docs] def get_id(self): return self.id
[docs] def set_id(self, id): self.id = id
[docs] def get_primaryLanguage(self): return self.primaryLanguage
[docs] def set_primaryLanguage(self, primaryLanguage): self.primaryLanguage = primaryLanguage
[docs] def get_primaryScript(self): return self.primaryScript
[docs] def set_primaryScript(self, primaryScript): self.primaryScript = primaryScript
[docs] def get_secondaryScript(self): return self.secondaryScript
[docs] def set_secondaryScript(self, secondaryScript): self.secondaryScript = secondaryScript
[docs] def get_readingDirection(self): return self.readingDirection
[docs] def set_readingDirection(self, readingDirection): self.readingDirection = readingDirection
[docs] def get_production(self): return self.production
[docs] def set_production(self, production): self.production = production
[docs] def get_custom(self): return self.custom
[docs] def set_custom(self, custom): self.custom = custom
[docs] def get_comments(self): return self.comments
[docs] def set_comments(self, comments): self.comments = comments
[docs] def get_index(self): return self.index
[docs] def set_index(self, index): self.index = index
[docs] def validate_LanguageSimpleType(self, value): # Validate type pc:LanguageSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['Abkhaz', 'Afar', 'Afrikaans', 'Akan', 'Albanian', 'Amharic', 'Arabic', 'Aragonese', 'Armenian', 'Assamese', 'Avaric', 'Avestan', 'Aymara', 'Azerbaijani', 'Bambara', 'Bashkir', 'Basque', 'Belarusian', 'Bengali', 'Bihari', 'Bislama', 'Bosnian', 'Breton', 'Bulgarian', 'Burmese', 'Cambodian', 'Cantonese', 'Catalan', 'Chamorro', 'Chechen', 'Chichewa', 'Chinese', 'Chuvash', 'Cornish', 'Corsican', 'Cree', 'Croatian', 'Czech', 'Danish', 'Divehi', 'Dutch', 'Dzongkha', 'English', 'Esperanto', 'Estonian', 'Ewe', 'Faroese', 'Fijian', 'Finnish', 'French', 'Fula', 'Gaelic', 'Galician', 'Ganda', 'Georgian', 'German', 'Greek', 'Guaraní', 'Gujarati', 'Haitian', 'Hausa', 'Hebrew', 'Herero', 'Hindi', 'Hiri Motu', 'Hungarian', 'Icelandic', 'Ido', 'Igbo', 'Indonesian', 'Interlingua', 'Interlingue', 'Inuktitut', 'Inupiaq', 'Irish', 'Italian', 'Japanese', 'Javanese', 'Kalaallisut', 'Kannada', 'Kanuri', 'Kashmiri', 'Kazakh', 'Khmer', 'Kikuyu', 'Kinyarwanda', 'Kirundi', 'Komi', 'Kongo', 'Korean', 'Kurdish', 'Kwanyama', 'Kyrgyz', 'Lao', 'Latin', 'Latvian', 'Limburgish', 'Lingala', 'Lithuanian', 'Luba-Katanga', 'Luxembourgish', 'Macedonian', 'Malagasy', 'Malay', 'Malayalam', 'Maltese', 'Manx', 'Māori', 'Marathi', 'Marshallese', 'Mongolian', 'Nauru', 'Navajo', 'Ndonga', 'Nepali', 'North Ndebele', 'Northern Sami', 'Norwegian', 'Norwegian Bokmål', 'Norwegian Nynorsk', 'Nuosu', 'Occitan', 'Ojibwe', 'Old Church Slavonic', 'Oriya', 'Oromo', 'Ossetian', 'Pāli', 'Panjabi', 'Pashto', 'Persian', 'Polish', 'Portuguese', 'Punjabi', 'Quechua', 'Romanian', 'Romansh', 'Russian', 'Samoan', 'Sango', 'Sanskrit', 'Sardinian', 'Serbian', 'Shona', 'Sindhi', 'Sinhala', 'Slovak', 'Slovene', 'Somali', 'South Ndebele', 'Southern Sotho', 'Spanish', 'Sundanese', 'Swahili', 'Swati', 'Swedish', 'Tagalog', 'Tahitian', 'Tajik', 'Tamil', 'Tatar', 'Telugu', 'Thai', 'Tibetan', 'Tigrinya', 'Tonga', 'Tsonga', 'Tswana', 'Turkish', 'Turkmen', 'Twi', 'Uighur', 'Ukrainian', 'Urdu', 'Uzbek', 'Venda', 'Vietnamese', 'Volapük', 'Walloon', 'Welsh', 'Western Frisian', 'Wolof', 'Xhosa', 'Yiddish', 'Yoruba', 'Zhuang', 'Zulu', 'other'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on LanguageSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_ScriptSimpleType(self, value): # Validate type pc:ScriptSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['Adlm - Adlam', 'Afak - Afaka', 'Aghb - Caucasian Albanian', 'Ahom - Ahom, Tai Ahom', 'Arab - Arabic', 'Aran - Arabic (Nastaliq variant)', 'Armi - Imperial Aramaic', 'Armn - Armenian', 'Avst - Avestan', 'Bali - Balinese', 'Bamu - Bamum', 'Bass - Bassa Vah', 'Batk - Batak', 'Beng - Bengali', 'Bhks - Bhaiksuki', 'Blis - Blissymbols', 'Bopo - Bopomofo', 'Brah - Brahmi', 'Brai - Braille', 'Bugi - Buginese', 'Buhd - Buhid', 'Cakm - Chakma', 'Cans - Unified Canadian Aboriginal Syllabics', 'Cari - Carian', 'Cham - Cham', 'Cher - Cherokee', 'Cirt - Cirth', 'Copt - Coptic', 'Cprt - Cypriot', 'Cyrl - Cyrillic', 'Cyrs - Cyrillic (Old Church Slavonic variant)', 'Deva - Devanagari (Nagari)', 'Dsrt - Deseret (Mormon)', 'Dupl - Duployan shorthand, Duployan stenography', 'Egyd - Egyptian demotic', 'Egyh - Egyptian hieratic', 'Egyp - Egyptian hieroglyphs', 'Elba - Elbasan', 'Ethi - Ethiopic', 'Geok - Khutsuri (Asomtavruli and Nuskhuri)', 'Geor - Georgian (Mkhedruli)', 'Glag - Glagolitic', 'Goth - Gothic', 'Gran - Grantha', 'Grek - Greek', 'Gujr - Gujarati', 'Guru - Gurmukhi', 'Hanb - Han with Bopomofo', 'Hang - Hangul', 'Hani - Han (Hanzi, Kanji, Hanja)', 'Hano - Hanunoo (Hanunóo)', 'Hans - Han (Simplified variant)', 'Hant - Han (Traditional variant)', 'Hatr - Hatran', 'Hebr - Hebrew', 'Hira - Hiragana', 'Hluw - Anatolian Hieroglyphs', 'Hmng - Pahawh Hmong', 'Hrkt - Japanese syllabaries', 'Hung - Old Hungarian (Hungarian Runic)', 'Inds - Indus (Harappan)', 'Ital - Old Italic (Etruscan, Oscan etc.)', 'Jamo - Jamo', 'Java - Javanese', 'Jpan - Japanese', 'Jurc - Jurchen', 'Kali - Kayah Li', 'Kana - Katakana', 'Khar - Kharoshthi', 'Khmr - Khmer', 'Khoj - Khojki', 'Kitl - Khitan large script', 'Kits - Khitan small script', 'Knda - Kannada', 'Kore - Korean (alias for Hangul + Han)', 'Kpel - Kpelle', 'Kthi - Kaithi', 'Lana - Tai Tham (Lanna)', 'Laoo - Lao', 'Latf - Latin (Fraktur variant)', 'Latg - Latin (Gaelic variant)', 'Latn - Latin', 'Leke - Leke', 'Lepc - Lepcha (Róng)', 'Limb - Limbu', 'Lina - Linear A', 'Linb - Linear B', 'Lisu - Lisu (Fraser)', 'Loma - Loma', 'Lyci - Lycian', 'Lydi - Lydian', 'Mahj - Mahajani', 'Mand - Mandaic, Mandaean', 'Mani - Manichaean', 'Marc - Marchen', 'Maya - Mayan hieroglyphs', 'Mend - Mende Kikakui', 'Merc - Meroitic Cursive', 'Mero - Meroitic Hieroglyphs', 'Mlym - Malayalam', 'Modi - Modi, Moḍī', 'Mong - Mongolian', 'Moon - Moon (Moon code, Moon script, Moon type)', 'Mroo - Mro, Mru', 'Mtei - Meitei Mayek (Meithei, Meetei)', 'Mult - Multani', 'Mymr - Myanmar (Burmese)', 'Narb - Old North Arabian (Ancient North Arabian)', 'Nbat - Nabataean', 'Newa - Newa, Newar, Newari', 'Nkgb - Nakhi Geba', 'Nkoo - N’Ko', 'Nshu - Nüshu', 'Ogam - Ogham', 'Olck - Ol Chiki (Ol Cemet’, Ol, Santali)', 'Orkh - Old Turkic, Orkhon Runic', 'Orya - Oriya', 'Osge - Osage', 'Osma - Osmanya', 'Palm - Palmyrene', 'Pauc - Pau Cin Hau', 'Perm - Old Permic', 'Phag - Phags-pa', 'Phli - Inscriptional Pahlavi', 'Phlp - Psalter Pahlavi', 'Phlv - Book Pahlavi', 'Phnx - Phoenician', 'Piqd - Klingon (KLI pIqaD)', 'Plrd - Miao (Pollard)', 'Prti - Inscriptional Parthian', 'Rjng - Rejang (Redjang, Kaganga)', 'Roro - Rongorongo', 'Runr - Runic', 'Samr - Samaritan', 'Sara - Sarati', 'Sarb - Old South Arabian', 'Saur - Saurashtra', 'Sgnw - SignWriting', 'Shaw - Shavian (Shaw)', 'Shrd - Sharada, Śāradā', 'Sidd - Siddham', 'Sind - Khudawadi, Sindhi', 'Sinh - Sinhala', 'Sora - Sora Sompeng', 'Sund - Sundanese', 'Sylo - Syloti Nagri', 'Syrc - Syriac', 'Syre - Syriac (Estrangelo variant)', 'Syrj - Syriac (Western variant)', 'Syrn - Syriac (Eastern variant)', 'Tagb - Tagbanwa', 'Takr - Takri', 'Tale - Tai Le', 'Talu - New Tai Lue', 'Taml - Tamil', 'Tang - Tangut', 'Tavt - Tai Viet', 'Telu - Telugu', 'Teng - Tengwar', 'Tfng - Tifinagh (Berber)', 'Tglg - Tagalog (Baybayin, Alibata)', 'Thaa - Thaana', 'Thai - Thai', 'Tibt - Tibetan', 'Tirh - Tirhuta', 'Ugar - Ugaritic', 'Vaii - Vai', 'Visp - Visible Speech', 'Wara - Warang Citi (Varang Kshiti)', 'Wole - Woleai', 'Xpeo - Old Persian', 'Xsux - Cuneiform, Sumero-Akkadian', 'Yiii - Yi', 'Zinh - Code for inherited script', 'Zmth - Mathematical notation', 'Zsye - Symbols (Emoji variant)', 'Zsym - Symbols', 'Zxxx - Code for unwritten documents', 'Zyyy - Code for undetermined script', 'Zzzz - Code for uncoded script', 'other'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ScriptSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_ReadingDirectionSimpleType(self, value): # Validate type pc:ReadingDirectionSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['left-to-right', 'right-to-left', 'top-to-bottom', 'bottom-to-top'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ReadingDirectionSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def validate_ProductionSimpleType(self, value): # Validate type pc:ProductionSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: if not isinstance(value, str): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) return False value = value enumerations = ['printed', 'typewritten', 'handwritten-cursive', 'handwritten-printscript', 'medieval-manuscript', 'other'] if value not in enumerations: lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ProductionSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False
[docs] def hasContent_(self): if ( self.AlternativeImage or self.Coords is not None or self.Baseline is not None or self.Word or self.TextEquiv or self.TextStyle is not None or self.UserDefined is not None or self.Labels ): return True else: return False
[docs] def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextLineType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('TextLineType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ if pretty_print: eol_ = '\n' else: eol_ = '' if self.original_tagname_ is not None and name_ == 'TextLineType': name_ = self.original_tagname_ if UseCapturedNS_ and self.ns_prefix_: namespaceprefix_ = self.ns_prefix_ + ':' showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextLineType') if self.hasContent_(): outfile.write('>%s' % (eol_, )) self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextLineType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, ))
[docs] def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextLineType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) if self.primaryLanguage is not None and 'primaryLanguage' not in already_processed: already_processed.add('primaryLanguage') outfile.write(' primaryLanguage=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.primaryLanguage), input_name='primaryLanguage')), )) if self.primaryScript is not None and 'primaryScript' not in already_processed: already_processed.add('primaryScript') outfile.write(' primaryScript=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.primaryScript), input_name='primaryScript')), )) if self.secondaryScript is not None and 'secondaryScript' not in already_processed: already_processed.add('secondaryScript') outfile.write(' secondaryScript=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.secondaryScript), input_name='secondaryScript')), )) if self.readingDirection is not None and 'readingDirection' not in already_processed: already_processed.add('readingDirection') outfile.write(' readingDirection=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.readingDirection), input_name='readingDirection')), )) if self.production is not None and 'production' not in already_processed: already_processed.add('production') outfile.write(' production=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.production), input_name='production')), )) if self.custom is not None and 'custom' not in already_processed: already_processed.add('custom') outfile.write(' custom=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.custom), input_name='custom')), )) if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) if self.index is not None and 'index' not in already_processed: already_processed.add('index') outfile.write(' index="%s"' % self.gds_format_integer(self.index, input_name='index'))
[docs] def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextLineType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: eol_ = '' for AlternativeImage_ in self.AlternativeImage: namespaceprefix_ = self.AlternativeImage_nsprefix_ + ':' if (UseCapturedNS_ and self.AlternativeImage_nsprefix_) else '' AlternativeImage_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='AlternativeImage', pretty_print=pretty_print) if self.Coords is not None: namespaceprefix_ = self.Coords_nsprefix_ + ':' if (UseCapturedNS_ and self.Coords_nsprefix_) else '' self.Coords.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Coords', pretty_print=pretty_print) if self.Baseline is not None: namespaceprefix_ = self.Baseline_nsprefix_ + ':' if (UseCapturedNS_ and self.Baseline_nsprefix_) else '' self.Baseline.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Baseline', pretty_print=pretty_print) for Word_ in self.Word: namespaceprefix_ = self.Word_nsprefix_ + ':' if (UseCapturedNS_ and self.Word_nsprefix_) else '' Word_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Word', pretty_print=pretty_print) for TextEquiv_ in self.TextEquiv: namespaceprefix_ = self.TextEquiv_nsprefix_ + ':' if (UseCapturedNS_ and self.TextEquiv_nsprefix_) else '' TextEquiv_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TextEquiv', pretty_print=pretty_print) if self.TextStyle is not None: namespaceprefix_ = self.TextStyle_nsprefix_ + ':' if (UseCapturedNS_ and self.TextStyle_nsprefix_) else '' self.TextStyle.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TextStyle', pretty_print=pretty_print) if self.UserDefined is not None: namespaceprefix_ = self.UserDefined_nsprefix_ + ':' if (UseCapturedNS_ and self.UserDefined_nsprefix_) else '' self.UserDefined.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UserDefined', pretty_print=pretty_print) for Labels_ in self.Labels: namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print)
[docs] def to_etree(self, parent_element=None, name_='TextLineType', mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.id is not None: element.set('id', self.gds_format_string(self.id)) if self.primaryLanguage is not None: element.set('primaryLanguage', self.gds_format_string(self.primaryLanguage)) if self.primaryScript is not None: element.set('primaryScript', self.gds_format_string(self.primaryScript)) if self.secondaryScript is not None: element.set('secondaryScript', self.gds_format_string(self.secondaryScript)) if self.readingDirection is not None: element.set('readingDirection', self.gds_format_string(self.readingDirection)) if self.production is not None: element.set('production', self.gds_format_string(self.production)) if self.custom is not None: element.set('custom', self.gds_format_string(self.custom)) if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) if self.index is not None: element.set('index', self.gds_format_integer(self.index)) for AlternativeImage_ in self.AlternativeImage: AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, nsmap_=nsmap_) if self.Coords is not None: Coords_ = self.Coords Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) if self.Baseline is not None: Baseline_ = self.Baseline Baseline_.to_etree(element, name_='Baseline', mapping_=mapping_, nsmap_=nsmap_) for Word_ in self.Word: Word_.to_etree(element, name_='Word', mapping_=mapping_, nsmap_=nsmap_) for TextEquiv_ in self.TextEquiv: TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, nsmap_=nsmap_) if self.TextStyle is not None: TextStyle_ = self.TextStyle TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, nsmap_=nsmap_) if self.UserDefined is not None: UserDefined_ = self.UserDefined UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element return element
[docs] def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ if SaveElementTreeNode: self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix self.buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self
[docs] def buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') self.id = value value = find_attr_value_('primaryLanguage', node) if value is not None and 'primaryLanguage' not in already_processed: already_processed.add('primaryLanguage') self.primaryLanguage = value self.validate_LanguageSimpleType(self.primaryLanguage) # validate type LanguageSimpleType value = find_attr_value_('primaryScript', node) if value is not None and 'primaryScript' not in already_processed: already_processed.add('primaryScript') self.primaryScript = value self.validate_ScriptSimpleType(self.primaryScript) # validate type ScriptSimpleType value = find_attr_value_('secondaryScript', node) if value is not None and 'secondaryScript' not in already_processed: already_processed.add('secondaryScript') self.secondaryScript = value self.validate_ScriptSimpleType(self.secondaryScript) # validate type ScriptSimpleType value = find_attr_value_('readingDirection', node) if value is not None and 'readingDirection' not in already_processed: already_processed.add('readingDirection') self.readingDirection = value self.validate_ReadingDirectionSimpleType(self.readingDirection) # validate type ReadingDirectionSimpleType value = find_attr_value_('production', node) if value is not None and 'production' not in already_processed: already_processed.add('production') self.production = value self.validate_ProductionSimpleType(self.production) # validate type ProductionSimpleType value = find_attr_value_('custom', node) if value is not None and 'custom' not in already_processed: already_processed.add('custom') self.custom = value value = find_attr_value_('comments', node) if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value value = find_attr_value_('index', node) if value is not None and 'index' not in already_processed: already_processed.add('index') self.index = self.gds_parse_integer(value, node, 'index')
[docs] def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'AlternativeImage': obj_ = AlternativeImageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.AlternativeImage.append(obj_) obj_.original_tagname_ = 'AlternativeImage' elif nodeName_ == 'Coords': obj_ = CoordsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Coords = obj_ obj_.original_tagname_ = 'Coords' elif nodeName_ == 'Baseline': obj_ = BaselineType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Baseline = obj_ obj_.original_tagname_ = 'Baseline' elif nodeName_ == 'Word': obj_ = WordType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Word.append(obj_) obj_.original_tagname_ = 'Word' elif nodeName_ == 'TextEquiv': obj_ = TextEquivType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.TextEquiv.append(obj_) obj_.original_tagname_ = 'TextEquiv' elif nodeName_ == 'TextStyle': obj_ = TextStyleType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.TextStyle = obj_ obj_.original_tagname_ = 'TextStyle' elif nodeName_ == 'UserDefined': obj_ = UserDefinedType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.UserDefined = obj_ obj_.original_tagname_ = 'UserDefined' elif nodeName_ == 'Labels': obj_ = LabelsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Labels.append(obj_) obj_.original_tagname_ = 'Labels'
def __hash__(self): return hash(self.id)
[docs] def invalidate_AlternativeImage(self, feature_selector=None): """ Remove derived images from this segment (due to changed coordinates). If `feature_selector` is not none, remove only images with matching ``@comments``, e.g. ``feature_selector=cropped,deskewed``. """ existing_images = self.AlternativeImage or [] removed_images = [] if feature_selector: new_images = [] for image in existing_images: features = image.get_comments() or '' if any(feature in features.split(',') for feature in feature_selector.split(',') if feature): removed_images.append(image) else: new_images.append(image) self.AlternativeImage = new_images else: removed_images = existing_images self.AlternativeImage = [] if hasattr(self, 'id'): name = self.id elif hasattr(self, 'parent_object_') and hasattr(self.parent_object_, 'pcGtsId'): name = self.parent_object_.pcGtsId else: name = '' for image in removed_images: self.gds_collector_.add_message('Removing AlternativeImage %s from "%s"' % ( image.get_comments() or '', name))
[docs] def set_Coords(self, Coords): """ Set coordinate polygon by given :py:class:`CoordsType` object. Moreover, invalidate self's ``pc:AlternativeImage``s (because they will have been cropped with a bbox of the previous polygon). """ if hasattr(self, 'invalidate_AlternativeImage'): # RegionType, TextLineType, WordType, GlyphType: self.invalidate_AlternativeImage() elif hasattr(self, 'parent_object_') and hasattr(self.parent_object_, 'invalidate_AlternativeImage'): # BorderType: self.parent_object_.invalidate_AlternativeImage(feature_selector='cropped') self.Coords = Coords
# end class TextLineType
[docs]class WordType(GeneratedsSuper): """Overrides primaryLanguage attribute of parent line and/or text region The primary script used in the word The secondary script used in the word The direction in which text within the word should be read (order of characters). Overrides the production attribute of the parent text line and/or text region. For generic use""" __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), MemberSpec_('language', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional'}), MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), MemberSpec_('Glyph', 'GlyphType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Glyph', 'type': 'GlyphType'}, None), MemberSpec_('TextEquiv', 'TextEquivType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'TextEquiv', 'type': 'TextEquivType'}, None), MemberSpec_('TextStyle', 'TextStyleType', 0, 1, {'minOccurs': '0', 'name': 'TextStyle', 'type': 'TextStyleType'}, None), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), ] subclass = None superclass = None def __init__(self, id=None, language=None, primaryScript=None, secondaryScript=None, readingDirection=None, production=None, custom=None, comments=None, AlternativeImage=None, Coords=None, Glyph=None, TextEquiv=None, TextStyle=None, UserDefined=None, Labels=None, gds_collector_=None, **kwargs_): self.gds_collector_ = gds_collector_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') self.ns_prefix_ = None self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.language = _cast(None, language) self.language_nsprefix_ = "pc" self.primaryScript = _cast(None, primaryScript) self.primaryScript_nsprefix_ = "pc" self.secondaryScript = _cast(None, secondaryScript) self.secondaryScript_nsprefix_ = "pc" self.readingDirection = _cast(None, readingDirection) self.readingDirection_nsprefix_ = "pc" self.production = _cast(None, production) self.production_nsprefix_ = "pc" self.custom = _cast(None, custom) self.custom_nsprefix_ = "pc" self.comments = _cast(None, comments) self.comments_nsprefix_ = "pc" if AlternativeImage is None: self.AlternativeImage = [] else: self.AlternativeImage = AlternativeImage self.AlternativeImage_nsprefix_ = "pc" self.Coords = Coords self.Coords_nsprefix_ = "pc" if Glyph is None: self.Glyph = [] else: self.Glyph = Glyph self.Glyph_nsprefix_ = "pc" if TextEquiv is None: self.TextEquiv = [] else: self.TextEquiv = TextEquiv self.TextEquiv_nsprefix_ = "pc" self.TextStyle = TextStyle self.TextStyle_nsprefix_ = "pc" self.UserDefined = UserDefined self.UserDefined_nsprefix_ = "pc" if Labels is None: self.Labels = [] else: self.Labels = Labels self.Labels_nsprefix_ = "pc"
[docs] def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( CurrentSubclassModule_, WordType) if subclass is not None: return subclass(*args_, **kwargs_) if WordType.subclass: return WordType.subclass(*args_, **kwargs_) else: return WordType(*args_, **kwargs_)
factory = staticmethod(factory)
[docs] def get_ns_prefix_(self): return self.ns_prefix_
[docs] def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix
[docs] def get_AlternativeImage(self): return self.AlternativeImage
[docs] def set_AlternativeImage(self, AlternativeImage): self.AlternativeImage = AlternativeImage
[docs] def add_AlternativeImage(self, value): self.AlternativeImage.append(value)
[docs] def insert_AlternativeImage_at(self, index, value): self.AlternativeImage.insert(index, value)
[docs] def replace_AlternativeImage_at(self, index, value): self.AlternativeImage[index] = value
[docs] def get_Coords(self): return self.Coords
def set_Coords(self, Coords): self.Coords = Coords
[docs] def get_Glyph(self): return self.Glyph
[docs] def set_Glyph(self, Glyph): self.Glyph = Glyph
[docs] def add_Glyph(self, value): self.Glyph.append(