Source code for ocrd_models.xpath_functions
from ocrd_utils import xywh_from_points
pc_functions = []
def _export(func):
pc_functions.append(func)
return func
[docs]
@_export
def pc_pixelarea(nodes):
"""
Extract Coords/@points from all nodes, calculate the bounding
box, and accumulate areas.
"""
area = 0
for node in nodes:
# FIXME: find out why we need to go to the parent here
node = node.parent.value
coords = node.find(f'{node.prefix}:Coords', node.nsmap)
if coords is None:
continue
points = coords.attrib['points']
xywh = xywh_from_points(points)
area += xywh['w'] * xywh['h']
return area
[docs]
@_export
def pc_textequiv(nodes):
"""
Extract TextEquiv/Unicode from all nodes, then concatenate
(interspersed with spaces or newlines).
"""
text = ''
for node in nodes:
# FIXME: find out why we need to go to the parent here
node = node.parent.value
if text and node.tag.endswith('Region'):
text += '\n'
if text and node.tag.endswith('Line'):
text += '\n'
if text and node.tag.endswith('Word'):
text += ' '
equiv = node.find(f'{node.prefix}:TextEquiv', node.nsmap)
if equiv is None:
continue
string = equiv.find(f'{node.prefix}:Unicode', node.nsmap)
if string is None:
continue
text += str(string.text)
return text