Source code for ocrd.processor.ocrd_page_result
from dataclasses import dataclass, field
import copy
from typing import List, Union, Optional
from ocrd_models.ocrd_page import OcrdPage
from PIL.Image import Image
from ocrd_models.ocrd_page_generateds import AlternativeImageType, PageType
[docs]
@dataclass
class OcrdPageResultImage():
"""
Encapsulates a single ``AlternativeImage`` reference to be persisted
as image file to the :py:class:`ocrd.Workspace`.
"""
pil: Image
"""
image data to be saved
"""
file_id_suffix: str
"""
a suffix to append to the file name when saving
(something like ``.IMG`` according to OCR-D
conventions for PAGE-XML)
"""
alternative_image: Optional[Union[AlternativeImageType, PageType]]
"""
the ``AlternativeImage`` instance that references this image;
to be amended with the actual (final) ``@filename`` when saving
alternatively, can be a ``Page`` instance: in that case,
amend its ``@imageFilename`` (i.e. replace the original image
of the PAGE-XML)
"""
[docs]
@dataclass
class OcrdPageResult():
"""
Encapsulates the return type of :py:func:`ocrd.Processor.process_page_pcgts`,
i.e. an instance of :py:class:`ocrd_models.ocrd_page.OcrdPage` and an
accompanying list of :py:class:`OcrdPageResultImage` that contain all
image files referenced via ``AlternativeImage`` to be persisted into the
:py:class:`ocrd.Workspace` along with the PAGE-XML itself.
"""
pcgts: OcrdPage
images: List[OcrdPageResultImage] = field(default_factory=list)
[docs]
class OcrdPageResultVariadicListWrapper():
"""
Proxy object for :py:class:`ocrd.SingleOcrdPageResult` allowing
list semantics (i.e. multi-valued return from
:py:func:`ocrd.Processor.process_page_pcgts`) without changing
the API introduced in version 3.0.
Everything but list access will yield the old (singular valued)
semantics.
"""
def __init__(
self,
pcgts: OcrdPage,
*args):
self._results = [SingleOcrdPageResult(pcgts)] + [
SingleOcrdPageResult(arg) for arg in args]
def __getitem__(self, key):
return self._results[key]
def __contains__(self, key):
return key in self._results
def __len__(self):
return len(self._results)
def __iter__(self):
return iter(self._results)
def __repr__(self):
return repr(self._results)
# allow copy() without infinite recursion
def __copy__(self):
return OcrdPageResultVariadicListWrapper(*copy.copy(self._results))
# allow deepcopy() without infinite recursion
def __deepcopy__(self, memo):
return OcrdPageResultVariadicListWrapper(*copy.deepcopy(self._results))
# delegate to all members of first result
def __getattr__(self, name):
return getattr(self._results[0], name)
SingleOcrdPageResult, OcrdPageResult = OcrdPageResult, OcrdPageResultVariadicListWrapper