Source code for ocrd_models.ocrd_exif

"""
Technical image metadata
"""

from math import sqrt
from io import BytesIO
from subprocess import run, PIPE
from shutil import which
from ocrd_utils import getLogger


[docs]
class OcrdExif():
    """Represents technical image metadata.

    Attributes:
        width (int): pixel dimensions
        height (int): pixel dimensions
        photometricInterpretation (str): pixel type/depth, e.g. \

            * ``1`` for b/w,
            * ``L`` for 8-bit grayscale,
            * ``RGB`` for 24-bit truecolor,
            * ``I`` for 32-bit signed integer grayscale,
            * ``F`` for floating-point grayscale

          (see PIL concept **mode**)
        resolution (int): pixel density
        xResolution (int): pixel density
        yResolution (int): pixel density
        resolutionUnit (str): unit of measurement (either ``inches`` or ``cm``)
    """

    def __init__(self, img):
        """
        Arguments:
            img (`PIL.Image`): PIL image technical metadata is about.
        """
        #  print(img.__dict__)
        self.width = img.width
        self.height = img.height
        self.photometricInterpretation = img.mode
        self.n_frames = img.n_frames if 'n_frames' in img.__dict__ else 1
        if which('identify'):
            self.run_identify(img)
        else:
            getLogger('ocrd.exif').warning("ImageMagick 'identify' not available, Consider installing ImageMagick for more robust pixel density estimation")
            self.run_pil(img)


[docs]
    def run_identify(self, img):
        for prop in ['compression', 'photometric_interpretation']:
            setattr(self, prop, img.info[prop] if prop in img.info else None)
        if img.filename:
            ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U ', img.filename], check=False, stderr=PIPE, stdout=PIPE)
        else:
            with BytesIO() as bio:
                img.save(bio, format=img.format)
                ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U ', '/dev/stdin'], check=False, stderr=PIPE, stdout=PIPE, input=bio.getvalue())
        if ret.returncode:
            stderr = ret.stderr.decode('utf-8')
            if 'no decode delegate for this image format' in stderr:
                getLogger('ocrd.exif').warning("ImageMagick does not support the '%s' image format. ", img.format)
            else:
                getLogger('ocrd.exif').error("identify exited with non-zero %s: %s", ret.returncode, stderr)
            self.xResolution = self.yResolution = 1
            self.resolutionUnit = 'inches'
        else:
            tokens = ret.stdout.decode('utf-8').split(' ', 3)
            self.xResolution = max(int(float(tokens[0])), 1)
            self.yResolution = max(int(float(tokens[1])), 1)
            self.resolutionUnit = 'inches' if tokens[2] == 'undefined' else \
                                  'cm' if tokens[2] == 'PixelsPerCentimeter' else \
                                  'inches'
        self.resolution = round(sqrt(self.xResolution * self.yResolution))



[docs]
    def run_pil(self, img):
        if img.format in ('TIFF', 'PNG') and 'dpi' in img.info:
            self.xResolution = int(img.info['dpi'][0])
            self.yResolution = int(img.info['dpi'][1])
            if img.format == 'TIFF':
                self.resolutionUnit = 'cm' if img.tag.get(296) == 3 else 'inches'
            else:
                self.resolutionUnit = 'inches'
        elif img.format == 'JPEG' and 'jfif_density' in img.info:
            self.xResolution = img.info['jfif_density'][0]
            self.yResolution = img.info['jfif_density'][1]
            self.resolutionUnit = 'cm' if img.info['jfif_unit'] == 2 else 'inches'
        elif img.format == 'PNG' and 'aspect' in img.info:
            self.xResolution = img.info['aspect'][0]
            self.yResolution = img.info['aspect'][1]
            self.resolutionUnit = 'inches'
        else:
            #  if img.format == 'JPEG2000':
            #      import sys
            #      print('JPEG 2000 not supported yet :(', file=sys.stderr)
            self.xResolution = 1
            self.yResolution = 1
            self.resolutionUnit = 'inches'
        #  print('format=%s type=%s' % (img.format, type(self.xResolution))
        self.resolution = round(sqrt(self.xResolution * self.yResolution))



[docs]
    def to_xml(self):
        """
        Serialize all properties as XML string.
        """
        ret = '<exif>'
        for k, v in self.__dict__.items():
            ret += f'<{k}>{v}</{k}>'
        ret += '</exif>'
        return ret
Source code for ocrd_models.ocrd_exif

OCR-D/core

Navigation

Related Topics