Source code for pyexttiff

#!/usr/bin/env python
# -*- coding: utf-8 -*
# Copyright (c) Météo France (2014-)
# This software is governed by the CeCILL-C license under French law.
# http://www.cecill.info
"""
This module aims at reading tiff with private IFDs.
It uses PIL for image reading.

This module uses code from pylibtiff (https://pypi.python.org/pypi/libtiff, https://code.google.com/p/pylibtiff or https://github.com/hmeine/pylibtiff)
"""

from __future__ import print_function, absolute_import, division, unicode_literals

import os
import numpy
import mmap
import PIL.Image
import six


[docs]class PyexttiffError(Exception):

    """Error handling for pyexttiff."""
    pass


[docs]class TiffFile(object):

    """
    This class allows the access to the entire tiff file (tags and images).
    """

    #see https://github.com/numpy/numpy/issues/2407 for the str workaround
    #We will be able to suppress it when python2 with "from __future__ import unicode_literals" will not be used
    _rational = numpy.dtype([(str('numer'), numpy.uint32), (str('denom'), numpy.uint32)])
    _srational = numpy.dtype([(str('numer'), numpy.int32), (str('denom'), numpy.int32)])

    _type2name = {1:'BYTE', 2:'ASCII', 3:'SHORT', 4:'LONG', 5:'RATIONAL',  # two longs, lsm uses it for float64
                  6:'SBYTE', 7:'UNDEFINED', 8:'SSHORT', 9:'SLONG', 10:'SRATIONAL',
                  11:'FLOAT', 12:'DOUBLE',
                  }
    _name2type = dict((v, k) for k, v in _type2name.items())
    _name2type['SHORT|LONG'] = _name2type['LONG']
    _name2type['LONG|SHORT'] = _name2type['LONG']
    _type2bytes = {1:1, 2:1, 3:2, 4:4, 5:8, 6:1, 7:1, 8:2, 9:4, 10:8, 11:4, 12:8}
    _type2dtype = {1:numpy.uint8, 2:numpy.uint8, 3:numpy.uint16, 4:numpy.uint32, 5:_rational,
                   6:numpy.int8, 7:numpy.uint8, 8:numpy.int16, 9:numpy.int32, 10:_srational,
                   11:numpy.float32, 12:numpy.float64}

    class _LittleEndianNumpyDTypes(object):
        uint8 = numpy.dtype('<u1')
        uint16 = numpy.dtype('<u2')
        uint32 = numpy.dtype('<u4')
        uint64 = numpy.dtype('<u8')
        int8 = numpy.dtype('<i1')
        int16 = numpy.dtype('<i2')
        int32 = numpy.dtype('<i4')
        int64 = numpy.dtype('<i8')
        float32 = numpy.dtype('<f4')
        float64 = numpy.dtype('<f8')
        complex64 = numpy.dtype('<c8')
        complex128 = numpy.dtype('<c16')

        @property
        def type2dt(self):
            return dict((k, numpy.dtype(v).newbyteorder('<')) for k, v in TiffFile._type2dtype.items())

    class _BigEndianNumpyDTypes(object):
        uint8 = numpy.dtype('>u1')
        uint16 = numpy.dtype('>u2')
        uint32 = numpy.dtype('>u4')
        uint64 = numpy.dtype('>u8')
        int8 = numpy.dtype('>i1')
        int16 = numpy.dtype('>i2')
        int32 = numpy.dtype('>i4')
        int64 = numpy.dtype('>i8')
        float32 = numpy.dtype('>f4')
        float64 = numpy.dtype('>f8')
        complex64 = numpy.dtype('>c8')
        complex128 = numpy.dtype('>c16')

        @property
        def type2dt(self):
            return dict((k, numpy.dtype(v).newbyteorder('>')) for k, v in TiffFile._type2dtype.items())

    def __init__(self, filename, subIFDpaths=[], method=1):
        """
        Opens a tiff file, reads header and IFDs.
        *filename* is the filename containing the tiff
        *subIFDpaths* is the list of tag path whose values are offset to private IFDs
            a tag path is a tuple representing the path to a given tag which must represent an IFD
            (34665) means that tag 34665 of any given public IFD is an offset to a private IFD
            (32001, 521) means that tag 32001 of any given public IFD is an offset to a private IFD
                           and that tag 521 of any private tag referenced by a 32001 public tag is also an offset to a private IFD
        *method* is the method used to read data:
            1: f=open(..., 'rb') ; numpy.frombuffer(f.read(), dtype=numpy.ubyte)
            2: f=open(..., 'rb') ; numpy.ndarray(buffer=mmap(f), dtype=numpy.ubyte)
            3: same as 2 but with modifications allowed - DANGEROUS
        """

        self._filename = filename
        self._subIFDpaths = subIFDpaths
        self._fileHandle = None
        if not os.path.exists(filename):
            raise IOError(filename + " must exists.")

        # Reading file
        if method == 1:
            with open(filename, 'rb') as f:
                self._data = numpy.frombuffer(f.read(), dtype=numpy.ubyte)
        elif method == 2:
            self._fileHandle = open(filename, 'rb')
            mm = mmap.mmap(self._fileHandle.fileno(), 0, prot=mmap.PROT_READ)
            self._data = numpy.ndarray(shape=(mm.size(),), buffer=mm, dtype=numpy.ubyte)
        elif method == 3:
            self._fileHandle = open(filename, 'r+b')
            mm = mmap.mmap(self._fileHandle.fileno(), 0)
            self._data = numpy.ndarray(shape=(mm.size(),), buffer=mm, dtype=numpy.ubyte)
        else:
            raise PyexttiffError("This method is unknown.")

        # Decoding header - byte order
        byteorder = self._data[0:2].view(dtype=numpy.uint16)[0]
        if byteorder == 0x4949:
            self.endian = 'little'
            self.dtypes = TiffFile._LittleEndianNumpyDTypes()
        elif byteorder == 0x4d4d:
            self.endian = 'big'
            self.dtypes = TiffFile._BigEndianNumpyDTypes()
        else:
            raise IOError('unrecognized byteorder: %s' % (hex(byteorder)))

        # Decoding header - magic number
        magic = self._get_uint16(2)
        if magic != 42:
            raise IOError('wrong magic number for TIFF file: %s' % (magic))

        # Decoding header - first IFD offset
        IFD0offset = self._get_uint32(4)

        # IFD reading
        self.IFDs = []
        offset = IFD0offset
        num = 0
        while offset:
            ifd, offset = self._readIFD(offset, (), subIFDpaths, num)
            if ifd.has_image():
                num += 1
            else:
                raise PyexttiffError("Not sure about IFD that does not contain image.")
            self.IFDs.append(ifd)

    def _readIFD(self, offset, path, subIFDpaths, num):
        """
        Reads recursively IFDs
        """
        ifd = IFD()
        n = self._get_uint16(offset)
        for i in range(n):
            entryOffset = offset + 2 + i * 12
            entrytag = self._get_uint16(entryOffset)
            entrytype = self._get_uint16(entryOffset + 2)
            entrycount = self._get_uint32(entryOffset + 4)
            entrybytes = TiffFile._type2bytes.get(entrytype, 0)
            if entrycount == 1 and 1 <= entrybytes <= 4:
                entryvalue = self._get_values(entryOffset + 8, entrytype, entrycount)
            else:
                valueOffset = self._get_int32(entryOffset + 8)
                entryvalue = self._get_values(valueOffset, entrytype, entrycount)
            entrypath = tuple(list(path) + [entrytag])
            if entrypath in [mypath[:len(entrypath)] for mypath in subIFDpaths]:
                #In the first version of this tool, entryvalue was an array at this stage
                #This is corrected to suppress the numpy warning but code
                #(by replacing entryvalue by entryvalue[0] in following statement)
                #lacks comment and I'm now unable to understand this part.
                #If an error is raised by this line, it would be necessary
                #to investigate more...
                assert entryvalue.shape == (1, ), "Not as expected..."
                
                subifd, _ = self._readIFD(entryvalue[0], entrypath, subIFDpaths, None)
                ifd.append(IFDEntry(entrytag, entrytype, subifd))
            else:
                ifd.append(IFDEntry(entrytag, entrytype, entryvalue))
        if path == () and ifd.has_tag(273):
            # Raw data
            nbRows = ifd.get_value(257)
            offsetValues = ifd.get_value(273)
            nbRowsPerStrip = ifd.get_value(278)
            nbBytesPerStrip = ifd.get_value(279)
            if not isinstance(offsetValues, numpy.ndarray):
                offsetValues = numpy.array([offsetValues])
                nbBytesPerStrip = numpy.array([nbBytesPerStrip])
            if nbRows / nbRowsPerStrip + (1 if nbRows % nbRowsPerStrip != 0 else 0) != len(offsetValues):
                raise PyexttiffError("Total number of rows, strip numbers and number of rows per strips are not consistent.")
            data = []
            for i in range(len(offsetValues)):
                data.append(self._get_values(offsetValues[i], 1, nbBytesPerStrip[i]))
            ifd.get_entry(273).set_value(data)

            # Image data
            im = self.get_PILImage()
            im.seek(num)
            data = numpy.array(im)
            if data.shape == ():
                data = numpy.array(im)  # Sometimes must be called twice to return values...
            ifd._image = data
        nextIFD = self._get_uint32(offset + 2 + n * 12)
        return (ifd, nextIFD)

[docs]    def get_data(self):
        """
        Returns the ndarray containing the data.
        """
        return self._data

[docs]    def get_PILImage(self):
        """
        Returns the PIL image object of the file.
        """
        try:
            meth = numpy.getbuffer
        except AttributeError:
            meth = memoryview
        from distutils.version import LooseVersion
        import warnings
        if LooseVersion(PIL.__version__) < LooseVersion('5.4.1'):
            warnings.warn("You may have issues using an old version of PIL; please update it with " + \
                          "'pip install --user --upgrade pillow'. This warning is issued when version " + \
                          "of pillow is inferior to '5.4.1' but it may work, or not, with older versions...")
        return PIL.Image.open(six.BytesIO(meth(self.get_data())))

    def _get_uint16(self, offset):
        return self.get_data()[offset:offset + 2].view(dtype=self.dtypes.uint16)[0]
    def _get_uint32(self, offset):
        return self.get_data()[offset:offset + 4].view(dtype=self.dtypes.uint32)[0]
    def _get_int32(self, offset):
        return self.get_data()[offset:offset + 4].view(dtype=self.dtypes.int32)[0]
    def _get_values(self, offset, typ, count):
        if isinstance(typ, numpy.dtype):
            dtype = typ
            size = typ.itemsize
        elif isinstance(typ, type) and issubclass(typ, numpy.generic):
            dtype = typ
            size = typ().itemsize
        else:
            if isinstance(typ, six.string_types):
                ntyp = typ
                typ = TiffFile._name2type.get(typ)
            else:
                ntyp = str(typ)
            dtype = self.dtypes.type2dt.get(typ)
            size = TiffFile._type2bytes.get(typ)
            if dtype is None or size is None:
                raise PyexttiffError('_get_values: incomplete info for type=%r [%r]: dtype=%s, bytes=%s\n' % (typ, ntyp, dtype, size))
        result = self.get_data()[offset:offset + size * count].view(dtype=dtype)
        return result

[docs]    def close(self):
        """
        Closes the file.
        """
        if self._fileHandle is not None:
            self._fileHandle.close()

    def __del__(self):
        """
        __del__ method
        """
        self.close()


[docs]class IFD(list):
    """This class represent an IFD."""

    def __init__(self):
        """Initialisation method of IFD class."""
        self._image = None

[docs]    def has_tag(self, tag):
        """
        Returns True if an entry fits the tag given
        :param tag: tag to look for, as an integer or a name
        :return: True if tag exists
        """
        if isinstance(tag, int):
            return tag in self.get_tagValues()
        else:
            return tag in self.get_tagNames()

[docs]    def get_entry(self, tag):
        """
        Returns the entry for a tag
        :param tag: tag to look for, as an integer or a name
        :return: the entry associated to the tag
        """
        if not self.has_tag(tag):
            raise PyexttiffError("This tag doesn't exist in this IFD.")
        for entry in self:
            if (entry.get_tagValue() if isinstance(tag, int) else entry.get_tagName()) == tag:
                result = entry
                break
        return result

[docs]    def get_value(self, tag, human=True):
        """
        Returns the value for a tag
        :param tag: tag to look for, as an integer or a name
        :param human: if True, value is modified:
                        - value[0] is returned instead of value if array contains only one element
                        - conversion in string is achieved for arrays representing strings
        :return: the value associated to the tag
        """
        if not self.has_tag(tag):
            raise PyexttiffError("This tag doesn't exist in this IFD.")
        return self.get_entry(tag).get_value(human)

[docs]    def has_image(self):
        """Returns True if one tag is an image"""
        return self._image is not None

[docs]    def get_image(self):
        """Returns the image"""
        if not self.has_image():
            raise PyexttiffError("This IFD doesn't contain an image.")
        return self._image

[docs]    def get_tagValues(self):
        """Returns the list of the tags as decimal values"""
        return [entry.get_tagValue() for entry in self]

[docs]    def get_tagNames(self):
        """Returns the list of the tag names"""
        return [entry.get_tagName() for entry in self]

[docs]    def as_dict(self, keys='value'):
        """
        Returns a dictionary containing all entries.
        :param keys: keys to use for the dictionary, among ('value', 'name')
        :return: the dictionary
        """
        assert keys in ('value', 'name'), "keys must be in ('value', 'name')"
        return {entry.get_tagValue() if keys == 'value' else entry.get_tagName: entry.get_value() for entry in self}

[docs]class IFDEntry(object):
    """This class represent an IFD entry"""

    # <TagName> <Hex> <Type> <Number of values>
    _tag_info = '''
# standard tags:
NewSubfileType FE LONG 1
SubfileType FF SHORT 1
ImageWidth 100 SHORT|LONG 1
ImageLength 101 SHORT|LONG 1
BitsPerSample 102 SHORT SamplesPerPixel
Compression 103 SHORT 1
  Uncompressed 1
  CCITT1D 2
  Group3Fax 3
  Group4Fax 4
  LZW 5
  JPEG 6
  PackBits 32773
PhotometricInterpretation 106 SHORT 1
  WhiteIsZero 0
  BlackIsZero 1
  RGB 2
  RGBPalette 3
  TransparencyMask 4
  CMYK 5
  YCbCr 6
  CIELab 8
Threshholding 107 SHORT 1
CellWidth 108 SHORT 1
CellLength 109 SHORT 1
FillOrder 10A SHORT 1
DocumentName 10D ASCII
ImageDescription 10E ASCII
Make 10F ASCII
Model 110 ASCII
StripOffsets 111 SHORT|LONG StripsPerImage
Orientation 112 SHORT 1
  TopLeft 1
  TopRight 2
  BottomRight 3
  BottomLeft 4
  LeftTop 5
  RightTop 6
  RightBottom 7
  LeftBottom 8
SamplesPerPixel 115 SHORT 1
RowsPerStrip 116 SHORT|LONG 1
StripByteCounts 117 LONG|SHORT StripsPerImage
MinSampleValue 118 SHORT SamplesPerPixel
MaxSampleValue 119 SHORT SamplesPerPixel
XResolution 11A RATIONAL 1
YResolution 11B RATIONAL 1
PlanarConfiguration 11C SHORT 1
  Chunky 1
  Planar 2
PageName 11D ASCII
XPosition 11E DOUBLE
YPosition 11F DOUBLE
FreeOffsets 120 LONG
FreeByteCounts 121 LONG
GrayResponseUnit 122 SHORT 1
GrayResponseCurve 123 SHORT 2**BitsPerSample
T4Options 124 LONG 1
T6Options 125 LONG 1
ResolutionUnit 128 SHORT 1
PageNumber 129 SHORT 2
TransferFunction 12D SHORT (1|SamplesPerPixel)*2**BitsPerSample
Software 131 ASCII
DateTime 132 ASCII 20
Artist 13B ASCII
HostComputer 13C ASCII
Predictor 13D SHORT 1
WhitePoint 13E RATIONAL 2
PrimaryChromaticities 13F RATIONAL 6
ColorMap 140 SHORT 3*(2**BitsPerSample)
HalftoneHints 141 SHORT 2
TileWidth 142 SHORT|LONG 1
TileLength 143 SHORT|LONG 1
TileOffsets 144 LONG TilesPerImage
TileByteCounts 145 SHORT|LONG TilesPerImage
InkSet 14C SHORT 1
InkNames 14D ASCII <total number of chars in all ink name strings, including zeros>
NumberOfInks 14E SHORT 1
DotRange 150 BYTE|SHORT 2|2*NumberOfInks
TargetPrinter 151 ASCII any
ExtraSamples 152 BYTE <number of extra components per pixel>
SampleFormat 153 SHORT SamplesPerPixel
SMinSampleValue 154 Any SamplesPerPixel
SMaxSampleValue 155 Any SamplesPerPixel
TransferRange 156 SHORT 6
JPEGProc 200 SHORT 1
JPEGInterchangeFormat 201 LONG 1
JPEGInterchangeFormatLength 202 LONG 1
JPEGRestartInterval 203 SHORT 1
JPEGLosslessPredictos 205 SHORT SamplesPerPixel
JPEGPointTransforms 206 SHORT SamplesPerPixel
JPEGQTables 207 LONG SamplesPerPixel
JPEGDCTables 208 LONG SamplesPerPixel
JPEGACTables 209 LONG SamplesPerPixel
YCbCrCoefficients 211 RATIONAL 3
YCbCrSubSampling 212 SHORT 2
YCbCrPositioning 213 SHORT 1
ReferenceBlackWhite 214 LONG 2*SamplesPerPixel
Copyright 8298 ASCII Any

# non-standard tags:
CZ_LSMInfo 866C CZ_LSM

# EXIF tags, see http://www.awaresystems.be/imaging/tiff/tifftags/privateifd/exif.html
EXIF_IFDOffset 8769 SHORT 1
EXIF_ExposureTime 829a RATIONAL 1
EXIF_FNumber 829d RATIONAL 1
EXIF_ExposureProgram 8822 SHORT 1
EXIF_SpectralSensitivity 8824 ASCII
EXIF_ISOSpeedRatings 8827 SHORT 1
EXIF_OECF 8828 UNDEFINED
EXIF_ExifVersion 9000 UNDEFINED 4
EXIF_DateTimeOriginal 9003 ASCII
EXIF_DateTimeDigitized 9004 ASCII
EXIF_ComponentsConfiguration 9101 UNDEFINED 4
EXIF_CompressedBitsPerPixel 9102 RATIONAL 1
EXIF_ShutterSpeedValue 9201 SRATIONAL 1
EXIF_ApertureValue 9202 RATIONAL 1
EXIF_BrightnessValue 9203 SRATIONAL 1
EXIF_ExposureBiasValue 9204 SRATIONAL 1
EXIF_MaxApertureValue 9205 RATIONAL 1
EXIF_SubjectDistance 9206 RATIONAL 1
EXIF_MeteringMode 9207 SHORT 1
EXIF_LightSource 9208 SHORT 1
EXIF_Flash 9209 SHORT 1
EXIF_FocalLength 920a RATIONAL 1
EXIF_SubjectArea 9214 SHORT 2|3|4
EXIF_MakerNote 927c UNDEFINED
EXIF_UserComment 9286 UNDEFINED
EXIF_SubsecTime 9290 ASCII
EXIF_SubsecTimeOriginal 9291 ASCII
EXIF_SubsecTimeDigitized 9292 ASCII
EXIF_FlashpixVersion a000 UNDEFINED 4
EXIF_ColorSpace a001 SHORT 1
EXIF_PixelXDimension a002 SHORT!LONG 1
EXIF_PixelYDimension a003 SHORT!LONG 1
EXIF_RelatedSoundFile a004 ASCII 13
EXIF_FlashEnergy a20b RATIONAL 1
EXIF_SpatialFrequencyResponse a20c UNDEFINED
EXIF_FocalPlaneXResolution a20e RATIONAL 1
EXIF_FocalPlaneYResolution a20f RATIONAL 1
EXIF_FocalPlaneResolutionUnit a210 SHORT 1
EXIF_SubjectLocation a214 SHORT 2
EXIF_ExposureIndex a215 RATIONAL 1
EXIF_SensingMethod a217 SHORT 1
EXIF_FileSource a300 UNDEFINED 1
EXIF_SceneType a301 UNDEFINED 1
EXIF_CFAPattern a302 UNDEFINED
EXIF_CustomRendered a401 SHORT 1
EXIF_ExposureMode a402 SHORT 1
EXIF_WhiteBalance a403 SHORT 1
EXIF_DigitalZoomRatio a404 RATIONAL 1
EXIF_FocalLengthIn35mmFilm a405 SHORT 1
EXIF_SceneCaptureType a406 SHORT 1
EXIF_GainControl a407 SHORT 1
EXIF_Contrast a408 SHORT 1
EXIF_Saturation a409 SHORT 1
EXIF_Sharpness a40a SHORT 1
EXIF_DeviceSettingDescription a40b UNDEFINED
EXIF_SubjectDistanceRange a40c SHORT 1
EXIF_ImageUniqueID a420 ASCII 33

'''
    _tag_value2name = {}
    _tag_name2value = {}
    _tag_value2type = {}
    for line in _tag_info.split('\n'):
        if not line or line.startswith('#'):
            continue
        if line[0] == ' ':
            pass
        else:
            n, h, t = line.split()[:3]
            h = eval('0x' + h)
            _tag_value2name[h] = n
            _tag_value2type[h] = t
            _tag_name2value[n] = h

    def __init__(self, tag, entrytype, value=None):
        """
        *tag* is the tag number of the entry
        *entrytype* is the type of the entry
        *value* is the value associated to the tag
        """
        self._tag = tag
        self._type = entrytype
        self._value = value
        self._name = self._tag_value2name.get(tag, 'TAG%s' % (hex(tag),))

[docs]    def is_image(self):
        """Returns True if content in an image"""
        return self.get_tagValue() == 273

[docs]    def is_IFD(self):
        """Returns True if content is an IFD."""
        return isinstance(self.get_value(), IFD)

[docs]    def get_tagValue(self):
        """Returns the tag"""
        return self._tag

[docs]    def get_value(self, human=True):
        """
        Returns the value
        if human=True, value is modified:
            - value[0] is returned instead of value if array contains only one element
            - conversion in string is achieved for arrays representing strings
        """
        value = self._value
        if human:
            if len(value) == 1:
                value = value[0]
            if self.get_type() == 2: #ASCII
                value = (b''.join(value.view('|S%s' % (value.nbytes // value.size)))).decode('UTF8')
        return value

[docs]    def get_tagName(self):
        """Returns the tag name"""
        return self._name

[docs]    def get_type(self):
        """Returns the type of entry."""
        return self._type

[docs]    def set_value(self, value):
        """Sets the value of the entry."""
        self._value = value