#!/usr/bin/env python
# -*- coding: utf-8 -*
# Copyright (c) Météo France (2014-)
# This software is governed by the CeCILL-C license under French law.
This module aims at reading tiff with private IFDs.
It uses PIL for image reading.

This module uses code from pylibtiff
(, or

import os
import numpy
import mmap
import PIL.Image
import io

[docs] class PyexttiffError(Exception): """Error handling for pyexttiff.""" pass
[docs] class TiffFile(object): """ This class allows the access to the entire tiff file (tags and images). """ #see for the str workaround #We will be able to suppress it when python2 with "from __future__ import unicode_literals" will not be used _rational = numpy.dtype([(str('numer'), numpy.uint32), (str('denom'), numpy.uint32)]) _srational = numpy.dtype([(str('numer'), numpy.int32), (str('denom'), numpy.int32)]) _type2name = {1:'BYTE', 2:'ASCII', 3:'SHORT', 4:'LONG', 5:'RATIONAL', # two longs, lsm uses it for float64 6:'SBYTE', 7:'UNDEFINED', 8:'SSHORT', 9:'SLONG', 10:'SRATIONAL', 11:'FLOAT', 12:'DOUBLE', } _name2type = dict((v, k) for k, v in _type2name.items()) _name2type['SHORT|LONG'] = _name2type['LONG'] _name2type['LONG|SHORT'] = _name2type['LONG'] _type2bytes = {1:1, 2:1, 3:2, 4:4, 5:8, 6:1, 7:1, 8:2, 9:4, 10:8, 11:4, 12:8} _type2dtype = {1:numpy.uint8, 2:numpy.uint8, 3:numpy.uint16, 4:numpy.uint32, 5:_rational, 6:numpy.int8, 7:numpy.uint8, 8:numpy.int16, 9:numpy.int32, 10:_srational, 11:numpy.float32, 12:numpy.float64} class _LittleEndianNumpyDTypes(object): uint8 = numpy.dtype('<u1') uint16 = numpy.dtype('<u2') uint32 = numpy.dtype('<u4') uint64 = numpy.dtype('<u8') int8 = numpy.dtype('<i1') int16 = numpy.dtype('<i2') int32 = numpy.dtype('<i4') int64 = numpy.dtype('<i8') float32 = numpy.dtype('<f4') float64 = numpy.dtype('<f8') complex64 = numpy.dtype('<c8') complex128 = numpy.dtype('<c16') @property def type2dt(self): return dict((k, numpy.dtype(v).newbyteorder('<')) for k, v in TiffFile._type2dtype.items()) class _BigEndianNumpyDTypes(object): uint8 = numpy.dtype('>u1') uint16 = numpy.dtype('>u2') uint32 = numpy.dtype('>u4') uint64 = numpy.dtype('>u8') int8 = numpy.dtype('>i1') int16 = numpy.dtype('>i2') int32 = numpy.dtype('>i4') int64 = numpy.dtype('>i8') float32 = numpy.dtype('>f4') float64 = numpy.dtype('>f8') complex64 = numpy.dtype('>c8') complex128 = numpy.dtype('>c16') @property def type2dt(self): return dict((k, numpy.dtype(v).newbyteorder('>')) for k, v in TiffFile._type2dtype.items()) def __init__(self, filename, subIFDpaths=[], method=1): """ Opens a tiff file, reads header and IFDs. *filename* is the filename containing the tiff *subIFDpaths* is the list of tag path whose values are offset to private IFDs a tag path is a tuple representing the path to a given tag which must represent an IFD (34665) means that tag 34665 of any given public IFD is an offset to a private IFD (32001, 521) means that tag 32001 of any given public IFD is an offset to a private IFD and that tag 521 of any private tag referenced by a 32001 public tag is also an offset to a private IFD *method* is the method used to read data: 1: f=open(..., 'rb') ; numpy.frombuffer(, dtype=numpy.ubyte) 2: f=open(..., 'rb') ; numpy.ndarray(buffer=mmap(f), dtype=numpy.ubyte) 3: same as 2 but with modifications allowed - DANGEROUS """ self._filename = filename self._subIFDpaths = subIFDpaths self._fileHandle = None if not os.path.exists(filename): raise IOError(filename + " must exists.") # Reading file if method == 1: with open(filename, 'rb') as f: self._data = numpy.frombuffer(, dtype=numpy.ubyte) elif method == 2: self._fileHandle = open(filename, 'rb') mm = mmap.mmap(self._fileHandle.fileno(), 0, prot=mmap.PROT_READ) self._data = numpy.ndarray(shape=(mm.size(),), buffer=mm, dtype=numpy.ubyte) elif method == 3: self._fileHandle = open(filename, 'r+b') mm = mmap.mmap(self._fileHandle.fileno(), 0) self._data = numpy.ndarray(shape=(mm.size(),), buffer=mm, dtype=numpy.ubyte) else: raise PyexttiffError("This method is unknown.") # Decoding header - byte order byteorder = self._data[0:2].view(dtype=numpy.uint16)[0] if byteorder == 0x4949: self.endian = 'little' self.dtypes = TiffFile._LittleEndianNumpyDTypes() elif byteorder == 0x4d4d: self.endian = 'big' self.dtypes = TiffFile._BigEndianNumpyDTypes() else: raise IOError('unrecognized byteorder: %s' % (hex(byteorder))) # Decoding header - magic number magic = self._get_uint16(2) if magic != 42: raise IOError('wrong magic number for TIFF file: %s' % (magic)) # Decoding header - first IFD offset IFD0offset = self._get_uint32(4) # IFD reading self.IFDs = [] offset = IFD0offset num = 0 while offset: ifd, offset = self._readIFD(offset, (), subIFDpaths, num) if ifd.has_image(): num += 1 else: raise PyexttiffError("Not sure about IFD that does not contain image.") self.IFDs.append(ifd) def _readIFD(self, offset, path, subIFDpaths, num): """ Reads recursively IFDs """ ifd = IFD() n = self._get_uint16(offset) for i in range(n): entryOffset = offset + 2 + i * 12 entrytag = self._get_uint16(entryOffset) entrytype = self._get_uint16(entryOffset + 2) entrycount = self._get_uint32(entryOffset + 4) entrybytes = TiffFile._type2bytes.get(entrytype, 0) if entrycount == 1 and 1 <= entrybytes <= 4: entryvalue = self._get_values(entryOffset + 8, entrytype, entrycount) else: valueOffset = self._get_int32(entryOffset + 8) entryvalue = self._get_values(valueOffset, entrytype, entrycount) entrypath = tuple(list(path) + [entrytag]) if entrypath in [mypath[:len(entrypath)] for mypath in subIFDpaths]: #In the first version of this tool, entryvalue was an array at this stage #This is corrected to suppress the numpy warning but code #(by replacing entryvalue by entryvalue[0] in following statement) #lacks comment and I'm now unable to understand this part. #If an error is raised by this line, it would be necessary #to investigate more... assert entryvalue.shape == (1, ), "Not as expected..." subifd, _ = self._readIFD(entryvalue[0], entrypath, subIFDpaths, None) ifd.append(IFDEntry(entrytag, entrytype, subifd)) else: ifd.append(IFDEntry(entrytag, entrytype, entryvalue)) if path == () and ifd.has_tag(273): # Raw data nbRows = ifd.get_value(257) offsetValues = ifd.get_value(273) nbRowsPerStrip = ifd.get_value(278) nbBytesPerStrip = ifd.get_value(279) if not isinstance(offsetValues, numpy.ndarray): offsetValues = numpy.array([offsetValues]) nbBytesPerStrip = numpy.array([nbBytesPerStrip]) if nbRows / nbRowsPerStrip + (1 if nbRows % nbRowsPerStrip != 0 else 0) != len(offsetValues): raise PyexttiffError("Total number of rows, strip numbers and number of rows per strips are not consistent.") data = [] for i in range(len(offsetValues)): data.append(self._get_values(offsetValues[i], 1, nbBytesPerStrip[i])) ifd.get_entry(273).set_value(data) # Image data im = self.get_PILImage() data = numpy.array(im) if data.shape == (): data = numpy.array(im) # Sometimes must be called twice to return values... ifd._image = data nextIFD = self._get_uint32(offset + 2 + n * 12) return (ifd, nextIFD)
[docs] def get_data(self): """ Returns the ndarray containing the data. """ return self._data
[docs] def get_PILImage(self): """ Returns the PIL image object of the file. """ try: meth = numpy.getbuffer except AttributeError: meth = memoryview return
def _get_uint16(self, offset): return self.get_data()[offset:offset + 2].view(dtype=self.dtypes.uint16)[0] def _get_uint32(self, offset): return self.get_data()[offset:offset + 4].view(dtype=self.dtypes.uint32)[0] def _get_int32(self, offset): return self.get_data()[offset:offset + 4].view(dtype=self.dtypes.int32)[0] def _get_values(self, offset, typ, count): if isinstance(typ, numpy.dtype): dtype = typ size = typ.itemsize elif isinstance(typ, type) and issubclass(typ, numpy.generic): dtype = typ size = typ().itemsize else: if isinstance(typ, str): ntyp = typ typ = TiffFile._name2type.get(typ) else: ntyp = str(typ) dtype = self.dtypes.type2dt.get(typ) size = TiffFile._type2bytes.get(typ) if dtype is None or size is None: raise PyexttiffError('_get_values: incomplete info for type=%r [%r]: dtype=%s, bytes=%s\n' % (typ, ntyp, dtype, size)) result = self.get_data()[offset:offset + size * count].view(dtype=dtype) return result
[docs] def close(self): """ Closes the file. """ if self._fileHandle is not None: self._fileHandle.close()
def __del__(self): """ __del__ method """ self.close()
[docs] class IFD(list): """This class represent an IFD.""" def __init__(self): """Initialisation method of IFD class.""" self._image = None
[docs] def has_tag(self, tag): """ Returns True if an entry fits the tag given :param tag: tag to look for, as an integer or a name :return: True if tag exists """ if isinstance(tag, int): return tag in self.get_tagValues() else: return tag in self.get_tagNames()
[docs] def get_entry(self, tag): """ Returns the entry for a tag :param tag: tag to look for, as an integer or a name :return: the entry associated to the tag """ if not self.has_tag(tag): raise PyexttiffError("This tag doesn't exist in this IFD.") for entry in self: if (entry.get_tagValue() if isinstance(tag, int) else entry.get_tagName()) == tag: result = entry break return result
[docs] def get_value(self, tag, human=True): """ Returns the value for a tag :param tag: tag to look for, as an integer or a name :param human: if True, value is modified: - value[0] is returned instead of value if array contains only one element - conversion in string is achieved for arrays representing strings :return: the value associated to the tag """ if not self.has_tag(tag): raise PyexttiffError("This tag doesn't exist in this IFD.") return self.get_entry(tag).get_value(human)
[docs] def has_image(self): """Returns True if one tag is an image""" return self._image is not None
[docs] def get_image(self): """Returns the image""" if not self.has_image(): raise PyexttiffError("This IFD doesn't contain an image.") return self._image
[docs] def get_tagValues(self): """Returns the list of the tags as decimal values""" return [entry.get_tagValue() for entry in self]
[docs] def get_tagNames(self): """Returns the list of the tag names""" return [entry.get_tagName() for entry in self]
[docs] def as_dict(self, keys='value'): """ Returns a dictionary containing all entries. :param keys: keys to use for the dictionary, among ('value', 'name') :return: the dictionary """ assert keys in ('value', 'name'), "keys must be in ('value', 'name')" return {entry.get_tagValue() if keys == 'value' else entry.get_tagName: entry.get_value() for entry in self}
[docs] class IFDEntry(object): """This class represent an IFD entry""" # <TagName> <Hex> <Type> <Number of values> _tag_info = ''' # standard tags: NewSubfileType FE LONG 1 SubfileType FF SHORT 1 ImageWidth 100 SHORT|LONG 1 ImageLength 101 SHORT|LONG 1 BitsPerSample 102 SHORT SamplesPerPixel Compression 103 SHORT 1 Uncompressed 1 CCITT1D 2 Group3Fax 3 Group4Fax 4 LZW 5 JPEG 6 PackBits 32773 PhotometricInterpretation 106 SHORT 1 WhiteIsZero 0 BlackIsZero 1 RGB 2 RGBPalette 3 TransparencyMask 4 CMYK 5 YCbCr 6 CIELab 8 Threshholding 107 SHORT 1 CellWidth 108 SHORT 1 CellLength 109 SHORT 1 FillOrder 10A SHORT 1 DocumentName 10D ASCII ImageDescription 10E ASCII Make 10F ASCII Model 110 ASCII StripOffsets 111 SHORT|LONG StripsPerImage Orientation 112 SHORT 1 TopLeft 1 TopRight 2 BottomRight 3 BottomLeft 4 LeftTop 5 RightTop 6 RightBottom 7 LeftBottom 8 SamplesPerPixel 115 SHORT 1 RowsPerStrip 116 SHORT|LONG 1 StripByteCounts 117 LONG|SHORT StripsPerImage MinSampleValue 118 SHORT SamplesPerPixel MaxSampleValue 119 SHORT SamplesPerPixel XResolution 11A RATIONAL 1 YResolution 11B RATIONAL 1 PlanarConfiguration 11C SHORT 1 Chunky 1 Planar 2 PageName 11D ASCII XPosition 11E DOUBLE YPosition 11F DOUBLE FreeOffsets 120 LONG FreeByteCounts 121 LONG GrayResponseUnit 122 SHORT 1 GrayResponseCurve 123 SHORT 2**BitsPerSample T4Options 124 LONG 1 T6Options 125 LONG 1 ResolutionUnit 128 SHORT 1 PageNumber 129 SHORT 2 TransferFunction 12D SHORT (1|SamplesPerPixel)*2**BitsPerSample Software 131 ASCII DateTime 132 ASCII 20 Artist 13B ASCII HostComputer 13C ASCII Predictor 13D SHORT 1 WhitePoint 13E RATIONAL 2 PrimaryChromaticities 13F RATIONAL 6 ColorMap 140 SHORT 3*(2**BitsPerSample) HalftoneHints 141 SHORT 2 TileWidth 142 SHORT|LONG 1 TileLength 143 SHORT|LONG 1 TileOffsets 144 LONG TilesPerImage TileByteCounts 145 SHORT|LONG TilesPerImage InkSet 14C SHORT 1 InkNames 14D ASCII <total number of chars in all ink name strings, including zeros> NumberOfInks 14E SHORT 1 DotRange 150 BYTE|SHORT 2|2*NumberOfInks TargetPrinter 151 ASCII any ExtraSamples 152 BYTE <number of extra components per pixel> SampleFormat 153 SHORT SamplesPerPixel SMinSampleValue 154 Any SamplesPerPixel SMaxSampleValue 155 Any SamplesPerPixel TransferRange 156 SHORT 6 JPEGProc 200 SHORT 1 JPEGInterchangeFormat 201 LONG 1 JPEGInterchangeFormatLength 202 LONG 1 JPEGRestartInterval 203 SHORT 1 JPEGLosslessPredictos 205 SHORT SamplesPerPixel JPEGPointTransforms 206 SHORT SamplesPerPixel JPEGQTables 207 LONG SamplesPerPixel JPEGDCTables 208 LONG SamplesPerPixel JPEGACTables 209 LONG SamplesPerPixel YCbCrCoefficients 211 RATIONAL 3 YCbCrSubSampling 212 SHORT 2 YCbCrPositioning 213 SHORT 1 ReferenceBlackWhite 214 LONG 2*SamplesPerPixel Copyright 8298 ASCII Any # non-standard tags: CZ_LSMInfo 866C CZ_LSM # EXIF tags, see EXIF_IFDOffset 8769 SHORT 1 EXIF_ExposureTime 829a RATIONAL 1 EXIF_FNumber 829d RATIONAL 1 EXIF_ExposureProgram 8822 SHORT 1 EXIF_SpectralSensitivity 8824 ASCII EXIF_ISOSpeedRatings 8827 SHORT 1 EXIF_OECF 8828 UNDEFINED EXIF_ExifVersion 9000 UNDEFINED 4 EXIF_DateTimeOriginal 9003 ASCII EXIF_DateTimeDigitized 9004 ASCII EXIF_ComponentsConfiguration 9101 UNDEFINED 4 EXIF_CompressedBitsPerPixel 9102 RATIONAL 1 EXIF_ShutterSpeedValue 9201 SRATIONAL 1 EXIF_ApertureValue 9202 RATIONAL 1 EXIF_BrightnessValue 9203 SRATIONAL 1 EXIF_ExposureBiasValue 9204 SRATIONAL 1 EXIF_MaxApertureValue 9205 RATIONAL 1 EXIF_SubjectDistance 9206 RATIONAL 1 EXIF_MeteringMode 9207 SHORT 1 EXIF_LightSource 9208 SHORT 1 EXIF_Flash 9209 SHORT 1 EXIF_FocalLength 920a RATIONAL 1 EXIF_SubjectArea 9214 SHORT 2|3|4 EXIF_MakerNote 927c UNDEFINED EXIF_UserComment 9286 UNDEFINED EXIF_SubsecTime 9290 ASCII EXIF_SubsecTimeOriginal 9291 ASCII EXIF_SubsecTimeDigitized 9292 ASCII EXIF_FlashpixVersion a000 UNDEFINED 4 EXIF_ColorSpace a001 SHORT 1 EXIF_PixelXDimension a002 SHORT!LONG 1 EXIF_PixelYDimension a003 SHORT!LONG 1 EXIF_RelatedSoundFile a004 ASCII 13 EXIF_FlashEnergy a20b RATIONAL 1 EXIF_SpatialFrequencyResponse a20c UNDEFINED EXIF_FocalPlaneXResolution a20e RATIONAL 1 EXIF_FocalPlaneYResolution a20f RATIONAL 1 EXIF_FocalPlaneResolutionUnit a210 SHORT 1 EXIF_SubjectLocation a214 SHORT 2 EXIF_ExposureIndex a215 RATIONAL 1 EXIF_SensingMethod a217 SHORT 1 EXIF_FileSource a300 UNDEFINED 1 EXIF_SceneType a301 UNDEFINED 1 EXIF_CFAPattern a302 UNDEFINED EXIF_CustomRendered a401 SHORT 1 EXIF_ExposureMode a402 SHORT 1 EXIF_WhiteBalance a403 SHORT 1 EXIF_DigitalZoomRatio a404 RATIONAL 1 EXIF_FocalLengthIn35mmFilm a405 SHORT 1 EXIF_SceneCaptureType a406 SHORT 1 EXIF_GainControl a407 SHORT 1 EXIF_Contrast a408 SHORT 1 EXIF_Saturation a409 SHORT 1 EXIF_Sharpness a40a SHORT 1 EXIF_DeviceSettingDescription a40b UNDEFINED EXIF_SubjectDistanceRange a40c SHORT 1 EXIF_ImageUniqueID a420 ASCII 33 ''' _tag_value2name = {} _tag_name2value = {} _tag_value2type = {} for line in _tag_info.split('\n'): if not line or line.startswith('#'): continue if line[0] == ' ': pass else: n, h, t = line.split()[:3] h = eval('0x' + h) _tag_value2name[h] = n _tag_value2type[h] = t _tag_name2value[n] = h def __init__(self, tag, entrytype, value=None): """ *tag* is the tag number of the entry *entrytype* is the type of the entry *value* is the value associated to the tag """ self._tag = tag self._type = entrytype self._value = value self._name = self._tag_value2name.get(tag, 'TAG%s' % (hex(tag),))
[docs] def is_image(self): """Returns True if content in an image""" return self.get_tagValue() == 273
[docs] def is_IFD(self): """Returns True if content is an IFD.""" return isinstance(self.get_value(), IFD)
[docs] def get_tagValue(self): """Returns the tag""" return self._tag
[docs] def get_value(self, human=True): """ Returns the value if human=True, value is modified: - value[0] is returned instead of value if array contains only one element - conversion in string is achieved for arrays representing strings """ value = self._value if human: if len(value) == 1: value = value[0] if self.get_type() == 2: #ASCII value = (b''.join(value.view('|S%s' % (value.nbytes // value.size)))).decode('UTF8') return value
[docs] def get_tagName(self): """Returns the tag name""" return self._name
[docs] def get_type(self): """Returns the type of entry.""" return self._type
[docs] def set_value(self, value): """Sets the value of the entry.""" self._value = value