#!/usr/bin/env python
# -*- coding: utf-8 -*
# Copyright (c) Météo France (2014-)
# This software is governed by the CeCILL-C license under French law.
# http://www.cecill.info
"""
This module aims at reading tiff with private IFDs.
It uses PIL for image reading.
This module uses code from pylibtiff (https://pypi.python.org/pypi/libtiff, https://code.google.com/p/pylibtiff or https://github.com/hmeine/pylibtiff)
"""
from __future__ import print_function, absolute_import, division, unicode_literals
import os
import numpy
import mmap
import PIL.Image
import six
[docs]class PyexttiffError(Exception):
"""Error handling for pyexttiff."""
pass
[docs]class TiffFile(object):
"""
This class allows the access to the entire tiff file (tags and images).
"""
#see https://github.com/numpy/numpy/issues/2407 for the str workaround
#We will be able to suppress it when python2 with "from __future__ import unicode_literals" will not be used
_rational = numpy.dtype([(str('numer'), numpy.uint32), (str('denom'), numpy.uint32)])
_srational = numpy.dtype([(str('numer'), numpy.int32), (str('denom'), numpy.int32)])
_type2name = {1:'BYTE', 2:'ASCII', 3:'SHORT', 4:'LONG', 5:'RATIONAL', # two longs, lsm uses it for float64
6:'SBYTE', 7:'UNDEFINED', 8:'SSHORT', 9:'SLONG', 10:'SRATIONAL',
11:'FLOAT', 12:'DOUBLE',
}
_name2type = dict((v, k) for k, v in _type2name.items())
_name2type['SHORT|LONG'] = _name2type['LONG']
_name2type['LONG|SHORT'] = _name2type['LONG']
_type2bytes = {1:1, 2:1, 3:2, 4:4, 5:8, 6:1, 7:1, 8:2, 9:4, 10:8, 11:4, 12:8}
_type2dtype = {1:numpy.uint8, 2:numpy.uint8, 3:numpy.uint16, 4:numpy.uint32, 5:_rational,
6:numpy.int8, 7:numpy.uint8, 8:numpy.int16, 9:numpy.int32, 10:_srational,
11:numpy.float32, 12:numpy.float64}
class _LittleEndianNumpyDTypes(object):
uint8 = numpy.dtype('<u1')
uint16 = numpy.dtype('<u2')
uint32 = numpy.dtype('<u4')
uint64 = numpy.dtype('<u8')
int8 = numpy.dtype('<i1')
int16 = numpy.dtype('<i2')
int32 = numpy.dtype('<i4')
int64 = numpy.dtype('<i8')
float32 = numpy.dtype('<f4')
float64 = numpy.dtype('<f8')
complex64 = numpy.dtype('<c8')
complex128 = numpy.dtype('<c16')
@property
def type2dt(self):
return dict((k, numpy.dtype(v).newbyteorder('<')) for k, v in TiffFile._type2dtype.items())
class _BigEndianNumpyDTypes(object):
uint8 = numpy.dtype('>u1')
uint16 = numpy.dtype('>u2')
uint32 = numpy.dtype('>u4')
uint64 = numpy.dtype('>u8')
int8 = numpy.dtype('>i1')
int16 = numpy.dtype('>i2')
int32 = numpy.dtype('>i4')
int64 = numpy.dtype('>i8')
float32 = numpy.dtype('>f4')
float64 = numpy.dtype('>f8')
complex64 = numpy.dtype('>c8')
complex128 = numpy.dtype('>c16')
@property
def type2dt(self):
return dict((k, numpy.dtype(v).newbyteorder('>')) for k, v in TiffFile._type2dtype.items())
def __init__(self, filename, subIFDpaths=[], method=1):
"""
Opens a tiff file, reads header and IFDs.
*filename* is the filename containing the tiff
*subIFDpaths* is the list of tag path whose values are offset to private IFDs
a tag path is a tuple representing the path to a given tag which must represent an IFD
(34665) means that tag 34665 of any given public IFD is an offset to a private IFD
(32001, 521) means that tag 32001 of any given public IFD is an offset to a private IFD
and that tag 521 of any private tag referenced by a 32001 public tag is also an offset to a private IFD
*method* is the method used to read data:
1: f=open(..., 'rb') ; numpy.frombuffer(f.read(), dtype=numpy.ubyte)
2: f=open(..., 'rb') ; numpy.ndarray(buffer=mmap(f), dtype=numpy.ubyte)
3: same as 2 but with modifications allowed - DANGEROUS
"""
self._filename = filename
self._subIFDpaths = subIFDpaths
self._fileHandle = None
if not os.path.exists(filename):
raise IOError(filename + " must exists.")
# Reading file
if method == 1:
with open(filename, 'rb') as f:
self._data = numpy.frombuffer(f.read(), dtype=numpy.ubyte)
elif method == 2:
self._fileHandle = open(filename, 'rb')
mm = mmap.mmap(self._fileHandle.fileno(), 0, prot=mmap.PROT_READ)
self._data = numpy.ndarray(shape=(mm.size(),), buffer=mm, dtype=numpy.ubyte)
elif method == 3:
self._fileHandle = open(filename, 'r+b')
mm = mmap.mmap(self._fileHandle.fileno(), 0)
self._data = numpy.ndarray(shape=(mm.size(),), buffer=mm, dtype=numpy.ubyte)
else:
raise PyexttiffError("This method is unknown.")
# Decoding header - byte order
byteorder = self._data[0:2].view(dtype=numpy.uint16)[0]
if byteorder == 0x4949:
self.endian = 'little'
self.dtypes = TiffFile._LittleEndianNumpyDTypes()
elif byteorder == 0x4d4d:
self.endian = 'big'
self.dtypes = TiffFile._BigEndianNumpyDTypes()
else:
raise IOError('unrecognized byteorder: %s' % (hex(byteorder)))
# Decoding header - magic number
magic = self._get_uint16(2)
if magic != 42:
raise IOError('wrong magic number for TIFF file: %s' % (magic))
# Decoding header - first IFD offset
IFD0offset = self._get_uint32(4)
# IFD reading
self.IFDs = []
offset = IFD0offset
num = 0
while offset:
ifd, offset = self._readIFD(offset, (), subIFDpaths, num)
if ifd.has_image():
num += 1
else:
raise PyexttiffError("Not sure about IFD that does not contain image.")
self.IFDs.append(ifd)
def _readIFD(self, offset, path, subIFDpaths, num):
"""
Reads recursively IFDs
"""
ifd = IFD()
n = self._get_uint16(offset)
for i in range(n):
entryOffset = offset + 2 + i * 12
entrytag = self._get_uint16(entryOffset)
entrytype = self._get_uint16(entryOffset + 2)
entrycount = self._get_uint32(entryOffset + 4)
entrybytes = TiffFile._type2bytes.get(entrytype, 0)
if entrycount == 1 and 1 <= entrybytes <= 4:
entryvalue = self._get_values(entryOffset + 8, entrytype, entrycount)
else:
valueOffset = self._get_int32(entryOffset + 8)
entryvalue = self._get_values(valueOffset, entrytype, entrycount)
entrypath = tuple(list(path) + [entrytag])
if entrypath in [mypath[:len(entrypath)] for mypath in subIFDpaths]:
#In the first version of this tool, entryvalue was an array at this stage
#This is corrected to suppress the numpy warning but code
#(by replacing entryvalue by entryvalue[0] in following statement)
#lacks comment and I'm now unable to understand this part.
#If an error is raised by this line, it would be necessary
#to investigate more...
assert entryvalue.shape == (1, ), "Not as expected..."
subifd, _ = self._readIFD(entryvalue[0], entrypath, subIFDpaths, None)
ifd.append(IFDEntry(entrytag, entrytype, subifd))
else:
ifd.append(IFDEntry(entrytag, entrytype, entryvalue))
if path == () and ifd.has_tag(273):
# Raw data
nbRows = ifd.get_value(257)
offsetValues = ifd.get_value(273)
nbRowsPerStrip = ifd.get_value(278)
nbBytesPerStrip = ifd.get_value(279)
if not isinstance(offsetValues, numpy.ndarray):
offsetValues = numpy.array([offsetValues])
nbBytesPerStrip = numpy.array([nbBytesPerStrip])
if nbRows / nbRowsPerStrip + (1 if nbRows % nbRowsPerStrip != 0 else 0) != len(offsetValues):
raise PyexttiffError("Total number of rows, strip numbers and number of rows per strips are not consistent.")
data = []
for i in range(len(offsetValues)):
data.append(self._get_values(offsetValues[i], 1, nbBytesPerStrip[i]))
ifd.get_entry(273).set_value(data)
# Image data
im = self.get_PILImage()
im.seek(num)
data = numpy.array(im)
if data.shape == ():
data = numpy.array(im) # Sometimes must be called twice to return values...
ifd._image = data
nextIFD = self._get_uint32(offset + 2 + n * 12)
return (ifd, nextIFD)
[docs] def get_data(self):
"""
Returns the ndarray containing the data.
"""
return self._data
[docs] def get_PILImage(self):
"""
Returns the PIL image object of the file.
"""
try:
meth = numpy.getbuffer
except AttributeError:
meth = memoryview
from distutils.version import LooseVersion
import warnings
if LooseVersion(PIL.__version__) < LooseVersion('5.4.1'):
warnings.warn("You may have issues using an old version of PIL; please update it with " + \
"'pip install --user --upgrade pillow'. This warning is issued when version " + \
"of pillow is inferior to '5.4.1' but it may work, or not, with older versions...")
return PIL.Image.open(six.BytesIO(meth(self.get_data())))
def _get_uint16(self, offset):
return self.get_data()[offset:offset + 2].view(dtype=self.dtypes.uint16)[0]
def _get_uint32(self, offset):
return self.get_data()[offset:offset + 4].view(dtype=self.dtypes.uint32)[0]
def _get_int32(self, offset):
return self.get_data()[offset:offset + 4].view(dtype=self.dtypes.int32)[0]
def _get_values(self, offset, typ, count):
if isinstance(typ, numpy.dtype):
dtype = typ
size = typ.itemsize
elif isinstance(typ, type) and issubclass(typ, numpy.generic):
dtype = typ
size = typ().itemsize
else:
if isinstance(typ, six.string_types):
ntyp = typ
typ = TiffFile._name2type.get(typ)
else:
ntyp = str(typ)
dtype = self.dtypes.type2dt.get(typ)
size = TiffFile._type2bytes.get(typ)
if dtype is None or size is None:
raise PyexttiffError('_get_values: incomplete info for type=%r [%r]: dtype=%s, bytes=%s\n' % (typ, ntyp, dtype, size))
result = self.get_data()[offset:offset + size * count].view(dtype=dtype)
return result
[docs] def close(self):
"""
Closes the file.
"""
if self._fileHandle is not None:
self._fileHandle.close()
def __del__(self):
"""
__del__ method
"""
self.close()
[docs]class IFD(list):
"""This class represent an IFD."""
def __init__(self):
"""Initialisation method of IFD class."""
self._image = None
[docs] def has_tag(self, tag):
"""
Returns True if an entry fits the tag given
:param tag: tag to look for, as an integer or a name
:return: True if tag exists
"""
if isinstance(tag, int):
return tag in self.get_tagValues()
else:
return tag in self.get_tagNames()
[docs] def get_entry(self, tag):
"""
Returns the entry for a tag
:param tag: tag to look for, as an integer or a name
:return: the entry associated to the tag
"""
if not self.has_tag(tag):
raise PyexttiffError("This tag doesn't exist in this IFD.")
for entry in self:
if (entry.get_tagValue() if isinstance(tag, int) else entry.get_tagName()) == tag:
result = entry
break
return result
[docs] def get_value(self, tag, human=True):
"""
Returns the value for a tag
:param tag: tag to look for, as an integer or a name
:param human: if True, value is modified:
- value[0] is returned instead of value if array contains only one element
- conversion in string is achieved for arrays representing strings
:return: the value associated to the tag
"""
if not self.has_tag(tag):
raise PyexttiffError("This tag doesn't exist in this IFD.")
return self.get_entry(tag).get_value(human)
[docs] def has_image(self):
"""Returns True if one tag is an image"""
return self._image is not None
[docs] def get_image(self):
"""Returns the image"""
if not self.has_image():
raise PyexttiffError("This IFD doesn't contain an image.")
return self._image
[docs] def get_tagValues(self):
"""Returns the list of the tags as decimal values"""
return [entry.get_tagValue() for entry in self]
[docs] def get_tagNames(self):
"""Returns the list of the tag names"""
return [entry.get_tagName() for entry in self]
[docs] def as_dict(self, keys='value'):
"""
Returns a dictionary containing all entries.
:param keys: keys to use for the dictionary, among ('value', 'name')
:return: the dictionary
"""
assert keys in ('value', 'name'), "keys must be in ('value', 'name')"
return {entry.get_tagValue() if keys == 'value' else entry.get_tagName: entry.get_value() for entry in self}
[docs]class IFDEntry(object):
"""This class represent an IFD entry"""
# <TagName> <Hex> <Type> <Number of values>
_tag_info = '''
# standard tags:
NewSubfileType FE LONG 1
SubfileType FF SHORT 1
ImageWidth 100 SHORT|LONG 1
ImageLength 101 SHORT|LONG 1
BitsPerSample 102 SHORT SamplesPerPixel
Compression 103 SHORT 1
Uncompressed 1
CCITT1D 2
Group3Fax 3
Group4Fax 4
LZW 5
JPEG 6
PackBits 32773
PhotometricInterpretation 106 SHORT 1
WhiteIsZero 0
BlackIsZero 1
RGB 2
RGBPalette 3
TransparencyMask 4
CMYK 5
YCbCr 6
CIELab 8
Threshholding 107 SHORT 1
CellWidth 108 SHORT 1
CellLength 109 SHORT 1
FillOrder 10A SHORT 1
DocumentName 10D ASCII
ImageDescription 10E ASCII
Make 10F ASCII
Model 110 ASCII
StripOffsets 111 SHORT|LONG StripsPerImage
Orientation 112 SHORT 1
TopLeft 1
TopRight 2
BottomRight 3
BottomLeft 4
LeftTop 5
RightTop 6
RightBottom 7
LeftBottom 8
SamplesPerPixel 115 SHORT 1
RowsPerStrip 116 SHORT|LONG 1
StripByteCounts 117 LONG|SHORT StripsPerImage
MinSampleValue 118 SHORT SamplesPerPixel
MaxSampleValue 119 SHORT SamplesPerPixel
XResolution 11A RATIONAL 1
YResolution 11B RATIONAL 1
PlanarConfiguration 11C SHORT 1
Chunky 1
Planar 2
PageName 11D ASCII
XPosition 11E DOUBLE
YPosition 11F DOUBLE
FreeOffsets 120 LONG
FreeByteCounts 121 LONG
GrayResponseUnit 122 SHORT 1
GrayResponseCurve 123 SHORT 2**BitsPerSample
T4Options 124 LONG 1
T6Options 125 LONG 1
ResolutionUnit 128 SHORT 1
PageNumber 129 SHORT 2
TransferFunction 12D SHORT (1|SamplesPerPixel)*2**BitsPerSample
Software 131 ASCII
DateTime 132 ASCII 20
Artist 13B ASCII
HostComputer 13C ASCII
Predictor 13D SHORT 1
WhitePoint 13E RATIONAL 2
PrimaryChromaticities 13F RATIONAL 6
ColorMap 140 SHORT 3*(2**BitsPerSample)
HalftoneHints 141 SHORT 2
TileWidth 142 SHORT|LONG 1
TileLength 143 SHORT|LONG 1
TileOffsets 144 LONG TilesPerImage
TileByteCounts 145 SHORT|LONG TilesPerImage
InkSet 14C SHORT 1
InkNames 14D ASCII <total number of chars in all ink name strings, including zeros>
NumberOfInks 14E SHORT 1
DotRange 150 BYTE|SHORT 2|2*NumberOfInks
TargetPrinter 151 ASCII any
ExtraSamples 152 BYTE <number of extra components per pixel>
SampleFormat 153 SHORT SamplesPerPixel
SMinSampleValue 154 Any SamplesPerPixel
SMaxSampleValue 155 Any SamplesPerPixel
TransferRange 156 SHORT 6
JPEGProc 200 SHORT 1
JPEGInterchangeFormat 201 LONG 1
JPEGInterchangeFormatLength 202 LONG 1
JPEGRestartInterval 203 SHORT 1
JPEGLosslessPredictos 205 SHORT SamplesPerPixel
JPEGPointTransforms 206 SHORT SamplesPerPixel
JPEGQTables 207 LONG SamplesPerPixel
JPEGDCTables 208 LONG SamplesPerPixel
JPEGACTables 209 LONG SamplesPerPixel
YCbCrCoefficients 211 RATIONAL 3
YCbCrSubSampling 212 SHORT 2
YCbCrPositioning 213 SHORT 1
ReferenceBlackWhite 214 LONG 2*SamplesPerPixel
Copyright 8298 ASCII Any
# non-standard tags:
CZ_LSMInfo 866C CZ_LSM
# EXIF tags, see http://www.awaresystems.be/imaging/tiff/tifftags/privateifd/exif.html
EXIF_IFDOffset 8769 SHORT 1
EXIF_ExposureTime 829a RATIONAL 1
EXIF_FNumber 829d RATIONAL 1
EXIF_ExposureProgram 8822 SHORT 1
EXIF_SpectralSensitivity 8824 ASCII
EXIF_ISOSpeedRatings 8827 SHORT 1
EXIF_OECF 8828 UNDEFINED
EXIF_ExifVersion 9000 UNDEFINED 4
EXIF_DateTimeOriginal 9003 ASCII
EXIF_DateTimeDigitized 9004 ASCII
EXIF_ComponentsConfiguration 9101 UNDEFINED 4
EXIF_CompressedBitsPerPixel 9102 RATIONAL 1
EXIF_ShutterSpeedValue 9201 SRATIONAL 1
EXIF_ApertureValue 9202 RATIONAL 1
EXIF_BrightnessValue 9203 SRATIONAL 1
EXIF_ExposureBiasValue 9204 SRATIONAL 1
EXIF_MaxApertureValue 9205 RATIONAL 1
EXIF_SubjectDistance 9206 RATIONAL 1
EXIF_MeteringMode 9207 SHORT 1
EXIF_LightSource 9208 SHORT 1
EXIF_Flash 9209 SHORT 1
EXIF_FocalLength 920a RATIONAL 1
EXIF_SubjectArea 9214 SHORT 2|3|4
EXIF_MakerNote 927c UNDEFINED
EXIF_UserComment 9286 UNDEFINED
EXIF_SubsecTime 9290 ASCII
EXIF_SubsecTimeOriginal 9291 ASCII
EXIF_SubsecTimeDigitized 9292 ASCII
EXIF_FlashpixVersion a000 UNDEFINED 4
EXIF_ColorSpace a001 SHORT 1
EXIF_PixelXDimension a002 SHORT!LONG 1
EXIF_PixelYDimension a003 SHORT!LONG 1
EXIF_RelatedSoundFile a004 ASCII 13
EXIF_FlashEnergy a20b RATIONAL 1
EXIF_SpatialFrequencyResponse a20c UNDEFINED
EXIF_FocalPlaneXResolution a20e RATIONAL 1
EXIF_FocalPlaneYResolution a20f RATIONAL 1
EXIF_FocalPlaneResolutionUnit a210 SHORT 1
EXIF_SubjectLocation a214 SHORT 2
EXIF_ExposureIndex a215 RATIONAL 1
EXIF_SensingMethod a217 SHORT 1
EXIF_FileSource a300 UNDEFINED 1
EXIF_SceneType a301 UNDEFINED 1
EXIF_CFAPattern a302 UNDEFINED
EXIF_CustomRendered a401 SHORT 1
EXIF_ExposureMode a402 SHORT 1
EXIF_WhiteBalance a403 SHORT 1
EXIF_DigitalZoomRatio a404 RATIONAL 1
EXIF_FocalLengthIn35mmFilm a405 SHORT 1
EXIF_SceneCaptureType a406 SHORT 1
EXIF_GainControl a407 SHORT 1
EXIF_Contrast a408 SHORT 1
EXIF_Saturation a409 SHORT 1
EXIF_Sharpness a40a SHORT 1
EXIF_DeviceSettingDescription a40b UNDEFINED
EXIF_SubjectDistanceRange a40c SHORT 1
EXIF_ImageUniqueID a420 ASCII 33
'''
_tag_value2name = {}
_tag_name2value = {}
_tag_value2type = {}
for line in _tag_info.split('\n'):
if not line or line.startswith('#'):
continue
if line[0] == ' ':
pass
else:
n, h, t = line.split()[:3]
h = eval('0x' + h)
_tag_value2name[h] = n
_tag_value2type[h] = t
_tag_name2value[n] = h
def __init__(self, tag, entrytype, value=None):
"""
*tag* is the tag number of the entry
*entrytype* is the type of the entry
*value* is the value associated to the tag
"""
self._tag = tag
self._type = entrytype
self._value = value
self._name = self._tag_value2name.get(tag, 'TAG%s' % (hex(tag),))
[docs] def is_image(self):
"""Returns True if content in an image"""
return self.get_tagValue() == 273
[docs] def is_IFD(self):
"""Returns True if content is an IFD."""
return isinstance(self.get_value(), IFD)
[docs] def get_tagValue(self):
"""Returns the tag"""
return self._tag
[docs] def get_value(self, human=True):
"""
Returns the value
if human=True, value is modified:
- value[0] is returned instead of value if array contains only one element
- conversion in string is achieved for arrays representing strings
"""
value = self._value
if human:
if len(value) == 1:
value = value[0]
if self.get_type() == 2: #ASCII
value = (b''.join(value.view('|S%s' % (value.nbytes // value.size)))).decode('UTF8')
return value
[docs] def get_tagName(self):
"""Returns the tag name"""
return self._name
[docs] def get_type(self):
"""Returns the type of entry."""
return self._type
[docs] def set_value(self, value):
"""Sets the value of the entry."""
self._value = value