From 0f81385298ebdcc6dd0e108076a062d0589bd951 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Fri, 27 Dec 2024 16:07:01 -0800 Subject: [PATCH] fix: IPTC fields have length limits (#4568) It escaped our notice before that the IPTC spec dictates length limits for many fields. Getting this wrong can confuse some software, including Photoshop, apparently. This patch enforces length limits wherever we could figure them out from the IPTC spec -- it will simply truncate those strings that are too long before writing them to a binary IPTC tag. For this reason, we also are ending the practice of automatically translating several IPTC fields to and from what we figured were the equivalent generic metadata names. This was perhaps a dubious practice to begin with, but now the enforcement of length limits for IPTC (but not the generic metadata) makes it even more frought. There is probably nobody depending on this behavior (and maybe few OIIO users depending on IPTC support at all?), so now anybody purposely using IPTC metadata is fully responsible for setting it and dealing with any issues of whether it's "out of sync" with any other metadata that OIIO stores in or reads from a file. Fixes #4342 --------- Signed-off-by: Larry Gritz --- src/doc/stdmetadata.rst | 49 ++++++---- src/libOpenImageIO/iptc.cpp | 134 ++++++++++++++++------------ testsuite/jpeg-metadata/ref/out.txt | 2 - 3 files changed, 111 insertions(+), 74 deletions(-) diff --git a/src/doc/stdmetadata.rst b/src/doc/stdmetadata.rst index 69f17f052b..5ec22ce0c7 100644 --- a/src/doc/stdmetadata.rst +++ b/src/doc/stdmetadata.rst @@ -916,23 +916,8 @@ conventions for storing image metadata, and this standard is growing in popularity and is commonly used in photo-browsing programs to record captions and keywords. -The following IPTC metadata items correspond exactly to metadata in the -OpenImageIO conventions, so it is recommended that you use the standards and -that plugins supporting IPTC metadata respond likewise: - - =============== ========================================================================================================= - IPTC tag OpenImageIO metadata convention - =============== ========================================================================================================= - Caption `"ImageDescription"` - Keyword IPTC keywords should be concatenated, separated by semicolons (`;`), and stored as the `Keywords` attribute. - ExposureTime `ExposureTime` - CopyrightNotice `Copyright` - Creator `Artist` - =============== ========================================================================================================= - - -The remainder of IPTC metadata fields should use the following names, -prefixed with `IPTC:` to avoid conflicts with other plugins or standards. +IPTC metadata fields should use the following names, prefixed with `IPTC:` to +avoid conflicts with other plugins or standards. .. option:: "IPTC:ObjecTypeReference" : string @@ -958,6 +943,10 @@ prefixed with `IPTC:` to avoid conflicts with other plugins or standards. Category. +.. option:: "IPTC:Keywords" : string + + Semicolon-separated keywords describing the contents of the image. + .. option:: "IPTC:ContentLocationCode" : string Code for content location. @@ -976,6 +965,10 @@ prefixed with `IPTC:` to avoid conflicts with other plugins or standards. Expiration date and time. +.. option:: "IPTC:ExposureTime" : string + + The exposure time (in seconds) of the captured image. + .. option:: "IPTC:Instructions" : string Special instructions for handling the image. @@ -1000,6 +993,10 @@ prefixed with `IPTC:` to avoid conflicts with other plugins or standards. The version number of the creation software. +.. option:: "IPTC:Creator" : string + + The artist, creator, or owner of the image. + .. option:: "IPTC:AuthorsPosition" : string The job title or position of the creator of the image. @@ -1025,11 +1022,19 @@ prefixed with `IPTC:` to avoid conflicts with other plugins or standards. The source of the image. +.. option:: "IPTC:CopyrightNotice" : string + + Any copyright notice for the image. + .. option:: "IPTC:Contact" : string The contact information for the image (possibly including name, address, email, etc.). +.. option:: "IPTC:Caption" : string + + A caption for the image. + .. option:: "IPTC:CaptionWriter" : string The name of the person who wrote the caption or description of the @@ -1058,6 +1063,16 @@ prefixed with `IPTC:` to avoid conflicts with other plugins or standards. The history of the image or document. +References for more information on IPTC metadata: + +* https://www.iptc.org/std/photometadata/specification/IPTC-PhotoMetadata +* https://www.iptc.org/std/photometadata/specification/IPTC-PhotoMetadata#iptc-core-schema-1-5-specifications + This is the one where you can find the length limits +* ExifTool's documentation about IPTC tags (caveat: not a definitive + reference, could be outdated or incorrect): + https://exiftool.org/TagNames/IPTC.html + + SMPTE metadata ============== diff --git a/src/libOpenImageIO/iptc.cpp b/src/libOpenImageIO/iptc.cpp index 0f79bbe0fe..aa739eb950 100644 --- a/src/libOpenImageIO/iptc.cpp +++ b/src/libOpenImageIO/iptc.cpp @@ -21,68 +21,78 @@ struct IIMtag { const char* name; // Attribute name we use const char* anothername; // Optional second name bool repeatable; // May repeat + unsigned int maxlen; // Maximum length (if nonzero) }; static IIMtag iimtag[] = { - { 3, "IPTC:ObjectTypeReference", NULL, false }, - { 4, "IPTC:ObjectAttributeReference", NULL, true }, - { 5, "IPTC:ObjectName", NULL, false }, - { 7, "IPTC:EditStatus", NULL, false }, - { 10, "IPTC:Urgency", NULL, false }, // deprecated by IPTC - { 12, "IPTC:SubjectReference", NULL, true }, - { 15, "IPTC:Category", NULL, false }, - { 20, "IPTC:SupplementalCategories", NULL, true }, // deprecated by IPTC - { 22, "IPTC:FixtureIdentifier", NULL, false }, - { 25, "Keywords", NULL, true }, - { 26, "IPTC:ContentLocationCode", NULL, true }, - { 27, "IPTC:ContentLocationName", NULL, true }, - { 30, "IPTC:ReleaseDate", NULL, false }, - { 35, "IPTC:ReleaseTime", NULL, false }, - { 37, "IPTC:ExpirationDate", NULL, false }, - { 38, "IPTC:ExpirationTime", NULL, false }, - { 40, "IPTC:Instructions", NULL, false }, - { 45, "IPTC:ReferenceService", NULL, true }, - { 47, "IPTC:ReferenceDate", NULL, false }, - { 50, "IPTC:ReferenceNumber", NULL, true }, - { 55, "IPTC:DateCreated", NULL, false }, - { 60, "IPTC:TimeCreated", NULL, false }, - { 62, "IPTC:DigitalCreationDate", NULL, false }, - { 63, "IPTC:DigitalCreationTime", NULL, false }, - { 65, "IPTC:OriginatingProgram", "Software", false }, - { 70, "IPTC:ProgramVersion", NULL, false }, - { 80, "IPTC:Creator", "Artist", true }, // sometimes called "byline" - { 85, "IPTC:AuthorsPosition", NULL, true }, // sometimes "byline title" - { 90, "IPTC:City", NULL, false }, - { 92, "IPTC:Sublocation", NULL, false }, - { 95, "IPTC:State", NULL, false }, // sometimes "Province/State" - { 100, "IPTC:CountryCode", NULL, false }, - { 101, "IPTC:Country", NULL, false }, - { 103, "IPTC:TransmissionReference", NULL, false }, - { 105, "IPTC:Headline", NULL, false }, - { 110, "IPTC:Provider", NULL, false }, // aka Credit - { 115, "IPTC:Source", NULL, false }, - { 116, "IPTC:CopyrightNotice", "Copyright", false }, - { 118, "IPTC:Contact", NULL, false }, - { 120, "IPTC:Caption", "ImageDescription", false }, - { 121, "IPTC:LocalCaption", NULL, false }, - { 122, "IPTC:CaptionWriter", NULL, false }, // aka Writer/Editor + { 3, "IPTC:ObjectTypeReference", NULL, false, 67 }, + { 4, "IPTC:ObjectAttributeReference", NULL, true, 68 }, + { 5, "IPTC:ObjectName", NULL, false, 64 }, + { 7, "IPTC:EditStatus", NULL, false, 64 }, + { 10, "IPTC:Urgency", NULL, false, 1 }, // deprecated by IPTC + { 12, "IPTC:SubjectReference", NULL, true, 236 }, + { 15, "IPTC:Category", NULL, false, 3 }, + { 20, "IPTC:SupplementalCategories", NULL, true, 32 }, // deprecated by IPTC + { 22, "IPTC:FixtureIdentifier", NULL, false, 32 }, + { 25, "IPTC:Keywords", NULL, true, 64 }, + { 26, "IPTC:ContentLocationCode", NULL, true, 3 }, + { 27, "IPTC:ContentLocationName", NULL, true, 64 }, + { 30, "IPTC:ReleaseDate", NULL, false, 8 }, + { 35, "IPTC:ReleaseTime", NULL, false, 11 }, + { 37, "IPTC:ExpirationDate", NULL, false, 8 }, + { 38, "IPTC:ExpirationTime", NULL, false, 11 }, + { 40, "IPTC:Instructions", NULL, false, 256 }, + { 45, "IPTC:ReferenceService", NULL, true, 10 }, + { 47, "IPTC:ReferenceDate", NULL, false, 8 }, + { 50, "IPTC:ReferenceNumber", NULL, true, 8 }, + { 55, "IPTC:DateCreated", NULL, false, 8 }, + { 60, "IPTC:TimeCreated", NULL, false, 11 }, + { 62, "IPTC:DigitalCreationDate", NULL, false, 8 }, + { 63, "IPTC:DigitalCreationTime", NULL, false, 11 }, + { 65, "IPTC:OriginatingProgram", "Software", false, 32 }, + { 70, "IPTC:ProgramVersion", NULL, false, 10 }, + { 80, "IPTC:Creator", "Artist", true, 32 }, // sometimes called "byline" + { 85, "IPTC:AuthorsPosition", NULL, true, 32 }, // sometimes "byline title" + { 90, "IPTC:City", NULL, false, 32 }, + { 92, "IPTC:Sublocation", NULL, false, 32 }, + { 95, "IPTC:State", NULL, false, 32 }, // sometimes "Province/State" + { 100, "IPTC:CountryCode", NULL, false, 3 }, + { 101, "IPTC:Country", NULL, false, 64 }, + { 103, "IPTC:TransmissionReference", NULL, false, 32 }, + { 105, "IPTC:Headline", NULL, false, 256 }, + { 110, "IPTC:Provider", NULL, false, 32 }, // aka Credit + { 115, "IPTC:Source", NULL, false, 32 }, + { 116, "IPTC:CopyrightNotice", "Copyright", false, 128 }, + { 118, "IPTC:Contact", NULL, false, 128 }, + { 120, "IPTC:Caption", "ImageDescription", false, 2000 }, + { 121, "IPTC:LocalCaption", NULL, false, 256 }, + { 122, "IPTC:CaptionWriter", NULL, false, 32 }, // aka Writer/Editor // Note: 150-154 is audio sampling stuff - { 184, "IPTC:JobID", NULL, false }, - { 185, "IPTC:MasterDocumentID", NULL, false }, - { 186, "IPTC:ShortDocumentID", NULL, false }, - { 187, "IPTC:UniqueDocumentID", NULL, false }, - { 188, "IPTC:OwnerID", NULL, false }, - { 221, "IPTC:Prefs", NULL, false }, - { 225, "IPTC:ClassifyState", NULL, false }, - { 228, "IPTC:SimilarityIndex", NULL, false }, - { 230, "IPTC:DocumentNotes", NULL, false }, - { 231, "IPTC:DocumentHistory", NULL, false }, - { -1, NULL, NULL, false } + { 184, "IPTC:JobID", NULL, false, 64 }, + { 185, "IPTC:MasterDocumentID", NULL, false, 256 }, + { 186, "IPTC:ShortDocumentID", NULL, false, 64 }, + { 187, "IPTC:UniqueDocumentID", NULL, false, 128 }, + { 188, "IPTC:OwnerID", NULL, false, 128 }, + { 221, "IPTC:Prefs", NULL, false, 64 }, + { 225, "IPTC:ClassifyState", NULL, false, 64 }, + { 228, "IPTC:SimilarityIndex", NULL, false, 32 }, + { 230, "IPTC:DocumentNotes", NULL, false, 1024 }, + { 231, "IPTC:DocumentHistory", NULL, false, 256 }, + { -1, NULL, NULL, false, 0 } }; // N.B. All "Date" fields are 8 digit strings: CCYYMMDD // All "Time" fields are 11 digit strings (what format?) +// IPTC references: +// +// * https://www.iptc.org/std/photometadata/specification/IPTC-PhotoMetadata +// * https://www.iptc.org/std/photometadata/specification/IPTC-PhotoMetadata#iptc-core-schema-1-5-specifications +// This is the one where you can find the length limits +// * ExifTool's documentation about IPTC tags (caveat: not a definitive +// reference, could be outdated or incorrect): +// https://exiftool.org/TagNames/IPTC.html + } // anonymous namespace @@ -144,9 +154,13 @@ decode_iptc_iim(const void* iptc, int length, ImageSpec& spec) } else { spec.attribute(iimtag[i].name, s); } +#if 0 + // We are no longer confident about auto-translating IPTC + // data into allegedly equivalent metadata. if (iimtag[i].anothername && !spec.extra_attribs.contains(iimtag[i].anothername)) spec.attribute(iimtag[i].anothername, s); +#endif break; } } @@ -193,18 +207,28 @@ encode_iptc_iim(const ImageSpec& spec, std::vector& iptc) Strutil::split(allvals, tokens, ";"); for (auto& token : tokens) { token = Strutil::strip(token); - if (token.size()) + if (token.size()) { + if (iimtag[i].maxlen && iimtag[i].maxlen < token.size()) + token = token.substr(0, iimtag[i].maxlen); encode_iptc_iim_one_tag(iimtag[i].tag, token, iptc); + } } } else { // Regular, non-repeating - encode_iptc_iim_one_tag(iimtag[i].tag, p->get_string(0), iptc); + std::string token = p->get_string(0); + if (iimtag[i].maxlen && iimtag[i].maxlen < token.size()) + token = token.substr(0, iimtag[i].maxlen); + encode_iptc_iim_one_tag(iimtag[i].tag, token, iptc); } } +#if 0 + // We are no longer confident about auto-translating other metadata + // into allegedly equivalent IPTC. if (iimtag[i].anothername) { if ((p = spec.find_attribute(iimtag[i].anothername))) encode_iptc_iim_one_tag(iimtag[i].tag, p->get_string(0), iptc); } +#endif } return iptc.size() != 0; } diff --git a/testsuite/jpeg-metadata/ref/out.txt b/testsuite/jpeg-metadata/ref/out.txt index 8c3550e998..fe4a7b489a 100644 --- a/testsuite/jpeg-metadata/ref/out.txt +++ b/testsuite/jpeg-metadata/ref/out.txt @@ -83,7 +83,6 @@ with-attribs-and-desc.jpg : 640 x 480, 3 channel, uint8 jpeg Exif:ColorSpace: 1 Exif:ExifVersion: "0230" Exif:FlashPixVersion: "0100" - IPTC:Caption: "A photo" jpeg:subsampling: "4:2:0" oiio:ColorSpace: "sRGB" Reading src/blender-render.jpg @@ -109,6 +108,5 @@ with-colon-desc.jpg : 640 x 480, 3 channel, uint8 jpeg Exif:ColorSpace: 1 Exif:ExifVersion: "0230" Exif:FlashPixVersion: "0100" - IPTC:Caption: "Example:Text" jpeg:subsampling: "4:2:0" oiio:ColorSpace: "sRGB"