Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasw234 committed Jun 14, 2023
1 parent b2f3e89 commit 990d37c
Show file tree
Hide file tree
Showing 8 changed files with 990 additions and 2 deletions.
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include README.md
35 changes: 33 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,33 @@
# merge_image_metadata
Merges image metadata (Keywords, Subject, HierarchicalSubject) as used by Adobe Bridge between images
# Merge Image Metadata
Currently only tested and working on Windows 10.

Merge metadata between images.
Useful for cases when metadata was already added and then you receive the high-res version afterwards.

Assumes that you want to merge the `Keyword`, `Subject`, and `HierarchicalSubject` fields as used by Adobe Bridge.

Currently merges metadata between two consecutive images only (i.e. will not correctly detect triplicates).

# Usage
```
Usage: merge_image_metadata.py [-v] FOLDER
Options:
-v Enable verbose (debug) output
```

# Installation
[exiftool](https://exiftool.org/) needs to be available in the PATH at runtime.

For the development version:

```
git clone https://github.com/jonasw234/face_tagger
cd face_tagger
python3 setup.py install
pip3 install -r dev-requirements.txt
```

For normal usage do the same but don’t include the last line or use `[pipx](https://pypi.org/project/pipx/)` and install with

`pipx install git+https://github.com/jonasw234/face_tagger`
5 changes: 5 additions & 0 deletions dev-requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
autoflake
black
ipdb
isort
pylint
Empty file.
243 changes: 243 additions & 0 deletions merge_image_metadata/merge_image_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
#!/usr/bin/env python3
"""
Merge metadata between images.
Useful for cases when metadata was already added and then you receive the high-res
version afterwards.
Assumes that you want to merge the Keyword, Subject, and HierarchicalSubject fields as
used by Adobe Bridge.
Currently merges metadata between two consecutive images only (i.e. will not correctly
detect triplicates).
Usage: merge_image_metadata.py [-v] FOLDER
Options:
-v Enable verbose (debug) output
"""
import logging
import os
import subprocess
import sys
from typing import Callable, Tuple

import imagehash
from PIL import Image

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter("%(levelname)s:%(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)

IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png")


def create_image_hash(
image_path: str, algorithm: Callable = imagehash.average_hash
) -> imagehash.ImageHash:
"""
Create a perceptual hash for an image file.
Params
------
image_path : str
Path to the image
algorithm : Callable
The algorithm to use to hash the image file.
`imagehash.average_hash` is great for similar images
(https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html)
Returns
-------
imagehash.ImageHash
The image hash for the image
"""
return algorithm(Image.open(image_path))


def compare_image_hashes(
image1_hash: imagehash.ImageHash,
image2_hash: imagehash.ImageHash,
max_difference: int = 1,
) -> bool:
"""
Compare two image hashes and return True if they are similar.
Params
------
image1_hash : imagehash.ImageHash
The hash of the first image
image2_hash : imagehash.ImageHash
The hash of the second image
max_difference : int
Images have to be at maximum this different to be considered the same
Returns
-------
bool
True if the images are close enough to be considered the same
"""
return image1_hash - image2_hash <= max_difference


def merge_metadata(image1_path: str, image2_path: str) -> Tuple[list, list, list]:
"""
Merge two images metadata into a single string.
Params
------
image1_path : str
Path to the first image
image2_path : str
Path to the second image
Returns
-------
Tuple[list, list, list]
A tuple of the merged keywords, subjects, and hierarchical subjects
"""
base_command = [
"exiftool",
"-L", # Don’t convert encodings
"-charset",
"filename=cp1252", # For Windows file paths
"-Keywords",
"-Subject",
"-HierarchicalSubject",
]

keywords = []
subject = []
hierarchicalsubject = []

for file in (image1_path, image2_path):
exiftool_command = base_command.copy()
exiftool_command.append(file)
output = subprocess.check_output(exiftool_command).decode()
try:
keywords.append(output.split("\r\n", 1)[0].split(": ")[1])
subject.append(output.split("\r\n")[1].split(": ")[1])
hierarchicalsubject.append(output.split("\r\n")[2].split(": ")[1])
except IndexError:
continue

keywords_list = list(set((", ".join(keywords)).split(", ")))
logger.debug(
"Combined keywords of %s and %s: %s",
image1_path,
image2_path,
", ".join(keywords_list),
)
subject_list = list(set((", ".join(subject)).split(", ")))
hierarchicalsubject_list = list(set((", ".join(hierarchicalsubject)).split(", ")))

return (keywords_list, subject_list, hierarchicalsubject_list)


def apply_metadata(
image_path: str, keywords: list, subjects: list, hierarchicalsubjects: list
) -> None:
"""
Apply new metadata to an image.
Params
------
image_path : str
The path to the image to which the metadata should be applied
keywords : list
The keywords for the image
subjects : list
The subjects for the image
hierarchicalsubjects : list
The hierarchical subjects for the image
"""
parameters = [
"exiftool",
"-overwrite_original",
"-L", # Don’t convert encodings
"-charset",
"filename=cp1252", # For Windows file paths
]
parameters.extend([f"-Keywords+={keyword}" for keyword in keywords])
parameters.extend([f"-Subject+={subject}" for subject in subjects])
parameters.extend(
[
f"-HierarchicalSubject+={hierarchicalsubject}"
for hierarchicalsubject in hierarchicalsubjects
]
)
parameters.append(image_path)
logger.info(
"Adding the following keywords (and related subjects and hierarchical subjects) "
"to %s: %s",
image_path,
", ".join(keywords),
)

subprocess.run(
parameters,
check=True,
)


def compare_all_images(folder_path: str) -> None:
"""
Compare all images in the given folder and transfer metadata between similar
images.
Params
------
folder_path : str
Path to the folder containing the images.
"""
logger.debug("Finding all image files in %s", folder_path)
image_files = [
f for f in os.listdir(folder_path) if f.lower().endswith(IMAGE_EXTENSIONS)
]

# 1. Create image hashes and save them in a dictionary
logger.debug("Calculating perceptual hashes for image files ...")
image_dict = {
os.path.join(folder_path, image_file): create_image_hash(
os.path.join(folder_path, image_file)
)
for image_file in image_files
}

# 2. Compare all image hashes in the dictionary
logger.debug("Comparing the hashes and finding duplicates ...")
for image1, hash1 in image_dict.items():
for image2, hash2 in image_dict.items():
if image1 < image2: # Compare only unique pairs of images
if compare_image_hashes(hash1, hash2):
logger.debug(
"%s and %s seem to be similar. Merging their metadata.",
image1,
image2,
)
# 3. Merge the metadata of all images where the perceptual hashes
# are similar
keywords, subjects, hierarchicalsubject = merge_metadata(
image1, image2
)
# 4. Write the merged metadata for each image
for image in (image1, image2):
apply_metadata(image, keywords, subjects, hierarchicalsubject)


def main() -> None:
"""Main function to run the script."""
if len(sys.argv) not in (2, 3):
print(__doc__)
sys.exit(1)
folder = ""
for arg in sys.argv[1:]:
if arg == "-v":
logger.setLevel(logging.DEBUG)
else:
folder = arg
compare_all_images(folder)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagehash
Pillow
32 changes: 32 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import setuptools

with open("README.md", "r") as fh:
long_description = fh.read()

setuptools.setup(
name="merge_image_metadata",
version="0.0.1",
author="Jonas A. Wendorf",
description="Merges image metadata (Keywords, Subject, HierarchicalSubject) as used by Adobe Bridge between images",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/jonasw234/merge_image_metadata",
packages=setuptools.find_packages(),
install_requires=["imagehash", "Pillow"],
include_package_data=True,
classifiers=[
"Environment :: Console",
"Natural Language :: English",
"OSI Approved :: GNU General Public License v3 or later (GPLv3)",
"Operating System :: Windows",
"Programming Language :: Python :: 3",
"Topic :: Multimedia :: Graphics",
"Topic :: Utilities",
],
python_requires=">=3.11",
entry_points={
"console_scripts": [
"merge_image_metadata=merge_image_metadata.merge_image_metadata:main"
],
},
)

0 comments on commit 990d37c

Please sign in to comment.