Browse Source

Extend caching plugin to handle markdown and itpc metadata, as well as updating when a file timestamp changes

pull/443/head
David Schultz 5 years ago committed by David Schultz
parent
commit
f2451f258e
  1. 125
      sigal/gallery.py
  2. 125
      sigal/plugins/extended_caching.py
  3. 7
      sigal/utils.py
  4. 49
      tests/test_extended_caching.py

125
sigal/gallery.py

@ -51,6 +51,7 @@ from .utils import (
check_or_create_dir,
copy,
get_mime,
get_mod_date,
is_valid_html5_video,
read_markdown,
url_from_path,
@ -96,12 +97,6 @@ class Media:
self.logger = logging.getLogger(__name__)
self.file_metadata = None
self._get_metadata()
# default: title is the filename
if not self.title:
self.title = self.basename
signals.media_initialized.send(self)
def __repr__(self):
@ -190,27 +185,45 @@ class Media:
return
return url_from_path(self.thumb_name)
def _get_metadata(self):
"""Get image metadata from filename.md: title, description, meta."""
self.description = ''
@cached_property
def description(self):
"""Description extracted from the Markdown <imagename>.md file."""
return self.raw_metadata.get('description', '')
self.title = ''
"""Title extracted from the Markdown <imagename>.md file."""
@cached_property
def title(self):
"""Title extracted from the metadata, or defaults to the filename."""
title = self.raw_metadata.get('title', '')
return title if title else self.basename
self.meta = {}
@cached_property
def meta(self):
"""Other metadata extracted from the Markdown <imagename>.md file."""
return self.raw_metadata.get('meta', {})
descfile = splitext(self.src_path)[0] + '.md'
if isfile(descfile):
meta = read_markdown(descfile)
for key, val in meta.items():
setattr(self, key, val)
@cached_property
def raw_metadata(self):
"""Get metadata from filename.md: title, description, meta."""
return self._get_raw_metadata()
@property
def raw_metadata_filepath(self):
return splitext(self.src_path)[0] + '.md'
def _get_raw_metadata(self):
"""Get metadata from filename.md."""
meta = {'title': '', 'description': '', 'meta': {}}
if isfile(self.raw_metadata_filepath):
meta.update(read_markdown(self.raw_metadata_filepath))
return meta
@cached_property
def file_metadata(self):
"""Type-specific metadata"""
return {}
def _get_file_date(self):
stat = os.stat(self.src_path)
return datetime.fromtimestamp(stat.st_mtime)
return datetime.fromtimestamp(get_mod_date(self.src_path))
class Image(Media):
@ -247,21 +260,23 @@ class Image(Media):
else None
)
def _get_metadata(self):
super()._get_metadata()
self.file_metadata = get_image_metadata(self.src_path)
@cached_property
def file_metadata(self):
"""Image file metadata (Exif and IPTC)"""
return get_image_metadata(self.src_path)
def _get_raw_metadata(self):
"""Get metadata from filename.md."""
meta = super()._get_raw_metadata()
# If a title or description hasn't been obtained by other means, look
# for the information in IPTC fields
if self.title and self.description:
# Nothing to do - we already have title and description
return
if not meta['title']:
meta['title'] = self.file_metadata['iptc'].get('title', '')
if not meta['description']:
meta['description'] = self.file_metadata['iptc'].get('description', '')
iptc_data = self.file_metadata['iptc']
if not self.title and iptc_data.get('title'):
self.title = iptc_data['title']
if not self.description and iptc_data.get('description'):
self.description = iptc_data['description']
return meta
@cached_property
def raw_exif(self):
@ -358,7 +373,6 @@ class Album:
self.dst_path = join(settings['destination'], path)
self.logger = logging.getLogger(__name__)
self._get_metadata()
# optionally add index.html to the URLs
self.url_ext = self.output_file if settings['index_in_url'] else ''
@ -411,27 +425,42 @@ class Album:
def __iter__(self):
return iter(self.medias)
def _get_metadata(self):
"""Get album metadata from `description_file` (`index.md`):
-> title, thumbnail image, description
@cached_property
def description(self):
"""Description extracted from the Markdown index.md file."""
return self.raw_metadata.get('description', '')
"""
descfile = join(self.src_path, self.description_file)
self.description = ''
self.meta = {}
# default: get title from directory name
self.title = os.path.basename(self.path if self.path != '.' else self.src_path)
@cached_property
def title(self):
"""Title extracted from the Markdown index.md file."""
title = self.raw_metadata.get('title', '')
path = self.path if self.path != '.' else self.src_path
return title if title else os.path.basename(path)
if isfile(descfile):
meta = read_markdown(descfile)
for key, val in meta.items():
setattr(self, key, val)
@cached_property
def meta(self):
"""Other metadata extracted from the Markdown index.md file."""
return self.raw_metadata.get('meta', {})
@cached_property
def author(self):
"""Author extracted from the Markdown index.md file or settings."""
try:
self.author = self.meta['author'][0]
return self.meta['author'][0]
except KeyError:
self.author = self.settings.get('author')
return self.settings.get('author')
@property
def raw_metadata_filepath(self):
return join(self.src_path, self.description_file)
@cached_property
def raw_metadata(self):
"""Get metadata from filename.md: title, description, meta."""
meta = {'title': '', 'description': '', 'meta': {}}
if isfile(self.raw_metadata_filepath):
meta.update(read_markdown(self.raw_metadata_filepath))
return meta
def create_output_directories(self):
"""Create output directories for thumbnails and original images."""

125
sigal/plugins/extended_caching.py

@ -22,63 +22,130 @@
2.5s instead of 30s)
This plugin allows extended caching, which is useful for large galleries. Once
a gallery has been built it caches the exif-data of the contained images in the
gallery target folder. Before the next run it restores them so that the image
does not have to be parsed again. For large galleries this can speed up the
creation of index files dramatically.
a gallery has been built it caches all metadata for all media (markdown, exif,
itpc) in the gallery target folder. Before the next run it restores them so
that the image and metadata files do not have to be parsed again. For large
galleries this can speed up the creation of index files dramatically.
"""
import logging
import os
import pickle
from sigal import signals
from .. import signals
from ..utils import get_mod_date
logger = logging.getLogger(__name__)
def load_exif(album):
"""Loads the exif data of all images in an album from cache"""
if not hasattr(album.gallery, "exifCache"):
def load_metadata(album):
"""Loads the metadata of all media in an album from cache"""
if not hasattr(album.gallery, "metadataCache"):
_restore_cache(album.gallery)
cache = album.gallery.exifCache
cache = album.gallery.metadataCache
# load album metadata
key = os.path.join(album.path, '_index')
if key in cache:
data = cache[key]
# check if file has changed
try:
mod_date = int(get_mod_date(album.raw_metadata_filepath))
except FileNotFoundError:
pass
else:
if data.get('mod_date', -1) >= mod_date:
# cache is good
if 'raw_metadata' in data:
album.raw_metadata = data['raw_metadata']
# load media metadata
for media in album.medias:
if media.type == "image":
key = os.path.join(media.path, media.dst_filename)
if key in cache:
media.exif = cache[key]
key = os.path.join(media.path, media.dst_filename)
if key in cache:
data = cache[key]
# check if files have changed
try:
mod_date = int(get_mod_date(media.src_path))
except FileNotFoundError:
continue
if data.get('mod_date', -1) < mod_date:
continue # file_metadata needs updating
if 'file_metadata' in data:
media.file_metadata = data['file_metadata']
if 'exif' in data:
media.exif = data['exif']
try:
mod_date = int(get_mod_date(media.raw_metadata_filepath))
except FileNotFoundError:
continue
if data.get('meta_mod_date', -1) < mod_date:
continue # raw_metadata needs updating
if 'raw_metadata' in data:
media.raw_metadata = data['raw_metadata']
def _restore_cache(gallery):
"""Restores the exif data cache from the cache file"""
cachePath = os.path.join(gallery.settings["destination"], ".exif_cache")
"""Restores the metadata cache from the cache file"""
cachePath = os.path.join(gallery.settings["destination"], ".metadata_cache")
try:
if os.path.exists(cachePath):
with open(cachePath, "rb") as cacheFile:
gallery.exifCache = pickle.load(cacheFile)
logger.debug("Loaded cache with %d entries", len(gallery.exifCache))
gallery.metadataCache = pickle.load(cacheFile)
logger.debug("Loaded cache with %d entries", len(gallery.metadataCache))
else:
gallery.exifCache = {}
gallery.metadataCache = {}
except Exception as e:
logger.warn("Could not load cache: %s", e)
gallery.exifCache = {}
gallery.metadataCache = {}
def save_cache(gallery):
"""Stores the exif data of all images in the gallery"""
if hasattr(gallery, "exifCache"):
cache = gallery.exifCache
if hasattr(gallery, "metadataCache"):
cache = gallery.metadataCache
else:
cache = gallery.exifCache = {}
cache = gallery.metadataCache = {}
for album in gallery.albums.values():
for image in album.images:
cache[os.path.join(image.path, image.dst_filename)] = image.exif
cachePath = os.path.join(gallery.settings["destination"], ".exif_cache")
try:
data = {
'mod_date': int(get_mod_date(album.raw_metadata_filepath)),
'raw_metadata': album.raw_metadata,
}
cache[os.path.join(album.path, '_index')] = data
except FileNotFoundError:
pass
for media in album.medias:
data = {}
try:
mod_date = int(get_mod_date(media.src_path))
except FileNotFoundError:
continue
else:
data['mod_date'] = mod_date
data['file_metadata'] = media.file_metadata
if hasattr(media, 'exif'):
data['exif'] = media.exif
try:
meta_mod_date = int(get_mod_date(media.raw_metadata_filepath))
except FileNotFoundError:
pass
else:
data['meta_mod_date'] = meta_mod_date
data['raw_metadata'] = media.raw_metadata
cache[os.path.join(media.path, media.dst_filename)] = data
cachePath = os.path.join(gallery.settings["destination"], ".metadata_cache")
if len(cache) == 0:
if os.path.exists(cachePath):
@ -88,7 +155,7 @@ def save_cache(gallery):
try:
with open(cachePath, "wb") as cacheFile:
pickle.dump(cache, cacheFile)
logger.debug("Stored cache with %d entries", len(gallery.exifCache))
logger.debug("Stored cache with %d entries", len(gallery.metadataCache))
except Exception as e:
logger.warn("Could not store cache: %s", e)
os.remove(cachePath)
@ -96,4 +163,4 @@ def save_cache(gallery):
def register(settings):
signals.gallery_build.connect(save_cache)
signals.album_initialized.connect(load_exif)
signals.album_initialized.connect(load_metadata)

7
sigal/utils.py

@ -18,6 +18,7 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
from functools import lru_cache
import os
import shutil
from urllib.parse import quote
@ -64,6 +65,12 @@ def check_or_create_dir(path):
os.makedirs(path)
@lru_cache(maxsize=1024)
def get_mod_date(path):
"""Get modification date for a path, caching result with LRU cache."""
return os.path.getmtime(path)
def url_from_path(path):
"""Transform path to url, converting backslashes to slashes if needed."""

49
tests/test_extended_caching.py

@ -12,16 +12,40 @@ def test_save_cache(settings, tmpdir):
gal = Gallery(settings, ncpu=1)
extended_caching.save_cache(gal)
cachePath = os.path.join(settings['destination'], ".exif_cache")
cachePath = os.path.join(settings['destination'], ".metadata_cache")
assert os.path.isfile(cachePath)
with open(cachePath, "rb") as cacheFile:
cache = pickle.load(cacheFile)
assert cache["exifTest/21.jpg"] == gal.albums["exifTest"].medias[0].exif
assert cache["exifTest/22.jpg"] == gal.albums["exifTest"].medias[1].exif
assert cache["exifTest/noexif.png"] == gal.albums["exifTest"].medias[2].exif
# test exif
album = gal.albums["exifTest"]
cache_img = cache["exifTest/21.jpg"]
assert cache_img["exif"] == album.medias[0].exif
assert 'raw_metadata' not in cache_img
assert cache_img["file_metadata"] == album.medias[0].file_metadata
cache_img = cache["exifTest/22.jpg"]
assert cache_img["exif"] == album.medias[1].exif
assert 'raw_metadata' not in cache_img
assert cache_img["file_metadata"] == album.medias[1].file_metadata
cache_img = cache["exifTest/noexif.png"]
assert cache_img["exif"] == album.medias[2].exif
assert 'raw_metadata' not in cache_img
assert cache_img["file_metadata"] == album.medias[2].file_metadata
# test iptc and md
album = gal.albums["iptcTest"]
assert cache["iptcTest/_index"]["raw_metadata"] == album.raw_metadata
cache_img = cache["iptcTest/1.jpg"]
assert cache_img["file_metadata"] == album.medias[0].file_metadata
assert 'raw_metadata' not in cache_img
cache_img = cache["iptcTest/2.jpg"]
assert cache_img["raw_metadata"] == album.medias[1].raw_metadata
def test_restore_cache(settings, tmpdir):
@ -30,22 +54,29 @@ def test_restore_cache(settings, tmpdir):
gal2 = Gallery(settings, ncpu=1)
extended_caching.save_cache(gal1)
extended_caching._restore_cache(gal2)
assert gal1.exifCache == gal2.exifCache
assert gal1.metadataCache == gal2.metadataCache
def test_load_exif(settings, tmpdir):
settings['destination'] = str(tmpdir)
gal1 = Gallery(settings, ncpu=1)
gal1.albums["exifTest"].medias[2].exif = "blafoo"
gal1.exifCache = {"exifTest/21.jpg": "Foo", "exifTest/22.jpg": "Bar"}
gal1.metadataCache = {
"exifTest/21.jpg": {"exif": "Foo", "mod_date": 100000000000},
"exifTest/22.jpg": {"exif": "Bar", "mod_date": 100000000000},
}
extended_caching.load_exif(gal1.albums["exifTest"])
extended_caching.load_metadata(gal1.albums["exifTest"])
assert gal1.albums["exifTest"].medias[0].exif == "Foo"
assert gal1.albums["exifTest"].medias[1].exif == "Bar"
assert gal1.albums["exifTest"].medias[2].exif == "blafoo"
# check if setting gallery.exifCache works
# check if setting gallery.metadataCache works
gal2 = Gallery(settings, ncpu=1)
extended_caching.save_cache(gal1)
extended_caching.load_exif(gal2.albums["exifTest"])
extended_caching.load_metadata(gal2.albums["exifTest"])
assert gal2.albums["exifTest"].medias[0].exif == "Foo"
assert gal2.albums["exifTest"].medias[1].exif == "Bar"
assert gal2.albums["exifTest"].medias[2].exif == "blafoo"

Loading…
Cancel
Save