diff --git a/sigal/gallery.py b/sigal/gallery.py index 9319b82..6b8131a 100644 --- a/sigal/gallery.py +++ b/sigal/gallery.py @@ -51,6 +51,7 @@ from .utils import ( check_or_create_dir, copy, get_mime, + get_mod_date, is_valid_html5_video, read_markdown, url_from_path, @@ -96,12 +97,6 @@ class Media: self.logger = logging.getLogger(__name__) - self.file_metadata = None - self._get_metadata() - - # default: title is the filename - if not self.title: - self.title = self.basename signals.media_initialized.send(self) def __repr__(self): @@ -190,27 +185,45 @@ class Media: return return url_from_path(self.thumb_name) - def _get_metadata(self): - """Get image metadata from filename.md: title, description, meta.""" - - self.description = '' + @cached_property + def description(self): """Description extracted from the Markdown .md file.""" + return self.raw_metadata.get('description', '') - self.title = '' - """Title extracted from the Markdown .md file.""" + @cached_property + def title(self): + """Title extracted from the metadata, or defaults to the filename.""" + title = self.raw_metadata.get('title', '') + return title if title else self.basename - self.meta = {} + @cached_property + def meta(self): """Other metadata extracted from the Markdown .md file.""" + return self.raw_metadata.get('meta', {}) - descfile = splitext(self.src_path)[0] + '.md' - if isfile(descfile): - meta = read_markdown(descfile) - for key, val in meta.items(): - setattr(self, key, val) + @cached_property + def raw_metadata(self): + """Get metadata from filename.md: title, description, meta.""" + return self._get_raw_metadata() + + @property + def raw_metadata_filepath(self): + return splitext(self.src_path)[0] + '.md' + + def _get_raw_metadata(self): + """Get metadata from filename.md.""" + meta = {'title': '', 'description': '', 'meta': {}} + if isfile(self.raw_metadata_filepath): + meta.update(read_markdown(self.raw_metadata_filepath)) + return meta + + @cached_property + def file_metadata(self): + """Type-specific metadata""" + return {} def _get_file_date(self): - stat = os.stat(self.src_path) - return datetime.fromtimestamp(stat.st_mtime) + return datetime.fromtimestamp(get_mod_date(self.src_path)) class Image(Media): @@ -247,21 +260,23 @@ class Image(Media): else None ) - def _get_metadata(self): - super()._get_metadata() - self.file_metadata = get_image_metadata(self.src_path) + @cached_property + def file_metadata(self): + """Image file metadata (Exif and IPTC)""" + return get_image_metadata(self.src_path) + + def _get_raw_metadata(self): + """Get metadata from filename.md.""" + meta = super()._get_raw_metadata() # If a title or description hasn't been obtained by other means, look # for the information in IPTC fields - if self.title and self.description: - # Nothing to do - we already have title and description - return + if not meta['title']: + meta['title'] = self.file_metadata['iptc'].get('title', '') + if not meta['description']: + meta['description'] = self.file_metadata['iptc'].get('description', '') - iptc_data = self.file_metadata['iptc'] - if not self.title and iptc_data.get('title'): - self.title = iptc_data['title'] - if not self.description and iptc_data.get('description'): - self.description = iptc_data['description'] + return meta @cached_property def raw_exif(self): @@ -358,7 +373,6 @@ class Album: self.dst_path = join(settings['destination'], path) self.logger = logging.getLogger(__name__) - self._get_metadata() # optionally add index.html to the URLs self.url_ext = self.output_file if settings['index_in_url'] else '' @@ -411,27 +425,42 @@ class Album: def __iter__(self): return iter(self.medias) - def _get_metadata(self): - """Get album metadata from `description_file` (`index.md`): - - -> title, thumbnail image, description + @cached_property + def description(self): + """Description extracted from the Markdown index.md file.""" + return self.raw_metadata.get('description', '') - """ - descfile = join(self.src_path, self.description_file) - self.description = '' - self.meta = {} - # default: get title from directory name - self.title = os.path.basename(self.path if self.path != '.' else self.src_path) + @cached_property + def title(self): + """Title extracted from the Markdown index.md file.""" + title = self.raw_metadata.get('title', '') + path = self.path if self.path != '.' else self.src_path + return title if title else os.path.basename(path) - if isfile(descfile): - meta = read_markdown(descfile) - for key, val in meta.items(): - setattr(self, key, val) + @cached_property + def meta(self): + """Other metadata extracted from the Markdown index.md file.""" + return self.raw_metadata.get('meta', {}) + @cached_property + def author(self): + """Author extracted from the Markdown index.md file or settings.""" try: - self.author = self.meta['author'][0] + return self.meta['author'][0] except KeyError: - self.author = self.settings.get('author') + return self.settings.get('author') + + @property + def raw_metadata_filepath(self): + return join(self.src_path, self.description_file) + + @cached_property + def raw_metadata(self): + """Get metadata from filename.md: title, description, meta.""" + meta = {'title': '', 'description': '', 'meta': {}} + if isfile(self.raw_metadata_filepath): + meta.update(read_markdown(self.raw_metadata_filepath)) + return meta def create_output_directories(self): """Create output directories for thumbnails and original images.""" diff --git a/sigal/plugins/extended_caching.py b/sigal/plugins/extended_caching.py index 47b0025..b07f301 100644 --- a/sigal/plugins/extended_caching.py +++ b/sigal/plugins/extended_caching.py @@ -22,63 +22,130 @@ 2.5s instead of 30s) This plugin allows extended caching, which is useful for large galleries. Once -a gallery has been built it caches the exif-data of the contained images in the -gallery target folder. Before the next run it restores them so that the image -does not have to be parsed again. For large galleries this can speed up the -creation of index files dramatically. - +a gallery has been built it caches all metadata for all media (markdown, exif, +itpc) in the gallery target folder. Before the next run it restores them so +that the image and metadata files do not have to be parsed again. For large +galleries this can speed up the creation of index files dramatically. """ import logging import os import pickle -from sigal import signals +from .. import signals +from ..utils import get_mod_date logger = logging.getLogger(__name__) -def load_exif(album): - """Loads the exif data of all images in an album from cache""" - if not hasattr(album.gallery, "exifCache"): +def load_metadata(album): + """Loads the metadata of all media in an album from cache""" + if not hasattr(album.gallery, "metadataCache"): _restore_cache(album.gallery) - cache = album.gallery.exifCache + cache = album.gallery.metadataCache + + # load album metadata + key = os.path.join(album.path, '_index') + if key in cache: + data = cache[key] + + # check if file has changed + try: + mod_date = int(get_mod_date(album.raw_metadata_filepath)) + except FileNotFoundError: + pass + else: + if data.get('mod_date', -1) >= mod_date: + # cache is good + if 'raw_metadata' in data: + album.raw_metadata = data['raw_metadata'] + # load media metadata for media in album.medias: - if media.type == "image": - key = os.path.join(media.path, media.dst_filename) - if key in cache: - media.exif = cache[key] + key = os.path.join(media.path, media.dst_filename) + if key in cache: + data = cache[key] + + # check if files have changed + try: + mod_date = int(get_mod_date(media.src_path)) + except FileNotFoundError: + continue + if data.get('mod_date', -1) < mod_date: + continue # file_metadata needs updating + + if 'file_metadata' in data: + media.file_metadata = data['file_metadata'] + if 'exif' in data: + media.exif = data['exif'] + + try: + mod_date = int(get_mod_date(media.raw_metadata_filepath)) + except FileNotFoundError: + continue + if data.get('meta_mod_date', -1) < mod_date: + continue # raw_metadata needs updating + + if 'raw_metadata' in data: + media.raw_metadata = data['raw_metadata'] def _restore_cache(gallery): - """Restores the exif data cache from the cache file""" - cachePath = os.path.join(gallery.settings["destination"], ".exif_cache") + """Restores the metadata cache from the cache file""" + cachePath = os.path.join(gallery.settings["destination"], ".metadata_cache") try: if os.path.exists(cachePath): with open(cachePath, "rb") as cacheFile: - gallery.exifCache = pickle.load(cacheFile) - logger.debug("Loaded cache with %d entries", len(gallery.exifCache)) + gallery.metadataCache = pickle.load(cacheFile) + logger.debug("Loaded cache with %d entries", len(gallery.metadataCache)) else: - gallery.exifCache = {} + gallery.metadataCache = {} except Exception as e: logger.warn("Could not load cache: %s", e) - gallery.exifCache = {} + gallery.metadataCache = {} def save_cache(gallery): """Stores the exif data of all images in the gallery""" - if hasattr(gallery, "exifCache"): - cache = gallery.exifCache + if hasattr(gallery, "metadataCache"): + cache = gallery.metadataCache else: - cache = gallery.exifCache = {} + cache = gallery.metadataCache = {} for album in gallery.albums.values(): - for image in album.images: - cache[os.path.join(image.path, image.dst_filename)] = image.exif - - cachePath = os.path.join(gallery.settings["destination"], ".exif_cache") + try: + data = { + 'mod_date': int(get_mod_date(album.raw_metadata_filepath)), + 'raw_metadata': album.raw_metadata, + } + cache[os.path.join(album.path, '_index')] = data + except FileNotFoundError: + pass + + for media in album.medias: + data = {} + try: + mod_date = int(get_mod_date(media.src_path)) + except FileNotFoundError: + continue + else: + data['mod_date'] = mod_date + data['file_metadata'] = media.file_metadata + if hasattr(media, 'exif'): + data['exif'] = media.exif + + try: + meta_mod_date = int(get_mod_date(media.raw_metadata_filepath)) + except FileNotFoundError: + pass + else: + data['meta_mod_date'] = meta_mod_date + data['raw_metadata'] = media.raw_metadata + + cache[os.path.join(media.path, media.dst_filename)] = data + + cachePath = os.path.join(gallery.settings["destination"], ".metadata_cache") if len(cache) == 0: if os.path.exists(cachePath): @@ -88,7 +155,7 @@ def save_cache(gallery): try: with open(cachePath, "wb") as cacheFile: pickle.dump(cache, cacheFile) - logger.debug("Stored cache with %d entries", len(gallery.exifCache)) + logger.debug("Stored cache with %d entries", len(gallery.metadataCache)) except Exception as e: logger.warn("Could not store cache: %s", e) os.remove(cachePath) @@ -96,4 +163,4 @@ def save_cache(gallery): def register(settings): signals.gallery_build.connect(save_cache) - signals.album_initialized.connect(load_exif) + signals.album_initialized.connect(load_metadata) diff --git a/sigal/utils.py b/sigal/utils.py index 1bbfa97..a374758 100644 --- a/sigal/utils.py +++ b/sigal/utils.py @@ -18,6 +18,7 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +from functools import lru_cache import os import shutil from urllib.parse import quote @@ -64,6 +65,12 @@ def check_or_create_dir(path): os.makedirs(path) +@lru_cache(maxsize=1024) +def get_mod_date(path): + """Get modification date for a path, caching result with LRU cache.""" + return os.path.getmtime(path) + + def url_from_path(path): """Transform path to url, converting backslashes to slashes if needed.""" diff --git a/tests/test_extended_caching.py b/tests/test_extended_caching.py index 169530b..bc5fba9 100644 --- a/tests/test_extended_caching.py +++ b/tests/test_extended_caching.py @@ -12,16 +12,40 @@ def test_save_cache(settings, tmpdir): gal = Gallery(settings, ncpu=1) extended_caching.save_cache(gal) - cachePath = os.path.join(settings['destination'], ".exif_cache") + cachePath = os.path.join(settings['destination'], ".metadata_cache") assert os.path.isfile(cachePath) with open(cachePath, "rb") as cacheFile: cache = pickle.load(cacheFile) - assert cache["exifTest/21.jpg"] == gal.albums["exifTest"].medias[0].exif - assert cache["exifTest/22.jpg"] == gal.albums["exifTest"].medias[1].exif - assert cache["exifTest/noexif.png"] == gal.albums["exifTest"].medias[2].exif + # test exif + album = gal.albums["exifTest"] + cache_img = cache["exifTest/21.jpg"] + assert cache_img["exif"] == album.medias[0].exif + assert 'raw_metadata' not in cache_img + assert cache_img["file_metadata"] == album.medias[0].file_metadata + + cache_img = cache["exifTest/22.jpg"] + assert cache_img["exif"] == album.medias[1].exif + assert 'raw_metadata' not in cache_img + assert cache_img["file_metadata"] == album.medias[1].file_metadata + + cache_img = cache["exifTest/noexif.png"] + assert cache_img["exif"] == album.medias[2].exif + assert 'raw_metadata' not in cache_img + assert cache_img["file_metadata"] == album.medias[2].file_metadata + + # test iptc and md + album = gal.albums["iptcTest"] + assert cache["iptcTest/_index"]["raw_metadata"] == album.raw_metadata + + cache_img = cache["iptcTest/1.jpg"] + assert cache_img["file_metadata"] == album.medias[0].file_metadata + assert 'raw_metadata' not in cache_img + + cache_img = cache["iptcTest/2.jpg"] + assert cache_img["raw_metadata"] == album.medias[1].raw_metadata def test_restore_cache(settings, tmpdir): @@ -30,22 +54,29 @@ def test_restore_cache(settings, tmpdir): gal2 = Gallery(settings, ncpu=1) extended_caching.save_cache(gal1) extended_caching._restore_cache(gal2) - assert gal1.exifCache == gal2.exifCache + assert gal1.metadataCache == gal2.metadataCache def test_load_exif(settings, tmpdir): settings['destination'] = str(tmpdir) gal1 = Gallery(settings, ncpu=1) gal1.albums["exifTest"].medias[2].exif = "blafoo" - gal1.exifCache = {"exifTest/21.jpg": "Foo", "exifTest/22.jpg": "Bar"} + gal1.metadataCache = { + "exifTest/21.jpg": {"exif": "Foo", "mod_date": 100000000000}, + "exifTest/22.jpg": {"exif": "Bar", "mod_date": 100000000000}, + } - extended_caching.load_exif(gal1.albums["exifTest"]) + extended_caching.load_metadata(gal1.albums["exifTest"]) assert gal1.albums["exifTest"].medias[0].exif == "Foo" assert gal1.albums["exifTest"].medias[1].exif == "Bar" assert gal1.albums["exifTest"].medias[2].exif == "blafoo" - # check if setting gallery.exifCache works + # check if setting gallery.metadataCache works gal2 = Gallery(settings, ncpu=1) extended_caching.save_cache(gal1) - extended_caching.load_exif(gal2.albums["exifTest"]) + extended_caching.load_metadata(gal2.albums["exifTest"]) + + assert gal2.albums["exifTest"].medias[0].exif == "Foo" + assert gal2.albums["exifTest"].medias[1].exif == "Bar" + assert gal2.albums["exifTest"].medias[2].exif == "blafoo"