Source code for ocrd.workspace_backup
from datetime import datetime
from os import makedirs
from os.path import join, basename, getsize, abspath
from glob import glob
from shutil import copy
import hashlib
from ocrd_models import OcrdMets
from ocrd_utils import getLogger, atomic_write, DEFAULT_METS_BASENAME
from .constants import BACKUP_DIR
def _chksum(s):
return hashlib.sha256(s).hexdigest()
[docs]
class WorkspaceBackup():
[docs]
@classmethod
def from_path(cls, d):
mets_file = join(d, DEFAULT_METS_BASENAME)
(chksum, lastmod) = basename(d).split('.', maxsplit=1)
size = getsize(mets_file)
mets_xml = OcrdMets(filename=mets_file)
return cls(chksum, float(lastmod), size, mets_xml)
def __init__(self, chksum, lastmod, size, mets_xml):
self.chksum = chksum
self.lastmod = datetime.fromtimestamp(lastmod)
self.size = size
self.mets_xml = mets_xml
def __str__(self):
return '%s - %s - %8s B %s' % (
self.chksum[0:7],
self.lastmod.strftime('%Y-%m-%d %H:%M:%S'),
self.size,
self.mets_xml.file_groups
)
[docs]
class WorkspaceBackupManager():
"""
Manages backups of a workspace in a directory BACKUP_DIR
"""
def __init__(self, workspace):
self.workspace = workspace
self.backup_directory = join(workspace.directory, BACKUP_DIR)
[docs]
def restore(self, chksum, choose_first=False):
"""
Restore mets.xml to previous state
"""
log = getLogger('ocrd.workspace_backup.restore')
bak = None
candidates = glob(join(self.backup_directory, '%s*' % chksum))
if not candidates:
log.error("No backup found: %s" % chksum)
return
if len(candidates) > 1 and not choose_first:
raise Exception("Not unique, could be\n%s" % '\n'.join(candidates))
bak = candidates[0]
self.add()
log.info("Restoring from %s/mets.xml" % bak)
src = join(bak, DEFAULT_METS_BASENAME)
dest = self.workspace.mets_target
log.debug('cp "%s" "%s"', src, dest)
copy(src, dest)
self.workspace.reload_mets()
[docs]
def add(self):
"""
Create a backup in <self.backup_directory>
"""
log = getLogger('ocrd.workspace_backup.add')
mets_str = self.workspace.mets.to_xml()
chksum = _chksum(mets_str)
backups = self.list()
if backups and backups[0].chksum == chksum:
log.info('No changes since last backup: %s' % backups[0])
else:
timestamp = datetime.now().timestamp()
d = join(self.backup_directory, '%s.%s' % (chksum, timestamp))
mets_file = join(d, DEFAULT_METS_BASENAME)
log.info("Backing up to %s" % mets_file)
makedirs(d)
with atomic_write(mets_file) as f:
f.write(mets_str.decode('utf-8'))
return chksum
[docs]
def list(self):
"""
List all backups as WorkspaceBackup objects, sorted descending by lastmod.
"""
backups = []
for d in glob(join(self.backup_directory, '*')):
backups.append(WorkspaceBackup.from_path(d))
backups.sort(key=lambda b: b.lastmod, reverse=True)
return backups
[docs]
def undo(self):
"""
Restore to last version
"""
log = getLogger('ocrd.workspace_backup.undo')
backups = self.list()
if backups:
last_backup = backups[0]
self.restore(last_backup.chksum, choose_first=True)
else:
log.info("No backups, nothing to undo.")