python-tldp/tldp/outputs.py

199 lines
6.9 KiB
Python
Raw Normal View History

2016-02-11 03:22:23 +00:00
#! /usr/bin/python
2016-02-18 21:22:48 +00:00
# -*- coding: utf8 -*-
2016-02-11 03:22:23 +00:00
from __future__ import absolute_import, division, print_function
2016-02-11 17:15:22 +00:00
2016-02-11 03:22:23 +00:00
import os
import errno
import shutil
2016-02-11 03:22:23 +00:00
2016-02-16 07:52:52 +00:00
import collections
from .utils import logger, logdir, statfiles
2016-02-11 17:15:22 +00:00
class OutputNamingConvention(object):
2016-02-18 21:22:48 +00:00
'''A base class inherited by OutputDirectory to ensure consistent
naming of files across the output collection of documents,
regardless of the source document type and processing toolchain
choice.
Sets a list of names for documents that are expected to be present
in order to report that the directory iscomplete.
'''
2016-02-16 05:04:41 +00:00
expected = ['name_txt', 'name_pdf', 'name_htmls', 'name_html',
'name_indexhtml']
2016-02-11 17:15:22 +00:00
2016-02-17 21:50:06 +00:00
def __init__(self, dirname, stem):
self.dirname = dirname
2016-02-17 21:50:06 +00:00
self.stem = stem
2016-02-11 17:15:22 +00:00
@property
def name_txt(self):
return os.path.join(self.dirname, self.stem + '.txt')
2016-02-11 17:15:22 +00:00
@property
def name_pdf(self):
return os.path.join(self.dirname, self.stem + '.pdf')
2016-02-11 03:22:23 +00:00
2016-02-11 17:15:22 +00:00
@property
def name_html(self):
return os.path.join(self.dirname, self.stem + '.html')
2016-02-11 03:22:23 +00:00
2016-02-11 17:15:22 +00:00
@property
def name_htmls(self):
return os.path.join(self.dirname, self.stem + '-single.html')
@property
def name_epub(self):
return os.path.join(self.dirname, self.stem + '.epub')
2016-02-16 05:04:41 +00:00
@property
def name_indexhtml(self):
2016-02-16 05:04:41 +00:00
return os.path.join(self.dirname, 'index.html')
2016-02-18 21:22:48 +00:00
@property
def iscomplete(self):
'''True if the output directory contains all expected documents'''
present = list()
for prop in self.expected:
name = getattr(self, prop, None)
assert name is not None
present.append(os.path.isfile(name))
return all(present)
2016-02-18 21:22:48 +00:00
class OutputDirectory(OutputNamingConvention):
'''A class providing a container for each set of output documents
for a given source document and general methods for operating on
and preparing the output directory for a document processor.
For example, the process of generating each document type for a single
source (e.g. 'Unicode-HOWTO') would be managed by this object.
An important element of the OutputDirectory is the stem, determined
from the directory name when __init__() is called.
'''
2016-02-16 07:52:52 +00:00
def __repr__(self):
return '<%s:%s>' % (self.__class__.__name__, self.dirname)
def __init__(self, dirname):
2016-02-18 21:22:48 +00:00
'''constructor
:param dirname: directory name for all output documents
This directory name is expected to end with the document stem name,
for example '/path/to/the/collection/Unicode-HOWTO'. The parent
directory (e.g. '/path/to/the/collection' must exist already. The
output directory itself will be created, or emptied and cleared if
the document needs to be rebuilt.
'''
self.dirname = os.path.abspath(dirname)
self.stem = os.path.basename(self.dirname)
parent = os.path.dirname(self.dirname)
if not os.path.isdir(parent):
logger.critical("Missing output collection directory %s.", parent)
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), parent)
2016-02-17 19:19:48 +00:00
self.statinfo = statfiles(self.dirname, relative=self.dirname)
self.status = 'output'
self.logdir = os.path.join(self.dirname, logdir)
2016-02-17 21:50:06 +00:00
super(OutputDirectory, self).__init__(self.dirname, self.stem)
2016-02-16 05:04:41 +00:00
def clean(self):
2016-02-18 21:22:48 +00:00
'''remove the output directory for this document
This is done as a matter of course when the output documents must be
regenerated. Better to start fresh.
'''
logger.info("%s cleaning dir %s.", self.stem, self.dirname)
if os.path.isdir(self.dirname):
shutil.rmtree(self.dirname)
def prebuild_hook(self):
self.clean()
for d in (self.dirname, self.logdir):
if not os.path.isdir(d):
logger.info("%s creating dir %s.", self.stem, d)
os.mkdir(d)
self.copy_ancillaries(self.dirname)
def build_failure_hook(self):
logger.critical("%s FAILURE, see logs in %s", self.stem, self.logdir)
def build_success_hook(self):
logger.info("%s SUCCESS!", self.stem)
logger.debug("%s removing logs %s)", self.stem, self.logdir)
if os.path.isdir(self.logdir):
shutil.rmtree(logdir)
2016-02-16 07:52:52 +00:00
class OutputCollection(collections.MutableMapping):
2016-02-18 21:22:48 +00:00
'''a dict-like container for OutputDirectory objects
2016-02-18 21:22:48 +00:00
The key of an OutputCollection is the stem name of the document, which
allows convenient access and guaranteed non-collision.
'''
def __repr__(self):
2016-02-16 07:52:52 +00:00
return '<%s:(%s docs)>' % (self.__class__.__name__, len(self))
def __init__(self, dirname=None):
2016-02-18 21:22:48 +00:00
'''construct an OutputCollection
if dirname is not supplied, OutputCollection is basically, a dict().
if dirname is supplied, then OutputCollection scans the filesystem
for subdirectories of dirname and creates an OutputDirectory for each
subdir. Each subdir name is used as the stem (or key) for holding the
OutputDirectory in the OutputCollection.
For example, consider the following directory tree:
en
Latvian-HOWTO
Scanner-HOWTO
UUCP-HOWTO
Wireless-HOWTO
If called like OutputCollection("en"), the result in memory would be
a structure resembling this:
OutputCollection("/path/en") = {
"Latvian-HOWTO": OutputDirectory("/path/en/Latvian-HOWTO")
"Scanner-HOWTO": OutputDirectory("/path/en/Scanner-HOWTO")
"UUCP-HOWTO": OutputDirectory("/path/en/UUCP-HOWTO")
"Wireless-HOWTO": OutputDirectory("/path/en/Wireless-HOWTO")
}
The use of the stem as a key works conveniently with the
SourceCollection which uses the same strategy on SourceDocuments.
'''
if dirname is None:
return
elif not os.path.isdir(dirname):
2016-02-18 21:22:48 +00:00
logger.critical("Output collection dir %s must already exist.",
dirname)
2016-02-16 07:52:52 +00:00
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), dirname)
2016-02-18 21:22:48 +00:00
for fname in sorted(os.listdir(dirname)):
name = os.path.join(dirname, fname)
if not os.path.isdir(name):
logger.info("Skipping non-directory %s (in %s)", name, dirname)
continue
2016-02-18 21:22:48 +00:00
logger.debug("Found directory %s (in %s)", name, dirname)
2016-02-16 05:04:41 +00:00
o = OutputDirectory(name)
assert o.stem not in self
self[o.stem] = o
def __delitem__(self, key):
del self.__dict__[key]
def __getitem__(self, key):
return self.__dict__[key]
def __setitem__(self, key, value):
self.__dict__[key] = value
def __iter__(self):
return iter(self.__dict__)
def __len__(self):
return len(self.__dict__)
#
2016-02-11 03:22:23 +00:00
# -- end of file