python-tldp/tldp/outputs.py

#! /usr/bin/python
# -*- coding: utf8 -*-

from __future__ import absolute_import, division, print_function

import os
import errno
import shutil

import collections
from .utils import logger, logdir, statfiles


class OutputNamingConvention(object):
    '''A base class inherited by OutputDirectory to ensure consistent
    naming of files across the output collection of documents,
    regardless of the source document type and processing toolchain
    choice.

    Sets a list of names for documents that are expected to be present
    in order to report that the directory iscomplete.
    '''
    expected = ['name_txt', 'name_pdf', 'name_htmls', 'name_html',
                'name_indexhtml']

    def __init__(self, dirname, stem):
        self.dirname = dirname
        self.stem = stem

    @property
    def name_txt(self):
        return os.path.join(self.dirname, self.stem + '.txt')

    @property
    def name_pdf(self):
        return os.path.join(self.dirname, self.stem + '.pdf')

    @property
    def name_html(self):
        return os.path.join(self.dirname, self.stem + '.html')

    @property
    def name_htmls(self):
        return os.path.join(self.dirname, self.stem + '-single.html')

    @property
    def name_epub(self):
        return os.path.join(self.dirname, self.stem + '.epub')

    @property
    def name_indexhtml(self):
        return os.path.join(self.dirname, 'index.html')

    @property
    def iscomplete(self):
        '''True if the output directory contains all expected documents'''
        present = list()
        for prop in self.expected:
            name = getattr(self, prop, None)
            assert name is not None
            present.append(os.path.isfile(name))
        return all(present)


class OutputDirectory(OutputNamingConvention):
    '''A class providing a container for each set of output documents
    for a given source document and general methods for operating on
    and preparing the output directory for a document processor.
    For example, the process of generating each document type for a single
    source (e.g. 'Unicode-HOWTO') would be managed by this object.

    An important element of the OutputDirectory is the stem, determined
    from the directory name when __init__() is called.
    '''
    def __repr__(self):
        return '<%s:%s>' % (self.__class__.__name__, self.dirname)

    def __init__(self, dirname):
        '''constructor
        :param dirname: directory name for all output documents

        This directory name is expected to end with the document stem name,
        for example '/path/to/the/collection/Unicode-HOWTO'.  The parent
        directory (e.g. '/path/to/the/collection' must exist already.  The
        output directory itself will be created, or emptied and cleared if
        the document needs to be rebuilt.
        '''
        self.dirname = os.path.abspath(dirname)
        self.stem = os.path.basename(self.dirname)
        parent = os.path.dirname(self.dirname)
        if not os.path.isdir(parent):
            logger.critical("Missing output collection directory %s.", parent)
            raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), parent)
        self.statinfo = statfiles(self.dirname, relative=self.dirname)
        self.status = 'output'
        self.logdir = os.path.join(self.dirname, logdir)
        super(OutputDirectory, self).__init__(self.dirname, self.stem)

    def clean(self):
        '''remove the output directory for this document

        This is done as a matter of course when the output documents must be
        regenerated.  Better to start fresh.
        '''
        logger.info("%s cleaning dir   %s.", self.stem, self.dirname)
        if os.path.isdir(self.dirname):
            shutil.rmtree(self.dirname)

    def prebuild_hook(self):
        self.clean()
        for d in (self.dirname, self.logdir):
            if not os.path.isdir(d):
                logger.info("%s creating dir   %s.", self.stem, d)
                os.mkdir(d)
        self.copy_ancillaries(self.dirname)

    def build_failure_hook(self):
        logger.critical("%s FAILURE, see logs in %s", self.stem, self.logdir)

    def build_success_hook(self):
        logger.info("%s SUCCESS!", self.stem)
        logger.debug("%s removing logs  %s)", self.stem, self.logdir)
        if os.path.isdir(self.logdir):
            shutil.rmtree(logdir)


class OutputCollection(collections.MutableMapping):
    '''a dict-like container for OutputDirectory objects

    The key of an OutputCollection is the stem name of the document, which
    allows convenient access and guaranteed non-collision.
    '''
    def __repr__(self):
        return '<%s:(%s docs)>' % (self.__class__.__name__, len(self))

    def __init__(self, dirname=None):
        '''construct an OutputCollection

        if dirname is not supplied, OutputCollection is basically, a dict().
        if dirname is supplied, then OutputCollection scans the filesystem
        for subdirectories of dirname and creates an OutputDirectory for each
        subdir.  Each subdir name is used as the stem (or key) for holding the
        OutputDirectory in the OutputCollection.

        For example, consider the following directory tree:

            en
            ├── Latvian-HOWTO
            ├── Scanner-HOWTO
            ├── UUCP-HOWTO
            └── Wireless-HOWTO

        If called like OutputCollection("en"), the result in memory would be
        a structure resembling this:

            OutputCollection("/path/en") = {
              "Latvian-HOWTO":  OutputDirectory("/path/en/Latvian-HOWTO")
              "Scanner-HOWTO":  OutputDirectory("/path/en/Scanner-HOWTO")
              "UUCP-HOWTO":     OutputDirectory("/path/en/UUCP-HOWTO")
              "Wireless-HOWTO": OutputDirectory("/path/en/Wireless-HOWTO")
              }

        The use of the stem as a key works conveniently with the
        SourceCollection which uses the same strategy on SourceDocuments.
        '''
        if dirname is None:
            return
        elif not os.path.isdir(dirname):
            logger.critical("Output collection dir %s must already exist.",
                            dirname)
            raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), dirname)
        for fname in sorted(os.listdir(dirname)):
            name = os.path.join(dirname, fname)
            if not os.path.isdir(name):
                logger.info("Skipping non-directory %s (in %s)", name, dirname)
                continue
            logger.debug("Found directory %s (in %s)", name, dirname)
            o = OutputDirectory(name)
            assert o.stem not in self
            self[o.stem] = o

    def __delitem__(self, key):
        del self.__dict__[key]

    def __getitem__(self, key):
        return self.__dict__[key]

    def __setitem__(self, key, value):
        self.__dict__[key] = value

    def __iter__(self):
        return iter(self.__dict__)

    def __len__(self):
        return len(self.__dict__)

#
# -- end of file
initial commit 2016-02-11 03:22:23 +00:00			`#! /usr/bin/python`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`# -- coding: utf8 --`
initial commit 2016-02-11 03:22:23 +00:00
changing to __future__ (consistency across project) 2016-02-11 19:29:00 +00:00			`from __future__ import absolute_import, division, print_function`
adding OutputDir to outputs.py 2016-02-11 17:15:22 +00:00
initial commit 2016-02-11 03:22:23 +00:00			`import os`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`import errno`
include hooks for prebuild and postbuild states 2016-02-18 02:31:51 +00:00			`import shutil`
initial commit 2016-02-11 03:22:23 +00:00
initial test script for outputs.py 2016-02-16 07:52:52 +00:00			`import collections`
include hooks for prebuild and postbuild states 2016-02-18 02:31:51 +00:00			`from .utils import logger, logdir, statfiles`
adding OutputDir to outputs.py 2016-02-11 17:15:22 +00:00

create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`class OutputNamingConvention(object):`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`'''A base class inherited by OutputDirectory to ensure consistent`
			`naming of files across the output collection of documents,`
			`regardless of the source document type and processing toolchain`
			`choice.`

			`Sets a list of names for documents that are expected to be present`
			`in order to report that the directory iscomplete.`
			`'''`
improving cleaning, esp. index.html 2016-02-16 05:04:41 +00:00			`expected = ['name_txt', 'name_pdf', 'name_htmls', 'name_html',`
change name_index to name_indexhtml; adapt tests also minor logging adjustments for clarity and consistency 2016-02-18 17:15:47 +00:00			`'name_indexhtml']`
adding OutputDir to outputs.py 2016-02-11 17:15:22 +00:00
flipping the stem/dirname arguments 2016-02-17 21:50:06 +00:00			`def __init__(self, dirname, stem):`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`self.dirname = dirname`
flipping the stem/dirname arguments 2016-02-17 21:50:06 +00:00			`self.stem = stem`
adding OutputDir to outputs.py 2016-02-11 17:15:22 +00:00
			`@property`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`def name_txt(self):`
			`return os.path.join(self.dirname, self.stem + '.txt')`
adding OutputDir to outputs.py 2016-02-11 17:15:22 +00:00
			`@property`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`def name_pdf(self):`
			`return os.path.join(self.dirname, self.stem + '.pdf')`
initial commit 2016-02-11 03:22:23 +00:00
adding OutputDir to outputs.py 2016-02-11 17:15:22 +00:00			`@property`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`def name_html(self):`
			`return os.path.join(self.dirname, self.stem + '.html')`
initial commit 2016-02-11 03:22:23 +00:00
adding OutputDir to outputs.py 2016-02-11 17:15:22 +00:00			`@property`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`def name_htmls(self):`
			`return os.path.join(self.dirname, self.stem + '-single.html')`

simplify cleaning; add (unused) name_epub 2016-02-18 03:04:21 +00:00			`@property`
			`def name_epub(self):`
			`return os.path.join(self.dirname, self.stem + '.epub')`

improving cleaning, esp. index.html 2016-02-16 05:04:41 +00:00			`@property`
change name_index to name_indexhtml; adapt tests also minor logging adjustments for clarity and consistency 2016-02-18 17:15:47 +00:00			`def name_indexhtml(self):`
improving cleaning, esp. index.html 2016-02-16 05:04:41 +00:00			`return os.path.join(self.dirname, 'index.html')`

adding basic docstrings 2016-02-18 21:22:48 +00:00			`@property`
			`def iscomplete(self):`
			`'''True if the output directory contains all expected documents'''`
			`present = list()`
			`for prop in self.expected:`
			`name = getattr(self, prop, None)`
			`assert name is not None`
			`present.append(os.path.isfile(name))`
			`return all(present)`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00

adding basic docstrings 2016-02-18 21:22:48 +00:00			`class OutputDirectory(OutputNamingConvention):`
			`'''A class providing a container for each set of output documents`
			`for a given source document and general methods for operating on`
			`and preparing the output directory for a document processor.`
			`For example, the process of generating each document type for a single`
			`source (e.g. 'Unicode-HOWTO') would be managed by this object.`

			`An important element of the OutputDirectory is the stem, determined`
			`from the directory name when __init__() is called.`
			`'''`
initial test script for outputs.py 2016-02-16 07:52:52 +00:00			`def __repr__(self):`
			`return '<%s:%s>' % (self.__class__.__name__, self.dirname)`

create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`def __init__(self, dirname):`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`'''constructor`
			`:param dirname: directory name for all output documents`

			`This directory name is expected to end with the document stem name,`
			`for example '/path/to/the/collection/Unicode-HOWTO'. The parent`
			`directory (e.g. '/path/to/the/collection' must exist already. The`
			`output directory itself will be created, or emptied and cleared if`
			`the document needs to be rebuilt.`
			`'''`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`self.dirname = os.path.abspath(dirname)`
			`self.stem = os.path.basename(self.dirname)`
			`parent = os.path.dirname(self.dirname)`
			`if not os.path.isdir(parent):`
change name_index to name_indexhtml; adapt tests also minor logging adjustments for clarity and consistency 2016-02-18 17:15:47 +00:00			`logger.critical("Missing output collection directory %s.", parent)`
fixes found during testing should standardize on IOError for errno.ENOENT across the package adhere to standard choice of logging message make sure to skip a non-directory in OutputCollection ("continue") 2016-02-16 22:13:17 +00:00			`raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), parent)`
switch to using statfiles 2016-02-17 19:19:48 +00:00			`self.statinfo = statfiles(self.dirname, relative=self.dirname)`
adding support for documents to know their status 2016-02-17 08:17:49 +00:00			`self.status = 'output'`
include hooks for prebuild and postbuild states 2016-02-18 02:31:51 +00:00			`self.logdir = os.path.join(self.dirname, logdir)`
flipping the stem/dirname arguments 2016-02-17 21:50:06 +00:00			`super(OutputDirectory, self).__init__(self.dirname, self.stem)`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00
improving cleaning, esp. index.html 2016-02-16 05:04:41 +00:00			`def clean(self):`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`'''remove the output directory for this document`

			`This is done as a matter of course when the output documents must be`
			`regenerated. Better to start fresh.`
			`'''`
include hooks for prebuild and postbuild states 2016-02-18 02:31:51 +00:00			`logger.info("%s cleaning dir %s.", self.stem, self.dirname)`
simplify cleaning; add (unused) name_epub 2016-02-18 03:04:21 +00:00			`if os.path.isdir(self.dirname):`
			`shutil.rmtree(self.dirname)`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00
include hooks for prebuild and postbuild states 2016-02-18 02:31:51 +00:00			`def prebuild_hook(self):`
simplify cleaning; add (unused) name_epub 2016-02-18 03:04:21 +00:00			`self.clean()`
include hooks for prebuild and postbuild states 2016-02-18 02:31:51 +00:00			`for d in (self.dirname, self.logdir):`
			`if not os.path.isdir(d):`
			`logger.info("%s creating dir %s.", self.stem, d)`
			`os.mkdir(d)`
simplify cleaning; add (unused) name_epub 2016-02-18 03:04:21 +00:00			`self.copy_ancillaries(self.dirname)`
include hooks for prebuild and postbuild states 2016-02-18 02:31:51 +00:00
			`def build_failure_hook(self):`
			`logger.critical("%s FAILURE, see logs in %s", self.stem, self.logdir)`

			`def build_success_hook(self):`
			`logger.info("%s SUCCESS!", self.stem)`
			`logger.debug("%s removing logs %s)", self.stem, self.logdir)`
			`if os.path.isdir(self.logdir):`
			`shutil.rmtree(logdir)`

create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00
initial test script for outputs.py 2016-02-16 07:52:52 +00:00			`class OutputCollection(collections.MutableMapping):`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`'''a dict-like container for OutputDirectory objects`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00
adding basic docstrings 2016-02-18 21:22:48 +00:00			`The key of an OutputCollection is the stem name of the document, which`
			`allows convenient access and guaranteed non-collision.`
			`'''`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`def __repr__(self):`
initial test script for outputs.py 2016-02-16 07:52:52 +00:00			`return '<%s:(%s docs)>' % (self.__class__.__name__, len(self))`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00
allow creation of empty OutputCollection; fixes Allow creation of an empty OutputCollection so that the object can be handed around in driver.py for higher-level document wrangling. Also, repair one inobvious statement, thank you pep8/pyflakes 2016-02-17 07:43:07 +00:00			`def __init__(self, dirname=None):`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`'''construct an OutputCollection`

			`if dirname is not supplied, OutputCollection is basically, a dict().`
			`if dirname is supplied, then OutputCollection scans the filesystem`
			`for subdirectories of dirname and creates an OutputDirectory for each`
			`subdir. Each subdir name is used as the stem (or key) for holding the`
			`OutputDirectory in the OutputCollection.`

			`For example, consider the following directory tree:`

			`en`
			`├── Latvian-HOWTO`
			`├── Scanner-HOWTO`
			`├── UUCP-HOWTO`
			`└── Wireless-HOWTO`

			`If called like OutputCollection("en"), the result in memory would be`
			`a structure resembling this:`

			`OutputCollection("/path/en") = {`
			`"Latvian-HOWTO": OutputDirectory("/path/en/Latvian-HOWTO")`
			`"Scanner-HOWTO": OutputDirectory("/path/en/Scanner-HOWTO")`
			`"UUCP-HOWTO": OutputDirectory("/path/en/UUCP-HOWTO")`
			`"Wireless-HOWTO": OutputDirectory("/path/en/Wireless-HOWTO")`
			`}`

			`The use of the stem as a key works conveniently with the`
			`SourceCollection which uses the same strategy on SourceDocuments.`
			`'''`
allow creation of empty OutputCollection; fixes Allow creation of an empty OutputCollection so that the object can be handed around in driver.py for higher-level document wrangling. Also, repair one inobvious statement, thank you pep8/pyflakes 2016-02-17 07:43:07 +00:00			`if dirname is None:`
			`return`
			`elif not os.path.isdir(dirname):`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`logger.critical("Output collection dir %s must already exist.",`
change name_index to name_indexhtml; adapt tests also minor logging adjustments for clarity and consistency 2016-02-18 17:15:47 +00:00			`dirname)`
initial test script for outputs.py 2016-02-16 07:52:52 +00:00			`raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), dirname)`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`for fname in sorted(os.listdir(dirname)):`
converting OutputTree to behave like a dict 2016-02-16 05:55:58 +00:00			`name = os.path.join(dirname, fname)`
create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`if not os.path.isdir(name):`
allow creation of empty OutputCollection; fixes Allow creation of an empty OutputCollection so that the object can be handed around in driver.py for higher-level document wrangling. Also, repair one inobvious statement, thank you pep8/pyflakes 2016-02-17 07:43:07 +00:00			`logger.info("Skipping non-directory %s (in %s)", name, dirname)`
fixes found during testing should standardize on IOError for errno.ENOENT across the package adhere to standard choice of logging message make sure to skip a non-directory in OutputCollection ("continue") 2016-02-16 22:13:17 +00:00			`continue`
adding basic docstrings 2016-02-18 21:22:48 +00:00			`logger.debug("Found directory %s (in %s)", name, dirname)`
improving cleaning, esp. index.html 2016-02-16 05:04:41 +00:00			`o = OutputDirectory(name)`
allow creation of empty OutputCollection; fixes Allow creation of an empty OutputCollection so that the object can be handed around in driver.py for higher-level document wrangling. Also, repair one inobvious statement, thank you pep8/pyflakes 2016-02-17 07:43:07 +00:00			`assert o.stem not in self`
converting OutputTree to behave like a dict 2016-02-16 05:55:58 +00:00			`self[o.stem] = o`

			`def __delitem__(self, key):`
			`del self.__dict__[key]`

			`def __getitem__(self, key):`
			`return self.__dict__[key]`

			`def __setitem__(self, key, value):`
			`self.__dict__[key] = value`

			`def __iter__(self):`
			`return iter(self.__dict__)`

			`def __len__(self):`
			`return len(self.__dict__)`

create OutputDirectory and OutputTree an OutputTree must exist already, contains all of the OutputDocuments an OutputDocument determines the stem from the dirname and depends on the OutputNamingConvention for choosing names for the primary files of each generated output type 2016-02-15 22:01:55 +00:00			`#`
initial commit 2016-02-11 03:22:23 +00:00			`# -- end of file`