From bf768d027757a8e7bfded33d341cb68bace4fc82 Mon Sep 17 00:00:00 2001 From: "Martin A. Brown" Date: Thu, 18 Feb 2016 13:22:48 -0800 Subject: [PATCH] adding basic docstrings --- tldp/outputs.py | 87 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 11 deletions(-) diff --git a/tldp/outputs.py b/tldp/outputs.py index 8214d9c..c8453b3 100644 --- a/tldp/outputs.py +++ b/tldp/outputs.py @@ -1,4 +1,5 @@ #! /usr/bin/python +# -*- coding: utf8 -*- from __future__ import absolute_import, division, print_function @@ -11,7 +12,14 @@ from .utils import logger, logdir, statfiles class OutputNamingConvention(object): + '''A base class inherited by OutputDirectory to ensure consistent + naming of files across the output collection of documents, + regardless of the source document type and processing toolchain + choice. + Sets a list of names for documents that are expected to be present + in order to report that the directory iscomplete. + ''' expected = ['name_txt', 'name_pdf', 'name_htmls', 'name_html', 'name_indexhtml'] @@ -43,13 +51,40 @@ class OutputNamingConvention(object): def name_indexhtml(self): return os.path.join(self.dirname, 'index.html') + @property + def iscomplete(self): + '''True if the output directory contains all expected documents''' + present = list() + for prop in self.expected: + name = getattr(self, prop, None) + assert name is not None + present.append(os.path.isfile(name)) + return all(present) + class OutputDirectory(OutputNamingConvention): + '''A class providing a container for each set of output documents + for a given source document and general methods for operating on + and preparing the output directory for a document processor. + For example, the process of generating each document type for a single + source (e.g. 'Unicode-HOWTO') would be managed by this object. + An important element of the OutputDirectory is the stem, determined + from the directory name when __init__() is called. + ''' def __repr__(self): return '<%s:%s>' % (self.__class__.__name__, self.dirname) def __init__(self, dirname): + '''constructor + :param dirname: directory name for all output documents + + This directory name is expected to end with the document stem name, + for example '/path/to/the/collection/Unicode-HOWTO'. The parent + directory (e.g. '/path/to/the/collection' must exist already. The + output directory itself will be created, or emptied and cleared if + the document needs to be rebuilt. + ''' self.dirname = os.path.abspath(dirname) self.stem = os.path.basename(self.dirname) parent = os.path.dirname(self.dirname) @@ -61,16 +96,12 @@ class OutputDirectory(OutputNamingConvention): self.logdir = os.path.join(self.dirname, logdir) super(OutputDirectory, self).__init__(self.dirname, self.stem) - @property - def iscomplete(self): - present = list() - for prop in self.expected: - name = getattr(self, prop, None) - assert name is not None - present.append(os.path.isfile(name)) - return all(present) - def clean(self): + '''remove the output directory for this document + + This is done as a matter of course when the output documents must be + regenerated. Better to start fresh. + ''' logger.info("%s cleaning dir %s.", self.stem, self.dirname) if os.path.isdir(self.dirname): shutil.rmtree(self.dirname) @@ -94,22 +125,56 @@ class OutputDirectory(OutputNamingConvention): class OutputCollection(collections.MutableMapping): + '''a dict-like container for OutputDirectory objects + The key of an OutputCollection is the stem name of the document, which + allows convenient access and guaranteed non-collision. + ''' def __repr__(self): return '<%s:(%s docs)>' % (self.__class__.__name__, len(self)) def __init__(self, dirname=None): + '''construct an OutputCollection + + if dirname is not supplied, OutputCollection is basically, a dict(). + if dirname is supplied, then OutputCollection scans the filesystem + for subdirectories of dirname and creates an OutputDirectory for each + subdir. Each subdir name is used as the stem (or key) for holding the + OutputDirectory in the OutputCollection. + + For example, consider the following directory tree: + + en + ├── Latvian-HOWTO + ├── Scanner-HOWTO + ├── UUCP-HOWTO + └── Wireless-HOWTO + + If called like OutputCollection("en"), the result in memory would be + a structure resembling this: + + OutputCollection("/path/en") = { + "Latvian-HOWTO": OutputDirectory("/path/en/Latvian-HOWTO") + "Scanner-HOWTO": OutputDirectory("/path/en/Scanner-HOWTO") + "UUCP-HOWTO": OutputDirectory("/path/en/UUCP-HOWTO") + "Wireless-HOWTO": OutputDirectory("/path/en/Wireless-HOWTO") + } + + The use of the stem as a key works conveniently with the + SourceCollection which uses the same strategy on SourceDocuments. + ''' if dirname is None: return elif not os.path.isdir(dirname): - logger.critical("Output collection dir %s must already exist.", + logger.critical("Output collection dir %s must already exist.", dirname) raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), dirname) - for fname in os.listdir(dirname): + for fname in sorted(os.listdir(dirname)): name = os.path.join(dirname, fname) if not os.path.isdir(name): logger.info("Skipping non-directory %s (in %s)", name, dirname) continue + logger.debug("Found directory %s (in %s)", name, dirname) o = OutputDirectory(name) assert o.stem not in self self[o.stem] = o