From bf768d027757a8e7bfded33d341cb68bace4fc82 Mon Sep 17 00:00:00 2001
From: "Martin A. Brown" <martin@linux-ip.net>
Date: Thu, 18 Feb 2016 13:22:48 -0800
Subject: [PATCH] adding basic docstrings

---
 tldp/outputs.py | 87 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 76 insertions(+), 11 deletions(-)

diff --git a/tldp/outputs.py b/tldp/outputs.py
index 8214d9c..c8453b3 100644
--- a/tldp/outputs.py
+++ b/tldp/outputs.py
@@ -1,4 +1,5 @@
 #! /usr/bin/python
+# -*- coding: utf8 -*-
 
 from __future__ import absolute_import, division, print_function
 
@@ -11,7 +12,14 @@ from .utils import logger, logdir, statfiles
 
 
 class OutputNamingConvention(object):
+    '''A base class inherited by OutputDirectory to ensure consistent
+    naming of files across the output collection of documents,
+    regardless of the source document type and processing toolchain
+    choice.
 
+    Sets a list of names for documents that are expected to be present
+    in order to report that the directory iscomplete.
+    '''
     expected = ['name_txt', 'name_pdf', 'name_htmls', 'name_html',
                 'name_indexhtml']
 
@@ -43,13 +51,40 @@ class OutputNamingConvention(object):
     def name_indexhtml(self):
         return os.path.join(self.dirname, 'index.html')
 
+    @property
+    def iscomplete(self):
+        '''True if the output directory contains all expected documents'''
+        present = list()
+        for prop in self.expected:
+            name = getattr(self, prop, None)
+            assert name is not None
+            present.append(os.path.isfile(name))
+        return all(present)
+
 
 class OutputDirectory(OutputNamingConvention):
+    '''A class providing a container for each set of output documents
+    for a given source document and general methods for operating on
+    and preparing the output directory for a document processor.
+    For example, the process of generating each document type for a single
+    source (e.g. 'Unicode-HOWTO') would be managed by this object.
 
+    An important element of the OutputDirectory is the stem, determined
+    from the directory name when __init__() is called.
+    '''
     def __repr__(self):
         return '<%s:%s>' % (self.__class__.__name__, self.dirname)
 
     def __init__(self, dirname):
+        '''constructor
+        :param dirname: directory name for all output documents
+
+        This directory name is expected to end with the document stem name,
+        for example '/path/to/the/collection/Unicode-HOWTO'.  The parent
+        directory (e.g. '/path/to/the/collection' must exist already.  The
+        output directory itself will be created, or emptied and cleared if
+        the document needs to be rebuilt.
+        '''
         self.dirname = os.path.abspath(dirname)
         self.stem = os.path.basename(self.dirname)
         parent = os.path.dirname(self.dirname)
@@ -61,16 +96,12 @@ class OutputDirectory(OutputNamingConvention):
         self.logdir = os.path.join(self.dirname, logdir)
         super(OutputDirectory, self).__init__(self.dirname, self.stem)
 
-    @property
-    def iscomplete(self):
-        present = list()
-        for prop in self.expected:
-            name = getattr(self, prop, None)
-            assert name is not None
-            present.append(os.path.isfile(name))
-        return all(present)
-
     def clean(self):
+        '''remove the output directory for this document
+
+        This is done as a matter of course when the output documents must be
+        regenerated.  Better to start fresh.
+        '''
         logger.info("%s cleaning dir   %s.", self.stem, self.dirname)
         if os.path.isdir(self.dirname):
             shutil.rmtree(self.dirname)
@@ -94,22 +125,56 @@ class OutputDirectory(OutputNamingConvention):
 
 
 class OutputCollection(collections.MutableMapping):
+    '''a dict-like container for OutputDirectory objects
 
+    The key of an OutputCollection is the stem name of the document, which
+    allows convenient access and guaranteed non-collision.
+    '''
     def __repr__(self):
         return '<%s:(%s docs)>' % (self.__class__.__name__, len(self))
 
     def __init__(self, dirname=None):
+        '''construct an OutputCollection
+
+        if dirname is not supplied, OutputCollection is basically, a dict().
+        if dirname is supplied, then OutputCollection scans the filesystem
+        for subdirectories of dirname and creates an OutputDirectory for each
+        subdir.  Each subdir name is used as the stem (or key) for holding the
+        OutputDirectory in the OutputCollection.
+
+        For example, consider the following directory tree:
+
+            en
+            ├── Latvian-HOWTO
+            ├── Scanner-HOWTO
+            ├── UUCP-HOWTO
+            └── Wireless-HOWTO
+
+        If called like OutputCollection("en"), the result in memory would be
+        a structure resembling this:
+
+            OutputCollection("/path/en") = {
+              "Latvian-HOWTO":  OutputDirectory("/path/en/Latvian-HOWTO")
+              "Scanner-HOWTO":  OutputDirectory("/path/en/Scanner-HOWTO")
+              "UUCP-HOWTO":     OutputDirectory("/path/en/UUCP-HOWTO")
+              "Wireless-HOWTO": OutputDirectory("/path/en/Wireless-HOWTO")
+              }
+
+        The use of the stem as a key works conveniently with the
+        SourceCollection which uses the same strategy on SourceDocuments.
+        '''
         if dirname is None:
             return
         elif not os.path.isdir(dirname):
-            logger.critical("Output collection dir %s must already exist.", 
+            logger.critical("Output collection dir %s must already exist.",
                             dirname)
             raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), dirname)
-        for fname in os.listdir(dirname):
+        for fname in sorted(os.listdir(dirname)):
             name = os.path.join(dirname, fname)
             if not os.path.isdir(name):
                 logger.info("Skipping non-directory %s (in %s)", name, dirname)
                 continue
+            logger.debug("Found directory %s (in %s)", name, dirname)
             o = OutputDirectory(name)
             assert o.stem not in self
             self[o.stem] = o