diff --git a/tldp/sources.py b/tldp/sources.py index eab2e1a..6633a46 100644 --- a/tldp/sources.py +++ b/tldp/sources.py @@ -12,11 +12,47 @@ from .typeguesser import guess, knownextensions class SourceCollection(collections.MutableMapping): + '''a dict-like container for SourceDocument objects + The key in the SourceCollection is the stem name of the document, which + allows convenient access and guarantees non-collision. + + The use of the stem as a key works conveniently with the + OutputCollection which uses the same strategy on OutputDirectory. + ''' def __repr__(self): return '<%s:(%s docs)>' % (self.__class__.__name__, len(self)) def __init__(self, args=None): + '''construct a SourceCollection + + If args is not supplied, SourceCollection is basically a dict(). + If args is supplied, then SourceCollection ensures it is operating on + the absolute filesystem path for each of the source directories. + + If any of the supplied args does not exist as a directory, + SourceCollection will log the missing source directory names and then + will raise an IOError and quit. + + For each document that it finds in a source directory, it creates a + SourceDocument entry in itself using the stem name as a key. + + The rules for identifying possible SourceDocuments go as follows. + Within any source directory, a source document can consist of a single + file with an extension or a directory. + + If the candidate entry is a directory, then, the stem is the full + directory name, e.g. Masquerading-Simple-HOWTO + + If the candidate entry is a file, the stem is the filename minus + extension, e.g. Encrypted-Root-Filesystem-HOWTO + + Because the SourceCollection accepts (and will scan) many source + directories, it is possible that there will be stem name collisions. + If it discovers a stem collision, SourceCollection will issue a + warning and skip the duplicated stem(s). [It also tries to process + the source directories and candidates in a stable order between runs.] + ''' if args is None: return dirs = [os.path.abspath(x) for x in args] @@ -70,13 +106,39 @@ class SourceCollection(collections.MutableMapping): class SourceDocument(object): - + '''a class providing a container for each set of source documents + ''' def __repr__(self): return '<%s:%s (%s)>' % \ (self.__class__.__name__, self.filename, self.doctype) def __init__(self, filename): - # -- canonicalize the pathname we are given. + '''construct a SourceDocument + + filename is a required parameter + + The filename is the main (and sometimes sole) document representing + the source of the LDP HOWTO or Guide. It is the document that is + passed by name to be handled by any document processing toolchains + (see also tldp.doctypes). + + Each instantiation will raise an IOERror if the supplied filename does + not exist or if the filename isn't a file (symlink is fine, directory + or fifo is not). + + The remainder of the instantiation will set attributes that are useful + later in the processing phase, for example, stem, status, enclosing + directory name and file extension. + + There are two important attributes. First, the document type guesser + will try to infer the doctype (from file extension and signature). + Note that it is not a fatal error if document type cannot be guessed, + but the document will not be able to be processed. Second, it is + useful during the decision-making process to know if any of the source + files are newer than the output files. Thus, the stat() information + for every file in the source document directory (or just the single + source document file) will be collected. + ''' self.filename = os.path.abspath(filename) if not os.path.exists(self.filename): logger.critical("Missing source document: %s", self.filename) @@ -85,7 +147,7 @@ class SourceDocument(object): logger.critical("Source document is not a plain file: %s", self.filename) raise TypeError("Wrong type, not a plain file: " + self.filename) - self.doctype = self._doctype() + self.doctype = guess(self.filename) self.status = 'source' self.dirname, self.basename = os.path.split(self.filename) self.stem, self.ext = os.path.splitext(self.basename) @@ -96,8 +158,5 @@ class SourceDocument(object): else: self.statinfo = statfiles(self.filename, relative=self.dirname) - def _doctype(self): - return guess(self.filename) - # # -- end of file