mirror of https://github.com/tLDP/python-tldp
213 lines
7.2 KiB
Python
213 lines
7.2 KiB
Python
#! /usr/bin/python
|
|
# -*- coding: utf8 -*-
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import os
|
|
import sys
|
|
import errno
|
|
import codecs
|
|
import logging
|
|
|
|
from tldp.ldpcollection import LDPDocumentCollection
|
|
from tldp.utils import logdir, statfiles
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class OutputNamingConvention(object):
|
|
'''A base class inherited by OutputDirectory to ensure consistent
|
|
naming of files across the output collection of documents,
|
|
regardless of the source document type and processing toolchain
|
|
choice.
|
|
|
|
Sets a list of names for documents that are expected to be present
|
|
in order to report that the directory iscomplete.
|
|
'''
|
|
expected = ['name_txt', 'name_pdf', 'name_htmls', 'name_html',
|
|
'name_indexhtml']
|
|
|
|
def __init__(self, dirname, stem):
|
|
self.dirname = dirname
|
|
self.stem = stem
|
|
|
|
@property
|
|
def MD5SUMS(self):
|
|
return os.path.join(self.dirname, '.LDP-source-MD5SUMS')
|
|
|
|
@property
|
|
def name_txt(self):
|
|
return os.path.join(self.dirname, self.stem + '.txt')
|
|
|
|
@property
|
|
def name_fo(self):
|
|
return os.path.join(self.dirname, self.stem + '.fo')
|
|
|
|
@property
|
|
def name_pdf(self):
|
|
return os.path.join(self.dirname, self.stem + '.pdf')
|
|
|
|
@property
|
|
def name_html(self):
|
|
return os.path.join(self.dirname, self.stem + '.html')
|
|
|
|
@property
|
|
def name_htmls(self):
|
|
return os.path.join(self.dirname, self.stem + '-single.html')
|
|
|
|
@property
|
|
def name_epub(self):
|
|
return os.path.join(self.dirname, self.stem + '.epub')
|
|
|
|
@property
|
|
def name_indexhtml(self):
|
|
return os.path.join(self.dirname, 'index.html')
|
|
|
|
@property
|
|
def validsource(self):
|
|
return os.path.join(self.dirname, self.stem + '.xml') # -- burp
|
|
|
|
@property
|
|
def iscomplete(self):
|
|
'''True if the output directory contains all expected documents'''
|
|
present = list()
|
|
for prop in self.expected:
|
|
name = getattr(self, prop, None)
|
|
assert name is not None
|
|
present.append(os.path.exists(name))
|
|
return all(present)
|
|
|
|
@property
|
|
def missing(self):
|
|
'''returns a set of missing files'''
|
|
missing = set()
|
|
for prop in self.expected:
|
|
name = getattr(self, prop, None)
|
|
assert name is not None
|
|
if not os.path.isfile(name):
|
|
missing.add(name)
|
|
return missing
|
|
|
|
@property
|
|
def md5sums(self):
|
|
d = dict()
|
|
try:
|
|
with codecs.open(self.MD5SUMS, encoding='utf-8') as f:
|
|
for line in f:
|
|
if line.startswith('#'):
|
|
continue
|
|
hashval, fname = line.strip().split()
|
|
d[fname] = hashval
|
|
except IOError as e:
|
|
if e.errno != errno.ENOENT:
|
|
raise
|
|
return d
|
|
|
|
|
|
class OutputDirectory(OutputNamingConvention):
|
|
'''A class providing a container for each set of output documents
|
|
for a given source document and general methods for operating on
|
|
and preparing the output directory for a document processor.
|
|
For example, the process of generating each document type for a single
|
|
source (e.g. 'Unicode-HOWTO') would be managed by this object.
|
|
|
|
An important element of the OutputDirectory is the stem, determined
|
|
from the directory name when __init__() is called.
|
|
'''
|
|
def __repr__(self):
|
|
return '<%s:%s>' % (self.__class__.__name__, self.dirname)
|
|
|
|
@classmethod
|
|
def fromsource(cls, dirname, source):
|
|
newname = os.path.join(dirname, source.stem)
|
|
return cls(newname, source=source)
|
|
|
|
def __init__(self, dirname, source=None):
|
|
'''constructor
|
|
:param dirname: directory name for all output documents
|
|
|
|
This directory name is expected to end with the document stem name,
|
|
for example '/path/to/the/collection/Unicode-HOWTO'. The parent
|
|
directory (e.g. '/path/to/the/collection' must exist already. The
|
|
output directory itself will be created, or emptied and cleared if
|
|
the document needs to be rebuilt.
|
|
'''
|
|
self.dirname = os.path.abspath(dirname)
|
|
self.stem = os.path.basename(self.dirname)
|
|
super(OutputDirectory, self).__init__(self.dirname, self.stem)
|
|
parent = os.path.dirname(self.dirname)
|
|
if not os.path.isdir(parent):
|
|
logger.critical("Missing output collection directory %s.", parent)
|
|
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), parent)
|
|
self.status = 'output'
|
|
self.source = source
|
|
self.logdir = os.path.join(self.dirname, logdir)
|
|
|
|
def detail(self, widths, verbose, file=sys.stdout):
|
|
template = ' '.join(('{s.status:{w.status}}',
|
|
'{u:{w.doctype}}',
|
|
'{s.stem:{w.stem}}'))
|
|
outstr = template.format(s=self, w=widths, u="<unknown>")
|
|
print(outstr, file=file)
|
|
if verbose:
|
|
print(' missing source', file=file)
|
|
|
|
|
|
class OutputCollection(LDPDocumentCollection):
|
|
'''a dict-like container for OutputDirectory objects
|
|
|
|
The key of an OutputCollection is the stem name of the document, which
|
|
allows convenient access and guaranteed non-collision.
|
|
|
|
The use of the stem as a key works conveniently with the
|
|
SourceCollection which uses the same strategy on SourceDocuments.
|
|
'''
|
|
def __init__(self, dirname=None):
|
|
'''construct an OutputCollection
|
|
|
|
If dirname is not supplied, OutputCollection is basically, a dict().
|
|
If dirname is supplied, then OutputCollection scans the filesystem for
|
|
subdirectories of dirname and creates an OutputDirectory for each
|
|
subdir. Each subdir name is used as the stem (or key) for holding the
|
|
OutputDirectory in the OutputCollection.
|
|
|
|
For example, consider the following directory tree:
|
|
|
|
en
|
|
├── Latvian-HOWTO
|
|
├── Scanner-HOWTO
|
|
├── UUCP-HOWTO
|
|
└── Wireless-HOWTO
|
|
|
|
If called like OutputCollection("en"), the result in memory would be
|
|
a structure resembling this:
|
|
|
|
OutputCollection("/path/en") = {
|
|
"Latvian-HOWTO": OutputDirectory("/path/en/Latvian-HOWTO")
|
|
"Scanner-HOWTO": OutputDirectory("/path/en/Scanner-HOWTO")
|
|
"UUCP-HOWTO": OutputDirectory("/path/en/UUCP-HOWTO")
|
|
"Wireless-HOWTO": OutputDirectory("/path/en/Wireless-HOWTO")
|
|
}
|
|
|
|
'''
|
|
if dirname is None:
|
|
return
|
|
elif not os.path.isdir(dirname):
|
|
logger.critical("Output collection dir %s must already exist.",
|
|
dirname)
|
|
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), dirname)
|
|
for fname in sorted(os.listdir(dirname), key=lambda x: x.lower()):
|
|
name = os.path.join(dirname, fname)
|
|
if not os.path.isdir(name):
|
|
logger.info("Skipping non-directory %s (in %s)", name, dirname)
|
|
continue
|
|
logger.debug("Found directory %s (in %s)", name, dirname)
|
|
o = OutputDirectory(name)
|
|
assert o.stem not in self
|
|
self[o.stem] = o
|
|
|
|
|
|
#
|
|
# -- end of file
|