python-tldp/tldp/outputs.py

215 lines
7.3 KiB
Python
Raw Normal View History

2016-02-11 03:22:23 +00:00
#! /usr/bin/python
2016-02-18 21:22:48 +00:00
# -*- coding: utf8 -*-
2016-04-29 15:02:02 +00:00
#
# Copyright (c) 2016 Linux Documentation Project
2016-02-11 03:22:23 +00:00
from __future__ import absolute_import, division, print_function
2016-03-15 05:18:09 +00:00
from __future__ import unicode_literals
2016-02-11 17:15:22 +00:00
2016-02-11 03:22:23 +00:00
import os
import sys
import errno
import codecs
import logging
2016-02-11 03:22:23 +00:00
2016-02-23 04:02:12 +00:00
from tldp.ldpcollection import LDPDocumentCollection
2016-04-28 15:05:05 +00:00
from tldp.utils import logdir
2016-02-11 17:15:22 +00:00
2016-03-02 06:53:07 +00:00
logger = logging.getLogger(__name__)
2016-02-11 17:15:22 +00:00
class OutputNamingConvention(object):
2016-02-18 21:22:48 +00:00
'''A base class inherited by OutputDirectory to ensure consistent
naming of files across the output collection of documents,
regardless of the source document type and processing toolchain
choice.
Sets a list of names for documents that are expected to be present
in order to report that the directory iscomplete.
'''
2016-02-16 05:04:41 +00:00
expected = ['name_txt', 'name_pdf', 'name_htmls', 'name_html',
'name_indexhtml']
2016-02-11 17:15:22 +00:00
2016-02-17 21:50:06 +00:00
def __init__(self, dirname, stem):
self.dirname = dirname
2016-02-17 21:50:06 +00:00
self.stem = stem
2016-02-11 17:15:22 +00:00
@property
def MD5SUMS(self):
return os.path.join(self.dirname, '.LDP-source-MD5SUMS')
2016-02-11 17:15:22 +00:00
@property
def name_txt(self):
return os.path.join(self.dirname, self.stem + '.txt')
2016-02-11 17:15:22 +00:00
@property
def name_fo(self):
return os.path.join(self.dirname, self.stem + '.fo')
2016-02-11 17:15:22 +00:00
@property
def name_pdf(self):
return os.path.join(self.dirname, self.stem + '.pdf')
2016-02-11 03:22:23 +00:00
2016-02-11 17:15:22 +00:00
@property
def name_html(self):
return os.path.join(self.dirname, self.stem + '.html')
2016-02-11 03:22:23 +00:00
2016-02-11 17:15:22 +00:00
@property
def name_htmls(self):
return os.path.join(self.dirname, self.stem + '-single.html')
@property
def name_epub(self):
return os.path.join(self.dirname, self.stem + '.epub')
2016-02-16 05:04:41 +00:00
@property
def name_indexhtml(self):
2016-02-16 05:04:41 +00:00
return os.path.join(self.dirname, 'index.html')
@property
def validsource(self):
return os.path.join(self.dirname, self.stem + '.xml') # -- burp
2016-02-18 21:22:48 +00:00
@property
def iscomplete(self):
'''True if the output directory contains all expected documents'''
present = list()
for prop in self.expected:
name = getattr(self, prop, None)
assert name is not None
present.append(os.path.exists(name))
2016-02-18 21:22:48 +00:00
return all(present)
@property
def missing(self):
'''returns a set of missing files'''
missing = set()
for prop in self.expected:
name = getattr(self, prop, None)
assert name is not None
if not os.path.isfile(name):
missing.add(name)
return missing
@property
def md5sums(self):
d = dict()
try:
with codecs.open(self.MD5SUMS, encoding='utf-8') as f:
for line in f:
if line.startswith('#'):
continue
hashval, fname = line.strip().split()
d[fname] = hashval
except IOError as e:
if e.errno != errno.ENOENT:
raise
return d
2016-02-18 21:22:48 +00:00
class OutputDirectory(OutputNamingConvention):
'''A class providing a container for each set of output documents
for a given source document and general methods for operating on
and preparing the output directory for a document processor.
For example, the process of generating each document type for a single
source (e.g. 'Unicode-HOWTO') would be managed by this object.
An important element of the OutputDirectory is the stem, determined
from the directory name when __init__() is called.
'''
2016-02-16 07:52:52 +00:00
def __repr__(self):
return '<%s:%s>' % (self.__class__.__name__, self.dirname)
2016-03-03 19:21:54 +00:00
@classmethod
def fromsource(cls, dirname, source):
newname = os.path.join(dirname, source.stem)
return cls(newname, source=source)
def __init__(self, dirname, source=None):
2016-02-18 21:22:48 +00:00
'''constructor
:param dirname: directory name for all output documents
This directory name is expected to end with the document stem name,
for example '/path/to/the/collection/Unicode-HOWTO'. The parent
directory (e.g. '/path/to/the/collection' must exist already. The
output directory itself will be created, or emptied and cleared if
the document needs to be rebuilt.
'''
self.dirname = os.path.abspath(dirname)
self.stem = os.path.basename(self.dirname)
super(OutputDirectory, self).__init__(self.dirname, self.stem)
parent = os.path.dirname(self.dirname)
if not os.path.isdir(parent):
logger.critical("Missing output collection directory %s.", parent)
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), parent)
self.status = 'output'
2016-03-03 19:21:54 +00:00
self.source = source
self.logdir = os.path.join(self.dirname, logdir)
def detail(self, widths, verbose, file=sys.stdout):
template = ' '.join(('{s.status:{w.status}}',
'{u:{w.doctype}}',
'{s.stem:{w.stem}}'))
outstr = template.format(s=self, w=widths, u="<unknown>")
print(outstr, file=file)
if verbose:
print(' missing source', file=file)
2016-02-23 04:02:12 +00:00
class OutputCollection(LDPDocumentCollection):
2016-02-18 21:22:48 +00:00
'''a dict-like container for OutputDirectory objects
2016-02-18 21:22:48 +00:00
The key of an OutputCollection is the stem name of the document, which
allows convenient access and guaranteed non-collision.
2016-02-18 21:58:30 +00:00
The use of the stem as a key works conveniently with the
SourceCollection which uses the same strategy on SourceDocuments.
2016-02-18 21:22:48 +00:00
'''
def __init__(self, dirname=None):
2016-02-18 21:22:48 +00:00
'''construct an OutputCollection
2016-02-18 21:58:30 +00:00
If dirname is not supplied, OutputCollection is basically, a dict().
2016-03-02 06:53:07 +00:00
If dirname is supplied, then OutputCollection scans the filesystem for
subdirectories of dirname and creates an OutputDirectory for each
2016-02-18 21:22:48 +00:00
subdir. Each subdir name is used as the stem (or key) for holding the
OutputDirectory in the OutputCollection.
For example, consider the following directory tree:
en
Latvian-HOWTO
Scanner-HOWTO
UUCP-HOWTO
Wireless-HOWTO
If called like OutputCollection("en"), the result in memory would be
a structure resembling this:
OutputCollection("/path/en") = {
"Latvian-HOWTO": OutputDirectory("/path/en/Latvian-HOWTO")
"Scanner-HOWTO": OutputDirectory("/path/en/Scanner-HOWTO")
"UUCP-HOWTO": OutputDirectory("/path/en/UUCP-HOWTO")
"Wireless-HOWTO": OutputDirectory("/path/en/Wireless-HOWTO")
}
'''
if dirname is None:
return
elif not os.path.isdir(dirname):
2016-02-18 21:22:48 +00:00
logger.critical("Output collection dir %s must already exist.",
dirname)
2016-02-16 07:52:52 +00:00
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), dirname)
2016-02-25 17:31:49 +00:00
for fname in sorted(os.listdir(dirname), key=lambda x: x.lower()):
name = os.path.join(dirname, fname)
if not os.path.isdir(name):
logger.info("Skipping non-directory %s (in %s)", name, dirname)
continue
2016-02-18 21:22:48 +00:00
logger.debug("Found directory %s (in %s)", name, dirname)
2016-02-16 05:04:41 +00:00
o = OutputDirectory(name)
assert o.stem not in self
self[o.stem] = o
#
2016-02-11 03:22:23 +00:00
# -- end of file