2016-02-17 08:22:31 +00:00
|
|
|
#! /usr/bin/python
|
2016-02-18 21:25:02 +00:00
|
|
|
# -*- coding: utf8 -*-
|
2016-02-17 08:22:31 +00:00
|
|
|
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
|
|
|
2016-02-17 16:35:36 +00:00
|
|
|
import copy
|
|
|
|
|
2016-02-17 20:04:37 +00:00
|
|
|
from .utils import logger, max_mtime, mtime_gt
|
2016-02-17 08:22:31 +00:00
|
|
|
|
|
|
|
from .sources import SourceCollection
|
|
|
|
from .outputs import OutputCollection
|
|
|
|
|
|
|
|
from argparse import Namespace
|
|
|
|
|
2016-02-23 16:59:56 +00:00
|
|
|
# -- any individual document (source or output) will have a status
|
|
|
|
# from the following list of status_types
|
|
|
|
#
|
|
|
|
status_types = [
|
2016-02-23 04:24:19 +00:00
|
|
|
'source',
|
|
|
|
'output',
|
2016-02-23 16:59:56 +00:00
|
|
|
'published',
|
2016-02-23 04:24:19 +00:00
|
|
|
'new',
|
|
|
|
'orphan',
|
|
|
|
'broken',
|
|
|
|
'stale',
|
|
|
|
]
|
|
|
|
|
2016-02-23 16:59:56 +00:00
|
|
|
# -- the user probably doesn't usually care (too much) about listing
|
|
|
|
# every single published document and source document, but is probably
|
|
|
|
# mostly interested in specific documents grouped by status; so the
|
|
|
|
# status_classes are just sets of status_types
|
|
|
|
#
|
|
|
|
status_classes = dict(zip(status_types, [[x] for x in status_types]))
|
|
|
|
status_classes['outputs'] = ['output']
|
|
|
|
status_classes['sources'] = ['source']
|
|
|
|
status_classes['problems'] = ['orphan', 'broken', 'stale']
|
|
|
|
status_classes['work'] = ['new', 'orphan', 'broken', 'stale']
|
|
|
|
status_classes['orphans'] = ['orphan']
|
|
|
|
status_classes['orphaned'] = ['orphan']
|
|
|
|
status_classes['all'] = ['published', 'new', 'orphan', 'broken', 'stale']
|
|
|
|
|
2016-02-17 08:22:31 +00:00
|
|
|
|
2016-02-17 16:35:36 +00:00
|
|
|
class Inventory(object):
|
2016-02-19 07:07:53 +00:00
|
|
|
'''a container for classifying documents by their status
|
2016-02-17 16:35:36 +00:00
|
|
|
|
2016-02-19 07:07:53 +00:00
|
|
|
Every SourceDocument has no more than one matching OutputDirectory.
|
|
|
|
|
|
|
|
The Inventory class encodes the logic for identifying the following
|
|
|
|
different status possibilities for an arbitrary set of SourceDocuments and
|
|
|
|
OutputDirectorys.
|
|
|
|
|
|
|
|
The following are possible values for status:
|
|
|
|
- 'source': a source document before any status detection
|
|
|
|
- 'output': an output document before any status detection
|
|
|
|
- 'new': a source document without any matching output stem
|
|
|
|
- 'published': a pair of source/output documents with matching stems
|
|
|
|
- 'orphan': an output document without any matching source stem
|
|
|
|
- 'broken': a published document with missing output files
|
|
|
|
- 'stale': a published document with new(er) source files
|
|
|
|
|
|
|
|
The Inventory object is intended to be used to identify work that needs to
|
|
|
|
be done on individual source documents to produce up-to-date output
|
|
|
|
documents.
|
|
|
|
'''
|
2016-02-17 20:04:37 +00:00
|
|
|
def __repr__(self):
|
2016-02-23 04:24:19 +00:00
|
|
|
return '<%s: %d published, %d orphan, %d new, %d stale, %d broken>' % (
|
2016-02-17 20:04:37 +00:00
|
|
|
self.__class__.__name__,
|
|
|
|
len(self.published),
|
2016-02-23 04:24:19 +00:00
|
|
|
len(self.orphan),
|
2016-02-17 20:04:37 +00:00
|
|
|
len(self.new),
|
|
|
|
len(self.stale),
|
2016-02-18 03:30:25 +00:00
|
|
|
len(self.broken),
|
2016-02-17 20:04:37 +00:00
|
|
|
)
|
|
|
|
|
2016-02-17 16:35:36 +00:00
|
|
|
def __init__(self, pubdir, sourcedirs):
|
2016-02-19 07:07:53 +00:00
|
|
|
'''construct an Inventory
|
|
|
|
|
|
|
|
pubdir: path to the OutputCollection
|
|
|
|
|
|
|
|
sourcedirs: a list of directories which could be passed to the
|
|
|
|
SourceCollection object; essentially a directory containing
|
|
|
|
SourceDocuments; for example LDP/LDP/howto/linuxdoc and
|
|
|
|
LDP/LDP/guide/docbook
|
|
|
|
'''
|
2016-02-23 05:22:17 +00:00
|
|
|
self.output = OutputCollection(pubdir)
|
|
|
|
self.source = SourceCollection(sourcedirs)
|
|
|
|
s = copy.deepcopy(self.source)
|
|
|
|
o = copy.deepcopy(self.output)
|
2016-02-17 16:35:36 +00:00
|
|
|
sset = set(s.keys())
|
|
|
|
oset = set(o.keys())
|
2016-02-17 17:12:07 +00:00
|
|
|
|
2016-02-17 20:04:37 +00:00
|
|
|
# -- orphan identification
|
|
|
|
#
|
2016-02-23 04:24:19 +00:00
|
|
|
self.orphan = OutputCollection()
|
2016-02-17 16:35:36 +00:00
|
|
|
for doc in oset.difference(sset):
|
2016-02-23 04:24:19 +00:00
|
|
|
self.orphan[doc] = o[doc]
|
2016-02-17 16:35:36 +00:00
|
|
|
del o[doc]
|
2016-02-23 04:24:19 +00:00
|
|
|
self.orphan[doc].status = 'orphan'
|
|
|
|
logger.info("Identified %d orphan documents: %r.", len(self.orphan),
|
|
|
|
self.orphan.keys())
|
2016-02-17 17:12:07 +00:00
|
|
|
|
2016-02-17 20:04:37 +00:00
|
|
|
# -- unpublished ('new') identification
|
|
|
|
#
|
2016-02-17 16:35:36 +00:00
|
|
|
self.new = SourceCollection()
|
|
|
|
for doc in sset.difference(oset):
|
|
|
|
self.new[doc] = s[doc]
|
|
|
|
del s[doc]
|
|
|
|
self.new[doc].status = 'new'
|
|
|
|
logger.info("Identified %d new documents: %r.", len(self.new),
|
|
|
|
self.new.keys())
|
|
|
|
|
2016-02-23 05:22:17 +00:00
|
|
|
# -- published identification; source and output should be same size
|
2016-02-17 16:35:36 +00:00
|
|
|
assert len(s) == len(o)
|
|
|
|
for stem, odoc in o.items():
|
|
|
|
sdoc = s[stem]
|
|
|
|
sdoc.output = odoc
|
|
|
|
odoc.source = sdoc
|
2016-02-19 08:54:39 +00:00
|
|
|
sdoc.status = sdoc.output.status = 'published'
|
2016-02-17 16:35:36 +00:00
|
|
|
self.published = s
|
|
|
|
logger.info("Identified %d published documents.", len(self.published))
|
2016-02-17 17:12:07 +00:00
|
|
|
|
2016-02-17 20:04:37 +00:00
|
|
|
# -- stale identification
|
|
|
|
#
|
2016-02-17 16:35:36 +00:00
|
|
|
self.stale = SourceCollection()
|
|
|
|
for stem, sdoc in s.items():
|
|
|
|
odoc = sdoc.output
|
2016-02-17 20:04:37 +00:00
|
|
|
mtime = max_mtime(odoc.statinfo)
|
|
|
|
fset = mtime_gt(mtime, sdoc.statinfo)
|
2016-02-17 16:35:36 +00:00
|
|
|
if fset:
|
2016-02-23 05:22:17 +00:00
|
|
|
sdoc.newer = fset
|
2016-02-17 20:04:37 +00:00
|
|
|
for f in fset:
|
2016-02-17 21:50:55 +00:00
|
|
|
logger.debug("%s found updated source file %s", stem, f)
|
2016-02-17 16:35:36 +00:00
|
|
|
odoc.status = sdoc.status = 'stale'
|
|
|
|
self.stale[stem] = sdoc
|
|
|
|
logger.info("Identified %d stale documents: %r.", len(self.stale),
|
|
|
|
self.stale.keys())
|
2016-02-17 08:22:31 +00:00
|
|
|
|
2016-02-18 03:30:25 +00:00
|
|
|
# -- stale identification
|
|
|
|
#
|
|
|
|
self.broken = SourceCollection()
|
|
|
|
for stem, sdoc in s.items():
|
|
|
|
if not sdoc.output.iscomplete:
|
|
|
|
self.broken[stem] = sdoc
|
2016-02-19 08:54:39 +00:00
|
|
|
sdoc.status = sdoc.output.status = 'broken'
|
2016-02-18 03:30:25 +00:00
|
|
|
logger.info("Identified %d broken documents: %r.", len(self.broken),
|
|
|
|
self.broken.keys())
|
2016-02-17 08:22:31 +00:00
|
|
|
|
2016-02-18 03:38:27 +00:00
|
|
|
|
2016-02-17 08:22:31 +00:00
|
|
|
def get_sources(sourcedirs):
|
2016-02-17 16:35:36 +00:00
|
|
|
return SourceCollection(sourcedirs)
|
2016-02-17 08:22:31 +00:00
|
|
|
|
|
|
|
|
2016-02-17 16:35:36 +00:00
|
|
|
def get_outputs(pubdir):
|
|
|
|
return OutputCollection(pubdir)
|
2016-02-17 08:22:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
def print_sources(scollection, config=None):
|
|
|
|
if config is None:
|
|
|
|
config = Namespace(sep='\t', verbose=0)
|
2016-02-23 04:24:19 +00:00
|
|
|
for stem in scollection.keys():
|
2016-02-17 08:22:31 +00:00
|
|
|
doc = scollection[stem]
|
|
|
|
if config.verbose:
|
2016-02-21 04:10:35 +00:00
|
|
|
fields = [doc.stem, doc.status]
|
|
|
|
fields.append(str(len(doc.statinfo)) + ' source files')
|
2016-02-23 16:59:56 +00:00
|
|
|
fields.append(doc.filename)
|
|
|
|
fields.extend(doc.doctype.formatname)
|
|
|
|
fields.extend(str(doc.doctype))
|
2016-02-17 08:22:31 +00:00
|
|
|
print(config.sep.join(fields))
|
|
|
|
else:
|
|
|
|
print(doc.stem)
|
|
|
|
|
|
|
|
|
|
|
|
def print_outputs(ocollection, config=None):
|
|
|
|
if config is None:
|
|
|
|
config = Namespace(sep='\t', verbose=0)
|
2016-02-23 04:24:19 +00:00
|
|
|
for stem in ocollection.keys():
|
2016-02-17 08:22:31 +00:00
|
|
|
doc = ocollection[stem]
|
|
|
|
if config.verbose:
|
|
|
|
fields = [doc.stem, doc.status, doc.dirname]
|
2016-02-17 20:04:37 +00:00
|
|
|
fields.append(str(len(doc.statinfo)) + ' files')
|
2016-02-17 08:22:31 +00:00
|
|
|
print(config.sep.join(fields))
|
|
|
|
else:
|
|
|
|
print(doc.stem)
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# -- end of file
|