python-tldp/tldp/inventory.py

152 lines
4.5 KiB
Python
Raw Normal View History

#! /usr/bin/python
from __future__ import absolute_import, division, print_function
2016-02-17 16:35:36 +00:00
import copy
from .utils import logger, max_mtime, mtime_gt
from .sources import SourceCollection
from .outputs import OutputCollection
from argparse import Namespace
2016-02-17 16:35:36 +00:00
class Inventory(object):
def __repr__(self):
return '<%s: %d published, %d orphans, %d new, %d stale, %d broken>' % (
self.__class__.__name__,
len(self.published),
len(self.orphans),
len(self.new),
len(self.stale),
len(self.broken),
)
2016-02-17 16:35:36 +00:00
def __init__(self, pubdir, sourcedirs):
self.outputs = OutputCollection(pubdir)
self.sources = SourceCollection(sourcedirs)
s = copy.deepcopy(self.sources)
o = copy.deepcopy(self.outputs)
sset = set(s.keys())
oset = set(o.keys())
2016-02-17 17:12:07 +00:00
# -- orphan identification
#
2016-02-17 16:35:36 +00:00
self.orphans = OutputCollection()
for doc in oset.difference(sset):
self.orphans[doc] = o[doc]
del o[doc]
self.orphans[doc].status = 'orphan'
logger.info("Identified %d orphaned documents: %r.", len(self.orphans),
self.orphans.keys())
2016-02-17 17:12:07 +00:00
# -- unpublished ('new') identification
#
2016-02-17 16:35:36 +00:00
self.new = SourceCollection()
for doc in sset.difference(oset):
self.new[doc] = s[doc]
del s[doc]
self.new[doc].status = 'new'
logger.info("Identified %d new documents: %r.", len(self.new),
self.new.keys())
# -- published identification; sources and outputs should be same size
2016-02-17 16:35:36 +00:00
assert len(s) == len(o)
for stem, odoc in o.items():
sdoc = s[stem]
sdoc.output = odoc
odoc.source = sdoc
odoc.status = sdoc.status = 'published'
self.published = s
logger.info("Identified %d published documents.", len(self.published))
2016-02-17 17:12:07 +00:00
# -- stale identification
#
2016-02-17 16:35:36 +00:00
self.stale = SourceCollection()
for stem, sdoc in s.items():
odoc = sdoc.output
mtime = max_mtime(odoc.statinfo)
fset = mtime_gt(mtime, sdoc.statinfo)
2016-02-17 16:35:36 +00:00
if fset:
for f in fset:
logger.debug("%s found updated source file %s", stem, f)
2016-02-17 16:35:36 +00:00
odoc.status = sdoc.status = 'stale'
self.stale[stem] = sdoc
logger.info("Identified %d stale documents: %r.", len(self.stale),
self.stale.keys())
# -- stale identification
#
self.broken = SourceCollection()
for stem, sdoc in s.items():
if not sdoc.output.iscomplete:
self.broken[stem] = sdoc
logger.info("Identified %d broken documents: %r.", len(self.broken),
self.broken.keys())
2016-02-18 03:38:27 +00:00
def get_sources(sourcedirs):
2016-02-17 16:35:36 +00:00
return SourceCollection(sourcedirs)
2016-02-17 16:35:36 +00:00
def get_outputs(pubdir):
return OutputCollection(pubdir)
def print_sources(scollection, config=None):
if config is None:
config = Namespace(sep='\t', verbose=0)
for stem in sorted(scollection.keys(), key=lambda x: x.lower()):
doc = scollection[stem]
if config.verbose:
fields = [doc.stem, doc.status, doc.filename, str(doc.doctype),
doc.doctype.formatname]
fields.append(str(len(doc.statinfo)) + ' files')
print(config.sep.join(fields))
else:
print(doc.stem)
def print_outputs(ocollection, config=None):
if config is None:
config = Namespace(sep='\t', verbose=0)
for stem in sorted(ocollection.keys(), key=lambda x: x.lower()):
doc = ocollection[stem]
if config.verbose:
fields = [doc.stem, doc.status, doc.dirname]
fields.append(str(len(doc.statinfo)) + ' files')
print(config.sep.join(fields))
else:
print(doc.stem)
def list_sources(sourcedirs, config=None):
s = get_sources(sourcedirs)
print_sources(s, config)
def list_outputs(pubdir, config=None):
o = get_outputs(pubdir)
print_outputs(o, config)
def list_stale(pubdir, sourcedirs, config=None):
2016-02-17 16:35:36 +00:00
i = Inventory(pubdir, sourcedirs)
print_sources(i.stale, config)
def list_new(pubdir, sourcedirs, config=None):
2016-02-17 16:35:36 +00:00
i = Inventory(pubdir, sourcedirs)
print_sources(i.new, config)
def list_orphans(pubdir, sourcedirs, config=None):
2016-02-17 16:35:36 +00:00
i = Inventory(pubdir, sourcedirs)
print_outputs(i.orphans, config)
#
# -- end of file