From 5dcc255cc62e220a43236a2f21440ced1914671f Mon Sep 17 00:00:00 2001 From: "Martin A. Brown" Date: Sat, 2 Apr 2016 10:47:45 -0700 Subject: [PATCH] calculate stale by MD5s; swap stale/broken move the stanza that identifies the broken output directories up higher in the file; it's a simpler chunk of code adjust the detection of stale-ness by referring to an output MD5 file and compare with the available source files --- tldp/inventory.py | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/tldp/inventory.py b/tldp/inventory.py index 0e6fbfd..7417fe6 100644 --- a/tldp/inventory.py +++ b/tldp/inventory.py @@ -123,23 +123,7 @@ class Inventory(object): self.published = s logger.debug("Identified %d published documents.", len(self.published)) - # -- stale identification - # - self.stale = SourceCollection() - for stem, sdoc in s.items(): - odoc = sdoc.output - mtime = max_mtime(odoc.statinfo) - fset = mtime_gt(mtime, sdoc.statinfo) - if fset: - sdoc.newer = fset - for f in fset: - logger.debug("%s found updated source file %s", stem, f) - odoc.status = sdoc.status = 'stale' - self.stale[stem] = sdoc - logger.debug("Identified %d stale documents: %r.", len(self.stale), - self.stale.keys()) - - # -- stale identification + # -- broken identification # self.broken = SourceCollection() for stem, sdoc in s.items(): @@ -149,6 +133,31 @@ class Inventory(object): logger.debug("Identified %d broken documents: %r.", len(self.broken), self.broken.keys()) + # -- stale identification + # + self.stale = SourceCollection() + for stem, sdoc in s.items(): + odoc = sdoc.output + omd5, smd5 = odoc.md5sums, sdoc.md5sums + if omd5 != smd5: + logger.debug("%s differing MD5 sets %r %r", stem, smd5, omd5) + changed = set() + for gone in set(omd5.keys()).difference(smd5.keys()): + logger.debug("%s gone %s", stem, gone) + changed.add(('gone', gone)) + for new in set(smd5.keys()).difference(omd5.keys()): + changed.add(('new', new)) + for sfn in set(smd5.keys()).intersection(omd5.keys()): + if smd5[sfn] != omd5[sfn]: + changed.add(('changed', sfn)) + for why, sfn in changed: + logger.debug("%s differing source %s (%s)", stem, sfn, why) + odoc.status = sdoc.status = 'stale' + sdoc.differing = changed + self.stale[stem] = sdoc + logger.debug("Identified %d stale documents: %r.", len(self.stale), + self.stale.keys()) + def getByStatusClass(self, status_class): desired = status_classes.get(status_class, None) assert isinstance(desired, list)