mirror of https://github.com/tLDP/python-tldp
allow creation of empty SourceCollection; fixes
Allow creation of an empty SourceCollection, which can be handed around in the driver to allow for higher-level document wrangling fix bad, always-failing directory check (thank you, testing) clarify handling of documents living in a directory and the generation of the fileset
This commit is contained in:
parent
f39237d307
commit
f17d164b52
|
@ -6,7 +6,7 @@ import os
|
||||||
import errno
|
import errno
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
from .utils import logger
|
from .utils import logger, getfileset
|
||||||
from .typeguesser import guess, knownextensions
|
from .typeguesser import guess, knownextensions
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,7 +16,9 @@ class SourceCollection(collections.MutableMapping):
|
||||||
return '<%s:(%s docs)>' % \
|
return '<%s:(%s docs)>' % \
|
||||||
(self.__class__.__name__, len(self))
|
(self.__class__.__name__, len(self))
|
||||||
|
|
||||||
def __init__(self, args):
|
def __init__(self, args=None):
|
||||||
|
if args is None:
|
||||||
|
return
|
||||||
dirs = [os.path.abspath(x) for x in args]
|
dirs = [os.path.abspath(x) for x in args]
|
||||||
results = [os.path.exists(x) for x in dirs]
|
results = [os.path.exists(x) for x in dirs]
|
||||||
|
|
||||||
|
@ -26,13 +28,12 @@ class SourceCollection(collections.MutableMapping):
|
||||||
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), sdir)
|
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), sdir)
|
||||||
|
|
||||||
for sdir in dirs:
|
for sdir in dirs:
|
||||||
docs = dict()
|
|
||||||
candidates = list()
|
|
||||||
for fname in os.listdir(sdir):
|
for fname in os.listdir(sdir):
|
||||||
|
candidates = list()
|
||||||
possible = os.path.join(sdir, fname)
|
possible = os.path.join(sdir, fname)
|
||||||
if os.path.isfile(possible):
|
if os.path.isfile(possible):
|
||||||
candidates.append(SourceDocument(possible))
|
candidates.append(SourceDocument(possible))
|
||||||
elif os.path.isdir(fname):
|
elif os.path.isdir(possible):
|
||||||
stem = os.path.basename(fname)
|
stem = os.path.basename(fname)
|
||||||
for ext in knownextensions:
|
for ext in knownextensions:
|
||||||
possible = os.path.join(sdir, fname, stem + ext)
|
possible = os.path.join(sdir, fname, stem + ext)
|
||||||
|
@ -44,12 +45,11 @@ class SourceCollection(collections.MutableMapping):
|
||||||
continue
|
continue
|
||||||
for candy in candidates:
|
for candy in candidates:
|
||||||
if candy.stem in self:
|
if candy.stem in self:
|
||||||
logger.warning("Duplicate stems: %s and %s",
|
logger.warning("Ignoring duplicate is %s", candy.filename)
|
||||||
self[candy.stem].filename, candy.filename)
|
logger.warning("Existing dup-entry is %s", self[candy.stem].filename)
|
||||||
logger.warning("Ignoring %s", candy.filename)
|
|
||||||
else:
|
else:
|
||||||
self[candy.stem] = candy
|
self[candy.stem] = candy
|
||||||
logger.info("Discovered %s documents total", len(self))
|
logger.debug("Discovered %s documents total", len(self))
|
||||||
|
|
||||||
def __delitem__(self, key):
|
def __delitem__(self, key):
|
||||||
del self.__dict__[key]
|
del self.__dict__[key]
|
||||||
|
@ -87,16 +87,12 @@ class SourceDocument(object):
|
||||||
self.doctype = self._doctype()
|
self.doctype = self._doctype()
|
||||||
self.dirname, self.basename = os.path.split(self.filename)
|
self.dirname, self.basename = os.path.split(self.filename)
|
||||||
self.stem, self.ext = os.path.splitext(self.basename)
|
self.stem, self.ext = os.path.splitext(self.basename)
|
||||||
self.stat = os.stat(self.filename)
|
|
||||||
|
|
||||||
self.resources = False # -- assume no ./images/, ./resources/
|
self.resources = False # -- assume no ./images/, ./resources/
|
||||||
self.singlefile = True # -- assume only one file
|
parentbase = os.path.basename(self.dirname)
|
||||||
parentdir = os.path.basename(self.dirname)
|
if parentbase == self.stem:
|
||||||
if parentdir == self.stem:
|
self.fileset = getfileset(self.dirname)
|
||||||
self.singlefile = False
|
else:
|
||||||
for rdir in ('resources', 'images'):
|
self.fileset = set([self.basename])
|
||||||
if os.path.exists(os.path.join(self.dirname, rdir)):
|
|
||||||
self.resources = True
|
|
||||||
|
|
||||||
def _doctype(self):
|
def _doctype(self):
|
||||||
return guess(self.filename)
|
return guess(self.filename)
|
||||||
|
|
Loading…
Reference in New Issue