mirror of https://github.com/tLDP/python-tldp
stop leaking FDs when guessing doctypes
This commit is contained in:
parent
a2daee9425
commit
26de64a2bb
|
@ -42,18 +42,16 @@ def depends(*predecessors):
|
||||||
class SignatureChecker(object):
|
class SignatureChecker(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def signatureLocation(cls, f):
|
def signatureLocation(cls, buf, fname):
|
||||||
f.seek(0)
|
|
||||||
buf = f.read(1024)
|
|
||||||
for sig in cls.signatures:
|
for sig in cls.signatures:
|
||||||
try:
|
try:
|
||||||
sindex = buf.index(sig)
|
sindex = buf.index(sig)
|
||||||
logger.debug("YES FOUND signature %r in %s at %s; doctype %s.",
|
logger.debug("YES FOUND signature %r in %s at %s; doctype %s.",
|
||||||
sig, f.name, sindex, cls)
|
sig, fname, sindex, cls)
|
||||||
return sindex
|
return sindex
|
||||||
except ValueError:
|
except ValueError:
|
||||||
logger.debug("not found signature %r in %s for type %s",
|
logger.debug("not found signature %r in %s for type %s",
|
||||||
sig, f.name, cls.__name__)
|
sig, fname, cls.__name__)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,11 +4,10 @@
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import errno
|
||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from tldp.utils import makefh
|
|
||||||
|
|
||||||
import tldp.doctypes
|
import tldp.doctypes
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -32,10 +31,10 @@ def getDoctypeClasses():
|
||||||
return getDoctypeMembers(inspect.isclass)
|
return getDoctypeMembers(inspect.isclass)
|
||||||
|
|
||||||
|
|
||||||
def guess(thing):
|
def guess(fname):
|
||||||
'''return a tldp.doctype class which is a best guess for document type
|
'''return a tldp.doctype class which is a best guess for document type
|
||||||
|
|
||||||
thing: Could be a filename or an open file.
|
:parama fname: A filename.
|
||||||
|
|
||||||
The guess function will try to guess the document type (doctype) from the
|
The guess function will try to guess the document type (doctype) from the
|
||||||
file extension. If extension matching produces multiple possible doctype
|
file extension. If extension matching produces multiple possible doctype
|
||||||
|
@ -55,11 +54,10 @@ def guess(thing):
|
||||||
* It could/should use heuristics or something richer than signatures.
|
* It could/should use heuristics or something richer than signatures.
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
f = makefh(thing)
|
stem, ext = os.path.splitext(fname)
|
||||||
except TypeError:
|
except AttributeError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
stem, ext = os.path.splitext(f.name)
|
|
||||||
if not ext:
|
if not ext:
|
||||||
logger.debug("%s no file extension, skipping %s.", stem, ext)
|
logger.debug("%s no file extension, skipping %s.", stem, ext)
|
||||||
return None
|
return None
|
||||||
|
@ -77,19 +75,22 @@ def guess(thing):
|
||||||
# -- for this extension, multiple document types, probably SGML, XML
|
# -- for this extension, multiple document types, probably SGML, XML
|
||||||
#
|
#
|
||||||
logger.debug("%s multiple possible doctypes for extension %s on file %s.",
|
logger.debug("%s multiple possible doctypes for extension %s on file %s.",
|
||||||
stem, ext, f.name)
|
stem, ext, fname)
|
||||||
for doctype in possible:
|
for doctype in possible:
|
||||||
logger.debug("%s extension %s could be %s.", stem, ext, doctype)
|
logger.debug("%s extension %s could be %s.", stem, ext, doctype)
|
||||||
|
|
||||||
|
with open(fname) as f:
|
||||||
|
buf = f.read(1024)
|
||||||
|
|
||||||
guesses = list()
|
guesses = list()
|
||||||
for doctype in possible:
|
for doctype in possible:
|
||||||
sindex = doctype.signatureLocation(f)
|
sindex = doctype.signatureLocation(buf, fname)
|
||||||
if sindex is not None:
|
if sindex is not None:
|
||||||
guesses.append((sindex, doctype))
|
guesses.append((sindex, doctype))
|
||||||
|
|
||||||
if not guesses:
|
if not guesses:
|
||||||
logger.warning("%s no matching signature found for %s.",
|
logger.warning("%s no matching signature found for %s.",
|
||||||
stem, f.name)
|
stem, fname)
|
||||||
return None
|
return None
|
||||||
if len(guesses) == 1:
|
if len(guesses) == 1:
|
||||||
_, doctype = guesses.pop()
|
_, doctype = guesses.pop()
|
||||||
|
@ -100,10 +101,10 @@ def guess(thing):
|
||||||
# first signature in the file as the more likely document type.
|
# first signature in the file as the more likely document type.
|
||||||
#
|
#
|
||||||
guesses.sort()
|
guesses.sort()
|
||||||
logger.info("%s multiple doctype guesses for file %s", stem, f.name)
|
logger.info("%s multiple doctype guesses for file %s", stem, fname)
|
||||||
for sindex, doctype in guesses:
|
for sindex, doctype in guesses:
|
||||||
logger.info("%s could be %s (sig at pos %s)", stem, doctype, sindex)
|
logger.info("%s could be %s (sig at pos %s)", stem, doctype, sindex)
|
||||||
logger.info("%s going to guess %s for %s", stem, doctype, f.name)
|
logger.info("%s going to guess %s for %s", stem, doctype, fname)
|
||||||
_, doctype = guesses.pop(0)
|
_, doctype = guesses.pop(0)
|
||||||
return doctype
|
return doctype
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue