From 2afbc7a147ce2553e8300b60733b5f0e6f935fe9 Mon Sep 17 00:00:00 2001 From: "Martin A. Brown" Date: Mon, 14 Mar 2016 21:42:21 -0700 Subject: [PATCH] switch to codecs.open and expect UTF-8 data --- tldp/typeguesser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tldp/typeguesser.py b/tldp/typeguesser.py index 7ddff09..3b1a726 100644 --- a/tldp/typeguesser.py +++ b/tldp/typeguesser.py @@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function import os import errno +import codecs import inspect import logging @@ -79,7 +80,7 @@ def guess(fname): for doctype in possible: logger.debug("%s extension %s could be %s.", stem, ext, doctype) - with open(fname) as f: + with codecs.open(fname, encoding='utf-8') as f: buf = f.read(1024) guesses = list()