initial commit of cascading config

this utility allows resolution of configuration data from multiple sources; for example, 'compiled-in' defaults, system configuration file, user configuration file, process environment and, of course, command-line options
2016-02-21 12:19:51 -08:00 · 2016-02-21 12:19:51 -08:00 · e7af014a73
parent a88451c124
commit e7af014a73
1 changed files with 462 additions and 0 deletions
--- a/tldp/cascadingconfig.py
+++ b/tldp/cascadingconfig.py
@ -0,0 +1,462 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+from __future__ import absolute_import, division, print_function
+
+import os
+import sys
+
+from argparse import ArgumentParser, ArgumentError, Namespace
+from argparse import _UNRECOGNIZED_ARGS_ATTR
+
+import logging
+logging.basicConfig(stream=sys.stderr, level=logging.WARNING)
+logger = logging.getLogger()
+
+CFGSEP = '.'
+ENVSEP = '_'
+CLISEP = '-'
+
+MULTIVALUESEP = ','
+
+try:
+    from configparser import SafeConfigParser as ConfigParser
+except ImportError:
+    from ConfigParser import SafeConfigParser as ConfigParser
+
+
+def dict_to_argv_longform(d):
+    '''creates from a dictionary, an invocation parseable by argparse
+
+    :param: d, should be a dictionary
+
+    Returns: a list that is suitable for passing to the method parser() on
+             an argparse.ArgumentParser; basically, a list of whitespace-
+             separated CLI options.
+
+    This function produces a list that looks like sys.argv on the
+    command-line.
+    '''
+    args = list()
+    for opt, arg in d.items():
+        if isinstance(arg, (tuple, list)):
+            for x in arg:
+                args.extend(("--" + opt, x))
+        else:
+            args.extend(("--" + opt, arg))
+    return args
+
+
+def convert_multivalues(d, multivaluesep=MULTIVALUESEP):
+    '''creates multivalued values in an argument dict()
+
+    :param: d, should be a dictionary
+    :param: separator; optional, desired string separator
+
+    Returns: a dictionary where any values containing the separator
+             are now converted to lists, broken on the separator.
+
+    This function assumes all keys are plain text.  It will adjust the content
+    of any value which contains separator, by splitting on that separator and
+    removing any whitespace around the resulting text elements.
+    '''
+    for k, v in d.items():
+        if multivaluesep in v:
+            d[k] = [x.strip() for x in v.split(multivaluesep)]
+    return d
+
+
+def cfg_to_dict(f, base=None, cfgsep=CFGSEP, clisep=CLISEP):
+    '''read a configuration file; convert to CLI-parseable form
+
+    :param: f, a filename or file-like object (readable via
+            ConfigParser.read() [filename] or ConfigParser.fp() [open file]
+
+    Returns: a dictionary where keys are section-field = value
+
+    Will read a single configuration file into dict.  Each section of the
+    configuration file is read and a dictionary is constructed by
+    concatenating the section name and the field name to produce the key.
+
+    It also normalizes all section names and fields (keys) to lowercase.
+    This aids in comparisons in downstream processing where interacting with
+    argparse and variables extracted from the environment.
+
+    Given only:
+
+      [frobnitz]
+      pubdir = /path/to/a/publication/directory
+
+    When invoked as:
+
+      cfg_to_dict(f)  # -- where f is the pathname or open filehandle
+
+    This function will return a dict that looks like this:
+
+      {'frobnitz.pubdir': '/path/to/a/publication/directory'}
+    '''
+    d = dict()
+    parser = ConfigParser()
+
+    if isinstance(f, (list, tuple)) or os.path.isfile(f):
+        parser.read(f)
+    else:
+        parser.readfp(f)
+
+    for section in parser.sections():
+        if base is not None:
+            if not section.startswith(base):
+                logger.debug("Skipping sect [%s] in %s (not prefixed with %s)",
+                             section, f, base)
+                continue
+        sectname = section.lower().replace(cfgsep, clisep)
+        for name, value in parser.items(section):
+            keyname = name.lower().replace(cfgsep, clisep)
+            d[clisep.join((sectname, keyname))] = value
+    return d
+
+
+def env_to_dict(env=os.environ, base=None, envsep=ENVSEP, clisep=CLISEP):
+    '''read environment, return keys starting with 'base'
+
+    :param: env, if nothing is supplied, os.environ
+    :param: base [optional], envar prefix filter selection criterion
+
+    Returns:  a dictionary of the adjusted environment-variable name
+              as the key and the value of the envar
+
+    This function reads the environment (well, OK, any dictionary) and
+    returns each entry that begins with base (plus underscore).
+
+    It also normalizes all environment value names names (envars) to
+    lowercase, since environments most often use uppercase names.  This aids
+    in comparisons in downstream processing where interacting with argparse
+    and variables extracted from configuration files.
+
+    Given an environment:
+
+      SSH_AGENT_PID=4753
+      SSH_AUTH_SOCK=/tmp/ssh-2w3uWI19OqvG/agent.2638
+      FROBNITZ=Waffle
+
+    When invoked as:
+
+      env_to_dict(os.environ, 'SSH')
+
+    This function will return a dict that looks like this:
+
+      {'ssh-agent-pid': '4753',
+       'ssh-auth-sock': '/tmp/ssh-2w3uWI19OqvG/agent.2638'}
+
+    When invoked as:
+
+      env_to_dict(os.environ)
+
+    This function will return a dict that looks like this:
+
+      {'frobnitz': 'Waffle',
+       'ssh-agent-pid': '4753',
+       'ssh-auth-sock': '/tmp/ssh-2w3uWI19OqvG/agent.2638'}
+    '''
+    d = dict()
+    if base is None:
+        tag = ''
+    else:
+        tag = base + envsep
+    for k, v in env.items():
+        if k.startswith(tag):
+            k = k.lower().replace(envsep, clisep)
+            d[k] = v
+    return d
+
+
+def strip_tag_from_key(base, d, clisep=CLISEP):
+    if not base:
+        return d
+    newd = dict()
+    tag = base + clisep
+    for oldk, v in d.items():
+        if oldk.startswith(tag):
+            newk = oldk[len(tag):]
+            if newk in newd:
+                logger.debug("Duplicate key found when stripping %s from %s",
+                             tag, oldk)
+                return d
+            newd[newk] = v
+        else:
+            newd[oldk] = v
+    return newd
+
+
+def argv_from_env(args, tag, **kw):
+    '''read a config file and produce argparse-compatible invocation
+
+    :param:  args, a dictionary containing the environment (os.environ)
+    :param:  tag, a prefix to remove from all config file field names
+    :kw:  a prefix to remove from all field names read from the config file
+
+    Returns an argparse-compatible list that looks like argv from a
+    command-line.
+
+    Given an environment dict:
+
+      args = {
+        'LDPTOOL_VERBOSE': '3',
+        'LDPTOOL_SOURCEDIR': '/path/faq/docbook/,/path/howto/linuxdoc/'}
+
+    When invoked as:
+
+      argv_from_env(args, 'ldptool')
+
+    This function will return a list that looks like this:
+
+      ['--verbose', '3',
+       '--sourcedir', '/path/faq/docbook/',
+       '--sourcedir', '/path/howto/linuxdoc/']
+    '''
+    d = env_to_dict(args, base=tag.upper(), **kw)
+    listify_values = kw.get('convert_multivalues', convert_multivalues)
+    if listify_values is not None:
+        d = listify_values(d)
+    d = strip_tag_from_key(tag, d)
+    d = dict_to_argv_longform(d)
+    return d
+
+
+def argv_from_cfg(args, tag, **kw):
+    '''read a config file and produce argparse-compatible invocation
+
+    :param:  args, anything suitable to ConfigParser.read() [see note]
+    :param:  tag, a prefix to remove from all config file field names
+
+    Returns an argparse-compatible list that looks like argv from a
+    command-line.
+
+    Given a config file:
+
+      [main]
+      silly = 3
+
+      [ldptool]
+      sourcedir = /home/mabrown/vcs/LDP/LDP/howto/linuxdoc/,
+                  /home/mabrown/vcs/LDP/LDP/howto/docbook/
+
+      [ldptool.linuxdoc]
+      sgml2html = /usr/bin/sgml2html
+
+      [ldptool-docbook]
+      xsltproc = /usr/bin/xsltproc
+
+    When invoked as:
+
+      argv_from_cfg(filename, 'ldptool')
+
+    This function will return a list that looks like this:
+
+      ['--sourcedir', '/home/mabrown/vcs/LDP/LDP/howto/linuxdoc/',
+       '--sourcedir', '/home/mabrown/vcs/LDP/LDP/howto/docbook/',
+       '--linuxdoc-sgml2html', '/usr/bin/sgml2html',
+       '--docbook-xsltproc', '/usr/bin/xsltproc']
+    '''
+    d = cfg_to_dict(args, base=tag, **kw)
+    listify_values = kw.get('convert_multivalues', convert_multivalues)
+    if listify_values is not None:
+        d = listify_values(d, **kw)
+    d = strip_tag_from_key(tag, d, **kw)
+    d = dict_to_argv_longform(d, **kw)
+    return d
+
+
+def dict_from_namespace(ns):
+    return vars(ns)
+
+
+def namespace_from_dict(d):
+    ns = Namespace()
+    for k, v in d.items():
+        setattr(ns, k, v)
+    return ns
+
+
+class DefaultFreeArgumentParser(ArgumentParser):
+    '''subclass of stock argparse.ArgumentParser; suppress default generation
+
+    The vast majority of argparse users (and usage cases) would like to
+    produce all defaults whenever parsing args/options.
+
+    In this case, we would like to take configuration data from multiple
+    sources and merge them.  It is important to omit any configured defaults
+    so that it's clear where a configuration option came from.
+
+    See the method parse_known_args_no_defaults().
+    '''
+    def parse_known_args_no_defaults(self, args=None, namespace=None):
+        '''This method is the parse_known_args() method from the stock
+        library, sans the block which sets the defaults in the Namespace().
+
+        This method is called many times by CascadingConfig():
+
+          - when processing CLI, returns only options found in user's CLI
+          - when processing system configuration, returns only ...
+          - when processing user configuration, returns only ...
+          - when processing environment, returns only ...
+
+        See also CascadingConfig()
+        '''
+        if args is None:
+            # args default to the system args
+            args = sys.argv[1:]
+        else:
+            # make sure that args are mutable
+            args = list(args)
+
+        # default Namespace built from parser defaults
+        if namespace is None:
+            namespace = Namespace()
+
+        # parse the arguments and exit if there are any errors
+        try:
+            namespace, args = self._parse_known_args(args, namespace)
+            if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR):
+                args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR))
+                delattr(namespace, _UNRECOGNIZED_ARGS_ATTR)
+            return namespace, args
+        except ArgumentError:
+            err = sys.exc_info()[1]
+            self.error(str(err))
+
+
+class CascadingConfig(object):
+    '''container for all conf data read from environ, CLI and config files
+
+    This class delegates most of the heavy lifting and processing of option
+    processing to argparse, which is eminently suited to the rich set of
+    possibilities, including type conversion and other user-defined
+    data-dependent arbitrary checks.
+
+    The CascadingConfig gathers configuration data from the following sources:
+
+      - cli:  command-line options, supplied by the user
+      - environment:  process environment
+      - userconfig:  user-specific configuration file
+      - systemconfig:  system-wide configuration file options
+      - defaults:  defaults from parser (subclass of argparse.ArgumentParser)
+
+    The resulting configuration is derived by applying rules of precedence for
+    the configuration sources.  The order of precedence resolution is
+    configurable.  The stock CascadingConfiguration resolution order is as
+    follows:
+
+      - cli            (highest precedence)
+      - environment
+      - userconfig
+      - systemconfig
+      - defaults       (lowest precedence)
+
+    The order of resolution of configurations can be controlled by passing
+    a list of sources to the resolve() method.  Here's the standard resolution
+    order:
+
+    order = ['cli', 'environment', 'userconfig', 'systemconfig', 'defaults']
+    '''
+    order = ['cli', 'environment', 'userconfig', 'systemconfig', 'defaults']
+
+    def __init__(self, tag, parser, argv=sys.argv[1:], env=os.environ,
+                 configfile='configfile', order=order):
+        '''construct a CascadingConfig
+
+        :param: tag, the config file prefix and envar prefix
+        :param: parser, a DefaultFreeArgumentParser with all args set
+
+        Optional:
+
+        :param: argv, CLI args to use instead of sys.argv[1:]
+        :param: env, environment dictionary to use instead of os.environ()
+        :param: configfile, CLI name to use instead of 'configfile' to
+                find the name(s) of the system and user configuration files
+        :param: order, the precedence or resolution order of the various
+                configuration sources
+
+        The parser must not merge or supply defaults when returning a
+        Namespace.  If it does that, then downstream consumers will not be
+        able to handle precedence resolution themselves.
+        '''
+        # -- a wee-bit hackish; but this is crucial to the proper functioning
+        #    of CascadingConfig
+        #
+        assert hasattr(parser, 'parse_known_args_no_defaults')
+
+        self.order = order
+        self.parser = parser.parse_known_args_no_defaults
+
+        self.defaults = parser.parse_args([])  # -- "compiled-in" defaults
+        self.cli, _ = self.parser(argv)
+        self.environment, _ = self.parser(argv_from_env(env, tag))
+
+        syscfg = getattr(self.defaults, configfile, None)
+        if syscfg is not None:
+            self.systemconfig, _ = self.parser(argv_from_cfg(syscfg, tag))
+        else:
+            self.systemconfig = Namespace()
+
+        candidates = list()
+        candidates.append(('cli', getattr(self.cli, configfile, None)))
+        candidates.append(('env', getattr(self.environment, configfile, None)))
+        for source, usrcfg in candidates:
+            if usrcfg is None:
+                continue
+            elif usrcfg == syscfg:
+                logger.info("Skipping systemconfig file %s in userconfig (%s)",
+                            syscfg, source)
+                continue
+            else:
+                logger.debug("Using %s for user config", usrcfg)
+                break
+        del candidates
+        if usrcfg is None:
+            self.userconfig = Namespace()
+        else:
+            logger.debug("Reading %s for user config", usrcfg)
+            self.userconfig, _ = self.parser(argv_from_cfg(usrcfg, tag))
+
+        self.resolve()
+
+    def resolve(self, order=None):
+        if order is None:
+            order = self.order
+        sources = [(x, getattr(self, x)) for x in order]
+        sources.reverse()
+        config = Namespace()
+        for sourcename, source in sources:
+            for name in vars(source):
+                newval = getattr(source, name)
+                logger.debug("Source %s: %s=%s", sourcename, name, newval)
+                oldval = getattr(config, name, None)
+                if oldval is not None:
+                    logger.info("Source %s: replacing %s=%s with %s=%s",
+                                sourcename, name, oldval, name, newval)
+                setattr(config, name, newval)
+        return config
+
+
+def sample(args):
+    tag = 'ldptool'
+    parser = DefaultFreeArgumentParser()
+    parser.add_argument('--sourcedir', default=None, type=str, action='append')
+    parser.add_argument('--pubdir', default=None, type=str)
+    parser.add_argument('--verbose', default=0, type=int)
+    parser.add_argument('--linuxdoc-sgml2html', default="/usr/bin/sgml2html",
+                        type=str)
+    parser.add_argument('--docbook-xsltproc', default="/usr/bin/xsltproc",
+                        type=str)
+    parser.add_argument('--configfile', '--cfg', '--config-file', type=str,
+                        default="/home/mabrown/tmp/ldptool.cfg")
+    uniconf = CascadingConfig(tag, parser, sys.argv[1:])
+    import pprint
+    pprint.pprint(uniconf.resolve())
+
+
+if __name__ == '__main__':
+    sample(sys.argv[1:])
+
+# -- end of file