From 41bcce747d05c78fdf69ee6325088c8666228fa1 Mon Sep 17 00:00:00 2001 From: "Martin A. Brown" Date: Fri, 25 Mar 2016 09:54:56 -0700 Subject: [PATCH] initial commit of some migration tools --- LDP/ref/migration-2016/guidemigration.py | 213 ++++++++++++ LDP/ref/migration-2016/howtomigration.py | 313 ++++++++++++++++++ LDP/ref/migration-2016/migration-helper.sh | 45 +++ .../migration-2016/migration-preparation.sh | 59 ++++ LDP/ref/migration-2016/old-migration.py | 138 ++++++++ 5 files changed, 768 insertions(+) create mode 100644 LDP/ref/migration-2016/guidemigration.py create mode 100644 LDP/ref/migration-2016/howtomigration.py create mode 100644 LDP/ref/migration-2016/migration-helper.sh create mode 100644 LDP/ref/migration-2016/migration-preparation.sh create mode 100644 LDP/ref/migration-2016/old-migration.py diff --git a/LDP/ref/migration-2016/guidemigration.py b/LDP/ref/migration-2016/guidemigration.py new file mode 100644 index 00000000..2dced884 --- /dev/null +++ b/LDP/ref/migration-2016/guidemigration.py @@ -0,0 +1,213 @@ +#! /usr/bin/python +# +# -- migrate to the new naming scheme + +from __future__ import absolute_import, division, print_function + +import os +import sys +import time +import errno +import shutil +import logging +import functools + +logformat = '%(levelname)-9s %(name)s %(filename)s#%(lineno)s ' \ + + '%(funcName)s %(message)s' +logging.basicConfig(stream=sys.stderr, format=logformat, level=logging.DEBUG) +logger = logging.getLogger(__name__) + +# -- short names +# +opa = os.path.abspath +opb = os.path.basename +opd = os.path.dirname +opj = os.path.join +opn = os.path.normpath +opr = os.path.relpath +ops = os.path.split + + +# -- Stem handling for HTML + +predictably_named_guides = '''Bash-Beginners-Guide +cpg +espk-ug +EVMSUG +GNU-Linux-Tools-Summary +LDP-Author-Guide +Linux-Dictionary +Linux-Filesystem-Hierarchy +Linux-Media-Guide +Mobile-Guide +Pocket-Linux-Guide +sag'''.split() + +stems = dict(zip(predictably_named_guides, predictably_named_guides)) + +# -- no "html" subdirectory +# +stems['lki'] = 'lki' +stems['nag2'] = 'nag2' + +# -- two kernel versions, same name (in days of yore) +# +stems['lkmpg/2.4'] = 'lkmpg-2.4' +stems['lkmpg/2.6'] = 'lkmpg-2.6' + +# -- wacky path naming +# +stems['lame/LAME/linux-admin-made-easy'] = 'lame' +stems['solrhe/Securing-Optimizing-Linux-RH-Edition-v1.3'] = 'solrhe' + +# -- name changers +# +stems['abs'] = 'abs-guide' +stems['intro-linux'] = 'Intro-Linux' + + +# -- PDF handling + +pdflist = '''Bash-Beginners-Guide/Bash-Beginners-Guide.pdf +EVMSUG/EVMSUG.pdf +GNU-Linux-Tools-Summary/GNU-Linux-Tools-Summary.pdf +LDP-Author-Guide/LDP-Author-Guide.pdf +Linux-Dictionary/Linux-Dictionary.pdf +Linux-Filesystem-Hierarchy/Linux-Filesystem-Hierarchy.pdf +Linux-Media-Guide/Linux-Media-Guide.pdf +Mobile-Guide/Mobile-Guide.pdf +Pocket-Linux-Guide/Pocket-Linux-Guide.pdf +cpg/Custom-Porting-Guide.pdf +espk-ug/espk-ug.pdf +lame/lame.pdf +lki/lki.pdf +nag2/nag2.pdf +sag/sag.pdf +solrhe/Securing-Optimizing-Linux-RH-Edition-v1.3.pdf'''.split() + +extrapdfs = dict() +extrapdfs['lkmpg/2.4/lkmpg.pdf'] = 'lkmpg-2.4' +extrapdfs['lkmpg/2.6/lkmpg.pdf'] = 'lkmpg-2.6' +extrapdfs['abs/abs-guide.pdf'] = 'abs-guide' +extrapdfs['intro-linux/intro-linux.pdf'] = 'Intro-Linux' + +def validate_args(argv): + if len(argv) == 4: + for d in argv[:3]: + if not os.path.isdir(d): + return False + return True + return False + + +def make_refresh(target, title, delay=0): + text = ''' + + {1}: {0} + + + +

This page has moved permanently to + {0}. + Update your bookmarks if you wish. The compatibility + redirect will remain through, at least, early 2017. +

+ + +''' + return text.format(target, title, delay) + +def swapfiles(a, b): + '''use os.rename() to make "a" become "b"''' + if not os.path.isfile(a): + raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), a) + tf = None + if os.path.exists(b): + _, tf = mkstemp(prefix='swapfile-', dir=opd(opa(a))) + logger.debug("Created tempfile %s.", tf) + logger.debug("About to rename %s to %s.", b, tf) + os.rename(b, tf) + logger.debug("About to rename %s to %s.", a, b) + os.rename(a, b) + if tf: + logger.debug("About to rename %s to %s.", tf, a) + os.rename(tf, a) + logger.debug("About to remove %s.", tf) + os.rmdir(tf) + + +def create_symlink(source, target): + assert not os.path.exists(target) + targetdir = os.path.dirname(target) + if not os.path.isdir(targetdir): + logger.debug("Creating directory %s", targetdir) + os.makedirs(targetdir) + logger.debug("Creating symlink %s, pointing to %s", target, source) + os.symlink(os.path.relpath(source, start=targetdir), target) + + +def create_refresh_meta_equiv(fname, url, stem, **kwargs): + assert not os.path.exists(fname) + targetdir = os.path.dirname(fname) + if not os.path.isdir(targetdir): + logger.debug("Creating directory %s", targetdir) + os.makedirs(targetdir) + logger.debug("Creating file %s, with redirect to %s", fname, url) + with open(fname, 'w') as f: + f.write(make_refresh(url, stem, **kwargs)) + + +def newhtmlfilename(pubdir, stem, fname): + sought = opj(pubdir, stem, fname) + if not os.path.isfile(sought): + return opj(pubdir, stem, 'index.html') + return sought + +def guides(stems, guidepath, guidecompat, pubdir, urlbase): + + for pdf in pdflist: + stem, _ = os.path.split(pdf) + oldpdf = opj(guidecompat, pdf) + newpdf = opj(pubdir, stem, stem + '.pdf') + assert os.path.exists(oldpdf) + assert os.path.exists(newpdf) + os.rename(oldpdf, oldpdf + '.' + str(int(time.time()))) + create_symlink(newpdf, oldpdf) + + for pdf, stem in extrapdfs.items(): + oldpdf = opj(guidecompat, pdf) + newpdf = opj(pubdir, stem, stem + '.pdf') + assert os.path.exists(oldpdf) + assert os.path.exists(newpdf) + os.rename(oldpdf, oldpdf + '.' + str(int(time.time()))) + create_symlink(newpdf, oldpdf) + + for stem, newstem in sorted(stems.items(), key=lambda x: x[1].lower()): + htmldir = opj(guidecompat, stem, 'html') + if not os.path.isdir(htmldir): + htmldir, _ = os.path.split(htmldir) + assert os.path.exists(htmldir) + for fn in os.listdir(htmldir): + if not fn.endswith('.html'): + continue + pubpath = newhtmlfilename(pubdir, newstem, fn) + url = pubpath.replace(pubdir, urlbase) + fullname = opj(htmldir, fn) + os.rename(fullname, fullname + '.' + str(int(time.time()))) + create_refresh_meta_equiv(fullname, url, newstem, delay=2) + + +def main(fin, fout, argv): + me = os.path.basename(sys.argv[0]) + usage = "usage: %s " % (me,) + if not validate_args(argv): + return usage + guidepath, guidecompat, pubdir, urlbase = argv + guides(stems, guidepath, guidecompat, pubdir, urlbase) + return os.EX_OK + + +if __name__ == '__main__': + sys.exit(main(sys.stdin, sys.stdout, sys.argv[1:])) + +# -- end of file diff --git a/LDP/ref/migration-2016/howtomigration.py b/LDP/ref/migration-2016/howtomigration.py new file mode 100644 index 00000000..ab2a24d5 --- /dev/null +++ b/LDP/ref/migration-2016/howtomigration.py @@ -0,0 +1,313 @@ +#! /usr/bin/python +# +# -- migrate to the new naming scheme + +from __future__ import absolute_import, division, print_function + +import os +import sys +import errno +import shutil +import logging +import functools + +logformat = '%(levelname)-9s %(name)s %(filename)s#%(lineno)s ' \ + + '%(funcName)s %(message)s' +logging.basicConfig(stream=sys.stderr, format=logformat, level=logging.DEBUG) +logger = logging.getLogger(__name__) + +# -- short names +# +opa = os.path.abspath +opb = os.path.basename +opd = os.path.dirname +opj = os.path.join +opn = os.path.normpath +opr = os.path.relpath +ops = os.path.split + +SKIP = object() + + +def add_renamed_stems(stems): + stems['ppp-ssh'] = 'VPN-PPP-SSH-HOWTO' + stems['intro-linux'] = 'Intro-Linux' + stems['DPT-Hardware-RAID'] = 'DPT-Hardware-RAID-HOWTO' + stems['Loadlin+Win95'] = 'Loadlin+Win95-98-ME' + stems['Laptop-HOWTO'] = 'Mobile-Guide' + stems['IR-HOWTO'] = 'Infrared-HOWTO' + stems['Xnews-under-Linux-HOWTO'] = 'Windows-Newsreaders-under-Linux-HOWTO' + stems['Access-HOWTO'] = 'Accessibility-HOWTO' + stems['Adv-Bash-Scr-HOWTO'] = 'abs-guide' + stems['abs'] = 'abs-guide' + stems['Mosix-HOWTO'] = 'openMosix-HOWTO' + stems['Partition-Rescue-New'] = 'Partition-Rescue' + stems['Partition-Mass-Storage-Dummies-Linux-HOWTO'] = 'Partition-Mass-Storage-Definitions-Naming-HOWTO' + + +def add_skipped_stems(stems): + stems['index.html'] = SKIP + stems['INDEX'] = SKIP + stems['README'] = SKIP + stems['COPYRIGHT'] = SKIP + stems['.htaccess'] = SKIP + stems['GCC-HOWTO'] = SKIP + stems['Netscape+Proxy'] = SKIP + stems['Sendmail+UUCP'] = SKIP + stems['GTEK-BBS-550'] = SKIP + stems['Consultants-HOWTO'] = SKIP + stems['Acer-Laptop-HOWTO'] = SKIP + stems['Linux-From-Scratch-HOWTO'] = SKIP + stems['Distributions-HOWTO'] = SKIP + stems['MIPS-HOWTO'] = SKIP + stems['3Dfx-HOWTO'] = SKIP + stems['PostgreSQL-HOWTO'] = SKIP + stems['Term-Firewall'] = SKIP + stems['WikiText-HOWTO'] = SKIP + stems['HOWTO-INDEX'] = SKIP + stems['HOWTO-HOWTO'] = SKIP + stems['Security-Quickstart-Redhat-HOWTO'] = SKIP + + +def collect_published_stems(dirbase): + d = dict() + for stem in os.listdir(dirbase): + if not os.path.isdir(opj(dirbase, stem)): + continue + d[stem] = stem + add_renamed_stems(d) + add_skipped_stems(d) + return d + + +def validate_args(argv): + if len(argv) == 4: + for d in argv[:3]: + if not os.path.isdir(d): + return False + return True + return False + + +def walk_simple(stems, dirbase, root): + for name in os.listdir(dirbase): + if name.endswith('.pdf'): + stem, _ = os.path.splitext(name) + else: + stem = name + relpath = opr(opj(dirbase, name), start=root) + newstem = stems.get(stem, None) + if newstem is None: + logger.error("%s missing stem: %s", stem, relpath) + continue + elif newstem is SKIP: + logger.info("%s ignoring stem: %s", stem, relpath) + continue + yield newstem, relpath + + +def walk_html_single(stems, dirbase, root): + for name in os.listdir(dirbase): + if name == 'images': + continue + dirname = opj(dirbase, name) + if not os.path.isdir(dirname): + continue + indexhtml = opj(dirname, 'index.html') + if not os.path.isfile(indexhtml): + logger.error("%s missing index.html: %s", stem, indexhtml) + stem = name + relpath = opr(indexhtml, start=root) + newstem = stems.get(stem, None) + if newstem is None: + logger.error("%s missing stem: %s", stem, relpath) + continue + elif newstem is SKIP: + logger.info("%s ignoring stem: %s", stem, relpath) + continue + yield newstem, relpath + + +def walk_html_chunked_dirs(stems, dirbase, root): + for name in os.listdir(dirbase): + if name in ('images', 'pdf', 'text', 'html_single', 'archived'): + continue + dirname = opj(dirbase, name) + if not os.path.isdir(dirname): + continue + for subname in os.listdir(dirname): + fname = opj(dirname, subname) + if os.path.isdir(fname): + continue + stem = name + relpath = opr(fname, start=root) + newstem = stems.get(stem, None) + if newstem is None: + logger.error("%s missing stem: %s", stem, relpath) + continue + elif newstem is SKIP: + logger.info("%s ignoring stem: %s", stem, relpath) + continue + yield newstem, relpath + + +def walk_html_chunked_files(stems, dirbase, root): + for name in os.listdir(dirbase): + fname = opj(dirbase, name) + if not os.path.isfile(fname): + continue + stem, ext = os.path.splitext(name) + if stem == 'index' or ext != '.html': + continue + if stem not in stems: + stem = '-'.join(stem.split('-')[:-1]) + if stem not in stems: + logger.error("Could not determine stem for %s", fname) + continue + relpath = opr(fname, start=root) + newstem = stems.get(stem, None) + if newstem is None: + logger.error("%s missing stem: %s", stem, relpath) + continue + elif newstem is SKIP: + logger.info("%s ignoring stem: %s", stem, relpath) + continue + yield newstem, relpath + + +def htmlf(stem, relpath, pubdir, newtree): + pubf = opj(pubdir, stem, relpath) + newf = opj(newtree, relpath) + if os.path.exists(pubf): + return stem, relpath, newf, pubf + else: + return stem, relpath, newf, opj(pubdir, stem, 'index.html') + + +def htmld(stem, relpath, pubdir, newtree): + pubf = opj(pubdir, relpath) + newf = opj(newtree, relpath) + if os.path.exists(pubf): + return stem, relpath, newf, pubf + else: + return stem, relpath, newf, opj(pubdir, stem, 'index.html') + + +def htmls(stem, relpath, pubdir, newtree): + pubf = opj(pubdir, stem, stem + '-single.html') + newf = opj(newtree, relpath) + if os.path.exists(pubf): + return stem, relpath, newf, pubf + else: + return stem, relpath, newf, opj(pubdir, stem, 'index.html') + + +def txt(stem, relpath, pubdir, newtree): + pubf = opj(pubdir, stem, stem + '.txt') + newf = opj(newtree, relpath) + if os.path.exists(pubf): + return stem, relpath, newf, pubf + else: + return stem, relpath, newf, None + + +def pdf(stem, relpath, pubdir, newtree): + pubf = opj(pubdir, stem, stem + '.pdf') + newf = opj(newtree, relpath) + if os.path.exists(pubf): + return stem, relpath, newf, pubf + else: + return stem, relpath, newf, None + +def make_refresh(target, title, delay=0): + text = ''' + + {1}: {0} + + + +

This page has moved permanently to + {0}. + Update your bookmarks if you wish. The compatibility + redirect will remain through, at least, early 2017. +

+ + +''' + return text.format(target, title, delay) + + +def create_symlink(source, target): + assert not os.path.exists(target) + targetdir = os.path.dirname(target) + if not os.path.isdir(targetdir): + logger.debug("Creating directory %s", targetdir) + os.makedirs(targetdir) + logger.debug("Creating symlink %s, pointing to %s", target, source) + os.symlink(os.path.relpath(source, start=targetdir), target) + + +def create_refresh_meta_equiv(fname, url, stem, **kwargs): + assert not os.path.exists(fname) + targetdir = os.path.dirname(fname) + if not os.path.isdir(targetdir): + logger.debug("Creating directory %s", targetdir) + os.makedirs(targetdir) + logger.debug("Creating file %s, with redirect to %s", fname, url) + with open(fname, 'w') as f: + f.write(make_refresh(url, stem, **kwargs)) + + +def howtos(stems, howtopath, newtree, pubdir, urlbase): + ldptree = dict() + for s, r in walk_html_chunked_files(stems, howtopath, howtopath): + ldptree[r] = htmlf(s, r, pubdir, newtree) + # print('chunked_files', s, r) + + for s, r in walk_html_chunked_dirs(stems, howtopath, howtopath): + ldptree[r] = htmld(s, r, pubdir, newtree) + # print('chunked_dirs', s, r) + + howto_htmls = opj(howtopath, 'html_single') + for s, r in walk_html_single(stems, howto_htmls, howtopath): + ldptree[r] = htmls(s, r, pubdir, newtree) + # print('html_single', s, r) + + for s, r in walk_simple(stems, opj(howtopath, 'text'), howtopath): + ldptree[r] = txt(s, r, pubdir, newtree) + # print('text', s, r) + + for s, r in walk_simple(stems, opj(howtopath, 'pdf'), howtopath): + ldptree[r] = pdf(s, r, pubdir, newtree) + # print('pdf', s, r) + + # -- have to symlink the PDF and TXT files + # + for fname in sorted(ldptree.keys(), key=lambda x: x.lower()): + stem, relpath, newpath, pubpath = ldptree[fname] + url = pubpath.replace(pubdir, urlbase) + if fname.startswith('text/') or fname.startswith('pdf/'): + create_symlink(pubpath, newpath) + else: + url = pubpath.replace(pubdir, urlbase) + create_refresh_meta_equiv(newpath, url, stem, delay=2) + + +def main(fin, fout, argv): + me = os.path.basename(sys.argv[0]) + usage = "usage: %s " % (me,) + if not validate_args(argv): + return usage + howtopath, howtocompat, pubdir, urlbase = argv + oldtree = opd(opn(howtopath)) + + stems = collect_published_stems(pubdir) + + howtos(stems, howtopath, howtocompat, pubdir, urlbase) + return os.EX_OK + + +if __name__ == '__main__': + sys.exit(main(sys.stdin, sys.stdout, sys.argv[1:])) + +# -- end of file diff --git a/LDP/ref/migration-2016/migration-helper.sh b/LDP/ref/migration-2016/migration-helper.sh new file mode 100644 index 00000000..8f44d346 --- /dev/null +++ b/LDP/ref/migration-2016/migration-helper.sh @@ -0,0 +1,45 @@ +#! /bin/bash + +set -e +set -x + +SELFNAME="$( readlink --canonicalize ${0})" +ME="${SELFNAME##*/}" # -- basename +DIR="${SELFNAME%/*}" # -- dirname + +HOWTO_MIGRATOR=${DIR}/howtomigration.py +GUIDE_MIGRATOR=${DIR}/guidemigration.py + +CONTENTROOT=/home/mabrown/wip/tldp/website/html +cd "$CONTENTROOT" + +# -- trailing slash, atypically included on PUBDIR, here +PUBDIR="${CONTENTROOT}/en/" +URL_PUBDIR=http://www.tldp.org/en/ + +HOWTOS="${CONTENTROOT}/HOWTO" +GUIDES="${CONTENTROOT}/LDP" + +# -- HOWTO handling: build symlinks and HTTP META-EQUIV files +# +HOWTO_COMPAT=HOWTO.compat/ +test -d "${HOWTO_COMPAT}" \ + || mkdir "${HOWTO_COMPAT}" + +HOWTO_COMPAT=$( readlink --canonicalize "$HOWTO_COMPAT" ) + +python \ + "${HOWTO_MIGRATOR}" "${HOWTOS}" "${HOWTO_COMPAT}" "${PUBDIR}" "${URL_PUBDIR}" + +GUIDE_COMPAT=LDP.compat/ +test -d "${GUIDE_COMPAT}" \ + || mkdir "${GUIDE_COMPAT}" + +rsync --archive --verbose ./LDP/ "${GUIDE_COMPAT}/" + +python \ + "${GUIDE_MIGRATOR}" "${GUIDES}" "${GUIDE_COMPAT}" "${PUBDIR}" "${URL_PUBDIR}" + +exit 0 + +# -- end of file diff --git a/LDP/ref/migration-2016/migration-preparation.sh b/LDP/ref/migration-2016/migration-preparation.sh new file mode 100644 index 00000000..52efa5da --- /dev/null +++ b/LDP/ref/migration-2016/migration-preparation.sh @@ -0,0 +1,59 @@ +#! /bin/bash + +set -e +set -x + +SELFNAME="$( readlink --canonicalize ${0})" +ME="${SELFNAME##*/}" # -- basename +DIR="${SELFNAME%/*}" # -- dirname + +CONTENTROOT=/home/mabrown/wip/tldp/website/html +cd "$CONTENTROOT" + +# -- minor cleanup of dangling or otherwise broken symlinks: +for LINK in \ + html/pub/Linux/docs/HOWTO/translations/polish/.message \ + html/pub/Linux/docs/HOWTO/translations/pl/.message \ + html/LDP/LGNET/182/184 \ + ; do + + test -L "$LINK" && rm -f "$LINK" + +done + +ARCHIVE=archive +test -d "${ARCHIVE}" \ + || mkdir "${ARCHIVE}" + +# -- populate the archive with retired items +# +mv \ + --target-directory "${ARCHIVE}" \ + --verbose \ + -- \ + HOWTO/Netscape+Proxy.html \ + HOWTO/Sendmail+UUCP.html \ + HOWTO/GTEK-BBS-550.html \ + HOWTO/DPT-Hardware-RAID.html \ + HOWTO/Consultants-HOWTO.html \ + HOWTO/WikiText-HOWTO \ + HOWTO/Security-Quickstart-Redhat-HOWTO \ + +# -- and populate the really ancient crap +# +TODELETE=todelete-$( date +%F ) +test -d "${TODELETE}" \ + || mkdir "${TODELETE}" + +mv \ + --target-directory "${TODELETE}" \ + --verbose \ + -- \ + HOWTO/Acer-Laptop-HOWTO.html \ + HOWTO/Linux-From-Scratch-HOWTO.html \ + HOWTO/Distributions-HOWTO.html \ + HOWTO/MIPS-HOWTO.html \ + HOWTO/3Dfx-HOWTO.html \ + HOWTO/PostgreSQL-HOWTO.html \ + +# -- end of file diff --git a/LDP/ref/migration-2016/old-migration.py b/LDP/ref/migration-2016/old-migration.py new file mode 100644 index 00000000..f8c3af86 --- /dev/null +++ b/LDP/ref/migration-2016/old-migration.py @@ -0,0 +1,138 @@ +#! /usr/bin/python +# +# -- migrate to the new naming scheme + +from __future__ import absolute_import, division, print_function + +import os +import sys +import errno +import logging +import functools + +logformat = '%(levelname)-9s %(name)s %(filename)s#%(lineno)s ' \ + + '%(funcName)s %(message)s' +logging.basicConfig(stream=sys.stderr, format=logformat, +level=logging.ERROR) +logger = logging.getLogger(__name__) + +# -- short names +# +opa = os.path.abspath +opb = os.path.basename +opd = os.path.dirname +opj = os.path.join +opn = os.path.normpath +opr = os.path.relpath +ops = os.path.split + +movingstems = dict() +movingstems['intro-linux'] = 'Intro-Linux' + +def namegenerator(suffix, stem, dirname): + fname = os.path.join(dirname, stem, stem + suffix) + if os.path.exists(fname): + return fname + else: + return None + +pdf = functools.partial(namegenerator, '.pdf') +txt = functools.partial(namegenerator, '.txt') +html = functools.partial(namegenerator, '.html') +htmls = functools.partial(namegenerator, '-single.html') + +def validate_args(argv): + if len(argv) == 4: + for d in argv[:3]: + if not os.path.isdir(d): + return False + return True + return False + + +def printstems(fname): + print(fname) + + +def firstdir(fdir): + if os.path.isabs(fdir): + raise ValueError("received absolute path") + desired = os.path.normpath(fdir) + while os.path.sep in desired: + desired, _ = os.path.split(desired) + return desired + + +def stem_and_ext(name): + '''return (stem, ext) for any relative or absolute filename''' + return os.path.splitext(os.path.basename(os.path.normpath(name))) + +def extract_rs_html(root, name): + found = opj(root, name) + relpath = opr(found, start=root) + stem = firstdir(relpath) + return relpath, stem + + +def extract_rs_htmls(root, name): + found = opj(root, name) + relpath = opr(found, start=opj(root, 'html_single')) + stem = firstdir(relpath) + return relpath, stem + + +def extract_relpath_and_stem(root, name): + found = opj(root, name) + stem, _ = stem_and_ext(found) + relpath = opr(found, start=root) + return relpath, stem + +def extract_stem_firstdir(name, base): + relpath = opr(name, start=base) + stem = firstdir(relpath) + return stem + + +def walktree(func, pubdir, oldtree, newtree, urlpath): + for root, dirs, files in os.walk(oldtree): + for x in files: + found = os.path.join(root, x) + relpath = os.path.relpath(found, start=oldtree) + _, ext = stem_and_ext(found) + if ext not in ('.pdf', '.html', '.txt'): + if not relpath.startswith('text'): + func(('skip', '', '', '', found)) + continue + if relpath.startswith('text') and ext == '': + relpath, stem = extract_relpath_and_stem(oldtree, found) + newname = txt(stem, pubdir) + func(('TEXT', stem, relpath, newname, found)) + elif relpath.startswith('pdf') and ext == '.pdf': + relpath, stem = extract_relpath_and_stem(oldtree, found) + newname = pdf(stem, pubdir) + func(('PDF', stem, relpath, newname, found)) + elif relpath.startswith(opj(oldtree, 'html_single')): + stem = extract_stem_firstdir(found, opj(oldtree, 'html_single')) + newname = htmls(stem, pubdir) + func(('HTMLS', stem, relpath, newname, found)) + elif root == oldtree: # -- plain-files at root + pass + else: + relpath, stem = extract_rs_html(oldtree, found) + newname = html(stem, pubdir) + func(('HTML', stem, relpath, newname, found)) + + +def main(fin, fout, argv): + me = os.path.basename(sys.argv[0]) + usage = "usage: %s " % (me,) + if not validate_args(argv): + return usage + pubdir, oldtree, newtree, urlpath = argv + walktree(printstems, pubdir, oldtree, newtree, urlpath) + return os.EX_OK + +if __name__ == '__main__': + sys.exit(main(sys.stdin, sys.stdout, sys.argv[1:])) + +# -- end of file