#!/bin/sh # # convert_to_utf_8.sh # # Find man pages with encoding other than us-ascii, and convert them # to the utf-8 encoding. # # Example usage: # # cd man-pages-x.yy # sh convert_to_utf_8.sh man?/* # ###################################################################### # # (C) Copyright 2013, Peter Schiffer # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details # (http://www.gnu.org/licenses/gpl-2.0.html). # if [[ $# -lt 2 ]]; then echo "Usage: ${0} man?/*" 1>&2 exit 1 fi out_dir="$1" shift enc_line="" for f in "$@"; do enc=$(file -bi "$f" | cut -d = -f 2) if [[ $enc != "us-ascii" ]]; then dirn=$(dirname "$f") basen=$(basename "$f") new_dir="${out_dir}/${dirn}" if [[ ! -e "$new_dir" ]]; then mkdir -p "$new_dir" fi case "$basen" in armscii-8.7 | cp1251.7 | iso_8859-*.7 | koi8-?.7) # iconv does not understand some encoding names that # start "iso_", but does understand the corresponding # forms that start with "iso-" from_enc="$(echo $basen | sed 's/\.7$//;s/iso_/iso-/')" ;; *) echo "NULL TRANSFORM: $f" from_enc=$enc ;; esac printf "Converting %-23s from %s\n" "$f" "$from_enc" echo "$enc_line" > "${new_dir}/${basen}" iconv -f "$from_enc" -t utf-8 "$f" \ | sed "/.*-\*- coding:.*/d;/.\\\" t$/d" >> "${new_dir}/${basen}" fi done exit 0