From f8fc5a2301bcf0cbfaa1db15adedde386e26a081 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@gmail.com>
Date: Wed, 19 Oct 2005 07:28:43 +0000
Subject: [PATCH] Maintenance scripts

---
 scripts/README                      |   4 +
 scripts/add_parens_for_own_funcs.sh | 238 ++++++++++++++++++++++++++++
 scripts/unformat_parens.sh          | 108 +++++++++++++
 3 files changed, 350 insertions(+)
 create mode 100644 scripts/README
 create mode 100644 scripts/add_parens_for_own_funcs.sh
 create mode 100644 scripts/unformat_parens.sh

diff --git a/scripts/README b/scripts/README
new file mode 100644
index 000000000..6b29c54c1
--- /dev/null
+++ b/scripts/README
@@ -0,0 +1,4 @@
+The files in this directory are scripts for man-pages maintenance tasks.
+They may be useful for downstream man-pages package maintainers or for 
+man-pages translators. This directory does not contain any files that
+need to be installed in order to use the manual pages.
diff --git a/scripts/add_parens_for_own_funcs.sh b/scripts/add_parens_for_own_funcs.sh
new file mode 100644
index 000000000..25be72960
--- /dev/null
+++ b/scripts/add_parens_for_own_funcs.sh
@@ -0,0 +1,238 @@
+#!/bin/sh
+#
+# add_parens_for_own_funcs.sh
+#
+# This script is designed to fix inconsistencies in the use of
+# parentheses after function names in the manual pages.
+# It changes manual pages to add these parentheses.
+# The problem is how to determine what is a "function name".
+# The approach this script takes is the following:
+#
+#   For each manual page named in the command line that contains 
+#           more than one line (i.e., skip man-page link files)
+#       Create a set of names taken from the .SH section of the
+#               page and from grepping all pages for names that 
+#               have .so links to this page
+#       For each name obtained above
+#           If we can find something that looks like a prototype on 
+#                   the page, then
+#               Try to substitute instances of that name on the page.
+#                   (instances are considered to be words formatted
+#		    using ^.[BI] or \f[BI]...\f[PR] -- this script
+#		    ignores unformatted instances on function names.)
+#           fi
+#       done
+#   done
+#
+# The rationale of the above is that the most likely function names
+# that appear on a page are those that the manual page is describing.
+# It doesn't fix everything, but it catches many instances.
+# The rest will have to be done manually.
+#
+# This script is rather verbose because it provides a computer-assisted
+# solution, rather than one that is fully automated.  When running it,
+# pipe the output through
+#
+#            ...  2>&1 | less
+#
+# and take a good look at the output.  In particular, you can scan
+# the output for *possible* problems by looking for the pattern: /^%%%/
+# The script's output should be enough to help you determine if the 
+# problem is real or not.
+#
+# Suggested usage (in this case to fix pages in Section 2):
+#
+#     cd man2
+#     sh add_parens_for_own_funcs.sh *.2 2>&1 | tee changes.log | less
+#
+# Use the "-n" option for a dry run, in order to see what would be
+# done, without actually doing it.
+#
+# (And, yes, there are many ways that this script could probably be 
+# made to work faster...)
+#
+######################################################################
+#
+# 
+
+file_base="tmp.$(basename $0)"
+
+work_dst_file="$file_base.dst"
+work_src_file="$file_base.src"
+
+matches_for_all_names="$file_base.all_match"
+matches_for_this_name="$file_base.this_match"
+
+all_files="$work_dst_file $work_src_file $matches_for_all_names \
+	   $matches_for_this_name"
+
+rm -f $all_files
+
+# Command-line option processing
+
+really_do_it=1
+while getopts "n" optname; do
+    case "$optname" in
+    n)	really_do_it=0;
+    	;;
+    *)  echo "Unknown option: $OPTARG"
+        exit 1
+	;;
+    esac
+done
+
+shift $(( OPTIND - 1 ))
+
+# Only process files with > 1 line -- single-line files are link files 
+
+for page in $(wc $* 2> /dev/null | awk '$1 > 1 {print $4}'| \
+    grep -v '^total'); do
+
+    echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<"
+    echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<" 1>&2
+
+    # Extract names that follow the ".SH NAME" directive -- these will
+    # be our guesses about function names to look for
+
+    sh_nlist=$(cat $page | \
+        awk 'BEGIN { p = 0 } 
+             /^\.SH NAME/ { p = NR } 
+	     /^.SH/ && NR > p { p = 0 }	  # Stop at the next .SH directive
+	     p > 0 && NR > p {print $0}	  # These are the lines between
+					  # the two .SH directives
+	    ')
+    sh_nlist=$(echo $sh_nlist | sed -e 's/ *\\-.*//' -e 's/, */ /g')
+    echo "### .SH name list:" $sh_nlist
+
+    # Some pages like msgop.2 don't actually list the function names in 
+    # the .SH section -- but we can try using link pages to give us 
+    # another guess at the right function names to look for
+
+    so_nlist=$(grep -l "^\\.so.*/$(echo $page| \
+	     sed -e 's/\.[1-8]$//')\\." $* | \
+	     sed -e 's/\.[1-8]$//g')
+
+    echo "### .so name list:" $so_nlist
+
+    # Combine the two lists, eliminate duplicates
+    
+    nlist=$(echo $sh_nlist $so_nlist | tr ' ' '\012' | sort -u)
+
+    maybechanged=0
+    
+    cp $page $work_dst_file
+    rm -f $matches_for_all_names; # touch $matches_for_all_names
+
+    for rname in $nlist; do	# try each name from out list for this page
+
+        # A very few names in .SH sections contain regexp characters!
+
+	name=$(echo $rname | sed -e 's/\*/\\*/g' -e 's/\./\\./g' \
+		-e 's/\[/\\[/g' -e 's/\+/\\+/g')
+
+        echo "########## trying $rname ##########"
+
+	rm -f $matches_for_this_name
+	
+        grep "^.BR* $name *$" $page | \
+	    >> $matches_for_this_name
+        grep "^.BR $name [^(\"]$" $page | \
+	    >> $matches_for_this_name
+        grep '\\fB'"$name"'\\f[PR][ .,;:]' $page | \
+	    >> $matches_for_this_name
+        grep '\\fB'"$name"'\\f[PR]$' $page | \
+	    >> $matches_for_this_name
+	
+	cat $matches_for_this_name | sed -e 's/^/### MATCH: /'
+	cat $matches_for_this_name >> $matches_for_all_names
+
+	# Only process a page if we can see something that looks
+	# like a function prototype for this name in the page
+
+        if grep -q "$name *(" $page || \
+	    grep -q "$name\\\\f.[\\ ]*(" $page; then 
+
+	    # '.B name$'
+	    # '.BR name [^("]*$      
+	    # (The use of [^"] in the above eliminates lines
+	    # like: .BR func " and " func
+	    # Those lines better be done manually.)
+	    cp $work_dst_file $work_src_file
+            cat $work_src_file | \
+		sed \
+		-e "s/^.BR* $name *\$/.BR $name ()/" \
+		-e "/^.BR *$name [^(\"]*\$/s/^.BR *$name /.BR $name ()/" \
+		> $work_dst_file
+
+	    # '\fBname\fP[ .,;:]'
+	    # '\fBname\fP$'
+	    cp $work_dst_file $work_src_file
+            cat $work_src_file | \
+		sed \
+		-e 's/\\fB'$name'\\fP /\\fB'$name'\\fP() /g' \
+		-e 's/\\fB'$name'\\fP\./\\fB'$name'\\fP()./g' \
+		-e 's/\\fB'$name'\\fP,/\\fB'$name'\\fP(),/g' \
+		-e 's/\\fB'$name'\\fP;/\\fB'$name'\\fP();/g' \
+		-e 's/\\fB'$name'\\fP:/\\fB'$name'\\fP():/g' \
+		-e 's/\\fB'$name'\\fP$/\\fB'$name'\\fP()/g' \
+		> $work_dst_file
+
+	    # '\fBname\fR[ .,;:]'
+	    # '\fBname\fR$'
+	    cp $work_dst_file $work_src_file
+            cat $work_src_file | \
+		sed \
+		-e 's/\\fB'$name'\\fR /\\fB'$name'\\fR() /g' \
+		-e 's/\\fB'$name'\\fR\./\\fB'$name'\\fR()./g' \
+		-e 's/\\fB'$name'\\fR,/\\fB'$name'\\fR(),/g' \
+		-e 's/\\fB'$name'\\fR;/\\fB'$name'\\fR();/g' \
+		-e 's/\\fB'$name'\\fR:/\\fB'$name'\\fR():/g' \
+		-e 's/\\fB'$name'\\fR$/\\fB'$name'\\fR()/g' \
+		> $work_dst_file
+
+	    maybechanged=1
+        else
+            echo "%%%%%%%%%% WARNING: NO PROTOTYPE MATCHES FOR: $name"
+        fi
+    done
+
+    # If the file was changed, then:
+    # show "diff -U" output to user;
+    # and count number of changed lines and compare it with what 
+    # we expected, displaying a warning if it wasn't what was expected
+
+    if test $maybechanged -ne 0 && ! cmp -s $page $work_dst_file; then
+        diff -u $page $work_dst_file
+
+        made_matches=$(diff -U 0 $page $work_dst_file | grep '^\+[^+]' | \
+		wc -l | awk '{print $1}')
+
+	# The following line makes the changes -- comment it out if you 
+        # just want to do a dry run to see what changes would be made.
+
+	if test $really_do_it -ne 0; then
+            cat $work_dst_file > $page
+	fi
+
+    else
+        echo "### NOTHING CHANGED"
+	made_matches=0
+    fi
+
+    min_match=$(cat $matches_for_all_names | \
+	    sort -u | wc -l | awk '{print $1}')
+
+    echo "### Expected matches >= $min_match"
+    echo "### Made matches $made_matches"
+
+    if test $made_matches -lt $min_match; then
+        echo "%%%%%%%%%% WARNING: NOT ENOUGH MATCHES: " \
+	    "$made_matches < $min_match"
+    fi
+    
+done 
+
+# clean up
+
+rm -f $all_files
+exit 0
diff --git a/scripts/unformat_parens.sh b/scripts/unformat_parens.sh
new file mode 100644
index 000000000..4b68bdd57
--- /dev/null
+++ b/scripts/unformat_parens.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+#
+# unformat_parens.sh
+#
+# The manual pages before 2.10 treat format parentheses
+# inconsistently.  In some cases they are like
+#
+#       .B name()
+#
+# while in others they are like:
+#
+#       .BR name ()
+#
+# This script changes instances to the latter format.
+# It does not fix all such instances: some will have to be 
+# done manually.
+#
+# Use the "-n" option for a dry run, in order to see what would be
+# done, without actually doing it.
+#
+######################################################################
+#
+
+file_base="tmp.$(basename $0)"
+
+work_dst_file="$file_base.dst"
+work_src_file="$file_base.src"
+
+all_files="$work_dst_file $work_src_file"
+
+# Command-line option processing
+
+really_do_it=1
+while getopts "n" optname; do
+    case "$optname" in
+    n)	really_do_it=0;
+    	;;
+    *)  echo "Unknown option: $OPTARG"
+        exit 1
+	;;
+    esac
+done
+
+shift $(( OPTIND - 1 ))
+
+# Only process files with > 1 line -- single-line files are link files 
+
+for page in $(wc $* 2> /dev/null | awk '$1 > 1 {print $4}'| \
+    grep -v '^total'); do
+
+    cp $page $work_dst_file
+
+    echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<"
+
+    if false; then
+    grep '^\.I *[a-z0-9_][a-z0-9_]*()$' $page
+    grep '^\.B *[a-z0-9_][a-z0-9_]*()$' $page
+    echo '###'
+    grep '^\.[BIR][BIR] *[a-z0-9_][a-z0-9_]*()$' $page
+    echo '###'
+    grep '^\.[BIR][BIR] *[a-z0-9_][a-z0-9_]*() [^"]*$' $page
+    echo '###'
+    grep '()\\f[PR]' $page
+    echo '###'
+    fi
+
+    cp $work_dst_file $work_src_file
+    cat $work_src_file | \
+	sed \
+	-e '/^\.B *[a-z0-9_][a-z0-9_]*() *$/s/^\.B/.BR/' \
+	-e '/^\.I *[a-z0-9_][a-z0-9_]*() *$/s/^\.I/.IR/' \
+	> $work_dst_file
+
+    cp $work_dst_file $work_src_file
+    cat $work_src_file | \
+	sed \
+	-e '/^\.[BIR][BIR] *[a-z0-9_][a-z0-9_]*()$/s/()/ ()/' \
+	> $work_dst_file
+
+    cp $work_dst_file $work_src_file
+    cat $work_src_file | \
+	sed \
+	-e '/^\.[BIR][BIR] *[a-z0-9_][a-z0-9_]*() [^"]*$/s/() / ()/' \
+	> $work_dst_file
+
+    cp $work_dst_file $work_src_file
+    cat $work_src_file | \
+	sed \
+	-e '/()\\fP/s/()\\fP/\\fP()/g' \
+	-e '/()\\fR/s/()\\fR/\\fR()/g' \
+	> $work_dst_file
+
+    if ! cmp -s $page $work_dst_file; then
+        diff -u $page $work_dst_file
+
+	if test $really_do_it -ne 0; then
+            cat $work_dst_file > $page
+	fi
+
+    else
+        echo "### NOTHING CHANGED"
+    fi
+done
+
+# clean up
+
+rm -f $all_files
+exit 0