#! /usr/bin/perl
#
# man2html.pl Version 1.2
# Copyright 1997, 1998 by Richard Dawe
#
# This software is distributed under the terms of the GNU General Public
# License, which should have been distributed with this file as LICENSE. Please
# read the notice below.
#
# ---
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
#
# ---
#
# Description:
#
# This program uses the 'man' command to generate text output which is then
# converted into HTML using the PRE pre-formatted tags. Any references like
# "socket(2)" are converted into hyperlinks.
#
# The program must be run from the directory below the 'man' tree, i.e. the
# man pages will be in the relative paths man/man1, man/man2, etc. The pages
# will be put into a similar tree with paths like html/html1, etc.
#
# The program also generates an alphabetic index file in the directory below
# the man tree called 'index.html' which has links to all the man pages
# converted.

# Uses
use strict;
no strict 'vars';
use FileHandle;
use DirHandle;

# Copyright message
$COPYRIGHT_TEXT = 'man2html.pl Version 1.2, Copyright 1997, 1998 by Richard Dawe';

$COPYRIGHT_MESSAGE = <<EOT;
$COPYRIGHT_TEXT

This program is distributed under the terms of the GNU Public License. Please
read the file LICENSE that should have been distributed with this program.

EOT

# Translation information
$TRANSLATION_MESSAGE = <<EOT;
<HTML>

<HEAD>
<TITLE>man2html Translation Issues</TITLE>
</HEAD>

<BODY>
<H1>man2html Translation Issues</H1>

man2html has some problems translating 'man' pages to HTML. These are:

<UL>
<LI>&lt; and &gt; cannot be translated properly, and are translated to (lt)
and (gt) respectively.
</UL><P>

<HR>
<I>$COPYRIGHT_TEXT</I>
</BODY>

</HTML>
EOT

$TRANSLATION_LINK = <<EOT;
<I>Please look at the <A HREF=\"../trans.html\">man2html translation issues</A>.
<BR>$COPYRIGHT_TEXT</I>
EOT

# Help message
$HELP_MESSAGE = <<EOT;
man2html is a man page to HTML converter. It does this using a brute-force
approach - it *does not* understand the 'nroff' format of man pages. The
output is generated by filtering and search-and-replace. There are also
problems converting less-than and greater-than signs.

The program expects the man pages to be in a directory structure like
man/man1, man/man2, etc. It creates directories like html/html1 (i.e.
analogous to the man directory names) with HTML-converted man pages in. The
program should be run from the directory in which the root man directory is
situated, e.g. /usr (e.g. /usr/man as the man root, /usr/html as the
HTML-converted man root).

If the '-htm' option is used, output files will have the extension ".htm"
rather than ".html". If the '-1' option is used, then only the index file
will be created and it will contain *all* converted pages.
EOT

$INDEX_FILE = "index";          # Index filename stem
$HTML_EXTENSION = ".html";      # Long extension by default
$OUTPUT_ALLINONE = 0;           # Output all in one file?

# Print the copyright message
print $COPYRIGHT_MESSAGE;

# Parse the command line arguments
for ($i = 0; $i <= $#ARGV; $i++) {
    # Help?
    if ( ($ARGV[$i] eq '-h') || ($ARGV[$i] eq '--help') || ($ARGV[$i] eq '-?') )
    {
        print $HELP_MESSAGE;
        exit;
    }

    # Index filename
    if ( ($ARGV[$i] =~ m/$\-idx\:/) || ($ARGV[$i] =~ m/$\--index\:/) ) {
        @temp = split(/\:/, $ARGV[$i]);
        $INDEX_FILE = $temp[1];
    }

    # .htm rather than .html?
    if ( ($ARGV[$i] eq '-htm') || ($ARGV[$i] eq '--htm') ) {
        $HTML_EXTENSION = ".htm";
    }

    if (   ($ARGV[$i] eq '-1')
        || ($ARGV[$i] eq '-one') || ($ARGV[$i] eq '--one')
        || ($ARGV[$i] eq '--all-in-one')
       ) {
        $OUTPUT_ALLINONE = 1;
    }
}

# Update $TRANSLATION_LINK
$TRANSLATION_LINK =~ s/\.html/$HTML_EXTENSION/g;

# Pages to put in index - format: caption, URL
@INDEXED = ();

# Check that the man directory exists
die "No man directory in current directory" if (! -e 'man');
die "man is not a directory" if (! -d 'man');

# Create the necessary html directories
if (! -e 'html')
{
    mkdir 'html', umask;
    print "Created \'html\' directory\n";
}

# Valid man<x> dirs, e.g. man1 - these two must be consistent for this program
# to work properly
@MANDIRS = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'o', 'n', 'l', 'p');

# Recurse into these directories
chdir 'man';

for ($i = 0; $i <= $#MANDIRS; $i++)
{
    # Go to next one if this one doesn't exist
    if (! -e 'man' . $MANDIRS[$i])
    {
        print 'No man pages in section ' . $MANDIRS[$i] . "\n";
        next;
    }
    #else
    #{ print 'Converting section ' . $MANDIRS[$i] . "\n"; }

    # Create html dir if necessary
    if (! -e '../html/html' . $MANDIRS[$i])
    {
        if (!$OUTPUT_ALLINONE) {
            mkdir '../html/html' . $MANDIRS[$i], umask;
            print 'Created \'html/html' . $MANDIRS[$i] . "\' directory\n";
        }
    }

    # Find all the '.<section>' files & request them via man & save as HTML in
    # the HTML directory
    $man_dh = new DirHandle("man$MANDIRS[$i]");
    if (!defined($man_dh)) { die "Unable to open directory man$MANDIRS[$i]: $!"; }
    @FILES = $man_dh->read();
    $man_dh->close();

    for ($j = 0; $j <= $#FILES; $j++)
    {
        if ( ($FILES[$j] eq '.') || ($FILES[$j] eq '..') ) { next; }

        $FILES[$j] = lc($FILES[$j]);        # Lowercase it for links
        $FILES[$j] =~ s/$\.$MANDIRS[$i]//;  # Remove number extension

        $MANPAGENAME = $FILES[$j];
        #$MANPAGENAME =~ s/$\.$MANDIRS[$i]//;   # Done already

        # Put entry in index
        push(@INDEXED, ($MANPAGENAME."(".$MANDIRS[$i]."):html".$MANDIRS[$i]."/".$FILES[$j].$HTML_EXTENSION));
    }
}

# Change back to parent directory of 'man' directory
chdir('..');

# Sort names of those pages to be converted
@INDEXED = sort(@INDEXED);

# Create the index file and converted pages in parallel
$index_fh = new FileHandle(">html/$INDEX_FILE$HTML_EXTENSION");
if (!defined($index_fh)) { die 'Unable to create index file'; }

$index_fh->print(
                   "<HTML>\n<HEAD>\n<TITLE>Man Page Index</TITLE>\n</HEAD>\n"
                 . "<BODY>\n<H1><A NAME=\"toc\">Man Page Index</A></H1>\n"
                );
              
for ($i = 'A'; $i lt 'Z'; $i++) { $index_fh->print("<A HREF=\"#$i\">$i</A> "); }
$index_fh->print("<A HREF=\"#Z\">Z</A><P>\n");

$letter = '';       # Used to generate targets for links at top
$manhtml = '';      # Converted man page
$manhtmlfile = '';  # Converted man page file name
$manref = '';       # $manpagename($mansection) style man references
$manpagename = '';  # man page name 
$mansection = '';   # man section

for ($i = 0; $i <= $#INDEXED; $i++)
{
    # Get details for the page to be indexed
    ($manref, $manhtmlfile) = split(/:/, $INDEXED[$i]);
    ($manpagename, $mansection) = split(/\(/, $manref);
    chop($mansection);

    # Do we need a new letter from the alphabet?
    if (uc(substr($manpagename, 0, 1)) ne $letter)
    {
        $letter = uc(substr($manpagename, 0, 1));
        $index_fh->print("<H2><A NAME=\"$letter\">$letter</A></H2>\n");
    }

    # If each man page has an HTML page, generate a link to its file, else
    # link to later in the page.
    if (!$OUTPUT_ALLINONE) {
        $index_fh->print(
                           '<A HREF="' . lc($manhtmlfile)
                         . "\">$manpagename($mansection)</A><BR>\n"
                        );
    } else {
        $index_fh->print(
                           '<A HREF="#' . $manpagename . $mansection
                         . "\">$manpagename($mansection)</A><BR>\n"
                        );
    }
}

# Divide between table of contents and rest in all-in-one style
if ($OUTPUT_ALLINONE) { $index_fh->print("<BR><HR>\n"); }

for ($i = 0; $i <= $#INDEXED; $i++)
{
    # Get details for the page to be indexed
    ($manref, $manhtmlfile) = split(/:/, $INDEXED[$i]);
    ($manpagename, $mansection) = split(/\(/, $manref);
    chop($mansection);

    # --- Generate the HTML for the man page ---

    # Open the output / use $index_fh
    if (!$OUTPUT_ALLINONE) {
        $manhtml_fh = new FileHandle(">html/$manhtmlfile");
    } else {
        $manhtml_fh = $index_fh;
    }

    if (!defined($manhtml_fh)) { die "Unable to open output '$manhtmlfile': $!"; }

    # Convert the man page
    $manhtml = &man2html($manpagename, $mansection,
                         $HTML_EXTENSION, $OUTPUT_ALLINONE);

    if (!$OUTPUT_ALLINONE) {
        $manhtml_fh->print(
                             "<HTML>\n<HEAD>\n"
                           . "<TITLE>$manpagename($mansection)</TITLE>\n"
                           . "<BODY>\n" . $manhtml
                           . "<HR>\n<P>$TRANSLATION_LINK</P>"
                           . "</BODY>\n</HTML>"
                          );

        # Done now, so close it
        $manhtml_fh->close();
    } else {
        $manhtml_fh->print(
                             '<H3><A NAME="' . $manpagename . $mansection
                           . "\">$manpagename($mansection)</A></H3>\n"
                           . $manhtml
                          );

        $manhtml_fh->print(
                             "<P ALIGN=\"CENTER\">"
                           . "<A HREF=\"#toc\">[Top]</A>"
                           . "</P>\n<HR>\n"
                          );
    }
}

# Change the relative link for the translation issues file & output end of
# index
$TRANSLATION_LINK =~ s/\.\.\/trans$HTML_EXTENSION/trans$HTML_EXTENSION/g;
if (!$OUTPUT_ALLINONE) { $index_fh->print("<BR><HR>\n"); }
$index_fh->print("\n<P>$TRANSLATION_LINK</P>\n</BODY>\n</HTML>\n");
$index_fh->close();

# Create the translation issues file
&CreateTranslationFile($TRANSLATION_MESSAGE, $HTML_EXTENSION);

# Finished
exit(0);

# ------------
# - man2html -
# ------------

# This fetches the required man page and converts it to HTML. The HTML does
# not include a header or body tags, so this can be used in the middle of an
# HTML document. The HTML is returned as a string.

sub man2html
{
    my ($manpagename, $mansection, $htmlext, $localref) = @_;
    my ($manpage);
    my ($MANDIRS_REGEXP);

    # Used in link construction
    $MANDIRS_REGEXP = '0-9onlp';

    # Get the 'raw' output from man
    warn "Executing: man - $mansection $manpagename";
    $manpage = `man - $mansection $manpagename`;

    # Nuke weird control characters
    $manpage =~ s/.\x08//g;

    # Escape characters
    $manpage =~ s/</(lt)/g;
    $manpage =~ s/>/(gt)/g;

    # Generate links - if $localref == 1, then links to current document, else
    # to man pages in html/html<x>/ directory structure.
    if ($localref == 1) {
        $manpage =~ s/(\w+)\(([$MANDIRS_REGEXP])\)/\<A HREF\=\"\L#$1$2\E\"\>$1($2)\<\/A\>/g;
    } else {
        $manpage =~ s/(\w+)\(([$MANDIRS_REGEXP])\)/\<A HREF\=\"\L..\/html$2\/$1$htmlext\E\"\>$1($2)\<\/A\>/g;
    }

    # Return it
    $manpage = '<P><PRE>' . $manpage . '</PRE></P>';    
    return($manpage);
}

sub HTMLFooter {
}

# -------------------------
# - CreateTranslationFile -
# -------------------------

sub CreateTranslationFile {
    my ($msg, $htmlext) = @_;
    my ($fh);

    $fh = new FileHandle(">html/trans$htmlext");
    if (! defined($fh)) { die 'Unable to create translation information file'; }
    $fh->print($msg);
    $fh->close();
}

