#!/usr/local/bin/perl
#
# hyper - Filter to make plain text into hypertext
#
# version 1.5
# Aug 20 1995
#
# Rob Funk  <funk+@osu.edu>
#
# Currently Translates:
#   & to &amp;
#   < to &lt;
#   > to &gt;
#
# Currently Understands and Links:
#   Email addresses (username@machine.name)
#   URLs: news, http, gopher, ftp, file (all but news need //)
#   FTP references (site.domain:/path/filename)
#
# TODO:
#   Add RFC links to http://www.cis.ohio-state.edu/htbin/rfc/rfc*.html
#     (see http://www.cis.ohio-state.edu/hypertext/information/rfc.html)
#   Translate more special characters, including those from the Mac charset
#
###########################################################################

# Keep the original formatting
print "<PRE>\n";

while (<>) {
    print &hyper($_); # all the work is done in &hyper
}

print "</PRE>\n";


###########

# Convert one line of text to HTML
#
# This is all sed-like magic -- not line noise!
sub hyper {
    local($line) = $_[0];

    # We need to fix 3 characters: & < >
    $line =~ s/&/&amp;/g; # this has to be first
    $line =~ s/</&lt;/g;
    $line =~ s/>/&gt;/g;
    # There are others that could be translated, but those are the
    # most important ones...

    # (I'm using ! to separate the parts of expressions below,
    #  since the more common / and | are used for other things.)

    # translate email addresses to mailto: links
    $line =~ s!([\w.+%-]+@[\w-]+(\.[\w-]+)*)!<A HREF="mailto:$1">$1</A>!g;

    # translate newsgroup URLs (not plain newsgroup names) into links
    $line =~ s!(news:[\w-]+(.[\w-])+)!<A HREF="$1">$1</A>!g;

    # I'm allowing * and ? in the file/pathnames, only because I think
    # it's better than cutting off the URL early.  It won't end up
    # being a valid URL.

    # translate other URLs into links
    $line =~ s!((http|gopher|ftp|file)://[\w-]+(\.[\w-]+)*(:\d+)?(/([\w.+*?~%-])*)*)!<A HREF="$1">$1</A>!g;

    # Old-style FTP references (site.domain:/path/filename) to URL links
    # I don't completely trust this yet, but it seems to be working now
    $line =~ s!([\w-]+(\.[\w-]+)+):/([\w.+/*?%~-]*)!<A HREF="ftp://$1/$3">$1:/$3</A>!g;

    $line;
}
