$title

#!/usr/bin/perl -w eval 'exec /local/all/perl -S $0 ${1+"$@"}' if 0; $version = '$Id: bib2html,v 1.68 1997/04/04 19:46:16 hull Exp hull $'; # # Convert from bibtex to HTML. # # Copyright 1995, 1996 David Hull. # David Hull / hull@cs.uiuc.edu / http://www.uiuc.edu/ph/www/dlhull # This program is free software. You can redistribute it and/or modify # it under the terms of the GNU General Public License. See the # files README and COPYING for details. require 'getopts.pl'; require 'ctime.pl'; # Bibliography styles. $EMPTY = 0; $PLAIN = 1; $ALPHA = 2; $NAMED = 3; $UNSORT = 4; @tmpfiles = (); sub usage { $program = $0; $program =~ s+^.*/++; print STDERR <<_EOF_; usage: $program [-A] [-a] [-d delim] [-h heading] [-s style] [-c] [-r] [-t] [-v] [-3] sourcefile [htmlfile] -a Write abstract to htmlfile. -c Sort chronologically, by year and then by author. -d delim Specify bibliography delimiter. -h heading String to use instead of default title when creating a new htmlfile. If updating an existing htmlfile, this option is ignored. -r Sort in reverse chronological order. -s style Control style of bibliography (empty, plain, alpha, named, or unsort). -t Write timestamp to htmlfile. -v Report the version number. -3 Perform HTML 3.0 character entity mappings. _EOF_ exit(1); } # Return the command needed to open a (perhaps compressed) file, # as well as the type of compression. sub openCommand { local($path) = @_; local($cmd); local($cmp); command: { ($path =~ m/\.Z$/ && ($cmd = "uncompress -c $path |", $cmp = "Compressed", last command)); ($path =~ m/\.g?z$/ && ($cmd = "gzip -d -c $path |", $cmp = "Gzipped", last command)); ($cmd = "<$path", $cmp = "", last command); } ($cmd, $cmp); } @paperTypes = ("PostScript", "PDF", "DVI"); sub PostScriptPageCount { local($cmd) = @_; local($pageCount); #print "in PostScriptPageCount $cmd\n"; open(FILE, $cmd) || (warn "error opening $cmd: $!\n", return undef); local($_); local($/) = "\n"; line: while () { last line if m/^%%EndComments/; if (m/^%%Pages:\s*(\d+)/) { $pageCount = $1 if ($1 > 0); last line; } } close(FILE); $pageCount; } # Don't know how to determine page count of PDF file. sub PDFPageCount { undef; } sub DVIPageCount { local($cmd) = @_; local($pageCount); #print "in DVIPageCount $cmd\n"; if ($cmd =~ m/^/dev/null"; } else { # Compressed file. $cmd .= "dviselect : >/dev/null"; } # Look at dviselect's stderr. open(DVISELECT, "-|") || (open(STDERR, ">&STDOUT"), exec $cmd); local($_); local($/) = "\n"; line: while () { if (m/[Ww]rote (\d+) pages/) { $pageCount = $1; last line; } } close(DVISELECT); $pageCount; } # Make an intelligent link to a paper file. sub doPaperLinks { local($file); local($cstr, $pstr, $sstr); papertype: foreach $paper (@paperTypes) { $sstr = ""; $pstr = ""; $cstr = ""; if (($file) = m/\<\!\-\- $paper:[\s\n]+(\S+)[\s\n]+\-\-\>/) { # If $file doesn't look like a URL, get more info. if ($file !~ m/^[^\:\/]+\:\/\//) { local($path) = $htmldir . $file; if (! -f $path) { print STDERR "couldn't find $path\n"; next papertype; } local($opencmd); local($size); local($pageCountRoutine); local($pageCount) = 0; ($opencmd, $cstr) = &openCommand($path); # Get size. $size = -s _; $sstr = ", $size bytes"; # Get page count. $pageCountRoutine = $paper . "PageCount"; $pageCount = &$pageCountRoutine($opencmd); $pstr = ", $pageCount pages" if (defined $pageCount); # Get compression type. $cstr = "$cstr " if ($cstr ne ""); } #print STDERR "found $paper $file$pstr$sstr\n"; s/\<\!\-\- $paper:[\s\n]+\S+[\s\n]+\-\-\>/(${cstr}$paper<\/A>$pstr$sstr)/; } } } # html_encode(string) # Protect character entities in string. sub html_encode { local($_) = @_; s/&/&/g; # Must be first. s//>/g; s/"/"/g; $_; } # Prevent "identifier used only once" warnings. $opt_a = $opt_c = $opt_d = $opt_h = $opt_r = $opt_s = $opt_t = undef; $opt_v = $opt_3 = undef; $command_line = &html_encode(join(' ', $0, @ARGV)); &Getopts("acd:h:rs:tv3") || &usage; print STDERR "$version\n" if (defined($opt_v)); &usage if (($#ARGV < 0) || ($#ARGV > 1)); if ($ARGV[0] =~ m/\.bib$/) { $bibfile = $ARGV[0]; $bibfile =~ s/\.bib$//; $delimiter = $bibfile; } elsif ($ARGV[0] =~ m/\.aux$/) { $citefile = $ARGV[0]; $citefile =~ s/\.aux$//; $delimiter = $citefile; } else { print STDERR "Unknown file extension on $ARGV[0]\n"; &usage; } $htmlfile = $ARGV[1] if ($#ARGV == 1); $delimiter = $opt_d if (defined($opt_d)); $title = (defined($opt_h) ? $opt_h : "Bibliography generated from $ARGV[0]"); $style = $EMPTY; if (defined($opt_s)) { $_ = $opt_s; style: { m/^empty$/ && ($style = $EMPTY, last style); m/^plain$/ && ($style = $PLAIN, last style); m/^alpha$/ && ($style = $ALPHA, last style); m/^named$/ && ($style = $NAMED, last style); m/^unsort$/ && ($style = $UNSORT, last style); print STDERR "Unknown style: $_\n"; &usage; } } style: { ($style == $EMPTY) && ($bstfile = "html-n", $list_start = "UL", $list_end = "/UL", last style); ($style == $PLAIN) && ($bstfile = "html-n", $list_start = "DL COMPACT", $list_end = "/DL", last style); ($style == $ALPHA) && ($bstfile = "html-a", $list_start = "DL", $list_end = "/DL", last style); ($style == $NAMED) && ($bstfile = "html-n", $list_start = "DL", $list_end = "/DL", last style); ($style == $UNSORT) && ($bstfile = "html-u", $list_start = "DL COMPACT", $list_end = "/DL", last style); } $bstfile .= "c" if (defined ($opt_c)); $bstfile .= "r" if (defined ($opt_r)); $bstfile .= "a" if (defined ($opt_a)); # PostScript and PDF files are assumed to be in same directory # as the target HTML file. $htmldir = ""; if ($ARGV[1] =~ m+(^.*/)+) { $htmldir = $1; } umask(077); $beginstring = ""; $endstring = ""; # Create an .aux file for bibtex to read. $auxfile = "bib$$"; push(@tmpfiles, "$auxfile.aux"); open(AUXFILE, ">$auxfile" . ".aux"); print AUXFILE "\\relax\n\\bibstyle{$bstfile}\n"; if (defined($citefile)) { $citefile .= ".aux"; open(CITEFILE, "<$citefile") || die "error opening $citefile: $!\n"; while () { print AUXFILE $_ if (m/^\\(citation|bibdata){/); } close(CITEFILE); } else { print AUXFILE "\\citation{*}\n\\bibdata{$bibfile}\n"; } close(AUXFILE); # run bibtex, redirecting bibtex's output from STDOUT to STDERR. push(@tmpfiles, "$auxfile.blg"); push(@tmpfiles, "$auxfile.bbl"); # fork is not implemented on some non-Unix platforms. # fork || (open(STDOUT, ">&STDERR"), exec('bibtex', $auxfile)); # wait; system("bibtex $auxfile"); open(HTMLFILE, ($htmlfile ne "" ? ">$htmlfile.$$" : ">&STDOUT")); if (open(OHTMLFILE, "<$htmlfile")) { $mode = (stat OHTMLFILE)[2] & 0xfff; $updating = 1; } else { $mode = 0644; $updating = 0; # An existing HTML file does not exist, so output some boilerplate. print HTMLFILE <<_EOF_; $title _EOF_ } if ($updating) { loop: while () { last loop if m/^$beginstring$/; print HTMLFILE; } loop: while () { last loop if m/^$endstring$/; } } print HTMLFILE "$beginstring\n"; print HTMLFILE < EOF # Now we make two passes over the .bbl file. In the first pass, we # just collect the {cite, label} pairs, which we will use later for # crossrefs. $t = $auxfile . ".bbl"; $/ = ""; # Make a first pass through the .bbl file, collecting citation/label pairs. open(BBLFILE, "<$t") || die "error opening $t: $!\n"; $nentry = 0; loop: while () { $nentry++; ($bcite, $blabel) = m+

\[([^\]]*)\]

+; $blabel = "$nentry" if ($style == $PLAIN || $style == $UNSORT); $bibcite{$bcite} = $blabel; } close(BBLFILE); if (defined($opt_t)) { print HTMLFILE "$nentry references, last updated " . &ctime(time) . "

\n"; } print HTMLFILE "<$list_start>\n\n"; #foreach $key (sort (keys(%bibcite))) { # print "$key : $bibcite{$key}\n"; #} open(BBLFILE, "<$t") || die "error opening $t: $!\n"; $nentry = 0; loop: while () { $nentry++; # Protect \{, \}, and \$, and then assign matching {} pairs a unique ID. s/\\\{/\002/g; s/\\\}/\003/g; s/\\\$/\004/g; { local ($c, $l, $z) = (0, 0, ()); s/([\{\}])/join("","\001",($1 eq "\{" ? $z[$l++]=$c++ : $z[--$l]),$1)/ge; } # bibtex sometimes breaks long lines by inserting "%\n". We remove # that because it might accidently break the line in the middle # of a URL. We don't need to deal with TeX comments in general # because bibtex seems to munge them up anyway, so there shouldn't # be any in the bibliography file. s/\%\n//g; # bibtex's add.period$ knows how to avoid adding extra periods # when a block already ends in a period. bib2html's modifications # of bibtex's style files break that. We fix it here. s/(\.(<\/CITE>|<\/A>|\')+)\./$1/g; # Adjust beginning of entry based on bibliography style. if ($style == $EMPTY) { s:

()\[[^\]]*\]()

$1$2:; # Attempt to fix up empty tag, which some browsers # choke on (even though it *is* legal HTML). s:([\w]+):$2<\/A>:; } elsif ($style == $PLAIN || $style == $UNSORT) { s:(

\[)[^\]]*(\]

):$1$nentry$2:; } # Attempt to fix up crossrefs. while (m/(\\(cite(label)?)(\001\d+)\{([^\001]+)\4\})/) { $old = $1; $cmd = $2; $doxref = ($3 eq ""); $bcite = $5; if ($bibcite{$bcite} eq "") { $blabel = " [" . $bcite . "]"; } elsif ($doxref) { $blabel = " [" . $bibcite{$bcite} . "]<\/A>"; } else { $blabel = " [" . $bibcite{$bcite} . "]"; } $old =~ s/(\W)/\\$1/g; s/\s*$old/$blabel/g; } # In the empty and named styles, with crossrefs we sometimes get # something like "In Doe and Roe [Doe and Row, 1995]." Change this to # "In [Doe and Roe, 1995]." to remove the redundancy. if (($style == $EMPTY) || ($style == $NAMED)) { s/In ()([^\[]+) \[(\2)/In $1\[$2/; } # Handle the latex2html commands \htmladdnormallink{text}{url} # and \htmladdnormallinkfoot{text}{url}. s/\\htmladdnormallink(foot)?(\001\d+)\{([^\001]+)\2\}(\001\d+)\{([^\001]+)\4\}/$3<\/A>/g; s/\\&/&/g; # \& -> & # Accents. s/\\i\b/i/g; # dotless i. s/\\\'(\001\d+)\{([AEIOUaeiou])\1\}/&$2acute;/g; # acute accent \'{x} s/\\\'([AEIOUaeiou])/&$1acute;/g; # acute accent \'x s/\\\`(\001\d+)\{([AEIOUaeiou])\1\}/&$2grave;/g; # grave accent \`{x} s/\\\`([AEIOUaeiou])/&$1grave;/g; # grave accent \`x s/\\\"(\001\d+)\{([AEIOUaeiouy])\1\}/&$2uml;/g; # umlaut \"{x} s/\\\"([AEIOUaeiouy])/&$1uml;/g; # umlaut \"x s/\\\~(\001\d+)\{([ANOano])\1\}/&$2tilde;/g; # tilde \~{x} s/\\\~([ANOano])/&$1tilde;/g; # tilde \~x s/\\\^(\001\d+)\{([AEIOUaeiou])\1\}/&$2circ;/g; # circumflex \^{x} s/\\\^(AEIOUaeiou])/&$1circ;/g; # circumflex \^x s/\\c(\001\d+)\{([Cc])\1\}/&$2cedil;/g; # cedilla \c{x} # The following accents have no HTML equivalent. # (This list is still not complete.) s/\\u(\001\d+)\{(.)\1\}/$2/g; # breve accent \u{x} s/\\v(\001\d+)\{(.)\1\}/$2/g; # hacek accent \v{x} s/\\([lL])\b/$1/g; # slashed l s/\\\=(\001\d+)\{(.)\1\}/$2/g; # macron \={x} s/\\\=(.)/$1/g; # macron accent \=x s/\\\.(\001\d+)\{(.)\1\}/$2/g; # dot \.{x} s/\\\.(.)/$1/g; # dot accent \.x # Other special characters. s/\\([Oo])\b\s*/&$1slash;/g; # \[Oo] -> &[Oo]slash; s/\\AA\b\s*/Å/g; # \AA -> Å s/\\aa\b\s*/å/g; # \aa -> å s/\\AE\b\s*/Æ/g; # \AE -> Æ s/\\ae\b\s*/æ/g; # \ae -> æ s/\\ss\b\s*/ß/g; # \ss -> ß s/\\S\b\s*/§/g; # \S -> § s/\\P\b\s*/¶/g; # \P -> ¶ s/\\pounds\b\s*/&163;/g; # \pounds -> £ s/\?\`/&191;/g; # ?` -> ¿ s/\!\`/&161;/g; # !` -> ¡ # Other special characters. if (defined($opt_3)) { # Try to be careful to not change the dashes in HTML comments # () to –s. s/\-\-\-/—/g; # --- -> — s/([^\!])\-\-([^\>])/$1–$2/g; # -- -> – # lower case greek s/\\alpha\b/&agr;/g; # \alpha -> &agr; s/\\beta\b/&bgr;/g; # \beta -> &bgr; s/\\gamma\b/&ggr;/g; # \gamma -> &ggr; s/\\delta\b/&dgr;/g; # \delta -> &dgr; s/\\varepsilon\b/&egr;/g; # \varepsilon -> &Egr; s/\\epsilon\b/&egr;/g; # \epsilon -> &egr; s/\\zeta\b/&zgr;/g; # \zeta -> &zgr; s/\\eta\b/&eegr;/g; # \eta -> &eegr; s/\\theta\b/&thgr;/g; # \theta -> &thgr; s/\\vartheta\b/&thgr;/g; # \vartheta -> &thgr; s/\\iota\b/&igr;/g; # \iota -> &igr; s/\\kappa\b/&kgr;/g; # \kappa -> &kgr; s/\\lambda\b/&lgr;/g; # \lambda -> &lgr; s/\\mu\b/&mgr;/g; # \mu -> &mgr; s/\\nu\b/&ngr;/g; # \nu -> &ngr; s/\\xi\b/&xgr;/g; # \xi -> &xgr; s/\\pi\b/&pgr;/g; # \pi -> &pgr; s/\\varpi\b/&pgr;/g; # \varpi -> &pgr; s/\\rho\b/&rgr;/g; # \rho -> &rgr; s/\\varrho\b/&rgr;/g; # \varrho -> &rgr; s/\\sigma\b/&sgr;/g; # \sigma -> &sgr; s/\\varsigma\b/&sfgr;/g; # \varsigma -> &sfgr; ??? s/\\tau\b/&tgr;/g; # \tau -> &tgr; s/\\upsilon\b/&ugr;/g; # \upsilon -> &ugr; s/\\phi\b/&phgr;/g; # \phi -> &phgr; s/\\varphi\b/&phgr;/g; # \varphi -> &phgr; s/\\chi\b/&khgr;/g; # \chi -> &khgr; s/\\psi\b/&psgr;/g; # \psi -> &psgr; s/\\omega\b/&ohgr;/g; # \omega -> &ohgr; # upper case greek s/\\Gamma\b/&Ggr;/g; # \Gamma -> &Ggr; s/\\Delta\b/&Dgr;/g; # \Delta -> &Dgr; s/\\Zeta\b/&Zgr;/g; # \Zeta -> &Zgr; s/\\Lambda\b/&Lgr;/g; # \Lambda -> &Lgr; s/\\Xi\b/&Xgr;/g; # \Xi -> &Xgr; s/\\Pi\b/&Pgr;/g; # \Pi -> &Pgr; s/\\Sigma\b/&Sgr;/g; # \Sigma -> &Sgr; s/\\Upsilon\b/&Ugr;/g; # \Upsilon -> &Ugr; s/\\Phi\b/&PHgr;/g; # \Phi -> &PHgr; s/\\Psi\b/&PSgr;/g; # \Psi -> &PSgr; s/\\Omega\b/&OHgr;/g; # \Omega -> &OHgr; } else { s/\b--\b/-/g; # -- -> - s/\b---\b/--/g; # --- -> -- } # Handle \char123 -> &123;. while (m/\\char([\'\"]?[0-9a-fA-F]+)/) { $o = $r = $1; if ($r =~ s/^\'//) { $r = oct($r); } elsif ($r =~ s/^\"//) { $r = hex($r); } s/\\char$o\s*/&#$r;/g; } s/{\\etalchar\001(\d+)\{(.)}\001\1\}/$2/g; # {\etalchar{x}} -> x s/\\par\b/