#!/usr/bin/perl -w # This script is a hack to scan the text version of 007 and extract # information comparable to the BNF summary annex in Fortran 90. # Usage: # extractBNF.pl [-hexopt] [files] # # hexopt hex representing the output to produce # 1 primary BNF # 2 duplicate BNF # 4 Constraints # 8 analysis of BNF placement in sections # 10 xref of non-terminal symbols # 20 xref of TERMINAL symbols # 40 xref of only those non-terminal symbols that # are defined and not referenced or referenced # and not defined # If omitted, hexopt is 48. # files files to scan for BNF and constraints # If omitted, files defaults to c*.txt # The method by which this script recognizes the beginning of a BNF # rule is quite solid, but recognizing the end of such a rule is a # hack that has been tweaked until it produced acceptable results # Line continuations within the BNF also presented a bit of a problem, # as the square bullet continuation character mapped to lower case n. # The continuation mark thus looks like the word "n". Unfortunately, # there is a non-terminal "n", so not every word "n" is a continuation # mark, not even if one looks only at words at the beginning or end of # a line. The added requirement that continuation marks should be # paired, with one at the end of the continued line and one at the # beginning of the continuation line immediately following, seems to # do a reasonably good job of differentiating between the continuation # mark "n" and the non-terminal "n", but it cannot be guaranteed to # continue to do so. # The method for recognizing the beginning of constraints is acceptably # solid. As long as constraints continue to be only 1 paragraph each, # locating the end of a constraint will also be solid. # Terminal symbols are distinguished from non-terminal symbols by case. # The long-term continuance of this convention cannot be guaranteed, # but there is no immediate reason to expect it to change. # The three metarules for syntax rules have been programmed into this # script. References to non-terminals of the form XXXX-name are # handled like except that they are treated as having been defined by # the rule for name. References to non-terminals of the form # scalar-XXXX or XXXX-list (or scalar-XXXX-list) are treated as # references to XXXX, but displayed separately. # Xref covers only the "primary" appearances of each rule, i.e. , # those in the section whose number matches the beginning of the # rule number. This definition has been slightly hacked to also # include definitions in later sections if the definition was omitted # from the proper section. my ($optbnf,$optbnd,$optcon,$optbns,$optnon,$opttrm,$opttrb); my $opt = '48'; # default print options my $rmar = 72; # target output right margin my $lmar; # target output left margin my $tlim; # limit on size of text between margins my $text; # text being accumulated my $lmtext; # text in left margin my %count; # root accumulator for refs/defs my %define; # the actual definitions my $sect; # current sections number open(STDERR,'>&STDOUT'); ## DEBUG ## $| = 1; ## DEBUG ## # procedure to empty left margin text buffer sub lmempty { $lmtext = ''; $tlim = $rmar } # procedure to pad left margin text buffer to target left margin sub lmpad { my $tabval; my $lmval = $rmar - $tlim; while ($lmval + ($tabval = 8 - (7 & $lmval)) <= $lmar) { $lmtext .= "\t"; $lmval += $tabval } if ($lmval < $lmar) { $lmtext .= ' 'x($lmar - $lmval); $lmval = $lmval } $tlim = $rmar - $lmval } # procedure to transfer text to left margin buffer sub lmxfer { $lmtext .= $text; $tlim -= length($text); $text = '' } # procedure to recognize the start of a BNF rule my $rule; # global to keep track of current rule number my $rsect; # number of section to which rule naturally belongs sub rulestart { return 0 unless s/^(R\d+)\s+(\S+)\s+is\s+//; $rule = $1; my $word = $2; $rsect = substr($rule,1,-2); $text = "$rule\t$word\n is\t"; ++$count{$word}[0]{$rule} } # record defintion # procedure to process text from RHS of BNF rule my $cont = ''; # flag to track of BNF continuations sub ruletext { my ($cont0,$next,$word,$off); chomp; s/\s*$//; s/^\s*n\b// if $cont; # clean edges do {$next = <>} while $next =~ /^\s*$/; # lookahead to next line $cont0 = $cont; $cont = $next =~ /^\s*n\b/ && s/\bn$//; # test for continuation unless (($rsect != $sect) && ($rsect > $sect || $define{$rule}[$rsect])) { while (/([a-zA-Z][a-zA-Z0-9\-]*)/g) { # find words in text $word = $1; $off = 1; # reduce to base form $off += 1 if $word =~ s/^scalar-//; $off += 2 if $word =~ s/-list$//; $count{$word}[$off]{$rule}++ }} # record reference s/^/\&/ if $cont0; # put continuations back for printing s/$/\&/ if $cont; $text .= "$_\n"; $_ = $next } # advance to next line # procedure to recognize following text not part of BNF rule sub ruleend { return 0 unless !/^\s*([\[a-z]|$)/ || /^where/; push @{$define{$rule}[$sect] ||= []},$text; return 1 if !$optbnd && ($rsect != $sect) && ($rsect > $sect || $define{$rule}[$rsect]); print $text,"\n" if $optbnf; # print rule return 1 } # procedure to recognize the start of a BNF constraint sub constart { return 0 unless s/^(Constraint:)\s+//; $lmar = 12; $lmtext = $1; $tlim = $rmar - length($lmtext); lmpad; s/^(\s*)(\S+)//; $text = $2; return 1 } # procedure to process text from the body of BNF constraint sub context { chomp; s/^\s*/ /; while (s/^(\s+)(\S+)//) { if (length($text)+length($1)+length($2) <= $tlim) { $text .= $1 . $2 } else { print $lmtext,$text,"\n" if $optcon; $text = $2; lmempty; lmpad }} $_ = <> } # procedure to recognize the end of a BNF constraint sub conend { return 0 unless /^\s*$/; print $lmtext,$text,"\n\n" if $optcon; return 1 } # procedure to return the list of keys in a hash, sorted as rule numbers sub rules { sort { length($a) <=> length($b) || $a cmp $b } keys %{$_[0]} } # procedure to add word to xref list in text my $comma; # flag whether comma needed after previous item sub xrefadd { $text .= ',' if $comma; $comma = 1; if (length($text)+length($_[0]) < $tlim) { $text .= $_[0] } else { print $lmtext,$text,"\n"; lmempty; lmpad; $text = $_[0] }} # procedure to add all rules in hash to xref list in text sub xrefaddall { my $rule; $comma = 0; foreach $rule (rules(@_)) { xrefadd($rule) }} # procedure to return effective definition of syntax symbol sub def { $count{$_[0]}[0] || ($_[0] =~ /-name$/ && $count{name}[0]) } # procedure to determine if syntax symbol is defined by a rule sub defok { def(@_) && (scalar(keys %{def(@_)}) == 1) } # procedure to determine if syntax symbol is referenced by at least one rule sub refok { $#{$count{$_[0]}} } # procedure to check for undefined or unreferenced syntax symbol sub trouble { !defok(@_) || !refok(@_) } # decode the print options if (@ARGV && $ARGV[0] =~ /^-([\dA-Za-z]+)$/) { $opt = $1; shift } ($optbnf,$optbnd,$optcon,$optbns,$optnon,$opttrm,$opttrb) = split(//,unpack('b*',pack('h*',scalar reverse($opt)))); # set the default input files, if necessary @ARGV = unless @ARGV; #@ARGV = "c04.txt" unless @ARGV; # main text scanning loop my $lastARGV = ''; $_ = <>; while ($_) { $sect = $1 if /^Section (\d+):\s+/; if (rulestart) { ruletext; until (ruleend) { $text .= (s/^\tor\t//)?" or\t":"\t"; ruletext } next } # already have next line so skip to next interation if (constart) { until (conend) { context }} $_ = <> } # analysis of BNF placement in sections if ($optbns) { my ($r,$t,$n,$m); foreach $rule (rules(\%define)) { # loop over rules $rsect = substr($rule,1,-2); $r = $define{$rule}; print "$rule not defined in section $rsect\n" unless $r->[$rsect]; $t = $r->[$rsect][0]; foreach $sect (1..$#$r) { # loop over sections next unless $r->[$sect]; $n = scalar(@{$r->[$sect]}); print "$rule is defined $n times in section $sect\n" if $n > 1; next if $sect == $rsect || !defined($t); foreach $m (0..($n-1)) { # loop over defs print "$rule inconsistently defined", " in section $sect\n" if $r->[$sect][$m] ne $t }}} print "\n" } # xref generation my ($r,$t); my $lmartrm = 16; # left margin for xref of terminal symbols my $lmardef = 8; # left margin for def part of xref of non-terminals my $lmarnon = 32; # left margin for ref part of xref of non-terminals my @reflabel = ('DEF','','scalar-*:','*-list:','scalar-*-list:'); foreach (sort keys %count) { # print requested parts of xref $r = $count{$_}; if (/^[A-Z]/) { next unless $opttrm; $lmar = $lmartrm; $lmtext = $_; $tlim = $rmar - length($lmtext); $lmar += 2 until length($lmtext) < $lmar; lmpad; $lmar = $lmartrm; $text = ''; xrefaddall($r->[1]); print $lmtext,$text,"\n" } else { next unless $optnon || ($opttrb && trouble($_)); $lmar = $lmardef; $text = "$_ ("; $lmtext = ''; $tlim = $rmar; $t = def($_); if ($t) { xrefaddall($t) } else { $comma = 0; xrefadd('') } $text .= ')'; lmxfer; $lmar = $lmarnon; $lmar += 2 until length($lmtext) < $lmar; lmpad; $lmar = $lmarnon; $comma = 0; if ($#$r) { foreach $t (1..$#$r) { next unless $r->[$t]; xrefadd($reflabel[$t]); xrefaddall($r->[$t]) }} else { xrefadd('') } print $lmtext,$text,"\n" }}