#!/usr/bin/perl -w use Getopt::Std; # Assembler for the Universal Machine # $Id: ac,v 1.13 2002/10/17 02:31:20 tomj Exp $ # As I wrote this code I kept thinking in terms of eventually # re-coding it in Universal Machine code itself, so the variables # tend to be non-local, as well as the code structure having other # primitive structural concepts. I do not apologize for this. # It's also more or less test/hack code, so it's probably chronically ugly. # SOURCE FILE: see [document] # OBJECT FILE: Paper tape oriented character stream. The idea # is brute simplicity and tape-length efficiency, with enough # flexibility for expansion. This is designed with the assumption # of a 5-bit ITA character set in mind. # A word is loaded in four characters, using # A start of object file. Origin=0 # Bcddddeeee....zzzs data record; c=count s=checksum, # dddd eeee zzzz words # Cdddds set origin dddd s=checksum # Z end of object file # nul, space, CR, LF ignored between records # But ignore all that, it outputs conveniently for # the simulator. # LISTING FILE: Human-readable listing including source # and object code. # Logical machine geometry. $WSIZE= 18; # bits per word $WMAX= (2**$WSIZE); # range of values $WMASK= ($WMAX-1); # maximum value/bit mask $KSIZE= 12; # bits per address field/K $KMASK= (2**$KSIZE-1); # address mask #$MAXMEM= 4096; # amount of memory # Simulation controls. $arch= 0; # 0=SKIP machine, else JUMP machine $memorg= 0; # 0=drum 1=random access $EMTTY= 1; # default emulate library (tty) calls # Memory geometry specifiers. 1/4096 is flat memory, eg. trackless. # Everything else assumes multiple tracks and a relay tree with a # finite change time (TRACKTIME). SSIZE is simultaneously the number # of bits in the sector value and the shift counter to position # the track value in the order word. SECTORS is the sector # count in memory and the sector mask value +1. # --track----|----sector----- # t t t t t t s s s s s s s s $SSIZE= 8; # # bits to describe sector %GEOM= ( # tracks sectors SSIZE '1/4096' => "1 4096 12", # flat memory '2/2048' => "2 2048 11", '4/1024' => "4 1024 10", '8/512' => "8 512 9", '16/256' => "16 256 8", '32/128' => "32 128 7", # LPG-30-ish '64/64' => "64 64 6", # 64 tracks is a big relay tree ); # Drum geometry defaults. $TRACKS= 64; # default tracks/heads on disk $SECTORS= 64; # default words per track $SKEWFACT= 4; # default skew factor $TRACKTIME= 20000; # default track-switch time, uS # Assembler variables. $line= ""; # source code line buffer $symbols= 0; # number of symbols in table $mem= 0; # memory locations consumed $errors= 0; $op= ""; # parsed opcode, $arg= ""; # parsed arg, $label= ""; # optional label, # Opcodes are pre-stored in the symbol table for simplicity. We # assign them flag values to indicate the requirements for each # opcode. # # We pack metajunk into the opcode value in the symbol table, # into the address field bits which get masked off anyways # during opcode assembly. The order format is: # # o order # t track # s sector # 0 unused # # 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 # | o | o | o | o | o | 0 | t | t | t | t | t | t | s | s | s | s | s | s | # a m k # |<--- OPOFF # 1 1 1 1 1 <--- OPMASK # # Assembler meta junk bits: # # k opcode requires arg 0 - K (track:sector, or constant) # m order references memory (k is address) # a architecture: 0=SKIP machine, 1=JUMP machine # $OPCODE=$WMAX; # out-of-range value flags opcode, # (stored symbols bound to 0 - $WMAX) $OPOFF= 4096; # opcode in upper 5 bits, $OPMASK= (31 * $OPOFF); # strip funny flag bits off opcode $OPARG= 1; # bit0=1 opcode takes arg 0 - K $OPMREF= 2; # bit1=1 opcode references memory $OPJUMP= 4; # JUMP vs. SKIP architecture %SYM= ( # SKIP architecture machine orders. "HALT", $OPCODE + (1 * $OPOFF) + $OPARG, # opcodes are preloaded "SWA", $OPCODE + (2 * $OPOFF), # into the symbol table "SZ", $OPCODE + (3 * $OPOFF), "SNZ", $OPCODE + (4 * $OPOFF), "SC", $OPCODE + (5 * $OPOFF), "SNC", $OPCODE + (6 * $OPOFF), "RLC", $OPCODE + (7 * $OPOFF), "SR", $OPCODE + (8 * $OPOFF) | $OPARG, "NEG", $OPCODE + (9 * $OPOFF) | $OPARG, "STZ", $OPCODE + (10 * $OPOFF), "STNZ", $OPCODE + (11 * $OPOFF), "TTO", $OPCODE + (12 * $OPOFF), "ADC", $OPCODE + (13 * $OPOFF) | $OPARG, "ADD", $OPCODE + (14 * $OPOFF) | $OPARG, "AND", $OPCODE + (15 * $OPOFF) | $OPARG, "OR", $OPCODE + (16 * $OPOFF) | $OPARG, "LD", $OPCODE + (17 * $OPOFF) | $OPARG, "JUMP", $OPCODE + (18 * $OPOFF) | $OPARG, "ADCM", $OPCODE + (19 * $OPOFF) | $OPARG | $OPMREF, "ADDM", $OPCODE + (20 * $OPOFF) | $OPARG | $OPMREF, "ANDM", $OPCODE + (21 * $OPOFF) | $OPARG | $OPMREF, "ORM", $OPCODE + (22 * $OPOFF) | $OPARG | $OPMREF, "LDM", $OPCODE + (23 * $OPOFF) | $OPARG | $OPMREF, "LDA", $OPCODE + (24 * $OPOFF) | $OPARG, "STA", $OPCODE + (25 * $OPOFF) | $OPARG | $OPMREF, "STO", $OPCODE + (26 * $OPOFF) | $OPARG | $OPMREF, # JUMP architecture machine orders. "JZ", $OPCODE + (3 * $OPOFF) | $OPJUMP, "JNZ", $OPCODE + (4 * $OPOFF) | $OPJUMP, "JC", $OPCODE + (5 * $OPOFF) | $OPJUMP, "JNC", $OPCODE + (6 * $OPOFF) | $OPJUMP, "JTZ", $OPCODE + (10 * $OPOFF) | $OPJUMP, "JTNZ", $OPCODE + (11 * $OPOFF) | $OPJUMP, ); # 64ASCII to 6-bit ITA2. bit5 encodes the FIGS/LTRS state; # bit5=0 is LTRS, bit5=1 is FIGS. $FIGS= 32; # TTY/ITA2 FIGS character and case machine state $LTRS= 0; # TTY/ITA2 LTRS character and case machine state $BOTH= 0; # case machine state #$NEITHER= 99; # initial case machine state $UU= 127; # untranslatable character # Table of TTY/ITA2 characters in ASCII order, in a crude double-character # format. Index is [2 * i] for the case indicator; [2 * i + 1] is the # TTY/ITA2 character. The first symbol defines which code set (FIGS, LTRS) # the character is in. @A2B= ( $BOTH, 0, # NULL $BOTH, $UU, # soh $BOTH, $UU, # stx $BOTH, $UU, # etx $BOTH, $UU, # eot $BOTH, $UU, # enq $BOTH, $UU, # ack $FIGS, 5, # bell $BOTH, $UU, # bs $BOTH, $UU, # ht $BOTH, 2, # LF $BOTH, $UU, # vt $BOTH, $UU, # ff $BOTH, 8, # CR $BOTH, $UU, # so $BOTH, $UU, # si $BOTH, $UU, # dle $BOTH, $UU, # dc1 $BOTH, $UU, # dc2 $BOTH, $UU, # dc3 $BOTH, $UU, # dc4 $BOTH, $UU, # nak $BOTH, $UU, # syn $BOTH, $UU, # etb $BOTH, $UU, # can $BOTH, $UU, # em $BOTH, $UU, # sub $BOTH, $FIGS, # FIGS $BOTH, $UU, # fs $BOTH, $UU, # gs $BOTH, $UU, # rs $BOTH, $LTRS, # LTRS $BOTH, 4, # SPACE $FIGS, 13, # (exclamation) $FIGS, 17, # " $FIGS, 20, # # $FIGS, 9, # (dollar) $BOTH, $UU, # (percent) $FIGS, 26, # $FIGS, 11, # (sgl quote) $FIGS, 15, # ( $FIGS, 18, # ) $BOTH, $UU, # * (asterisk) $BOTH, $UU, # + (plus) $FIGS, 12, # , $FIGS, 3, # - $FIGS, 28, # . $FIGS, 29, # / $FIGS, 22, # 0 $FIGS, 23, # 1 $FIGS, 19, # 2 $FIGS, 1, # 3 $FIGS, 10, # 4 $FIGS, 16, # 5 $FIGS, 21, # 6 $FIGS, 7, # 7 $FIGS, 6, # 8 $FIGS, 24, # 9 $FIGS, 14, # : $FIGS, 30, # ; $BOTH, $UU, # (.LT.) $BOTH, $UU, # (equal) $BOTH, $UU, # (.GT.) $FIGS, 25, # ? $BOTH, $UU, # (at) $LTRS, 3, # A $LTRS, 25, # B $LTRS, 14, # C $LTRS, 9, # D $LTRS, 1, # E $LTRS, 13, # F $LTRS, 26, # G $LTRS, 20, # H $LTRS, 6, # I $LTRS, 11, # J $LTRS, 15, # K $LTRS, 18, # L $LTRS, 28, # M $LTRS, 12, # N $LTRS, 24, # O $LTRS, 22, # P $LTRS, 23, # Q $LTRS, 10, # R $LTRS, 5, # S $LTRS, 16, # T $LTRS, 7, # U $LTRS, 30, # V $LTRS, 19, # W $LTRS, 29, # X $LTRS, 21, # Y $LTRS, 17, # Z $BOTH, $UU, # [ (L bkt) $BOTH, $UU, # \ (back sl) $BOTH, $UU, # ] (R bkt) $BOTH, $UU, # ^ (caret) $BOTH, $UU, # _ (underscore) ); # Process command line and filename junk. Given a sourcefile, # look for file.ASM, file.asm, and create the output files # file.o for object and file.p for printable listing. use vars qw/$opt_m $opt_h $opt_j $opt_c $opt_l $opt_s $opt_t $opt_k $opt_g/; getopts('mhjcls:t:k:g:'); $arch= 0; # assume SKIP machine, $arch= $opt_j if $opt_j; # 0=SKIP, 1=JUMP # Set default memory geometry, then allow command line to # override it. ($TRACKS, $SECTORS, $SSIZE)= split /\s+/, $GEOM {"64/64"}; ($TRACKS, $SECTORS, $SSIZE)= split /\s+/, $GEOM {$opt_g} if defined $opt_g and exists $GEOM {$opt_g}; $TRACKS= $opt_t if $opt_t; # maybe override, $SECTORS= $opt_s if $opt_s; $SKEWFACT= $opt_k if $opt_k; # Check for character table dump before filename. We don't need a file for this. # Note that the table organization is crude. if ($opt_c) { print "# ASCII (input) --> native\n"; for ($i= 0; $i < @A2B; $i += 2) { my $case= $A2B[$i]; $char= $A2B[$i + 1]; if ($i > 192 || $i < 64) { print sprintf (" %02d", $i >> 1); } else { print sprintf ("'%c'", $i >> 1); } print " => "; if ($char == $UU) { print "undef\n"; } else { print "FIGS, " if $state == $FIGS; print "LTRS, " if $state == $LTRS; print "$char\n"; } } exit; } $ifile= $ARGV[0]; # first arg is source file, if ($opt_h or $ifile eq "") { print "Usage:\n asm source-file (.asm implicit)\n"; print " -c dump character translation table\n"; print " -l produce listing output and file\n"; print " -j JUMP order set (vs. SKIP)\n"; print " -m parallel, random-access memory (vs. drum)\n"; print " -g specify memory geometry (default: 64/64, "; print "allowed: ", join " ", (sort alphanumerically keys %GEOM), ")\n"; print " -s specify sectors/track (default: $SECTORS)\n"; print " -t specify tracks per drum (default: $TRACKS)\n"; print " -k specify drum skew factor (default: $SKEWFACT)\n"; print " -h usage\n"; exit 2; } open (F, "<$ifile") or open (F, "<$ifile.ASM") or open (F, "<$ifile.asm") or (print "SOURCE FILE /$ifile/ NOT FOUND\n" + exit 1); # now that's Perl code. $ofile= $ifile; # make output filename, $ofile =~ s/\..*$//; # clip of extention, $pfile= "$ofile.p"; # make listing file, $ofile .= ".o"; # add extention for output, if ($opt_l) { open (P, ">$pfile") or (print "CAN'T CREATE LISTING FILE /$pfile/" + close (F) + exit 1); } open (O, ">$ofile") or (print "CAN'T CREATE OBJECT FILE /$ofile/" + close(F) + close(P) + exit 1); # The rest of the main() code is a traditional non-macro two-pass assembler. $track= 0; $sector= 0; $pass= 1; # changes subr behavior &list("PASS 1\n"); while ($line =) { # until end of file, chop $line; # clip newline, &tokenize; # decompose the line, &parse; # go parse it. } # Now that we've read the source in pass 1, we know the values # of these things. Needless to say their arguments must resolve # in pass 1. print O "SOF $0 $ifile\n"; # output object file marker print O "SOH\n"; print O "arch $arch\n"; print O "memorg $memorg\n"; print O "ssize $SSIZE\n"; print O "sectors $SECTORS\n"; # tell the simulator print O "tracks $TRACKS\n"; # some stuff print O "skew $SKEWFACT\n"; print O "tracktime $TRACKTIME\n"; print O "emtty $EMTTY\n"; print O "EOH\n"; &list("PASS 2\n"); $track= 0; $sector= 0; $pass= 2; seek F, 0, 0; # rewind the file, while ($line =) { # until end of file, chop $line; # clip newline, &tokenize; # decompose the line, &parse; # go parse it. } close F; print O "EOF\n"; close O; # Dump the symbol table. if ($opt_l) { &list("\nSYMBOLS\n"); $n= 0; foreach $s (%SYM) { # all of symbol table $v= &sym($s); # get symbol and value next if $v & $OPCODE; # ignore opcodes &list(sprintf("%8s=%06o ", $s, $v)); &list ("\n") if ++$n % 5 == 0; # N per line } } &list ("\n"); &list ("# $errors errors, $mem words, $symbols symbols\n"); close P; exit $errors; # Decompose the input line $line, producing $label, $op and $arg. sub tokenize { $_= $line; # work here, $line for errors s/#.*$//; # clip off comments, $_ =~ s/[\t ]+/ /g; # squish tabs and spaces, s/ +$//; # clip trailing spaces, tr/a-z/A-Z/; # force upper case, $label= ""; $label= $1 if /^(\w+)/; # save any label $label= $1 if /^(\w+:)/; # save any label /\s+(\S+)\s*(.*)/; # parse opcode, argument, $op= $1; # opcode, $arg= $2; # optional argument } # Given tokens from the parsed input line, produce object and listing. # This code is responsible for setting the variables # that make up the listing; $track and $sector (probably # already set); $opcode (the object output word(s)); # and $line, the raw input. # $arg is the text argument for opcodes. Each type of opcode # handles it separately. sub parse { if (($line =~ /^#/) || ($line eq "")) { # handle blank lines $opcode= 0; # and comments, &listing; # preserves listing return; # format. } if ($label =~ /:$/) { chop $label; # remove colon, &set_sym($label, &C); # label=(track, sector) $opcode= $C; # for display } if ($op eq "EQ") { # now process opcodes &set_sym($label, &sym($arg)); $opcode= $arg; # for display &listing; return; # Assembler directives. } elsif ($op eq "GEOMETRY") { my $foo= $arg; $foo =~ s/X/\//; # change "x" to / if (exists $GEOM {$foo}) { ($TRACKS, $SECTORS, $SSIZE)= split /\s+/, $GEOM {$foo}; $opcode= $SSIZE; # for display } else { &error ("BAD GEOMETRY /$arg/"); } &listing; } elsif ($op eq "MEMORG") { $memorg= &sym ($arg); $opcode= $memorg; &listing; } elsif ($op eq "ARCH") { $arch= &sym ($arg); $opcode= $arch; # for display &listing; } elsif ($op eq "SECTORS") { # sectorS not sector $SECTORS= &sym ($arg); $opcode= $SECTORS; # for display &listing; } elsif ($op eq "TRACKS") { # trackS not track $TRACKS= &sym ($arg); $opcode= $TRACKS; &listing; } elsif ($op eq "TRACKTIME") { $TRACKTIME= &sym ($arg); $opcode= $TRACKTIME; &listing; } elsif ($op eq "SECTOR") { if (&sym ($arg) < $SECTORS) { $sector= &sym ($arg); $opcode= $sector; # for display } else { &error ("SECTOR /$arg/ OUT OF RANGE"); } &listing; } elsif ($op eq "TRACK") { if (&sym ($arg) < $TRACKS) { $track= &sym($arg); $opcode= $track; # for display } else { &error ("TRACK /$arg/ OUT OF RANGE"); } &listing; } elsif ($op eq "SKEW") { $SKEWFACT= &sym($arg); $opcode= $SKEWFACT; # for display &listing; } elsif ($op eq "EMTTY") { $EMTTY= &sym($arg); $opcode= $EMTTY; &listing; # This would be so much easier with a pointer. $arg consists of mixed # quoted strings and comma-separated numbers. /TEXT/,2,"ABCDEF",6,7,8 } elsif ($op eq "TEXT") { $in_str= 0; # not in a string $qchar= ""; # quote character, $chars_in_word= 0; # packed chars, $word= 0; # where we pack 'em &listing; # print input line $line= " "; # build a line per 3 characters while ($arg ne "") { $arg =~ s/^(.)(.*)/$2/; # sample 1st char, $Achar= $1; # need ASCII for metas, if ($in_str) { # if in a quote, if ($Achar eq $qchar) { $in_str= 0; # end quote, $qchar= 0; next; } $char= &atoita($Achar); # else char to store, } else { # outside quotes if ($Achar =~ /([\/"'])/ ) { $qchar= $1; # remember quote char, $in_str= 1; # now inside quote, next; } next if $Achar =~ /[ ,]/; # skip separators, # When we find a non-separator character outside of a quote, look up # the following word and store it as a character. $arg =~ s/([09-A-Z]*)(.*)/$2/; $char= &sym($Achar.$1); # look up as symbols &error("TEXT CHARACTER /$char/ OUT OF RANGE") if $char > 63; } # Pack the character into a word; when full, output it. $word |= $char << ($chars_in_word * 6); $n= $char + 0; # change type $line .= ", " if $chars_in_word > 0; $line .= "$n"; # add symbol to listing, if (++$chars_in_word > 2) { $chars_in_word= 0; &output($word); # output every 3 chars $opcode= $word; # for listing, &listing; # list it $word= 0; # clear for next three $line= " "; # build line per 3 chars } } if ($chars_in_word == 0) { # nothing left over, $word= 0; # add a null } $char= 0; &output($word); # $opcode= $word; $n= $char + 0; # change type $line .= ", " if $chars_in_word > 0; $line .= "$n"; # add symbol to listing, &listing; } elsif ($op eq "STORAGE") { $n= &sym($arg); error("/$arg/ MUST BE DEFINED IN PASS 1") if !defined $n; $mem += $n; # no output $sector += $n; # just move . &listing; # Not a directive; look it up in the symbol table. Note that # this allows us to define orders. Symbol-undefined is ignored # in pass 1. } else { if ($pass == 2) { $opcode= &sym($op); # lookup the opcode, $opcode= 0 if $pass == 1 and ! defined ($opcode); $k= 0; # for orders w/no args, if ($opcode & $OPCODE) { # if an instruction, if ($opcode & $OPARG) { # if an arg required, if ($arg eq "") { &error("MISSING ARG"); } elsif ($k > $KMASK) { &error ("/$arg=$k/ OUT OF RANGE 0-$KMASK"); $k= $KMASK; } } $opcode &= $OPMASK; # strip off flag bits, $opcode += $k; # add in arg, } } &listing; &output($opcode); # output to obj file, } } # Output a word to the object file and check for out of bounds. sub output { my($word)= $_[0]; my($n); if ($pass == 2) { $n= ($track << $SSIZE) + $sector;# assemble address, print O "drum $n $word\n"; # good enough for Perl sim error("MAX SECTOR /$SECTORS/ EXCEEDED") if $SECTORS && ($sector >= $SECTORS); } ++$mem; ++$sector; } # Generate a listing line. sub listing { &list(sprintf("%02o:%03o %06o %s\n", $track, $sector, $opcode, $line)) if ($pass == 2) && $opt_l; } # Return the current value of C (sector and track). sub C { return ($track << $SSIZE) + $sector; } # Set the value of a symbol in the table; error if duplicate or # if the argument is undefined. sub set_sym { my($name)= $_[0]; # symbol to assign to, my($arg)= $_[1]; # value to assign to it return if $name eq ""; # return if $name =~ /^[0-9]/; # value itself if 0 - 9 &error("SYMBOL /$name/ ALREADY EXISTS") if &sym($name) && $pass == 1; $SYM{$name}= &sym($arg); # set new value &error("SYMBOL /$name/ VALUE /$arg/ OUT OF RANGE") if &sym($arg) > $WMAX; ++$symbols; # count another symbol } # Return the numeric value of the string argument. This will attempt # to convert numbers and lookup symbols, and evaluate expressions. # If not defined return undef. sub sym { my $name= shift; my($s, $sign); my($operator); # This recursively decomposes the given expression and causes # all of its components to be evaluated as numbers or values # from the symbol table. # Note that the left arg may be null, which covers the case # of signed numbers. return(undef) if ! defined ($name) or $name eq ""; if ($name =~ /([\+\-\*\/\&])/) { # if it contains an operator, $operator= $1; # remember it, # NOTE: []'s around $operator is required, I'm sure the reason is knowable # but I don't know it. Otherwise, it fouls up the regexp. my ($a1, $a2)= $name =~ /([A-Z0-9]*)[$operator]([A-Z0-9]*)/; $a1= 0 if $a1 eq ""; # avoid errors $a2= 0 if $a2 eq ""; if ($operator eq "+") { return (&sym($a1) + &sym($a2)) & $WMASK; } elsif ($operator eq "-") { # this clause handles "-1" return (&sym($a1) - &sym($a2)) & $WMASK; # as 0 - 1. } elsif ($operator eq "/") { if (&sym($a2) < 1) { # avoid div0 &error("DIVIDE BY ZERO ERROR /$3/"); return(undef); } return int(&sym($a1) / &sym($a2)) & $WMASK; } elsif ($operator eq "*") { return (&sym($a1) * &sym($a2)) & $WMASK; } elsif ($operator eq "&") { return (&sym($a1) & &sym($a2)) & $WMASK; } } return &C if $name eq "C"; # current track/sector, return (0+$name) if ($name =~ /^[0-9]/) and ($name !~ /[^0-9]/); # decimal numbers, return &atoita($1) if ($name =~ /^'(.)/);# 'A character value, return $atoo($name) if $name =~ /^=/; # octal numbers $s= $SYM{$name}; # find in symbol table, return($s) if defined($s); # symbol exists, &error("SYMBOL /$name/ NOT DEFINED") # oops no it doesn't if $pass == 2; # no complaints in pass 1 return 0; } # Error in the source code, complain. sub error { my($msg)= $_[0]; &list ("\n*** $msg\n"); ++$errors; } # Output the listing to the screen and file. sub list { my($s)= $_[0]; print $s; print P $s if $opt_l; } # Convert ASCII character to 6-bit packed ITA. sub atoita { my($c)= ord($_[0]); my($b); $c -= 32 if $c > 96; # convert to lower case $c += $c; # makes char index into table $state= $A2B[$c]; $b= $A2B[$c + 1]; # get ITA2 case and character, return $b + $state; # } # Convert a string of octal digits (0 - 7) into a number. We know a priori sub atoo { my ($s)= $_[0]; my ($n)= 0; # built-up number, my ($v)= 0; # current octade $s =~ s/^=//; # clip leading = if present while ($s ne "") { $1= ""; $s =~ /[0-7]$/; # sample last digit, last if $1 eq ""; # no more digits $n += ($s * $v); # N= N + (new digit * octade) chop $s; # remove last char, $v *= 8; # next octade, } return $n; } sub alphanumerically { $a cmp $b; }