texnicard.w

   1 % TeXnicard
   2 % version 0.1
   3
   4 % Licensed by GNU GPL v3 or later version.
   5
   6 \def\contentspagenumber{1}\pageno=3
   7 \def\title{\TeX nicard}
   8 \def\covernote{{\fiverm Batteries not included. Do not use this book as a
   9 flotation device. This is free software; see source file for details.}}
  10
  11 % Prevent \outer from getting in the way, stupid!
  12 \def\+{\tabalign}
  13
  14 @mp@-
  15 ``u{YJ"@<Predeclaration of procedures@>=
  16 qJA";
  17 J"@
  18 "@<Procedure codes@>=
  19 B" {
  20 @)
  21
  22 \long\def\IndexCharacter#1':{`\.{\char`#1}'}
  23 @mcase@-
  24 ``u "case
  25 qAqA/@!@^\IndexCharacter\
  26 Bqu'B"@>
  27 YJ"@<Nothing~@>
  28 @)
  29
  30 \iffalse
  31 @s _decl_head_ =09
  32 @s FILE int
  33 @s dvi_measure int
  34 \fi
  35
  36 \newcount\bibliocount \bibliocount=0
  37 \def\biblio#1{%
  38   \advance\bibliocount by 1 %
  39   $^{[\the\bibliocount]}$%
  40   \expandafter\def\csname biblio \the\bibliocount\endcsname{#1}%
  41 }%
  42
  43 \emergencystretch=\hsize
  44
  45 \def\strike#1{%
  46   \setbox0=\hbox{#1}%
  47   \rlap{\vrule height 3.2pt depth -2.5pt width \wd0}{\box0}%
  48 }%
  49
  50 \def\sectionnumber\PB#1{\sectionnumbernext#1}
  51 \def\sectionnumbernext$\X#1:#2\X${#1}
  52
  53 @*Introduction. This is \TeX nicard, a program designed for similar
  54 purposes of Magic Set Editor, but in a different (and better) way. It
  55 should be able to produce higher quality cards than Wizards of the Coast,
  56 and then they ought to use this program, too!
  57
  58 @^Magic Set Editor@>
  59 @^Wizards of the Coast@>
  60 @^commercial viability@>
  61
  62 @c
  63 @<Memory usage logging@>@;
  64 @<Interpreted C codes@>@;
  65 @<Include files@>@;
  66 @h
  67 @<Typedefs@>@;
  68 @<Late Typedefs@>@;
  69 @<The include file for memory managed types@>@;
  70 @<Global variables@>@;
  71 @<Predeclaration of procedures@>@;
  72 @<Procedure codes@>@;
  73
  74 @ This line below should be changed with the current version number,
  75 whenever a new version is released. (If you fork this program, you should
  76 also include some indication of forking in the \\{version\_string}.)
  77 % (it doesn't work if I use vertical bars here)
  78
  79 @^forking@>
  80
  81 @d version_string "0.1"
  82 @d version_number 1 // one major is worth ten minors
  83
  84 @ @<Typedefs@>=
  85 typedef unsigned char boolean;
  86
  87 @ You might be wondering what this section is for (especially since it
  88 appears to be unused). The reason is that some metamacros use it in order
  89 to force the compiler to know the correct line numbers (in case some lines
  90 have been added by metamacros).
  91
  92 @^nothing@>
  93 @^metamacro@>
  94
  95 @<Nothing~@>= /* ... */
  96
  97 @ There is also memory usage logging. If it is not being compiled for
  98 memory usage logging, it should just ignore these kind of commands.
  99
 100 @<Memory usage logging@>=
 101 #ifndef @!memusage_log
 102 #define @[memusage_log(_text,_arg1)@]
 103 #endif
 104
 105 @*Memory Management. This program uses a lot of similar memory management,
 106 so they will be defined in this chapter.
 107
 108 @^memory management@>
 109
 110 @d none -1 // indication that a |data_index| means nothing
 111
 112 @<Typedefs@>=
 113 typedef struct {
 114   char*data; // pointer to array of blocks (|char*| for use with |sizeof|)
 115   int used; // number of blocks used
 116   int allocated; // number of blocks allocated
 117 } managed_memory;
 118 @#typedef int data_index;
 119
 120 @ We will use an interpreted C code here, which will send output to a
 121 header file |"memory_management.h"|.
 122
 123 @<The include file for memory managed types@>=
 124 #include "memory_management.h"
 125
 126 @ We will need some variables now just to keep track of which kinds of
 127 memory managed areas are needed.
 128
 129 @<Interpreted C codes@>= @{
 130   char**memory_managed_types;
 131   int num_memory_managed_types;
 132   memory_managed_types=malloc(128*sizeof(char*));
 133   num_memory_managed_types=0;
 134 @}
 135
 136 @ From this code, the structure will be created in the header file for
 137 each type that we need a |memory_of|. This section, however, is just a
 138 ``wrapper'' code for the template.
 139
 140 @f @!memory_of _decl_head_ // category 9
 141
 142 @<Interpreted C codes@>= @{
 143   void memory_of$() {
 144     should_output=0;
 145     set_goal("bp","",@+{
 146       sendc(0200|'{'); // begin interpret mode
 147       send("send_memory_of(\"");
 148       set_goal("e","",@+{
 149         send("\");");
 150         sendc(0200|'}'); // end interpret mode
 151         should_output=0;
 152       }@+);
 153     }@+);
 154   }
 155 @}
 156
 157 @ Here is what it does in order to keep a list of the memory managed
 158 types. Note the type name was enclosed in quotation marks, so now it will
 159 be received as a string.
 160
 161 @<Interpreted C codes@>= @{
 162   void send_memory_of(char*s) {
 163     int i;
 164     s++;
 165     @<Send the proper name of the memory managed type@>;
 166     for(i=0;i<num_memory_managed_types;i++) {
 167       if(!strcmp(s,memory_managed_types[i])) return;
 168     }
 169     memory_managed_types[num_memory_managed_types++]=s;
 170   }
 171 @}
 172
 173 @ @<Send the proper name of the memory managed type@>= {
 174   send(" x__");
 175   send(s);
 176   send(" ");
 177 }
 178
 179 @ Now the code you get to in order to define the structures in the header
 180 file. We are mostly just copying the form of our |managed_memory|
 181 structure, but it will be customized to work with the specific type of the
 182 |data| components.
 183
 184 @<Interpreted C codes@>= @{
 185   void send_memory_managed_types() {
 186     int i;
 187     for(i=0;i<num_memory_managed_types;i++) {
 188       send("typedef struct {");
 189       send(memory_managed_types[i]);
 190       send("*data; int used; int allocated; } x__");
 191       send(memory_managed_types[i]);
 192       send(";");
 193     }
 194   }
 195 @}
 196
 197 @ @(memory_management.h@>= @{
 198   send_memory_managed_types();
 199 @}
 200
 201 @ These next two subroutines are used to allocate additional memory.
 202
 203 @d init_memory(_a,_size) init_memory_(&(_a),sizeof(*((_a).data)),(_size))
 204 @d new_record(_area) new_record_(&(_area),sizeof(*((_area).data)))
 205
 206 @-p void*init_memory_(void*mem,int record_size,int num_records) {
 207   managed_memory*m=mem;
 208   m->data=malloc(record_size*num_records);
 209   m->used=0;
 210   m->allocated=num_records;
 211   if(!m->data) @<Fatal error due to lack of memory@>;
 212   return m->data;
 213 }
 214
 215 @ @-p data_index new_record_(void*mem,int record_size) {
 216   managed_memory*m=mem;
 217   m->used++;
 218   if(m->used>m->allocated) {
 219     m->allocated*=2;
 220     m->data=realloc(m->data,m->allocated*record_size);
 221   }
 222   if(!m->data) @<Fatal error due to lack of memory@>;
 223   @<Zero the new record@>;
 224   return m->used-1;
 225 }
 226
 227 @ @<Fatal error due to lack of memory@>= {
 228   fprintf(stderr,"Out of memory\n");
 229 @.Out of memory@>
 230   exit(1);
 231 }
 232
 233 @ @<Zero the new record@>= {
 234   memset(m->data+(record_size*(m->used-1)),0,record_size);
 235 }
 236
 237 @ Now just one more thing. It is useful to have a |foreach| macro to
 238 iterate the areas.
 239
 240 @d foreach(_var,_area) for(_var=0;_var<_area.used;_var++)@;
 241 @f foreach while
 242
 243 @*Symbolic Names. There will be some names defined for the use of naming
 244 subroutines, symbolic constants, patterns, card areas, etc. These names
 245 are stored in a |managed_memory| called |names|.
 246
 247 It also stores references to other things (defined in later chapters). The
 248 numeric value of a name in |names.data[x]| is |x+256|.
 249
 250 @<Late Typedefs@>=
 251 typedef struct {
 252   char*name;
 253   @<More elements of |name_data|@>@;
 254 } name_data;
 255
 256 @ @<Global variables@>=
 257 memory_of(name_data) names;
 258
 259 @ @<Initialize memory@>= init_memory(names,16);
 260
 261 @ This subroutine finds a name, adding it if necessary. The number
 262 corresponding to it (as described above) will be the return value.
 263
 264 @-p int find_name(char*name) {
 265   @<Search for the |name| in |names|@>;
 266   @<Add the new name (it was not found)@>;
 267 }
 268
 269 @ @<Search for the |name| in |names|@>= {
 270   int i;
 271   foreach(i,names) {
 272     if(!strcmp(names.data[i].name,name)) return i+256;
 273   }
 274 }
 275
 276 @ @<Add the new name (it was not found)@>= {
 277   int n=new_record(names);
 278   names.data[n].name=strdup(name);
 279   return n+256;
 280 }
 281
 282 @ A macro will be useful to access the data from a number.
 283
 284 @d name_info(_num) names.data[(_num)-0x0100]
 285
 286 @ This code lists the names. It is used for a diagnostic purpose.
 287
 288 @<Display the list of names@>= {
 289   int n;
 290   foreach(n,names) {
 291     printf("%d \"%s\" ",n+256,names.data[n].name);
 292     @<Display other fields of |names.data[n]|@>;
 293     printf("\n");
 294   }
 295 }
 296
 297 @*Storage of Tokens. Tokens are stored as 16-bit numbers. Values |0x0020|
 298 to |0x00FF| represent those ASCII characters, and |0x0000| to |0x001F| are
 299 ASCII control codes. Higher numbers represent an index into the |names|
 300 array (where |0x0101| represents |names.data[0x0001]|).
 301
 302 @<Typedefs@>=
 303 @q[data type of tokens]@>
 304 typedef unsigned short token;
 305
 306 @ This section lists the ASCII control codes which can be used. Some of
 307 them have slightly different meaning from the ASCII standard.
 308
 309 @d null_char 0x00 // end of a |raw_data| string or similar things
 310 @d pre_null_char 0x01 // becomes |null_char|
 311 @d end_transmission 0x04 // marks the end of the last card in this area
 312 @d tabulation 0x09 // represents a tab in a {\TeX} alignment
 313 @d raw_data 0x10 // enter raw {\TeX} mode
 314 @d whatsit 0x1A // a token for converting into a name token
 315 @d escape_code 0x1B // represents a {\TeX} control sequence introducer
 316 @d record_separator 0x1E // marks the end of a card
 317 @d field_separator 0x1F // marks the end of a field of a card
 318 @d start_name_code 0x0100
 319
 320 @ These tokens are used in card areas, which are defined (and described)
 321 in the next chapter.
 322
 323 @*Cards. The data of the cards is stored in card areas. Each card area
 324 is a list of tokens, terminated by |record_separator|. The final card in
 325 the area is terminated by |end_transmission|.
 326
 327 @<Typedefs@>=
 328 typedef struct {
 329   token*tokens;
 330   int allocated;
 331   int used;
 332 } card_area_data;
 333
 334 @ @<More elements of |name_data|@>=
 335   boolean has_card_area;
 336   data_index card_area;
 337
 338 @ @<Global variables@>=
 339 memory_of(card_area_data) card_areas;
 340
 341 @ @<Initialize memory@>= init_memory(card_areas,1);
 342
 343 @ A new card area is created with this.
 344
 345 @-p data_index set_card_area(int num) {
 346   name_data*m=&name_info(num);
 347   @<Use the card area which is already set, if able@>;
 348   @<Otherwise, create a new card area and use the new one@>;
 349 }
 350
 351 @ @<Use the card area which is already set, if able@>= {
 352   if(m->has_card_area) return m->card_area;
 353 }
 354
 355 @ @<Otherwise, create a new card area and use the new one@>= {
 356   data_index n=new_record(card_areas);
 357   m->has_card_area=1;
 358   card_areas.data[n].allocated=0x100;
 359   card_areas.data[n].tokens=malloc(0x100*sizeof(token));
 360   card_areas.data[n].used=0;
 361   return n;
 362 }
 363
 364 @ This subroutine sends a token to a card area.
 365
 366 @-p void send_token(data_index a,token x) {
 367   if(card_areas.data[a].allocated<card_areas.data[a].used+4)
 368     @<Double the allocation of card area tokens@>;
 369   card_areas.data[a].tokens[card_areas.data[a].used++]=x;
 370 }
 371
 372 @ @<Double the allocation of card area tokens@>= {
 373   int n=(card_areas.data[a].allocated*=2)*sizeof(token);
 374   card_areas.data[a].tokens=realloc(card_areas.data[a].tokens,n);
 375 }
 376
 377 @ @<Display other fields of |names.data[n]|@>= {
 378   if(names.data[n].has_card_area)
 379     printf("C(%d) ",names.data[n].card_area);
 380 }
 381
 382 @ The code in this section is used to ensure that each card area is
 383 properly terminated with |end_transmission| marker, so that when it is
 384 time to write the output files, it will know when to stop.
 385
 386 @<Send |end_transmission| to each card area@>= {
 387   data_index a;
 388   foreach(a,card_areas) send_token(a,end_transmission);
 389 }
 390
 391 @*Patterns. For pattern matching, we store the patterns in one memory
 392 managed area. The index of the beginning of each pattern area is stored
 393 in the |names| list.
 394
 395 These constants are special codes which can occur in the |text| string
 396 of a pattern.
 397
 398 @d begin_capture 1
 399 @d end_capture 2
 400 @d match_keyword 3 // match a keyword followed by a character in a table
 401 @d match_table 4 // match a character using a table
 402 @d optional_table 5 // match a character optional using a table
 403 @d failed_match 6
 404 @d jump_table 7 // use a table to jump to a marker
 405 @d successful_match 8
 406 @d back_one_space 9
 407 @d forward_one_space 10
 408 @d match_left_side 11 // match at beginning of line
 409 @d match_right_side 12 // match at end of line
 410 @d match_eight_bit 13 // match 8-bit encodings and control characters
 411
 412 @<Typedefs@>=
 413 typedef struct {
 414   char*text;
 415   unsigned int category; // category for keywords
 416   data_index subroutine;
 417   data_index next;
 418 } pattern_data;
 419
 420 @ @<More elements of |name_data|@>=
 421   boolean has_pattern_area;
 422   data_index pattern_area;
 423
 424 @ @<Global variables@>=
 425 memory_of(pattern_data) pattern_areas;
 426
 427 @ @<Initialize memory@>= init_memory(pattern_areas,4);
 428
 429 @ @<Display other fields of |names.data[n]|@>= {
 430   if(names.data[n].has_pattern_area)
 431     printf("P(%d) ",names.data[n].pattern_area);
 432 }
 433
 434 @ A new pattern area is created with this. The patterns in an area are
 435 stored like a linked list. The last one with |next| pointing to nothing,
 436 is the terminator entry.
 437
 438 @-p data_index set_pattern_area(int num) {
 439   name_data*m=&name_info(num);
 440   @<Use the pattern area which is already set, if able@>;
 441   @<Otherwise, create a new pattern area and use the new one@>;
 442 }
 443
 444 @ @<Use the pattern area which is already set, if able@>= {
 445   if(m->has_pattern_area) return m->pattern_area;
 446 }
 447
 448 @ @<Otherwise, create a new pattern area and use the new one@>= {
 449   data_index n=new_record(pattern_areas);
 450   m->has_pattern_area=1;
 451   pattern_areas.data[n].subroutine=none;
 452   pattern_areas.data[n].next=none;
 453   return n;
 454 }
 455
 456 @ @<Display the list of patterns@>= {
 457   int i;
 458   foreach(i,pattern_areas) {
 459     if(pattern_areas.data[i].text) {
 460       printf("%d:%08X:%d:%d\n",i,pattern_areas.data[i].category
 461         ,pattern_areas.data[i].subroutine,pattern_areas.data[i].next
 462       );
 463       display_string(pattern_areas.data[i].text);
 464       printf("\n");
 465     }
 466   }
 467 }
 468
 469 @*Keywords. Keywords means words which can be placed on the card and which
 470 can have special meanings, and possibly reminder text.
 471
 472 Keywords are stored in a large list in only one keyword area. A category
 473 can be given a name, which will automatically be assigned for the next bit
 474 of the keyword category when it is entered the first time.
 475
 476 @<Typedefs@>=
 477 typedef struct {
 478   char*match; // match text (can contain pattern codes)
 479   unsigned int category; // bitfield of categories
 480   int extra1;
 481   int extra2;
 482   char*replacement; // replacement text or reminder text
 483 } keyword_data;
 484
 485 @ @<Global variables@>=
 486 unsigned int next_keyword_category=1;
 487 memory_of(keyword_data) keywords;
 488
 489 @ @<Initialize memory@>= init_memory(keywords,4);
 490
 491 @ A keyword category is found (and created, if it is not found) using the
 492 following code.
 493
 494 @-p unsigned int find_category(char*name) {
 495   int i=find_name(name);
 496   if(name_info(i).value.number) {
 497     return name_info(i).value.number;
 498   } @+else if(!name_info(i).value.is_string) {
 499     name_info(i).value.number=next_keyword_category;
 500     next_keyword_category<<=1;
 501     if(!next_keyword_category)
 502       fprintf(stderr,"Too many keyword categories: %s\n",name);
 503 @.Too many keyword categories@>
 504     return name_info(i).value.number;
 505   }
 506 }
 507
 508 @ Some stack code commands are used when dealing with reading/writing
 509 keyword info.
 510
 511 In order that you might be able to iterate them, it will exit out of the
 512 current block when trying to read nonexisting keyword info instead of
 513 displaying an error message.
 514
 515 @<Cases for system commands@>=
 516   @-case 'k': {
 517     // Read keyword info
 518     if(registers['K'].number<0 || registers['K'].number>=keywords.used)
 519       return 0;
 520     push_num(keywords.data[registers['K'].number].extra1);
 521     push_num(keywords.data[registers['K'].number].extra2);
 522     push_string(keywords.data[registers['K'].number].replacement);
 523     break;
 524   }@#
 525   @-case 'K': {
 526     // Write keyword info
 527     if(registers['K'].number<0 || registers['K'].number>=keywords.used)
 528       program_error("Out of range");
 529     free(keywords.data[registers['K'].number].replacement);
 530     keywords.data[registers['K'].number].replacement=pop_string();
 531     keywords.data[registers['K'].number].extra2=pop_num();
 532     keywords.data[registers['K'].number].extra1=pop_num();
 533     break;
 534   }@#
 535
 536 @ @<Display the list of keywords@>= {
 537   int i;
 538   foreach(i,keywords) {
 539     display_string(keywords.data[i].match);
 540     printf("  [%d:%08X:%d:%d:%d]\n",i,keywords.data[i].category
 541       ,keywords.data[i].extra1,keywords.data[i].extra2
 542       ,strlen(keywords.data[i].replacement)
 543     );
 544   }
 545 }
 546
 547 @*Card List. A sorted summary list of the cards is kept in one list,
 548 having thirty-two general-purpose numeric fields, and a pointer to the
 549 beginning of the record (usually the name in which it will be indexed by).
 550
 551 @<Typedefs@>=
 552 typedef struct {
 553   int token_ptr;
 554   int field[32];
 555   int amount_in_pack; // used in pack generation
 556 } list_entry;
 557
 558 @ @<Global variables@>=
 559 memory_of(list_entry) card_list;
 560
 561 @ @<Initialize memory@>= init_memory(card_list,16);
 562
 563 @*Deck Lists. Deck lists involve lists of cards or rules for cards that
 564 belong to a deck or pack.
 565
 566 @^booster pack@>
 567
 568 There is one macro |lflag| here just to convert letters to bit flags. For
 569 example |lflag('a')| is the least significant bit.
 570
 571 @d lflag(_ch) (1<<((_ch)-'a'))
 572
 573 @<Typedefs@>=
 574 typedef struct {
 575   int amount;
 576   unsigned int flags;
 577   char*name;
 578   data_index next;
 579 } deck_entry;
 580
 581 @ @<Global variables@>=
 582 memory_of(deck_entry) deck_lists;
 583
 584 @ @<More elements of |name_data|@>=
 585   boolean has_deck_list;
 586   data_index deck_list;
 587
 588 @ @<Initialize memory@>= init_memory(deck_lists,4);
 589
 590 @ A new deck list is created with this. The deck entries are stored like a
 591 linked list. The terminator has |next| pointing to |none|.
 592
 593 @-p data_index set_deck_list(int num) {
 594   name_data*m=&name_info(num);
 595   @<Use the deck list which is already set, if able@>;
 596   @<Otherwise, create a new deck list and use the new one@>;
 597 }
 598
 599 @ @<Use the deck list which is already set, if able@>= {
 600   if(m->has_deck_list) return m->deck_list;
 601 }
 602
 603 @ @<Otherwise, create a new deck list and use the new one@>= {
 604   data_index n=new_record(deck_lists);
 605   m->has_deck_list=1;
 606   deck_lists.data[n].next=none;
 607   return n;
 608 }
 609
 610 @ @<Display the deck list@>= {
 611   data_index i;
 612   foreach(i,deck_lists) {
 613     printf("%d ",i);
 614     if(deck_lists.data[i].name) display_string(deck_lists.data[i].name);
 615     else printf("-");
 616     printf(" [%08X:%d:%d]\n",deck_lists.data[i].flags
 617       ,deck_lists.data[i].amount,deck_lists.data[i].next);
 618   }
 619 }
 620
 621 @*Word Forms. These structures are used to store word form rules, such as
 622 plurals\biblio{Conway, Damian. ``An Algorithmic Approach to English
 623 Pluralization''. \hskip 0pt plus 1in\hbox{}
 624 \.{http://www.csse.monash.edu.au/\~damian/papers/HTML/Plurals.html}}. You
 625 can store up to four different kinds, in case of languages other than
 626 English.
 627
 628 @^Conway, Damian@>
 629 @^plurals@>
 630
 631 @<Typedefs@>=
 632 typedef struct {
 633   int level;
 634   data_index next;
 635   unsigned char orig[32];
 636   unsigned char dest[32];
 637   boolean left_boundary;
 638   boolean right_boundary;
 639 } word_form_entry;
 640
 641 @ @<Global variables@>=
 642 memory_of(word_form_entry) word_forms;
 643
 644 @ @<Initialize memory@>= {
 645   int i;
 646   init_memory(word_forms,16);
 647   word_forms.used=8;
 648   for(i=0;i<8;i+=2) {
 649     word_forms.data[i].orig[0]=word_forms.data[i].dest[0]=0;
 650     word_forms.data[i].next=i+1;
 651     word_forms.data[i].level=0x7FFFFFFF;
 652     word_forms.data[i+1].orig[0]=word_forms.data[i+1].dest[0]=0;
 653     word_forms.data[i+1].next=none;
 654     word_forms.data[i+1].level=0;
 655   }
 656 }
 657
 658 @ Word form rules are added and then inserted in the correct place in the
 659 linked list using the |next| field. Entries with a higher numbered level
 660 take higher priority, therefore will be placed before the ones with lower
 661 numbered level. Next, longer |orig| strings come before shorter strings,
 662 since they might be more specific forms of the others and will therefore
 663 override them.
 664
 665 @-p data_index add_word_form(int kind,int level,char*orig,char*dest) {
 666   data_index n=new_record(word_forms);
 667   @<Set the fields of the new word form rule@>;
 668   @<Insert the new word form rule into the linked list@>;
 669   return n;
 670 }
 671
 672 @ The |left_boundary| and |right_boundary| fields specify if they should
 673 match only at the boundary. Characters are checked using the \.W table and
 674 removed from the string to place in the list.
 675
 676 @d last_character(_str) ((_str)[strlen(_str)-1])
 677
 678 @<Set the fields of the new word form rule@>= {
 679   word_forms.data[n].level=level;
 680   strcpy(word_forms.data[n].orig,orig+(tables['W'][*orig]==2));
 681   word_forms.data[n].left_boundary=(tables['W'][*orig]==2);
 682   if((word_forms.data[n].right_boundary=
 683    (tables['W'][last_character(word_forms.data[n].orig)]==3)))
 684     last_character(word_forms.data[n].orig)=0;
 685   strcpy(word_forms.data[n].dest,dest+(tables['W'][*dest]==2));
 686   if(tables['W'][last_character(word_forms.data[n].dest)]==3)
 687     last_character(word_forms.data[n].dest)=0;
 688 }
 689
 690 @ @<Insert the new word form rule into the linked list@>= {
 691   data_index y=(kind&3)<<1; // previous item to |x|
 692   data_index x=word_forms.data[y].next; // current item
 693   int s=strlen(orig);
 694   for(;x!=none;y=x,x=word_forms.data[y].next) {
 695     if(word_forms.data[x].next==none) break;
 696     @#if(word_forms.data[x].level<level) break;
 697     if(word_forms.data[x].level>level) continue;
 698     @#if(strlen(word_forms.data[x].orig)<s) break;
 699   }
 700   word_forms.data[y].next=n;
 701   word_forms.data[n].next=x;
 702 }
 703
 704 @ Now to do computation of changing a word by word forms. This function
 705 expects only one word from input, or multiple words where the last one
 706 should be the word to be converted. Uppercase letters are converted to
 707 lowercase for conversion (but not the other way around), but if the
 708 letters are uppercase in the input, the output will also have uppercase
 709 letters on those positions. The algorithm starts from the right side of
 710 the input string.
 711
 712 The parameter |src| is the input, and |dest| should point to a buffer
 713 which is large enough to store the output string.
 714
 715 @^plurals@>
 716
 717 @-p data_index reform_word(int kind,char*src,char*dest) {
 718   char*l=src+strlen(src);
 719   data_index n=word_forms.data[(kind&3)<<1].next;
 720   strcpy(dest,src); // this is used later
 721   @<Try each word form rule, following the |next| pointers@>;
 722   return none; // in case there is nothing to do
 723 }
 724
 725 @ @<Try each word form rule, following the |next| pointers@>= {
 726   char*p;
 727   int s;
 728   while(n!=none && word_forms.data[n].next!=none) {
 729     s=strlen(word_forms.data[n].orig); @+ p=l-s;
 730     @<Check the characters matching from |p|, going backwards@>;
 731     n=word_forms.data[n].next;
 732   }
 733 }
 734
 735 @ Look ahead for the definition of |wcasecmp| (true means it matches).
 736
 737 @<Check the characters matching from |p|, going backwards@>= {
 738   for(;;) {
 739     if((!word_forms.data[n].left_boundary || p==src
 740      || tables['W'][p[-1]])
 741       && wcasecmp(word_forms.data[n].orig,p))
 742        @<A match to the word form rules has been found@>;
 743     @<Go backwards, stop if we are not allowed to continue backwards@>;
 744   }
 745 }
 746
 747 @ @<A match to the word form rules has been found@>= {
 748   char*o=dest+(p-src);
 749   sprintf(o,"%s%s",word_forms.data[n].dest,p+s);
 750   @<Change the capitalization to match the original@>;
 751   return n;
 752 }
 753
 754 @ Remember, that for example if ``cow'' becomes ``kine'', then ``Cow''
 755 will become ``Kine''. So, it will retain capitalization.
 756
 757 @^cows@>
 758
 759 @<Change the capitalization to match the original@>= {
 760   char*q=word_forms.data[n].orig;
 761   for(;*p && *q;p++,o++,q++)
 762     if(*p==tables['U'][*q] && *p!=tables['L'][*q]) *o=tables['U'][*o];
 763 }
 764
 765 @ @<Go backwards, stop if we are not allowed to continue backwards@>= {
 766   if(word_forms.data[n].right_boundary) break; // matches only on boundary
 767   if(tables['W'][p[s]]) break; // only the last word(s) can be matched
 768   if(p--==src) break; // stop at beginning
 769 }
 770
 771 @ This function is defined to compare strings in the way needed for
 772 matching word forms, including case conversion. The lowercase letters in
 773 the |shorter| string are permitted to match lowercase and uppercase
 774 letters in the |longer| string, and the |shorter| string is permitted to
 775 be shorter and still match.
 776
 777 @-p boolean wcasecmp(char*shorter,char*longer) {
 778   for(;;shorter++,longer++) {
 779     if(!*shorter) return 1;
 780     if(!*longer) return 0;
 781     if(*shorter!=*longer && *shorter!=tables['L'][*longer]) return 0;
 782   }
 783 }
 784
 785 @ Of course it is now needed a command that can access these features from
 786 within a \TeX nicard template. The |level| of the matched rule is also
 787 returned, in case your program might use that information for something.
 788
 789 @<Cases for system commands@>=
 790   @-case 'W': {
 791     // Convert a word form
 792     int k=pop_num();
 793     char*o=pop_string();
 794     char q[1500];
 795     data_index n=reform_word(k,o,q);
 796     push_string(q);
 797     if(n==none) push_num(0);
 798     else push_num(word_forms.data[n].level);
 799     free(o);
 800     break;
 801   }@#
 802
 803 @ @<Display the list of word form rules@>= {
 804   data_index i;
 805   foreach(i,word_forms) {
 806     printf("%d %c\"",i,word_forms.data[i].left_boundary?'[':' ');
 807     display_string(word_forms.data[i].orig);
 808     printf("\"%c -> \"",word_forms.data[i].right_boundary?']':' ');
 809     display_string(word_forms.data[i].dest);
 810     printf("\" %d >%d\n",word_forms.data[i].level
 811       ,word_forms.data[i].next);
 812   }
 813 }
 814
 815 @*Random Number Generation. This program uses the Xorshift algorithm,
 816 invented by George Marsaglia\biblio{Marsaglia (July 2003). ``Xorshift
 817 RNGs''. Journal of Statistical Software Vol.~8 (Issue 14). {\tt
 818 http://www.jstatsoft.org/v08/i14/paper}.}.
 819
 820 @^Marsaglia, George@>
 821 @^random numbers@>
 822
 823 @<Global variables@>=
 824 unsigned int rng_x;
 825 unsigned int rng_y;
 826 unsigned int rng_z;
 827 unsigned int rng_w;
 828
 829 @ @<Initialize the random number generator@>= {
 830 @q[initialize the random seed::]@>
 831   rng_seed((unsigned int)time(0));
 832 @q[::initialize the random seed]@>
 833 }
 834
 835 @ The seed parameters for the random number generator will be seeded using
 836 the linear congruential generator, which is a simpler generator which can
 837 be used to seed it with.
 838
 839 The parameters |lcg_a| and |lcg_c| are parameters to the linear
 840 congruential generator algorithm. The values used here are the same as
 841 those used in GNU C. In this program they will be specified explicitly so
 842 that you can get identical output on different computers.
 843
 844 @d lcg_a 1103515245
 845 @d lcg_c 12345
 846
 847 @-p void rng_seed(unsigned int x) {
 848   rng_x=x=lcg_a*x+lcg_c;
 849   rng_y=x=lcg_a*x+lcg_c;
 850   rng_z=x=lcg_a*x+lcg_c;
 851   rng_w=x=lcg_a*x+lcg_c;
 852 }
 853
 854 @ There is a command to reseed it using a constant (so that you can
 855 generate the same numbers on different computers).
 856
 857 @<Cases for system commands@>=
 858   @-case 'U': {
 859     // Reseed the random number generator
 860     if(stack_ptr->is_string) program_error("Type mismatch");
 861     rng_seed(pop_num());
 862     break;
 863   }@#
 864
 865 @ And now follows the algorithm for generating random numbers. One change
 866 has been made so that once it is modulo, all number will still be of equal
 867 probability.
 868
 869 Numbers are generated in the range from 0 up to but not including |limit|.
 870
 871 @d max_uint ((unsigned int)(-1))
 872
 873 @-p unsigned int gen_random(unsigned int limit) {
 874   unsigned int r=max_uint-(max_uint%limit); // range check
 875   for(;;) {
 876     @<Make the next number |rng_w|...@>;
 877     @<Check the range, try again if out of range, else |return|@>;
 878   }
 879 }
 880
 881 @ @<Make the next number |rng_w| by Xorshift algorithm@>= {
 882   unsigned int t = rng_x ^ (rng_x << 11);
 883   rng_x = rng_y; @+ rng_y = rng_z; @+ rng_z = rng_w;
 884   rng_w ^= (rng_w >> 19) ^ t ^ (t >> 8);
 885 }
 886
 887 @ @<Check the range, try again if out of range, else |return|@>= {
 888   if(rng_w<=r) return rng_w%limit;
 889 }
 890
 891 @ @<Cases for system commands@>=
 892   @-case 'u': {
 893     // Generate a random number
 894     if(stack_ptr->is_string) program_error("Type mismatch");
 895     stack_ptr->number=gen_random(stack_ptr->number);
 896     break;
 897   }@#
 898
 899 @*Stack Programming Language. Now we get to the part where the user can
 900 enter a program, in order to control the features of this program. The
 901 programming language used is like \.{dc}, but different.
 902
 903 @.dc@>
 904
 905 Subroutines are simply stored as strings in the |names| area, since they
 906 are the same as registers.
 907
 908 @ Now we have the storage of registers. Registers 0 to 255 are stored in
 909 this separate list, while other register values are just stored in the
 910 |names| list. There is also a stack, which has storage of the same values
 911 as registers can contain.
 912
 913 @d max_stack 0x1000
 914
 915 @<Typedefs@>=
 916 typedef struct {
 917   boolean is_string;
 918   union @+{
 919     int number;
 920     unsigned char*text;
 921   }@+;
 922 } register_value;
 923
 924 @ @<More elements of |name_data|@>=
 925   register_value value;
 926
 927 @ @<Global variables@>=
 928 register_value registers[256];
 929 register_value stack[max_stack];
 930 register_value*stack_ptr=stack-1; // current top of stack element
 931
 932 @ Here are some codes for pushing and popping the stack.
 933
 934 @d pop_num() ((stack_ptr--)->number)
 935
 936 @-p inline void push_string(char*s) {
 937   ++stack_ptr;
 938   stack_ptr->is_string=1;
 939   stack_ptr->text=strdup(s);
 940 }
 941
 942 @ @-p inline void push_num(int n) {
 943   ++stack_ptr;
 944   stack_ptr->is_string=0;
 945   stack_ptr->number=n;
 946 }
 947
 948 @ @-p inline void stack_dup(void) {
 949   if((stack_ptr[1].is_string=stack_ptr->is_string)) {
 950     stack_ptr[1].text=strdup(stack_ptr->text);
 951   } @+else {
 952     stack_ptr[1].number=stack_ptr->number;
 953   }
 954   stack_ptr++;
 955 }
 956
 957 @ @-p inline void stack_drop(void) {
 958   if(stack_ptr->is_string) free(stack_ptr->text);
 959   --stack_ptr;
 960 }
 961
 962 @ @-p inline char*pop_string(void) {
 963   char*p=stack_ptr->text;
 964   stack_ptr->is_string=0; stack_ptr->text=0;
 965   --stack_ptr;
 966   return p;
 967 }
 968
 969 @ Also, some subroutines are needed here in order to deal with registers.
 970
 971 For |fetch_code|, the string |"0[]+"| is returned if it is not a string,
 972 generating a ``Type mismatch'' error when you try to run it.
 973
 974 @-p inline char*fetch_code(int r) {
 975   if(!(r&~0xFF)) {
 976     if(!registers[r].is_string) return "0[]+";
 977     return registers[r].text;
 978   } @+else {
 979     if(!name_info(r).value.is_string) return "0[]+";
 980     return name_info(r).value.text;
 981   }
 982 }
 983
 984 @ @-p inline void fetch(int r) {
 985   register_value*v;
 986   if(!(r&~0xFF)) v=&(registers[r]);
 987   else v=&(name_info(r).value);
 988   (++stack_ptr)->is_string=v->is_string;
 989   if(v->is_string) {
 990     stack_ptr->text=strdup(v->text);
 991   } @+else {
 992     stack_ptr->number=v->number;
 993   }
 994 }
 995
 996 @ @-p inline void store(int r) {
 997   register_value*v;
 998   if(!(r&~0xFF)) v=&(registers[r]);
 999   else v=&(name_info(r).value);
1000   if(v->is_string) free(v->text);
1001   v->is_string=stack_ptr->is_string;
1002   if(v->is_string) {
1003     v->text=stack_ptr->text;
1004   } @+else {
1005     v->number=stack_ptr->number;
1006   }
1007   --stack_ptr;
1008 }
1009
1010 @ There is also a save stack. This save stack stores the saved values of
1011 the registers |'0'| to |'9'|, so that you can have local variables in a
1012 subroutine.
1013
1014 @<Global variables@>=
1015 register_value save_stack[520];
1016 register_value*save_stack_ptr=save_stack;
1017
1018 @ These codes deal with the save stack. Strings will be copied when
1019 saving. When loading, strings that were previously in the registers will
1020 be freed.
1021
1022 @<Save local registers to the save stack@>= {
1023   int i;
1024   for(i='0';i<='9';i++) {
1025     *save_stack_ptr=registers[i];
1026     if(registers[i].is_string)
1027       save_stack_ptr->text=strdup(save_stack_ptr->text);
1028     save_stack_ptr++;
1029   }
1030 }
1031
1032 @ @<Load local registers from the save stack@>= {
1033   int i;
1034   for(i='9';i>='0';i--) {
1035     if(registers[i].is_string) free(registers[i].text);
1036     registers[i]=*--save_stack_ptr;
1037   }
1038 }
1039
1040 @*Commands for Stack Programming Language. Finally, is the code where it
1041 can be executed. The return value of this function indicates how many
1042 levels should be exit when it is called.
1043
1044 @-p int execute_program(unsigned char*prog) {
1045   unsigned char*ptr=prog;
1046 reset_execute_program:
1047   for(;*ptr;ptr++) {
1048     switch(*ptr) {
1049       @<Cases for literal data commands@>@;
1050       @<Cases for stack manipulation commands@>@;
1051       @<Cases for arithmetic commands@>@;
1052       @<Cases for flow-control commands@>@;
1053       @<Cases for register/table operation commands@>@;
1054       @<Cases for string commands@>@;
1055       @<Cases for condition/compare commands@>@;
1056       @<Cases for local registers commands@>@;
1057       @<Cases for system commands@>@;
1058       @-case '?': @<Do a diagnostics command@>@;@+break;
1059       @-case '=': @<Do a typesetting command@>@;@+break;
1060       default:
1061         if(*ptr>='0' && *ptr<='9') {
1062           @<Read a literal number and push to stack@>;
1063         } @+else if(0x80&*ptr) {
1064           @<Execute a subroutine code from the current character@>;
1065         }
1066         break;
1067     }
1068     if(stack_ptr<stack-1) program_error("Stack underflow");
1069     if(stack_ptr>stack+max_stack) program_error("Stack overflow");
1070   }
1071   return 0;
1072 }
1073
1074 @ @<Cases for literal data commands@>=
1075   @-case '`': {
1076     // Literal ASCII character
1077     push_num(*++ptr);
1078     break;
1079   }@#
1080   @-case '[': {
1081     // Literal string
1082     @<Read a literal string and push to stack@>;
1083     break;
1084   }@#
1085   @-case '(': {
1086     // Literal name
1087     @<Read a literal name and push its number to the stack@>;
1088     break;
1089   }@#
1090
1091 @ @<Read a literal number and push to stack@>= {
1092   int n=0;
1093   while(*ptr>='0' && *ptr<='9') n=10*n+(*ptr++)-'0';
1094   --ptr;
1095   push_num(n);
1096 }
1097
1098 @ @<Read a literal string and push to stack@>= {
1099   char*p=++ptr;
1100   int n=1;
1101   while(n && *ptr) {
1102     if(*ptr=='[') ++n;
1103     if(*ptr==']') --n;
1104     if(n) ptr++;
1105   }
1106   if(!*ptr) program_error("Unterminated string literal");
1107   *ptr=0;
1108   push_string(p);
1109   *ptr=']';
1110 }
1111
1112 @ @<Read a literal name and push its number to the stack@>= {
1113   char*p=++ptr;
1114   while(*ptr && *ptr!=')') ptr++;
1115   if(!*ptr) program_error("Unterminated string literal");
1116   *ptr=0;
1117   push_num(find_name(p));
1118   *ptr=')';
1119 }
1120
1121 @ @<Cases for stack manipulation commands@>=
1122   @-case 'D': {
1123     // Drop top item of stack
1124     stack_drop();
1125     break;
1126   }@#
1127   @-case 'c': {
1128     // Clears the stack, rendering it empty
1129     while(stack_ptr>=stack) stack_drop();
1130     break;
1131   }@#
1132   @-case 'd': {
1133     // Duplicates the value on top of the stack.
1134     stack_dup();
1135     break;
1136   }@#
1137   @-case 'r': {
1138     // Swaps the top two values on the stack
1139     stack_ptr[1]=stack_ptr[0];
1140     stack_ptr[0]=stack_ptr[-1];
1141     stack_ptr[-1]=stack_ptr[1];
1142     break;
1143   }@#
1144
1145 @ @<Cases for arithmetic commands@>=
1146   @-case '+': {
1147     // Add two numbers, or concatenate two strings
1148     if(stack_ptr->is_string) {
1149       @<Concatenate strings on the stack@>;
1150     }@+ else {
1151       int n=pop_num();
1152       if(stack_ptr->is_string)
1153         program_error("Type mismatch");
1154       stack_ptr->number+=n;
1155     }
1156     break;
1157   }@#
1158   @-case '-': {
1159     // Subtract two numbers, or compare two strings
1160     if(stack_ptr->is_string) {
1161       @<Compare strings on the stack@>;
1162     }@+ else {
1163       int n=pop_num();
1164       if(stack_ptr->is_string)
1165         program_error("Type mismatch");
1166       stack_ptr->number-=n;
1167     }
1168     break;
1169   }@#
1170   @-case '*': {
1171     // Multiply two numbers
1172     int n=pop_num();
1173     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1174       program_error("Number expected");
1175     stack_ptr->number*=n;
1176     break;
1177   }@#
1178   @-case '/': {
1179     // Divide two numbers
1180     int n=pop_num();
1181     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1182       program_error("Number expected");
1183     if(n==0) program_error("Division by zero");
1184     stack_ptr->number/=n;
1185     break;
1186   }@#
1187   @-case '%': {
1188     // Modulo of two numbers
1189     int n=pop_num();
1190     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1191       program_error("Number expected");
1192     if(n==0) program_error("Division by zero");
1193     stack_ptr->number%=n;
1194     break;
1195   }@#
1196
1197 @ @<Concatenate strings on the stack@>= {
1198   char*s=pop_string();
1199   char*q;
1200   if(!stack_ptr->is_string) program_error("Type mismatch");
1201   q=malloc(strlen(s)+strlen(stack_ptr->text)+1);
1202   strcpy(q,stack_ptr->text);
1203   strcpy(q+strlen(q),s);
1204   stack_drop();
1205   push_string(q);
1206   free(q);
1207   free(s);
1208 }
1209
1210 @ @<Compare strings on the stack@>= {
1211   char*s=pop_string();
1212   char*q=pop_string();
1213   push_num(strcmp(q,s));
1214   free(q);
1215   free(s);
1216 }
1217
1218 @ @<Cases for flow-control commands@>=
1219   @-case 'Q': {
1220     // Exit from multiple levels
1221     int q=pop_num();
1222     if(q>0) return q-1;
1223     break;
1224   }@#
1225   @-case 'Y': {
1226     // Go back to beginning
1227     ptr=prog-1;
1228     break;
1229   }@#
1230   @-case 'q': {
1231     // Exit from two levels
1232     return 1;
1233     break;
1234   }@#
1235   @-case 'x': {
1236     // Execute code from top of stack
1237     @<Execute a string or subroutine code from top of stack@>;
1238     break;
1239   }@#
1240
1241 @ Note here, it is a recursive function call.
1242 @^recursive@>
1243
1244 @<Execute a string or subroutine code from top of stack@>= {
1245   if(stack_ptr->is_string) {
1246     char*p=pop_string();
1247     int q=execute_program(p);
1248     free(p);
1249     if(q) return q-1;
1250   } @+else {
1251     char*p=fetch_code(pop_num());
1252     int q=execute_program(p);
1253     if(q) return q-1;
1254   }
1255 }
1256
1257 @ Since the extended characters (|0x80| to |0xFF|) do not correspond to
1258 any commands, here we can use them to execute a subroutine code, allowing
1259 many things related to self-modifying code (and other stuff) to be done
1260 that would be difficult otherwise.
1261
1262 @<Execute a subroutine code from the current character@>= {
1263   char*p=fetch_code(*ptr);
1264   int q=execute_program(p);
1265   if(q) return q-1;
1266 }
1267
1268 @ @<Cases for register/table operation commands@>=
1269   @-case ':': {
1270     // Store value to table
1271     int n;
1272     if(stack_ptr->is_string) program_error("Number expected");
1273     n=pop_num();
1274     tables[0x7F&*++ptr][n]=pop_num();
1275     break;
1276   }@#
1277   @-case ';': {
1278     // Load value from table
1279     stack_ptr->number=tables[0x7F&*++ptr][stack_ptr->number];
1280     break;
1281   }@#
1282   @-case 'L': {
1283     // Load value from register named by stack
1284     if(stack_ptr->is_string) program_error("Number expected");
1285     fetch(pop_num());
1286     break;
1287   }@#
1288   @-case 'S': {
1289     // Store value in register named by stack
1290     if(stack_ptr->is_string) program_error("Number expected");
1291     store(pop_num());
1292     break;
1293   }@#
1294   @-case 'l': {
1295     // Load value from register
1296     fetch(*++ptr);
1297     break;
1298   }@#
1299   @-case 's': {
1300     // Store value in register
1301     store(*++ptr);
1302     break;
1303   }@#
1304
1305 @ @<Cases for string commands@>=
1306   @-case 'B': {
1307     // Put brackets around a string, or convert number to text
1308     if(stack_ptr->is_string) {
1309       @<Put brackets around string at top of stack@>;
1310     } @+else {
1311       @<Convert top of stack to string representation of a number@>;
1312     }
1313     break;
1314   }@#
1315   @-case 'Z': {
1316     // Calculate number of characters in a string
1317     char*s=pop_string();
1318     push_num(strlen(s));
1319     free(s);
1320     break;
1321   }@#
1322   @-case 'a': {
1323     // ``ASCIIfy'' a number
1324     if(stack_ptr->is_string) {
1325       if(stack_ptr->text[0]) stack_ptr->text[1]=0;
1326     } @+else {
1327       int n=stack_ptr->number;
1328       stack_ptr->is_string=1;
1329       stack_ptr->text=malloc(2);
1330       stack_ptr->text[0]=n;
1331       stack_ptr->text[1]=0;
1332     }
1333     break;
1334   }@#
1335   @-case 'A': {
1336     // Take the first character from the string
1337     char*s=stack_ptr->text;
1338     if(!stack_ptr->is_string || !*s) return 0;
1339     push_num(*s);
1340     stack_ptr[-1].text=strdup(s+1);
1341     free(s);
1342     break;
1343   }@#
1344   @-case 'N': {
1345     // Convert a register number to its name
1346     int n=stack_ptr->number;
1347     if(stack_ptr->is_string) program_error("Type mismatch");
1348     if(n<256 || n>=names.used+256) program_error("Out of range");
1349     stack_drop();
1350     push_string(names.data[n-256].name);
1351     break;
1352   }@#
1353
1354 @ @<Put brackets around string at top of stack@>= {
1355   char*buf=malloc(strlen(stack_ptr->text)+3);
1356   sprintf(buf,"[%s]",stack_ptr->text);
1357   free(stack_ptr->text);
1358   stack_ptr->text=buf;
1359 }
1360
1361 @ @<Convert top of stack to string representation of a number@>= {
1362   char buf[32];
1363   sprintf(buf,"%d",stack_ptr->number);
1364   stack_drop();
1365   push_string(buf);
1366 }
1367
1368 @ Here is how the ``Arithmetic IF'' command works: On the stack you have
1369 any three values at the top, and a number underneath it. Those are all
1370 removed, except one of the three values which is selected based on the
1371 sign of the number (the condition value).
1372
1373 @<Cases for condition/compare commands@>=
1374   @-case 'i': {
1375     // Arithmetic IF
1376     @<Do the ``Arithmetic IF''@>;
1377     break;
1378   }@#
1379   @-case '&': {
1380     // Bitwise AND
1381     int n=pop_num();
1382     if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1383       program_error("Number expected");
1384     stack_ptr->number&=n;
1385     break;
1386   }@#
1387
1388 @ Do you like this algorithm? Is this a real question?
1389
1390 @^strange codes@>
1391
1392 @<Do the ``Arithmetic IF''@>= {
1393   register_value v=stack_ptr[-3];
1394   int n=v.number;
1395   n=-(n<0?2:!n);
1396   stack_ptr[-3]=stack_ptr[n];
1397   stack_ptr[n]=v;
1398   stack_drop();@+stack_drop();@+stack_drop();
1399 }
1400
1401 @ @<Cases for local registers commands@>=
1402   @-case '<': {
1403     // Save locals
1404     @<Save local registers to the save stack@>;
1405     break;
1406   }@#
1407   @-case '>': {
1408     // Restore locals
1409     @<Load local registers from the save stack@>;
1410     break;
1411   }@#
1412
1413 @ When there is a program error (such as stack underflow), the following
1414 subroutine is used to handle it.
1415
1416 @d program_error(_text) program_error_(prog,ptr,_text)
1417
1418 @-p void program_error_(char*prog,char*ptr,char*msg) {
1419   fprintf(stderr,"Error in %s on line %d",current_filename,current_line);
1420   fprintf(stderr,"\n! %s\ns%dS%dp%d near \"",msg,stack_ptr-stack,
1421    save_stack_ptr-save_stack,ptr-prog);
1422   @<Display the codes near the part that caused the error@>;
1423   fprintf(stderr,"\"\n");
1424   exit(1);
1425 }
1426
1427 @ @<Display the codes near the part that caused the error@>= {
1428   char buf[32];
1429   char*p=ptr-5;
1430   int i;
1431   if(p<prog || p>ptr) p=prog;
1432   for(i=0;p+i<=ptr && p[i];i++) buf[i]=p[i];
1433   buf[i]=0;
1434   fprintf(stderr,"%s",buf);
1435 }
1436
1437 @*Tables and Registers. The tables must be stored here. There are 128
1438 tables with 256 entries each, each of which can store one byte of data.
1439 These tables are used for converting uppercase/lowercase, for deciding
1440 which characters need to be escaped in \TeX, and so on.
1441
1442 The purposes of the built-in registers are also described in this chapter.
1443 The tables and registers named by uppercase letters are for system use.
1444 The tables and registers named by lowercase can be used by the user.
1445
1446 @<Global variables@>=
1447 unsigned char tables[128][256];
1448
1449 @ Here are the uses of the built-in tables and registers:
1450 @^built-in registers@>
1451 @^built-in tables@>
1452
1453 Register \.A: The current position in the current cards area.
1454
1455 Register \.C: The current cards area.
1456
1457 Register \.D: Dots per inch, multiplied by 100.
1458
1459 Register \.E: The escape character for \TeX. If this is a string, the
1460 entire string is the prefix; otherwise, it is a ASCII number of the
1461 character to be used.
1462
1463 Register \.K: Index number for last keyword entry added. Also used when
1464 dealing with keyword operation commands, and when a keyword is matched in
1465 a pattern.
1466
1467 Register \.P: The current pattern area.
1468
1469 Register \.Q: The parameters for the ImageMagick command-line, separated
1470 by spaces.
1471
1472 Register \.T: Alignment tab character for \TeX. Same considerations apply
1473 as the \.E register.
1474
1475 Register \.U: A code to execute for a deck specification enrty with \.x
1476 flag set.
1477
1478 Register \.V: The version number of this program.
1479
1480 Register \.W: A code which pushes the whatsit replacements onto the stack.
1481 It is initialized to a blank string before each line in a card area. It
1482 should push the replacements in the reverse order of the whatsits, so you
1483 could use a code like this, for example: \.{[(Abracadabra)]lW+sW}
1484
1485 Register \.X: Horizontal coordinate across the page (in pixels).
1486
1487 Register \.Y: Vertical coordinate across the page (in pixels).
1488
1489 Register \.Z: Should be set to a code to execute after doing everything
1490 else (but before writing output files).
1491
1492 Table \.E: Indicates which characters need escaped for \TeX. Also used for
1493 category codes in internal typesetting mode (a discussion of the category
1494 codes will be deferred to a later part of this book).
1495
1496 Table \.F: Space factor codes for internal typesetting, where 40 is normal
1497 (multiplying these values by 25 results in the corresponding \.{\\sfcode}
1498 values in \TeX). Zero means no change.
1499
1500 Table \.G: Table containing information for sorting and grouping.
1501
1502 Table \.J: Left margin protrusions for internal typesetting. A value of
1503 128 is normal. Each one unit less or greater than 128 represents a
1504 distance of 0.005 em, where number less than 128 for negative kerns and
1505 greater than 128 for positive kerns. (Note that you will use {\sl negative
1506 negative} kerns to protrude into the margin, both for the left protrusions
1507 and for the right protrusions!)
1508
1509 Table \.K: Right margin protrusions for internal typesetting.
1510
1511 Table \.L: Conversion to lowercase.
1512
1513 Table \.S: Information for natural sorting.
1514
1515 Table \.U: Conversion to uppercase.
1516
1517 Table \.W: Table for word form rules. Zero means a letter, one means a
1518 word separator, two means use to mark beginning of a word, three means use
1519 to mark the end of a word. In this program, it is advantageous to use the
1520 fact that zero means word characters (such as letters), and nonzero means
1521 nonword characters.
1522
1523 @d init_register(_reg,_val) do@+{
1524   registers[_reg].is_string=0;
1525   registers[_reg].number=(_val);
1526 }@+while(0)@;
1527 @#
1528 @d init_register_str(_reg,_val) do@+{
1529   registers[_reg].is_string=1;
1530   registers[_reg].text=strdup(_val);
1531 }@+while(0)@;
1532
1533 @<Initialize the tables and registers@>= {
1534   int i;
1535   for(i=0;i<256;i++) init_register(i,0);
1536   init_register('E','\\');
1537   init_register('V',version_number);
1538   @<Initialize table of alphabetical case conversion@>;
1539   @<Initialize tables for internal typesetting@>;
1540 }
1541
1542 @ @<Initialize table of alphabetical case conversion@>= {
1543   for(i=0;i<256;i++) tables['L'][i]=tables['U'][i]=i;
1544   for(i='A';i<='Z';i++) {
1545     tables['L'][i]=i+'a'-'A';
1546     tables['U'][i+'a'-'A']=i;
1547   }
1548 }
1549
1550 @ @<Display the contents of table |*++ptr|@>= {
1551   int t=*++ptr;
1552   int i;
1553   for(i=0;i<256;i++) {
1554     printf("%c%c",tables[t][i]?'+':'.',@|
1555       (tables[t][i]<0x7F && tables[t][i]>=' ')?tables[t][i]:'.'
1556     );
1557     if((i&0x0F)==0x0F) printf("\n");
1558   }
1559   for(i=' ';i<0x7F;i++) if(tables[t][i]) printf("%c",i);
1560 }
1561
1562 @*Diagnostics. Here is diagnostics commands. These are used to display the
1563 internal information on standard output, so that you can check how these
1564 things are working. (You can also use \.{gdb} for debugging purposes.) A
1565 diagnostics command always starts with a question mark, and is then
1566 followed by one more character indicating the type of diagnostics
1567 requestsed. (Some are followed by an additional character after that.)
1568
1569 @<Do a diagnostics command@>= {
1570   switch(*++ptr) {
1571     case 'c': @<Display the sorted card list@>; @+break;
1572     case 'd': @<Display the deck list@>; @+break;
1573     case 'f': @<Display font information@>; @+break;
1574     case 'k': @<Display the list of keywords@>; @+break;
1575     case 'n': @<Display the list of names@>; @+break;
1576     case 'p': @<Display the list of patterns@>; @+break;
1577     case 's': @<Display the contents of the stack@>; @+break;
1578     case 't': @<Display the contents of table |*++ptr|@>; @+break;
1579     case 'w': @<Display the list of word form rules@>; @+break;
1580     case 'x': @<Display the list of typeset nodes@>; @+break;
1581     case 'y': @<Display typesetting diagnostics@>; @+break;
1582     default: program_error("Unknown type of diagnostics");
1583   }
1584 }
1585
1586 @ One subroutine is used here for displaying strings with escaped, so that
1587 it will display on a terminal without messing it up or omitting the
1588 display of some characters.
1589
1590 @-p void display_string(char*s) {
1591   for(;*s;s++) {
1592     if(*s<' ' || *s==0x7F) {
1593       printf("^%c",0x40^*s);
1594     } @+else {
1595       printf("%c",*s);
1596     }
1597   }
1598 }
1599
1600 @ @<Display the contents of the stack@>= {
1601   register_value*p;
1602   for(p=stack;p<=stack_ptr;p++) {
1603     if(p->is_string) {
1604       printf("[");
1605       display_string(p->text);
1606       printf("]\n");
1607     } @+else {
1608       printf("%d\n",p->number);
1609     }
1610   }
1611 }
1612
1613 @ More of the diagnostics functions are included in the chapters for the
1614 data structures which it is displaying.
1615
1616 @*Pattern Matching. Now, finally, after the chapter about patterns, and
1617 going through many other things in between, comes to the chapter in which
1618 patterns are actually being matched.
1619
1620 One structure is used here for the information about how to match it, and
1621 what has been matched from it. The parameter |num_capture| is how many
1622 captured parts there are, and the |start| and |end| arrays store the index
1623 into the |src| string of where the matches are. The entire matched part is
1624 indicated by |start[0]| and |end[0]| (note always |start[0]==0|).
1625
1626 @<Typedefs@>=
1627 typedef struct {
1628   char*src;
1629   char*truesrc; // used for checking true beginning of the line
1630   char*pattern;
1631   unsigned int category;
1632   int start[16];
1633   int end[16];
1634   int num_capture;
1635 } match_info;
1636
1637 @ This first one just matches one pattern against a string to see if it
1638 matches. It returns true if it does match. (It is somewhat inefficient.)
1639
1640 @-p boolean match_pattern(match_info*mat) {
1641   char*src; // current start of source string
1642   char*ptr; // pointer into source string |src|
1643   char*pptr; // pointer into pattern string
1644   src=mat->src; @+ mat->num_capture=0; @+ pptr=mat->pattern; @+ ptr=src;
1645   @<Execute the pattern on the string |src|@>;
1646   mismatch: return 0;
1647 }
1648
1649 @ This loop executes each command in the pattern in attempt to match each
1650 character. In case of mismatch, it will break out of this loop, and
1651 continue with the next iteration of the loop in the previous section.
1652
1653 @d not_a_marker !(pptr[-1]&0x80)
1654
1655 @<Execute the pattern on the string |src|@>= {
1656   while(*pptr) {
1657     switch(*pptr++) {
1658       case begin_capture:
1659         mat->start[++mat->num_capture]=ptr-mat->src; @+break;
1660       case end_capture: mat->end[mat->num_capture]=ptr-mat->src; @+break;
1661       case match_keyword: @<Do |match_keyword|@>; @+break;
1662       case match_table:
1663         if(!tables[*pptr++][*ptr++]) goto mismatch; @+break;
1664       case optional_table: ptr+=!!tables[*pptr++][*ptr]; @+break;
1665       case failed_match: goto mismatch;
1666       case jump_table:
1667         if(!(pptr=strchr(mat->pattern,0x80|tables[*pptr++][*ptr++])))
1668           goto mismatch;
1669         @+break;
1670       case successful_match: @<Do |successful_match|@>;
1671       case back_one_space: if(ptr--==mat->src) goto mismatch; @+break;
1672       case forward_one_space: if(!*ptr++) goto mismatch; @+break;
1673       case match_left_side: if(ptr!=mat->truesrc) goto mismatch; @+break;
1674       case match_right_side: if(*ptr>=' ') goto mismatch; @+break;
1675       default: if(not_a_marker && pptr[-1]!=*ptr++) goto mismatch;
1676     }
1677   }
1678 }
1679
1680 @ @<Do |successful_match|@>= {
1681   mat->start[0]=0;
1682   mat->end[0]=ptr-mat->src;
1683   return 1;
1684 }
1685
1686 @ And now, the next part matches from an area and changes the string in
1687 place, possibly by reallocating it. The |src| pointer passed to this
1688 function should be one that can be freed!
1689
1690 @-p char*do_patterns(char*src,int area) {
1691   pattern_data*pat;
1692   match_info mat;
1693   int index=0; // index into |src| string
1694   @<Cancel if there isn't a pattern area@>;
1695 continue_matching:
1696   if(index>=strlen(src)) return src;
1697   pat=pattern_areas.data+name_info(area).pattern_area;
1698   for(;;) {
1699     @<Fill up the |mat| structure for testing the current pattern@>;
1700     if(mat.pattern && match_pattern(&mat)) {
1701       @<Push the captured strings to the stack@>;
1702       @<Call the subroutine associated with this pattern@>;
1703       if(stack_ptr->is_string) {
1704         @<Replace the matched part from the stack and fix the |index|@>;
1705       } @+else {
1706         index+=mat.end[0];
1707       }
1708       stack_drop();
1709       goto continue_matching;
1710     }
1711     @<Select the next pattern in this area or |break|@>;
1712   }
1713   index++; @+ goto continue_matching;
1714 }
1715
1716 @ @<Cancel if there isn't a pattern area@>= {
1717   if(area<256) return src;
1718   if(!name_info(area).has_pattern_area) return src;
1719 }
1720
1721 @ @<Fill up the |mat| structure for testing the current pattern@>= {
1722   mat.src=src+index;
1723   mat.truesrc=src;
1724   mat.pattern=pat->text;
1725   mat.category=pat->category;
1726 }
1727
1728 @ @<Push the captured strings to the stack@>= {
1729   int i;
1730   for(i=mat.num_capture;i;i--) {
1731     push_string(src+index+mat.start[i]);
1732     stack_ptr->text[mat.end[i]-mat.start[i]]=0;
1733   }
1734 }
1735
1736 @ @<Call the subroutine associated with this pattern@>= {
1737   execute_program(names.data[pat->subroutine].value.text);
1738 }
1739
1740 @ The memory allocated is probably more than is needed, but this way is
1741 simpler. It is always sufficient amount, though. Think about it.
1742
1743 @^thought@>
1744
1745 @<Replace the matched part from the stack and fix the |index|@>= {
1746   char*q=malloc(strlen(src)+strlen(stack_ptr->text)+1);
1747   strcpy(q,src);
1748   sprintf(q+index,"%s%s",stack_ptr->text,src+index+mat.end[0]);
1749   free(src);
1750   src=q;
1751   index+=strlen(stack_ptr->text);
1752 }
1753
1754 @ @<Select the next pattern in this area or |break|@>= {
1755   if(pat->next==none) break;
1756   pat=pattern_areas.data+pat->next;
1757 }
1758
1759 @ Finally, there is a command |'M'| to do a pattern matching and
1760 replacement with a string, inside of a stack subroutine code.
1761
1762 @<Cases for system commands@>=
1763   @-case 'M': {
1764     // do pattern matching and replacement
1765     int n=pop_num();
1766     if(!stack_ptr->is_string) program_error("Type mismatch");
1767     stack_ptr->text=do_patterns(stack_ptr->text,n);
1768     break;
1769   }@#
1770
1771 @*Matching Keywords. Codes for matching keywords have been placed in
1772 another chapter, instead of making the previous chapter longer.
1773
1774 So now we can see how it is matched keywords in a pattern code.
1775
1776 @<Do |match_keyword|@>= {
1777   match_info m;
1778   char mstr[512];
1779   char t=*pptr++; // indicate which table to use
1780   data_index best=none;
1781   int best_length=-1;
1782   @<Try matching each keyword belonging to the category@>;
1783   if(best==none) goto mismatch;
1784   @<Adjust the \.K register for this keyword match@>;
1785   ptr+=m.end[0];
1786 }
1787
1788 @ @<Adjust the \.K register for this keyword match@>= {
1789   if(registers['K'].is_string) free(registers['K'].text);
1790   registers['K'].is_string=0;
1791   registers['K'].number=best;
1792 }
1793
1794 @ When matching keywords, all of them will be tried, in case there are
1795 better candidates for the search (bigger is better (so, for example,
1796 |"Power of One"| will override |"Power"|); failing that, later ones are
1797 better than earlier ones (so that user files can override keywords in
1798 template files)).
1799
1800 @^Courtenay, Bryce@>
1801 @^Houghton, Israel@>
1802 @^Luce, Ron@>
1803
1804 @<Try matching each keyword belonging to the category@>= {
1805   data_index i;
1806   foreach(i,keywords) {
1807     if(keywords.data[i].category&mat->category &&
1808      strlen(keywords.data[i].match)>=best_length) {
1809       @<Set up the |match_info| structure called |m|@>;
1810       @<Attempt applying this keyword match@>;
1811     }
1812   }
1813 }
1814
1815 @ @<Set up the |match_info| structure called |m|@>= {
1816   sprintf(mstr,"%s%c%c%c",
1817     keywords.data[i].match,match_table,t,successful_match);
1818   m.src=m.truesrc=ptr;
1819   m.pattern=mstr;
1820 }
1821
1822 @ @<Attempt applying this keyword match@>= {
1823   if(match_pattern(&m)) {
1824     best=i;
1825     best_length=strlen(keywords.data[i].match);
1826   }
1827 }
1828
1829 @*Sorting and Grouping. The card lists can be sorted/grouped using these
1830 commands, which are generally used by macros that create the records for
1831 the cards in the card areas.
1832
1833 @<Cases for system commands@>=
1834   @-case 'n': {
1835     // Add a new list entry
1836     data_index n=new_record(card_list);
1837     card_list.data[n].token_ptr=
1838       card_areas.data[set_card_area(registers['C'].number)].used
1839     ;
1840     break;
1841   }@#
1842   @-case 'f': {
1843     // Set a field value of the list entry
1844     data_index n=card_list.used-1;
1845     int x=pop_num();
1846     int y=pop_num();
1847     if(n==none) program_error("No card list is available");
1848     card_list.data[n].field[x&31]=y;
1849     break;
1850   }@#
1851
1852 @ Other than the commands to make the list entries above, there must be,
1853 of course, the actual sorting and grouping being done!
1854
1855 Sorting and grouping are controlled by the \.G table. Starting from a
1856 given offset (added), you use thirty-two entries for the thirty-two
1857 fields.
1858
1859 @<Cases for system commands@>=
1860   @-case 'G': {
1861     // Sort the list
1862     sorting_table_offset=pop_num();
1863     qsort(card_list.data,card_list.used,sizeof(list_entry),list_compare);
1864     @<Mark positions in the sorted list@>;
1865     break;
1866   }@#
1867
1868 @ @<Global variables@>=
1869 int sorting_table_offset;
1870
1871 @ This is the compare function for the list sorting. It is also worth to
1872 notice here what values belong in the \.G table. (There are also some
1873 other values, which are described a bit later.)
1874
1875 @d no_sort 0
1876 @d primary_ascending 'A'
1877 @d primary_descending 'Z'
1878 @d primary_name 'N'
1879 @d secondary_ascending 'a'
1880 @d secondary_descending 'z'
1881 @d secondary_name 'n'
1882 @d record_sorted_position 'R'
1883 @d reset_high_bits 'q'
1884 @#
1885 @d G_table(_field) (tables['G'][((sorting_table_offset+(_field))&0xFF)])
1886 @d p1s ((list_entry*)p1)
1887 @d p2s ((list_entry*)p2)
1888
1889 @-p int list_compare(const void*p1,const void*p2) {
1890   @<Compare using fields indicated by \.G table@>;
1891   @<Compare using the card's name and the \.S table@>;
1892   @<Compare using the order in which the cards are typed in@>;
1893   return 0; // This can't, but will, happen.
1894 }
1895
1896 @ @<Compare using fields indicated by \.G table@>= {
1897   int i;
1898   for(i=0;i<32;i++) if(p1s->field[i]!=p2s->field[i]) {
1899     if(G_table(i)==primary_ascending || (G_table(i)&0x80)) {
1900       return (p1s->field[i]>p2s->field[i])?1:-1;
1901     } @+else if(G_table(i)==primary_descending) {
1902       return (p1s->field[i]<p2s->field[i])?1:-1;
1903     } @+else if(G_table(i)==primary_name) {
1904       return name_compare(p1s->field[i],p2s->field[i]);
1905     }
1906   }
1907   for(i=0;i<32;i++) if(p1s->field[i]!=p2s->field[i]) {
1908     if(G_table(i)==secondary_ascending) {
1909       return (p1s->field[i]>p2s->field[i])?1:-1;
1910     } @+else if(G_table(i)==secondary_descending) {
1911       return (p1s->field[i]<p2s->field[i])?1:-1;
1912     } @+else if(G_table(i)==secondary_name) {
1913       return name_compare(p1s->field[i],p2s->field[i]);
1914     }
1915   }
1916 }
1917
1918 @ When all else fails, \strike{play dead} use the order in which the cards
1919 have been typed in. This is how it is made stable, and that you can get
1920 the same results on any computer.
1921
1922 @^Smith, Steve@>
1923
1924 @<Compare using the order in which the cards...@>= {
1925   if(p1s->token_ptr>p2s->token_ptr) return 1;
1926   if(p1s->token_ptr<p2s->token_ptr) return -1;
1927 }
1928
1929 @ The last thing to do after sorting, is mark positions in the list if it
1930 is requested to do so.
1931
1932 In addition, it shall also optionally mark high bits (30 to 27) of some
1933 fields, based on when other fields change. This helps with doing multi-%
1934 dimensional statistics. The fields that it is based on will automatically
1935 be primary sorted since such sorting is required for the marking algorithm
1936 to work properly.
1937
1938 @<Mark positions in the sorted list@>= {
1939   data_index i;
1940   int j;
1941   for(j=0;j<32;j++) {
1942     if(G_table(j)==record_sorted_position) {
1943       foreach(i,card_list) card_list.data[i].field[j]=i;
1944     } @+else if(G_table(j)&0x80) {
1945       @<Mark high bits of fields to prepare for...@>;
1946     } @+else if(G_table(j)==reset_high_bits) {
1947       foreach(i,card_list) card_list.data[i].field[j]&=0x0FFFFFFF;
1948     }
1949   }
1950 }
1951
1952 @ The rule is that whenever the current field's value changes, the bit in
1953 the corresponding grouping field will be flipped. Since the statistics
1954 grouping always treats consecutive equal values in the grouping field as
1955 belonging to the same group, this is a way to insert ``group breaks'' into
1956 the list.
1957
1958 @<Mark high bits of fields to prepare for complex statistics@>= {
1959   int f=G_table(j)&0x1F; // other field number
1960   int v=card_list.data[0].field[j]; // previous value
1961   int k=1<<(27+((G_table(j)&0x60)>>5)); // bit flip value
1962   int b=0; // current bit value
1963   foreach(i,card_list) {
1964     if(v!=card_list.data[i].field[j]) b^=k;
1965     card_list.data[i].field[f]&=~k;
1966     card_list.data[i].field[f]|=b;
1967     v=card_list.data[i].field[j];
1968   }
1969 }
1970
1971 @ @<Display the sorted card list@>= {
1972   data_index i;
1973   int j;
1974   foreach(i,card_list) {
1975     printf("%d=[ ",card_list.data[i].token_ptr);
1976     for(j=0;j<32;j++) printf("%d ",card_list.data[i].field[j]);
1977     printf("]\n");
1978   }
1979 }
1980
1981 @*Natural Sorting. A natural compare algorithm is used here. It is a
1982 generalization of Martin Pool's algorithm\biblio{Pool, Martin. ``Natural
1983 Order String Comparison''. {\tt
1984 http://sourcefrog.net/projects/natsort/}.}.
1985
1986 The \.S table maps from character tokens to the sorting specifications.
1987 Name tokens are converted to |whatsit| when looking up in this table.
1988
1989 Tokens are grouped into digits, letters, and priority letters. There are
1990 also some extras, such as spaces and radix point. A string of consecutive
1991 digits is treated as numeric, so a number with more digits comes after a
1992 number with less digits.
1993
1994 Priority letters are used mainly for sorting roman numerals. Two or more
1995 consecutive priority letters are considered as a group, otherwise they are
1996 treated in the same way as ordinary letters. A group is ranked with the
1997 letters latest in the alphabet, so for example, if |'I'| and |'X'| are
1998 priority, then |"IX"| is placed between |"W"| and |"X"|. This way, all
1999 roman numerals from I to XXXIX will be sorted correctly.
2000
2001 @^natural compare@>
2002 @^Pool, Martin@>
2003
2004 @d nat_end_low 0
2005 @d nat_end_high 1
2006 @d nat_space 2
2007 @d nat_ignore 3
2008 @d nat_radix_point 4
2009 @#
2010 @d nat_digit_zero 64 // digits go up to 127
2011 @d nat_first_letter 128 // letters go up to 191
2012 @d nat_first_priority_letter 192 // priority letters go up to 255
2013 @d nat_high_value 256
2014
2015 @<Compare using the card's name and the \.S table@>= {
2016   token*pa=card_areas.data[set_card_area(registers['C'].number)].tokens
2017    +p1s->token_ptr;
2018   token*pb=card_areas.data[set_card_area(registers['C'].number)].tokens
2019    +p2s->token_ptr;
2020   boolean fractional=0; // Are we reading digits after a radix point?
2021   int a,b,c;
2022   for(;;pa++,pb++) {
2023     begin_natural_compare_loop: @/
2024     a=tables['S'][*pa>=256?whatsit:*pa];
2025     @+ b=tables['S'][*pb>=256?whatsit:*pb];
2026     @<Skip over leading spaces and/or zeros@>;
2027     @<Process a run of digits@>;
2028     @<Check if the end of either string is reached@>;
2029     @<Check for a radix point@>;
2030     @<Process priority letters@>;
2031     @<Check if the current positions of each string sufficiently differ@>;
2032   }
2033 }
2034
2035 @ @<Skip over leading spaces and/or zeros@>= {
2036   while(a==nat_space||a==nat_ignore||(!fractional&&a==nat_digit_zero)) {
2037     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2038     if(a!=nat_ignore) fractional=0;
2039     if(!fractional && a==nat_digit_zero
2040      && aa>=nat_digit_zero && aa<nat_first_letter) break;
2041     pa++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2042   }
2043   while(b==nat_space||b==nat_ignore||(!fractional&&b==nat_digit_zero)) {
2044     int bb=tables['S'][pa[1]>=256?whatsit:pa[1]];
2045     if(b!=nat_ignore) fractional=0;
2046     if(!fractional && b==nat_digit_zero
2047      && bb>=nat_digit_zero && bb<nat_first_letter) break;
2048     pb++; @+ b=tables['S'][*pb>=256?whatsit:*pb];
2049   }
2050 }
2051
2052 @ @<Process a run of digits@>= {
2053   if(a>=nat_digit_zero&&a<nat_first_letter&&
2054    b>=nat_digit_zero&&b<nat_first_letter) {
2055     if((c=(fractional?compare_left:compare_right)(pa,pb))) return c;
2056     @<Skip the run of digits, since they are the same@>;
2057     fractional=0;
2058   }
2059 }
2060 @^strange codes@>
2061
2062 @ Compare two left-aligned numbers: the first to have a different value
2063 wins. This function and |compare_right| are basically equivalent, there
2064 are only a few differences (this one is the simpler one).
2065
2066 @-p int compare_left(token*pa,token*pb) {
2067   int a,b;
2068   for(;;pa++,pb++) {
2069     a=tables['S'][*pa>=256?whatsit:*pa];
2070     @+ b=tables['S'][*pb>=256?whatsit:*pb];
2071     @<Skip over ignored characters@>;
2072     @<If neither |a| nor |b| is digit, |break|@>;
2073     @<If one is a digit and the other isn't, the longest run wins@>;
2074     @<If both are different digits, the greater one wins@>;
2075   }
2076   return 0;
2077 }
2078
2079 @ The longest run of digits wins. That aside, the greatest value wins, but
2080 we can't know that it will until we've scanned both numbers to know they
2081 have the same magnitude, so we remember it in |bias|.
2082
2083 @-p int compare_right(token*pa,token*pb) {
2084   int a,b;
2085   int bias=0;
2086   for(;;pa++,pb++) {
2087     a=tables['S'][*pa>=256?whatsit:*pa];
2088     @+ b=tables['S'][*pb>=256?whatsit:*pb];
2089     @<Skip over ignored characters@>;
2090     @<If neither |a| nor |b| is digit, |break|@>;
2091     @<If one is a digit and the other isn't, the longest run wins@>;
2092     @<If both are digits, set the |bias|@>;
2093   }
2094   return bias;
2095 }
2096
2097 @ Ignored characters might be commas for grouping digits into thousands.
2098
2099 @<Skip over ignored characters@>= {
2100   while(a==nat_ignore) {
2101     pa++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2102   }
2103   while(b==nat_ignore) {
2104     pb++; @+ b=tables['S'][*pb>=256?whatsit:*pb];
2105   }
2106 }
2107
2108 @ @<If neither |a| nor |b| is digit, |break|@>= {
2109   if(!(a>=nat_digit_zero&&a<nat_first_letter)&&
2110    !(b>=nat_digit_zero&&b<nat_first_letter)) break;
2111 }
2112
2113 @ @<If one is a digit and the other isn't, the longest run wins@>= {
2114   if(!(a>=nat_digit_zero&&a<nat_first_letter)) return -1;
2115   if(!(b>=nat_digit_zero&&b<nat_first_letter)) return 1;
2116 }
2117
2118 @ @<If both are different digits, the greater one wins@>= {
2119   if(a!=b) return a-b;
2120 }
2121
2122 @ @<If both are digits, set the |bias|@>= {
2123   if(a!=b && !bias) bias=(a<b)?-1:1;
2124 }
2125
2126 @ @<Skip the run of digits, since they are the same@>= {
2127   while(a>=nat_digit_zero&&a<nat_first_letter) {
2128     pa++; @+ pb++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2129   }
2130   b=tables['S'][*pb>=256?whatsit:*pb];
2131 }
2132
2133 @ @<Check if the end of either string is reached@>= {
2134   if(a==nat_end_low && b>nat_end_high) return -1;
2135   if(b==nat_end_low && a>nat_end_high) return 1;
2136   if(a==nat_end_high && b>nat_end_high) return 1;
2137   if(b==nat_end_high && a>nat_end_high) return -1;
2138   if(a<=nat_end_high && b<=nat_end_high) break; // tied
2139 }
2140
2141 @ A radix point must be followed by a digit, otherwise it is considered to
2142 be punctuation (and ignored). Radix points come before digits in the
2143 sorting order (|".5"| comes before |"5"|).
2144
2145 @<Check for a radix point@>= {
2146   if(a==nat_radix_point && b==nat_radix_point) {
2147     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2148     int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2149     if(aa>=nat_digit_zero&&aa<nat_first_letter
2150      &&bb>=nat_digit_zero&&bb<nat_first_letter) fractional=1;
2151   } @+else if(a==nat_radix_point) {
2152     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2153     if(!(aa>=nat_digit_zero&&aa<nat_first_letter)) {
2154       pa++; goto begin_natural_compare_loop;
2155     }
2156   } @+else if(b==nat_radix_point) {
2157     int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2158     if(!(bb>=nat_digit_zero&&bb<nat_first_letter)) {
2159       pb++; goto begin_natural_compare_loop;
2160     }
2161   }
2162 }
2163
2164 @ This is used so that |"IX"| can be sorted between |"VIII"| and |"X"|. In
2165 normal alphabetical order, |"IX"| sorts before |"V"|. This algorithm makes
2166 it so that doesn't happen. For example: |a| is |'I'| and |aa| (the
2167 character after |a| in the text) is |'X'| (the check |aa>a| ensures that
2168 it too is priority, in addition to checking that |a| represents a negative
2169 part of a roman number), and |b| is |'V'|. Now, since |'V'| comes between
2170 |'I'| and |'X'| in the alphabetical order, the condition is checked to be
2171 valid and it overrides the later check.
2172
2173 @<Process priority letters@>= {
2174   if(a>=nat_first_priority_letter) {
2175     int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2176     if(aa>a && b>=nat_first_letter && (b&63)>(a&63) && (b&63)<(aa&63))
2177       return 1;
2178   }
2179   if(b>=nat_first_priority_letter) {
2180     int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2181     if(bb>b && a>=nat_first_letter && (a&63)>(b&63) && (a&63)<(bb&63))
2182       return -1;
2183   }
2184 }
2185
2186 @ At this point, |a| and |b| will both be |@[@]>=nat_radix_point|. Numbers
2187 always come after letters (this rule is designed so that when a radix
2188 point is found after a number, it will make a larger number; otherwise it
2189 will be followed by a letter and therefore the one followed by the letter
2190 is lesser since it has no fractional part to make it greater).
2191
2192 @<Check if the current positions of each string suffic...@>= {
2193   if(a>=nat_first_priority_letter) a-=64;
2194   if(b>=nat_first_priority_letter) b-=64;
2195   if(a<nat_first_letter) a+=128;
2196   if(b<nat_first_letter) b+=128;
2197   if(a!=b) return (a<b)?-1:1;
2198 }
2199
2200 @*Name Sorting. This kind of sorting is used when items are grouped
2201 together by some extra field in the statistics, such as creature types in
2202 Magic: the Gathering.
2203
2204 It works in a similar way to the natural sorting algorithm, but this time
2205 it is simpler and not as many things need to be checked. Digits and
2206 priority letters are treated as normal letters, and the types |nat_space|,
2207 |nat_ignore|, and |nat_radix_point| are all ignored. In addition, a null
2208 terminator is always treated as |nat_end_low|.
2209
2210 If both names compare the same, their number is used instead, in order to
2211 force sorting stability.
2212
2213 @-p int name_compare(int n1,int n2) {
2214   char*s1=name_info(n1).name;
2215   char*s2=name_info(n2).name;
2216   int a,b;
2217   for(;*s1 || *s2;s1++,s2++) {
2218     a=(*s1)?tables['S'][*s1]:nat_end_low;
2219     b=(*s2)?tables['S'][*s2]:nat_end_low;
2220     @<Skip over spaces and ignored characters@>;
2221     @<Check if the end of either string is reached@>;
2222     @<Check if the current positions of...@>;
2223   }
2224   return (n1<n2)?-1:1;
2225 }
2226
2227 @ @<Skip over spaces and ignored characters@>= {
2228   while(a<nat_digit_zero) {
2229     s1++; @+ a=(*s1)?tables['S'][*s1]:nat_end_low;
2230   }
2231   while(b<nat_digit_zero) {
2232     s2++; @+ b=(*s2)?tables['S'][*s2]:nat_end_low;
2233   }
2234 }
2235
2236 @*Statistics. After the card lists are created and sorted and grouped, it
2237 can make statistics from them. It can be just a plain list, or it can be
2238 in summary of groups, measuring count, minimum, maximum, mean, median, and
2239 so on.
2240
2241 First we do the simple iteration.
2242
2243 @^mean@>
2244 @^median@>
2245 @^groups@>
2246 @^minimum@>
2247 @^maximum@>
2248
2249 @<Cases for system commands@>=
2250   @-case 'V': {
2251     // Iterate the card list
2252     data_index i;
2253     char*q=pop_string();
2254     if(!stack_ptr[1].is_string) program_error("Type mismatch");
2255     foreach(i,card_list) {
2256       push_num(card_list.data[i].token_ptr);
2257       store('A');
2258       execute_program(q);
2259     }
2260     free(q);
2261     break;
2262   }@#
2263   @-case 'v': {
2264     // Read a field from the card list
2265     int x=pop_num()&31;
2266     int y=0;
2267     data_index i;
2268     foreach(i,card_list) {
2269       if(registers['A'].number==card_list.data[i].token_ptr)
2270         y=card_list.data[i].field[x];
2271     }
2272     push_num(y);
2273     break;
2274   }@#
2275
2276 @ That was simple, see? Now to do gathering statistics of summary of
2277 groups, which is a bit more complicated. The list is expected to be sorted
2278 by the group field primary, and the statistics field ascending as
2279 secondary, in order to make the correct calculation of the fields.
2280
2281 However, it will not do the sorting automatically, since there are some
2282 reasons why you might want it to work differently. One thing you can do is
2283 to sort the group field {\sl secondary} and some other more major group as
2284 primary, in order to do two-dimensional statistics, and this will work as
2285 long as you do not require the minimum, maximum, or median.
2286
2287 @<Cases for system commands@>=
2288   @-case 'g': {
2289     // Gather statistics of groups
2290     data_index i,si=0;
2291     int x=pop_num()&31; // field for grouping
2292     int y=pop_num()&31; // field to measure statistics with
2293     int sum1,sum2; // running totals of $s_1$ and $s_2$
2294     sum1=sum2=0;
2295     char*q=pop_string(); // code to execute for each group
2296     if(!stack_ptr[1].is_string) program_error("Type mismatch");
2297     foreach(i,card_list) {
2298       if(card_list.data[i].field[x]!=card_list.data[si].field[x]) {
2299         @<Send the results of the current group@>;
2300         sum1=sum2=0; @+ si=i;
2301       }
2302       @<Add to the running totals@>;
2303     }
2304     @<Send the results of the current group@>;
2305     free(q);
2306     break;
2307   }@#
2308
2309 @ Running totals are kept for two quantities called $s_1$ and $s_2$. There
2310 is also $s_0$, but that can be calculated easily using subtraction, so
2311 there is no need to keep a running total. If the sample values are denoted
2312 $x_k$, the following equation represents the running totals:
2313 $$s_j=\sum_{k=1}^N{x_k^j}$$ (note that $s_0=N$.)
2314
2315 @^mathematics@>
2316
2317 @<Add to the running totals@>= {
2318   sum1+=card_list.data[i].field[y];
2319   sum2+=card_list.data[i].field[y]*card_list.data[i].field[y];
2320 }
2321
2322 @ Now we will send the results and call |q|. The results are sent to the
2323 stack in the following order: $s_0$, $s_1$, $s_2$, $Q_0$, $2Q_2$, $Q_4$
2324 (where $Q_0$ is the minimum, $Q_2$ the median, and $Q_4$ the maximum).
2325
2326 From these results, it is then possible to calculate the standard
2327 deviation: $$\sigma={1\over s_0}\sqrt{s_0s_2-s_1^2}$$ and
2328 $$s=\sqrt{s_0s_2-s_1^2\over s_0(s_0-1)}.$$
2329
2330 @^mathematics@>
2331
2332 @<Send the results of the current group@>= {
2333   push_num(i-si); // $s_0$
2334   push_num(sum1); // $s_1$
2335   push_num(sum2); // $s_2$
2336   push_num(card_list.data[si].field[y]); // $Q_0$
2337   push_num(
2338     card_list.data[(si+i)/2].field[y]+card_list.data[(si+i+1)/2].field[y]
2339   ); // $2Q_2$
2340   push_num(card_list.data[i-1].field[y]); // $Q_4$
2341   @# push_num(card_list.data[si].token_ptr); @+ store('A');
2342   execute_program(q);
2343 }
2344
2345 @*Random Pack Generation. Now the codes so that it can create random packs
2346 (such as booster packs) by using the card lists and deck lists.
2347
2348 A command |'P'| is used for evaluation of a deck list. It expects the deck
2349 list number and the code to execute for each card on the list.
2350
2351 @^booster pack@>
2352
2353 @<Cases for system commands@>=
2354   @-case 'P': {
2355     // Generate a random pack or deck
2356     data_index s=set_deck_list(pop_num());
2357     data_index n; // current deck list entry
2358     if(stack_ptr[1].is_string) program_error("Number expected");
2359     @<Figure out what cards belong in the pack@>;
2360     @<Execute the code on the stack for each card in the pack@>;
2361     break;
2362   }@#
2363
2364 @ @<Figure out what cards belong in the pack@>= {
2365   deck_entry*e;
2366   int tries=1000; // How many times can you retry if it fails?
2367   figure_out_again:
2368   if(!--tries) program_error("No cards matched the deck criteria");
2369   n=s;
2370   @<Reset |amount_in_pack| of each card to zero@>;
2371   while(n!=none && (n=(e=deck_lists.data+n)->next)!=none)
2372     @<Process this deck entry@>;
2373 }
2374
2375 @ @<Reset |amount_in_pack| of each card to zero@>= {
2376   data_index i;
2377   foreach(i,card_list) card_list.data[i].amount_in_pack=0;
2378 }
2379
2380 @ The deck entry must be processed according to the flags. Here is a list
2381 of flags:
2382
2383 \.a: Use all cards that meet the criteria, instead of only one. If this is
2384 the case, it is possible to use negative weights to remove cards from the
2385 pack. Also, it cannot fail.
2386 [Combine with \.{x}]
2387
2388 \.k: Select without replacement. It is fail if the total weight is not
2389 enough. There are two ways in which this differs from \.u (below). One is
2390 that the previous lines in the deck list are not used. The other one is
2391 that if the weight is more than one, there will be more than one ball for
2392 that card, therefore the same card can be picked up multiple times.
2393 [Combine with \.{sux}]
2394
2395 \.n: Use the |amount| as a probability. If |amount<=100| then the
2396 probability is |amount/100| otherwise it is |100/amount|. This is a
2397 probability of using the |name| to select another deck list instead of
2398 this one.
2399 [Combine with nothing]
2400
2401 \.s: Skip the next line if this line does not fail. (Normally, if one line
2402 fails, everything does, and you have to try again.)
2403 [Combine with \.{kux}]
2404
2405 \.u: Require unique selection. It is fail if the card is already in this
2406 pack.
2407 [Combine with \.{ksx}]
2408
2409 \.x: Pass the |name| as a string to the code in the \.U register, and then
2410 use the resulting code as the code to determine weights instead of using
2411 the code in the register named by |name| directly. Now you can type things
2412 such as |"12x Forest"| into your deck list.
2413 [Combine with \.{aksu}]
2414
2415 @<Process this deck entry@>= {
2416   if(e->flags&lflag('n')) {
2417     @<Determine whether or not to skip to another deck list@>;
2418   } @+else {
2419     char*c; // code for weights of each card
2420     int total; // total weight of cards
2421     data_index*bag=malloc(sizeof(data_index));
2422     @<Get the code |c| for the weights of each card@>;
2423     @<Calculate the weights of each card@>;
2424     if(!(e->flags&lflag('a')))
2425       @<Select some of the cards at random and add them to the pack@>;
2426     if(e->flags&lflag('x')) free(c);
2427     free(bag);
2428   }
2429 }
2430
2431 @ @<Determine whether or not to skip to another deck list@>= {
2432   boolean q;
2433   if(e->amount<=100) {
2434     q=(gen_random(100)<e->amount);
2435   } @+else {
2436     q=(100<gen_random(e->amount));
2437   }
2438   if(q) n=set_deck_list(find_name(e->name));
2439 }
2440
2441 @ @<Get the code |c| for the weights of each card@>= {
2442   if(e->flags&lflag('x')) {
2443     execute_program(registers['U'].text);
2444     if(stack_ptr->is_string) {
2445       c=pop_string();
2446     } @+else {
2447       program_error("Type mismatch");
2448     }
2449   } @+else {
2450     int n=find_name(e->name);
2451     if(name_info(n).value.is_string) {
2452       c=name_info(n).value.text;
2453     } @+else {
2454       program_error("Type mismatch");
2455     }
2456   }
2457 }
2458
2459 @ @<Calculate the weights of each card@>= {
2460   data_index i;
2461   foreach(i,card_list) {
2462     registers['A'].number=card_list.data[i].token_ptr;
2463     execute_program(c);
2464     if(stack_ptr->number) {
2465       if(e->flags&lflag('a')) {
2466         card_list.data[i].amount_in_pack+=e->amount*stack_ptr->number;
2467       } @+else if(stack_ptr->number>0) {
2468         @<Add the cards to the |bag|@>;
2469       }
2470     }
2471     stack_drop();
2472   }
2473 }
2474
2475 @ The |bag| is like, you put the balls in the bag so that you can mix it
2476 and take one out, whatever number is on the ball is the card you put into
2477 the pack. Except, that there is no balls and no bag.
2478
2479 There is one ball per point of weight.
2480
2481 @^balls@>
2482
2483 @<Add the cards to the |bag|@>= {
2484   int j=stack_ptr->number;
2485   bag=realloc(bag,(total+j)*sizeof(data_index));
2486   while(j--) bag[total+j]=i;
2487   total+=stack_ptr->number;
2488 }
2489
2490 @ If it is not a line which adds all possibilities at once, then the cards
2491 must be selected from the |bag| at random to bag them. In some cases it
2492 will fail.
2493
2494 @<Select some of the cards at random and add them to the pack@>= {
2495   data_index r;
2496   int amount=e->amount;
2497   bag_next:
2498   if(!total) @<Deal with bag failure@>;
2499   r=gen_random(total);
2500   if((e->flags&lflag('u')) && card_list.data[bag[r]].amount_in_pack) {
2501     bag[r]=bag[--total];
2502     goto bag_next;
2503   }
2504   card_list.data[bag[r]].amount_in_pack++;
2505   if(e->flags&lflag('k')) bag[r]=bag[--total];
2506   if(amount--) goto bag_next;
2507   @#if(e->flags&lflag('s')) n=deck_lists.data[n].next;
2508   bag_done: ;
2509 }
2510
2511 @ @<Deal with bag failure@>= {
2512   if(e->flags&lflag('s')) goto bag_done;
2513   else goto figure_out_again;
2514 }
2515
2516 @ Now it must do stuff using the list which is generated. The quantity for
2517 how many of that card is pushed on the stack, and this is done even for
2518 cards with negative quantity (but not for zero quantity).
2519
2520 @<Execute the code on the stack for each card in the pack@>= {
2521   data_index i;
2522   char*q=pop_string();
2523   if(!stack_ptr[1].is_string) program_error("Type mismatch");
2524   foreach(i,card_list) {
2525     if(card_list.data[i].amount_in_pack) {
2526       push_num(card_list.data[i].amount_in_pack);
2527       execute_program(q);
2528     }
2529   }
2530   free(q);
2531 }
2532
2533 @*Reading Input Files. Now it is time for the part of the program where
2534 input files are read and processed. The areas of the file (and other
2535 special commands) are indicated using \.@@ signs.
2536
2537 At first we have state information. Each state is labeled by uppercase
2538 letters, or by digits 1 to 9. The high bit is set for the heading state,
2539 meaning the first line that contains the name and/or other heading
2540 information.
2541
2542 @d null_state 0
2543 @d card_state 'C'
2544 @d deck_state 'D'
2545 @d execute_state 'E'
2546 @d file_state 'F'
2547 @d include_state 'I'
2548 @d keyword_state 'K'
2549 @d image_state 'M'
2550 @d pattern_state 'P'
2551 @d subroutine_state 'S'
2552 @d font_state 'T'
2553 @d encoding_state 'U'
2554 @d wordforms_state 'W'
2555 @d heading 0x80
2556
2557 @<Global variables@>=
2558 int cur_state;
2559 data_index cur_name;
2560 data_index cur_data;
2561 boolean omit_line_break;
2562
2563 @ The next thing that must be kept track of for input files is the stack
2564 of open input files.
2565
2566 @d max_pathname_length 128
2567 @d max_filename_length 128
2568 @d max_input_stack 128
2569 @d max_line_length 256
2570
2571 @<Typedefs@>=
2572 typedef struct {
2573   FILE*fp; // zero for terminal input
2574   char name[max_filename_length+1];
2575   int line;
2576 } input_file_data;
2577
2578 @ @<Global variables@>=
2579 input_file_data input_files[max_input_stack];
2580 input_file_data*current_input_file=input_files;
2581 char input_buffer[max_line_length];
2582
2583 @ Some macros are useful to access the current file data.
2584
2585 @d current_line (current_input_file->line)
2586 @d current_filename (current_input_file->name)
2587 @d current_fp (current_input_file->fp)
2588 @#
2589 @d parsing_error(_text) fprintf(stderr,"%s on line %d in %s\n",
2590  _text,current_line,current_filename)@;
2591
2592 @ There is also conditional processing directives, which uses a single
2593 variable to keep track of the level. If it is greater than zero, the
2594 condition is false, and it is increased for nesting conditions (the
2595 nested conditions have no truth to them).
2596
2597 @<Global variables@>=
2598 int condition_level=0;
2599
2600 @ This subroutine inputs the next line. True is returned if there is a
2601 line, or false if it is finished.
2602
2603 It is necessary to check for end of file and if so, close that file and
2604 try the one it was included from; and if it is terminal input, display the
2605 current state when prompting input from the user.
2606
2607 @-p boolean input_line(void) {
2608   input_line_again: if(current_fp) {
2609     @<Get a line of input from the file@>;
2610   } @+else {
2611     @<Get a line of terminal input@>;
2612   }
2613   @<Remove trailing |'\n'|, |'\r'|, and spaces@>;
2614   ++current_line;
2615   return 1;
2616 }
2617
2618 @ @<Get a line of input from the file@>= {
2619   if(!fgets(input_buffer,max_line_length,current_fp)) {
2620     memusage_log("Closing input file",current_input_file-input_files)@;
2621     fclose(current_fp);
2622     if(current_input_file>input_files) {
2623       --current_input_file;
2624       goto input_line_again;
2625     } @+else {
2626       return 0;
2627     }
2628   }
2629 }
2630
2631 @ @<Get a line of terminal input@>= {
2632   printf("\n%c> ",cur_state?cur_state:'>');
2633   fflush(stdout);
2634   if(!fgets(input_buffer,max_line_length,stdin)) return 0;
2635 }
2636
2637 @ This function is used to open the main input file.
2638
2639 @-p void open_input(char*name) {
2640   if(++current_input_file>input_files+max_input_stack) {
2641     fprintf(stderr,"Too many simultaneous input files\n");
2642 @.Too many simultaneous...@>
2643     exit(1);
2644   }
2645   memusage_log("Opening input file",current_input_file-input_files)@;
2646   strcpy(current_filename,name);
2647   current_line=0;
2648   current_fp=fopen(name,"r");
2649   if(!current_fp) {
2650     fprintf(stderr,"Cannot open input file: %s\n",name);
2651 @.Cannot open input file@>
2652     exit(1);
2653   }
2654 }
2655
2656 @ Trailing newlines and spaces are removed. On some computers, there will
2657 be a carriage return before the line feed, it should be removed, so that
2658 the same file will work on other computers, too.
2659
2660 @d last_character_input input_buffer[strlen(input_buffer)-1]
2661
2662 @<Remove trailing |'\n'|, |'\r'|, and spaces@>= {
2663   if(last_character_input=='\n') last_character_input=0;
2664   if(last_character_input=='\r') last_character_input=0;
2665   while(last_character_input==' ') last_character_input=0;
2666 }
2667
2668 @ The input states start at these values.
2669
2670 @<Initialize the input states@>= {
2671   cur_state=execute_state;
2672   cur_name=cur_data=0;
2673 }
2674
2675 @ Now it is the time to do the actual processing according to the contents
2676 of the lines of the file. A line starting with \.@@ sign will indicate a
2677 special command (to operate in all modes) or a mode switch command.
2678
2679 @d delete_chars(_buf,_c) memmove((_buf),(_buf)+(_c),strlen((_buf)+(_c))+1)
2680
2681 @<Process the input files@>= {
2682   char*buf;
2683   while(input_line()) {
2684     buf=input_buffer;
2685     if(condition_level) {
2686       buf+=strspn(buf," ");
2687       condition_level+=!strcmp(buf,"@@<");
2688       condition_level-=!strcmp(buf,"@@>");
2689     } @+else {
2690       omit_line_break=1;
2691       @<Convert \.@@ commands in the |input_buffer|@>;
2692       omit_line_break=0;
2693       process_line(buf);
2694     }
2695   }
2696 }
2697
2698 @ @<Convert \.@@ commands in the |input_buffer|@>= {
2699   char*ptr=input_buffer;
2700   while(*ptr) {
2701     if(*ptr=='@@') {
2702       @<Convert the current \.@@ command@>;
2703     } @+else {
2704       ptr++;
2705     }
2706   }
2707 }
2708
2709 @ @<Convert the current \.@@ command@>= {
2710   switch(*++ptr) {
2711     case '@@': @/
2712       delete_chars(ptr,1);
2713       break;
2714     case '.': @<Process \.{@@.} command@>;@+break;
2715     case '&': @<Process \.{@@\&} command@>;@+break;
2716     case '^': @<Process \.{@@\^} command@>;@+break;
2717     case '(': @<Process \.{@@(} command@>;@+break;
2718     case '<': @<Process \.{@@<} command@>;@+break;
2719     case '>': @<Remove this command from the input@>;@+break;
2720     default: @/
2721       if((*ptr>='A' && *ptr<='Z') || (*ptr>='0' && *ptr<='9')) {
2722         @<Enter a |heading| state@>;
2723       } @+else {
2724         parsing_error("Unknown @@ command");
2725       }
2726   }
2727 }
2728
2729 @ @<Remove this command from the input@>= {
2730   ptr--;
2731   delete_chars(ptr,2);
2732 }
2733
2734 @ Heading states are used for the first line of a section in the file.
2735 After that line is processed, it becomes the corresponding non-heading
2736 state |(cur_state&~heading)|.
2737
2738 Note: The state |'0'| is deliberately unused; you might use it for
2739 documentation areas, for example.
2740
2741 @^documentation areas@>
2742
2743 @<Enter a |heading| state@>= {
2744   cur_state=heading|*ptr--;
2745   delete_chars(ptr,2);
2746   while(*ptr==' ' || *ptr=='\t') delete_chars(ptr,1);
2747 }
2748
2749 @ @-p void process_line(char*buf) {
2750   int q=cur_state;
2751   cur_state&=~heading;
2752   switch(q) {
2753     case card_state: @<Process card state@>;@+break;
2754     case deck_state: @<Process deck state@>;@+break;
2755     case execute_state: @<Process execute state@>;@+break;
2756     case file_state: @<Process file state@>;@+break;
2757     case keyword_state: @<Process keyword state@>;@+break;
2758     case pattern_state: @<Process pattern state@>;@+break;
2759     case subroutine_state: @<Process subroutine state@>;@+break;
2760     case wordforms_state: @<Process word forms state@>;@+break;
2761     case card_state|heading: @<Process card heading@>;@+break;
2762     case deck_state|heading: @<Process deck heading@>;@+break;
2763     case file_state|heading: @<Process file heading@>;@+break;
2764     case include_state|heading: @<Process include heading@>;@+break;
2765     case keyword_state|heading: @<Process keyword heading@>;@+break;
2766     case pattern_state|heading: @<Process pattern heading@>;@+break;
2767     case subroutine_state|heading: @<Process subroutine heading@>;@+break;
2768     default: ; // nothing happens
2769   }
2770 }
2771
2772 @ Sometimes you might want a macro which can send a line programmatically.
2773 So, here is the way that it is done.
2774
2775 @<Cases for system commands@>=
2776   @-case 'X': {
2777     // Process a line by programming
2778     int n;
2779     if(stack_ptr->is_string) program_error("Type mismatch");
2780     n=pop_num();
2781     if(n) cur_state=n|heading;
2782     if(!stack_ptr->is_string) program_error("Type mismatch");
2783     omit_line_break=1;
2784     process_line(stack_ptr->text);
2785     stack_drop();
2786     break;
2787   }@#
2788
2789 @*Inner Commands. These are commands other than the section headings.
2790
2791 @ The first command to deal with is simple--it is a comment. The rest of
2792 the current line is simply discarded.
2793
2794 @<Process \.{@@.} command@>= {
2795   ptr[-1]=0;
2796 }
2797
2798 @ This command is a pattern split. It means it will process the part of
2799 the line before this command and then process the stuff after it. The
2800 variable |omit_line_break| is 1 if this command is used; because it means
2801 there will not be a line break. (Otherwise, patterns and so on are split
2802 at line breaks.)
2803
2804 @<Process \.{@@\&} command@>= {
2805   ptr[-1]=0;
2806   process_line(buf);
2807   buf=++ptr;
2808 }
2809
2810 @ This allows control characters to be inserted into the input. This code
2811 takes advantage of the way the ASCII code works, in which stripping all
2812 but the five low bits can convert a letter (uppercase or lowercase) to its
2813 corresponding control character.
2814
2815 @^control character@>
2816
2817 @<Process \.{@@\^} command@>= {
2818   ptr[1]&=0x1F;
2819   --ptr;
2820   delete_chars(ptr,2);
2821 }
2822
2823 @ The following command is used to execute a code in a different state and
2824 then include the results in this area.
2825
2826 @<Process \.{@@(} command@>= {
2827   char*p;
2828   char*q;
2829   @<Skip over the name and save the rest of the line@>;
2830   @<Execute the code for the named subroutine@>;
2831   @<Insert the returned string and fix the line buffer@>;
2832 }
2833
2834 @ @<Skip over the name and save the rest of the line@>= {
2835   p=ptr+1;
2836   while(*ptr && *ptr!=')') ptr++;
2837   q=strdup(ptr+!!*ptr);
2838   *ptr=0;
2839 }
2840
2841 @ @<Execute the code for the named subroutine@>= {
2842   int n=find_name(p);
2843   execute_program(fetch_code(n));
2844 }
2845
2846 @ @<Insert the returned string and fix the line buffer@>= {
2847   char*s=pop_string();
2848   sprintf(p-2,"%s%s",s,q);
2849   ptr=p+strlen(s)-2;
2850   free(s);
2851   free(q);
2852 }
2853
2854 @ This command is used for conditional processing. The condition value
2855 comes from the stack. Zero is false, everything else is true.
2856
2857 @<Process \.{@@<} command@>= {
2858   --ptr;
2859   delete_chars(ptr,2);
2860   condition_level=!stack_ptr->number;
2861   stack_drop();
2862 }
2863
2864 @*Card State. Cards are added to the card areas by using the card state.
2865 The \.C register tells which is the current card area, and \.P register is
2866 used to select the current pattern area. The pattern area is used to match
2867 patterns after reading a line. Please note that it won't work to change
2868 the value of the \.C register during the card state.
2869
2870 @<Process card heading@>= {
2871   int n=find_name(buf);
2872   cur_data=set_card_area(n);
2873   cur_name=n-256;
2874   push_num(n);@+store('C');
2875 }
2876
2877 @ @<Process card state@>= {
2878   char*b;
2879   if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
2880   @<Initialize the \.W register@>;
2881   b=do_patterns(strdup(buf),registers['P'].number);
2882   if(registers['W'].is_string) execute_program(registers['W'].text);
2883   @<Send the tokens of |b| and replace whatsits@>;
2884   free(b);
2885 }
2886
2887 @ @<Initialize the \.W register@>= {
2888   if(registers['W'].is_string) free(registers['W'].text);
2889   registers['W'].is_string=1;
2890   registers['W'].text=strdup("");
2891 }
2892
2893 @ @<Send the tokens of |b| and replace whatsits@>= {
2894   char*p;
2895   for(p=b;*p;p++) {
2896     if(*p==whatsit) {
2897       send_token(cur_data,pop_num());
2898     } @+else {
2899       send_token(cur_data,(*p==1)?0:*p);
2900     }
2901   }
2902 }
2903
2904 @ There is one command you might want to send tokens in any other time.
2905
2906 @<Cases for system commands@>=
2907   @-case 'T': {
2908     // Add tokens to the card area
2909     if(stack_ptr->is_string) {
2910       @<Send tokens from the string on the stack@>;
2911     } @+else {
2912       send_token(set_card_area(registers['C'].number),stack_ptr->number);
2913     }
2914     stack_drop();
2915     break;
2916   }@#
2917
2918 @ @<Send tokens from the string on the stack@>= {
2919   char*p;
2920   data_index q=set_card_area(registers['C'].number);
2921   for(p=stack_ptr->text;*p;p++) send_token(q,*p);
2922 }
2923
2924 @*Deck State. Deck state is used for creating deck lists and random packs.
2925
2926 @<Process deck heading@>= {
2927   cur_name=find_name(buf)-256;
2928   cur_data=set_deck_list(cur_name+256);
2929   @<Skip to the end of the deck list@>;
2930 }
2931
2932 @ @<Skip to the end of the deck list@>= {
2933   while(deck_lists.data[cur_data].next!=none)
2934     cur_data=deck_lists.data[cur_data].next;
2935 }
2936
2937 @ Now to parse each line in turn. Each line consists of a number, the
2938 flags, and a text.
2939
2940 @<Process deck state@>= {
2941   int n=strtol(buf,&buf,10);
2942   unsigned int f=0;
2943   if(n) {
2944     buf+=strspn(buf,"\x20\t");
2945     @<Read the flags for the deck list@>;
2946     buf+=strspn(buf,"\x20\t"); // Now we are at the point of the text
2947     @<Send this line to the deck list@>;
2948     @<Create and advance to the new terminator of the deck list@>;
2949   }
2950 }
2951
2952 @ @<Read the flags for the deck list@>= {
2953   while(*buf>='a' && *buf<='z') f |=lflag(*buf++);
2954   buf++; // Skip terminator of flags
2955 }
2956
2957 @ If the \.x flag is set, it will be determined what to do with the text
2958 by the user-defined code. Otherwise, it is always a name, so we can save
2959 memory by pointing to the name buffer (since name buffers never vary).
2960
2961 @<Send this line to the deck list@>= {
2962   if(f&lflag('x')) {
2963     deck_lists.data[cur_data].name=strdup(buf);
2964   } @+else {
2965     deck_lists.data[cur_data].name=name_info(find_name(buf)).name;
2966   }
2967 }
2968
2969 @ @<Create and advance to the new terminator of the deck list@>= {
2970   data_index i=new_record(deck_lists);
2971   deck_lists.data[cur_data].next=i;
2972   deck_lists.data[cur_data=i].next=none;
2973 }
2974
2975 @*Execute State. This state is simple, just execute stack codes. It is the
2976 initial state; you can use it with terminal input, too.
2977
2978 @<Process execute state@>= {
2979   execute_program(buf);
2980 }
2981
2982 @*File State. This state is used to make list of output files. Each one is
2983 stored as a string, like subroutine state. The difference is that newlines
2984 will not be discarded. The other difference is that there is a flag in the
2985 |name_data| record for it that tells it that it is a file that should be
2986 sent to output.
2987
2988 @<More elements of |name_data|@>=
2989   boolean is_output_file;
2990
2991 @ @<Process file heading@>= {
2992   cur_name=find_name(buf)-256;
2993   if(!names.data[cur_name].value.is_string) {
2994     names.data[cur_name].value.is_string=1;
2995     names.data[cur_name].value.text=strdup("");
2996     names.data[cur_name].is_output_file=1;
2997   }
2998 }
2999
3000 @ @<Process file state@>= {
3001   int z=strlen(names.data[cur_name].value.text);
3002   if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
3003   names.data[cur_name].value.text=realloc(names.data[cur_name].value.text,
3004     z+strlen(buf)+1);
3005   strcpy(names.data[cur_name].value.text+z,buf);
3006 }
3007
3008 @*Include State. The include state causes inclusion of another source file
3009 from this one.
3010
3011 @<Process include heading@>= {
3012   cur_state=execute_state;
3013   @<Push the include file onto the input stack@>;
3014   @<Attempt to open the include file...@>;
3015 }
3016
3017 @ @<Push the include file onto the input stack@>= {
3018   ++current_input_file;
3019   memusage_log("Opening input file",current_input_file-input_files)@;
3020   strcpy(current_filename,buf);
3021   current_line=0;
3022   current_fp=0;
3023 }
3024
3025 @ Include files are searched using the search path specified in the
3026 environment variable called \.{TEXNICARDPATH}, which is a list of paths
3027 delimited by colons on UNIX systems (including Cygwin), but semicolons on
3028 Windows (colons are used in Windows for drive letters). A forward slash is
3029 the path separator. Please note that if you want to use include files in
3030 the current directory, you must include |"."| as the first entry in the
3031 search path!!
3032
3033 @^search path@>
3034 @.TEXNICARDPATH@>
3035 @^Windows@>
3036
3037 @<Set |includepath_separator| depending on operating system@>=
3038 #ifdef WIN32
3039 #define @!includepath_separator ';'
3040 #else
3041 #define includepath_separator ':'
3042 #endif
3043
3044 @ @<Attempt to open the include file by finding it in the search path@>= {
3045   current_fp=open_file(current_filename,"r");
3046   @<It is a fatal error if no such file was found@>;
3047 }
3048
3049 @ Since this part of the code is activated in many parts of the program,
3050 we will make it a subroutine that can open files in different modes.
3051
3052 @-p FILE*open_file(char*filename,char*mode) {
3053   char searchpath[max_pathname_length+max_filename_length+1];
3054   char*cpath;
3055   char*npath=getenv("TEXNICARDPATH");
3056   FILE*fp=0;
3057   strcpy(searchpath,npath?npath:".");
3058   npath=cpath=searchpath;
3059   @<Set |includepath_separator| depending on operating system@>;
3060  @<Attempt to open the file from each each directory in the search path@>;
3061   return fp;
3062 }
3063
3064 @ @<Attempt to open the file from each each directory...@>= {
3065   while(!fp) {
3066     char f[max_pathname_length+max_filename_length+1];
3067     @<Select the next path name into |cpath| and |npath|@>;
3068     sprintf(f,"%s/%s",cpath,filename);
3069     fp=fopen(f,mode);
3070   }
3071 }
3072
3073 @ @<Select the next path name into |cpath| and |npath|@>= {
3074   if(!(cpath=npath)) break;
3075   if((npath=strchr(npath,includepath_separator))) *npath++=0;
3076 }
3077
3078 @ @<It is a fatal error if no such file was found@>= {
3079   if(!current_fp) {
3080     fprintf(stderr,"%s not found in search path.\n",current_filename);
3081 @.not found in search path@>
3082     exit(1);
3083   }
3084 }
3085
3086 @*Keyword State. You can add keywords to the keyword area by using this.
3087 Each keyword heading is one entry in the list.
3088
3089 @<Process keyword heading@>= {
3090   cur_data=new_record(keywords);
3091   keywords.data[cur_data].match=strdup(buf);
3092   keywords.data[cur_data].replacement=strdup("");
3093 }
3094
3095 @ @<Process keyword state@>= {
3096   keyword_data*k=&keywords.data[cur_data];
3097   if(*buf=='+') {
3098     k->category|=find_category(buf+1);
3099   } @+else {
3100     if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
3101     @<Append buffer to keyword text@>;
3102   }
3103 }
3104
3105 @ @<Append buffer to keyword text@>= {
3106   if(*buf) {
3107     int z=strlen(k->replacement);
3108     k->replacement=realloc(k->replacement,z+strlen(buf)+1);
3109     strcpy(k->replacement+z,buf);
3110   }
3111 }
3112
3113 @*Pattern State. This state compiles patterns into a pattern area. It
3114 uses its own syntax, and then is converted into the proper control codes
3115 for the |text| of a pattern.
3116
3117 @<Process pattern heading@>= {
3118   cur_name=find_name(buf)-256;
3119   cur_data=set_pattern_area(cur_name+256);
3120 }
3121
3122 @ The stuff inside the pattern state has its own commands.
3123
3124 @<Process pattern state@>= {
3125   char add_buf[1024]; // buffer of text to add to the current pattern
3126   pattern_data*pat=&pattern_areas.data[cur_data];
3127   *add_buf=0;
3128   switch(*buf) {
3129     case '<': @<Create a new pattern with top priority@>;@+break;
3130     case '>': @<Create a new pattern with bottom priority@>;@+break;
3131     case ':': @<Make a pattern text with a marker@>;@+break;
3132     case '+': @<Add a keyword category to this pattern@>;@+break;
3133     default: ; // do nothing
3134   }
3135   @<Append contents of |add_buf| to the pattern, if needed@>;
3136 }
3137
3138 @ @<Create a new pattern with top priority@>= {
3139   cur_data=new_record(pattern_areas);
3140   pattern_areas.data[cur_data].text=strdup("");
3141   pattern_areas.data[cur_data].subroutine=find_name(buf+1)-256;
3142   pattern_areas.data[cur_data].next=names.data[cur_name].pattern_area;
3143   names.data[cur_name].pattern_area=cur_data;
3144 }
3145
3146 @ @<Create a new pattern with bottom priority@>= {
3147   data_index n;
3148   cur_data=new_record(pattern_areas);
3149   pattern_areas.data[cur_data].text=strdup("");
3150   pattern_areas.data[cur_data].subroutine=find_name(buf+1)-256;
3151   pattern_areas.data[cur_data].next=none;
3152   @<Find the bottom pattern and store its index in |n|@>;
3153   pattern_areas.data[n].next=cur_data;
3154 }
3155
3156 @ @<Find the bottom pattern and...@>= {
3157   n=names.data[cur_name].pattern_area;
3158   while(pattern_areas.data[n].next!=none && pattern_areas.data[n].text &&
3159    pattern_areas.data[pattern_areas.data[n].next].next!=none)
3160     n=pattern_areas.data[n].next;
3161 }
3162
3163 @ Actually, the name of this \strike{cake} chunk is a lie, because it does
3164 not always add a marker.
3165
3166 @<Make a pattern text with a marker@>= {
3167   char*p;
3168   char*b=add_buf;
3169   @<Add the pattern marker if applicable@>;
3170   for(p=buf+2;p[-1] && *p;p++) {
3171     switch(*p) {
3172       case '\\': *b++=*++p; @+break;
3173       case '(': *b++=begin_capture; @+break;
3174       case ')': *b++=end_capture; @+break;
3175       case '%': *b++=match_keyword; @+*b++=*++p; @+break;
3176       case '!': *b++=match_table; @+*b++=*++p; @+break;
3177       case '?': *b++=optional_table; @+*b++=*++p; @+break;
3178       case '#': *b++=failed_match; @+break;
3179       case '&': *b++=jump_table; @+*b++=*++p; @+break;
3180       case ';': *b++=successful_match; @+break;
3181       case '<': *b++=back_one_space; @+break;
3182       case '>': *b++=forward_one_space; @+break;
3183       case '[': *b++=match_left_side; @+break;
3184       case ']': *b++=match_right_side; @+break;
3185       default: *b++=*p; @+break;
3186     }
3187   }
3188   *b=0;
3189 }
3190
3191 @ @<Add the pattern marker if applicable@>= {
3192   if(buf[1]>' ') *b++=buf[1]|0x80;
3193 }
3194
3195 @ @<Add a keyword category to this pattern@>= {
3196   pattern_areas.data[cur_data].category=find_category(buf+1);
3197 }
3198
3199 @ @<Append contents of |add_buf| to the pattern...@>= {
3200   if(*add_buf) {
3201     int z=strlen(pat->text);
3202     pat->text=realloc(pat->text,z+strlen(add_buf)+1);
3203     strcpy(pat->text+z,add_buf);
3204   }
3205 }
3206
3207 @*Subroutine State. This state is used to add a named subroutine.
3208
3209 @<Process subroutine heading@>= {
3210   cur_name=find_name(buf)-256;
3211   if(!names.data[cur_name].value.is_string) {
3212     names.data[cur_name].value.is_string=1;
3213     names.data[cur_name].value.text=strdup("");
3214   }
3215 }
3216
3217 @ @<Process subroutine state@>= {
3218   int z=strlen(names.data[cur_name].value.text);
3219   names.data[cur_name].value.text=realloc(names.data[cur_name].value.text,
3220     z+strlen(buf)+1);
3221   strcpy(names.data[cur_name].value.text+z,buf);
3222 }
3223
3224 @*Word Forms State. You can use the word forms state to enter rules and
3225 exceptions for word forms, such as plurals.
3226
3227 @<Global variables@>=
3228 char wordform_code[256]; // code to execute at \.= line
3229 char wordform_kind; // which kind of word forms is being made now?
3230
3231 @ @<Process word forms state@>= {
3232   switch(*buf) {
3233     case '>': @<Process \.> line in word forms state@>; @+break;
3234     case '=': @<Process \.= line in word forms state@>; @+break;
3235     case '#': wordform_kind=buf[1]; @+break;
3236     default: if(*buf>='0' && *buf<='9')
3237       @<Process numeric line in word forms state@>;
3238   }
3239 }
3240
3241 @ The commands are \.>, \.=, and numbers. The command \.> sets a code for
3242 processing \.= commands, and then add to the list.
3243
3244 @<Process \.> line in word forms state@>= {
3245   strcpy(wordform_code,buf+1);
3246 }
3247
3248 @ @<Process \.= line in word forms state@>= {
3249   int level,kind;
3250   char*orig;
3251   char*dest;
3252   push_string(buf+1);
3253   execute_program(wordform_code);
3254   kind=pop_num(); @+ level=pop_num();
3255   dest=pop_string(); @+ orig=pop_string();
3256   add_word_form(kind,level,orig,dest);
3257   free(orig); @+ free(dest);
3258 }
3259
3260 @ Now the command for numeric forms. You put ``level\.:orig\.:dest'' in
3261 that order, please.
3262
3263 @<Process numeric line in word forms state@>= {
3264   int level=strtol(buf,&buf,0);
3265   char*p;
3266   if(*buf==':') buf++;
3267   p=strchr(buf,':');
3268   if(p) *p=0;
3269   add_word_form(wordform_kind,level,buf,p+1);
3270 }
3271
3272 @*Writing Output Files. Finally, it will be time to send any output
3273 generated into the files (if there is any, which there usually is).
3274
3275 @^output@>
3276
3277 @d ctrl(_letter) (0x1F&(_letter))
3278 @#
3279 @d call_final_subroutine ctrl('C')
3280 @d copy_field ctrl('F')
3281 @d newline ctrl('J')
3282 @d loop_point ctrl('L')
3283 @d next_record ctrl('N')
3284 @d skip_one_character ctrl('S')
3285
3286 @<Write the output files@>= {
3287   data_index n;
3288   foreach(n,names) {
3289     if(names.data[n].is_output_file && names.data[n].value.is_string)
3290       @<Write this output file@>;
3291   }
3292 }
3293
3294 @ @<Write this output file@>= {
3295   FILE*fout=fopen(names.data[n].name,"w");
3296   char*ptr=names.data[n].value.text;
3297   char*loopptr=ptr; // loop point
3298   if(!fout) @<Error about unable to open output file@>;
3299   while(*ptr) @<Write the character and advance to the next one@>;
3300   fclose(fout);
3301 }
3302
3303 @ @<Error about unable to open output file@>= {
3304   fprintf(stderr,"Unable to open output file: %s\n",names.data[n].name);
3305 @.Unable to open output file@>
3306   exit(1);
3307 }
3308
3309 @ @<Write the character and advance to the next one@>= {
3310   switch(*ptr) {
3311     case call_final_subroutine: @<Do |call_final_subroutine|@>; @+break;
3312     case copy_field: @<Do |copy_field|@>; @+break;
3313     case loop_point: loopptr=++ptr; @+break;
3314     case next_record: @<Do |next_record|@>; @+break;
3315     case skip_one_character: ptr+=2; @+break;
3316     default: fputc(*ptr++,fout);
3317   }
3318   done_writing_one_character: ;
3319 }
3320
3321 @ @<Do |call_final_subroutine|@>= {
3322   register_value*v;
3323   if(*++ptr=='(') {
3324     char*p=strchr(ptr,')');
3325     *p=0;
3326     v=&name_info(find_name(ptr+1)).value;
3327     *p=')';
3328     ptr=p+1;
3329   } @+else {
3330     v=&registers[*ptr++];
3331   }
3332   if(v->is_string) {
3333     execute_program(v->text);
3334     @<Write or loop based on result of subroutine call@>;
3335     stack_drop();
3336   }
3337 }
3338
3339 @ @<Write or loop based on result of subroutine call@>= {
3340   if(stack_ptr->is_string) {
3341     fprintf(fout,"%s",stack_ptr->text);
3342   } @+else if(stack_ptr->number) {
3343     ptr=loopptr;
3344   }
3345 }
3346
3347 @ This command is used to copy the next field.
3348
3349 Look at the definition for the |send_reg_char_or_text| macro. It is
3350 strange, but it should work wherever a statement is expected. Please note
3351 that a ternary condition operator should have both choices of the same
3352 type.
3353
3354 @^strange codes@>
3355
3356 @d tok_idx (registers['A'].number)
3357 @d tok_area
3358   (card_areas.data[name_info(registers['C'].number).value.number].tokens)
3359
3360 @d send_reg_char_or_text(_reg)
3361   if(!registers[_reg].is_string || *registers[_reg].text)
3362     fprintf(fout, "%c%s",
3363       registers[_reg].is_string?
3364         *registers[_reg].text:registers[_reg].number,
3365       registers[_reg].is_string?
3366         registers[_reg].text+1:(unsigned char*)""
3367     )@;
3368
3369 @<Do |copy_field|@>= {
3370   ++ptr;
3371   for(;;) {
3372     switch(tok_area[tok_idx++]) {
3373       case null_char: @<Unexpected |null_char|@>;
3374       case end_transmission: tok_idx=0; @+goto done_writing_one_character;
3375       case tabulation: send_reg_char_or_text('T'); @+break;
3376       case raw_data: @<Do |raw_data|@>; @+break;
3377       case escape_code: send_reg_char_or_text('E'); @+break;
3378       case record_separator: tok_idx--; @+goto done_writing_one_character;
3379       case field_separator: @+goto done_writing_one_character;
3380       default: @/
3381         if(tok_area[--tok_idx]&~0xFF)
3382           @<Deal with name code@>@;
3383         else
3384           @<Deal with normal character@>;
3385         tok_idx++;
3386     }
3387   }
3388 }
3389
3390 @ @<Unexpected |null_char|@>= {
3391   fprintf(stderr,"Unexpected null character found in a card area\n");
3392 @.Unexpected null character...@>
3393   exit(1);
3394 }
3395
3396 @ @<Do |raw_data|@>= {
3397   while(tok_area[tok_idx]) fputc(tok_area[tok_idx++],fout);
3398   tok_idx++;
3399 }
3400
3401 @ A name code found here is a code to tell it to call the subroutine code
3402 when it is time to write it out to the file. It should return a string on
3403 the stack (if it is a number, it will be ignored).
3404
3405 @<Deal with name code@>= {
3406   if(name_info(tok_area[tok_idx]).value.is_string)
3407     execute_program(name_info(tok_area[tok_idx]).value.text);
3408   if(stack_ptr->is_string) fprintf(fout,"%s",stack_ptr->text);
3409   stack_drop();
3410 }
3411
3412 @ In case of a normal character, normally just write it out. But some
3413 characters need escaped for \TeX.
3414
3415 @<Deal with normal character@>= {
3416   if(tables['E'][tok_area[tok_idx]]) send_reg_char_or_text('E');
3417   fputc(tok_area[tok_idx],fout);
3418 }
3419
3420 @ This one moves to the next record, looping if a next record is in fact
3421 available. Otherwise, just continue. Note that a |record_separator|
3422 immediately followed by a |end_transmission| is assumed to mean there is
3423 no next record, and that there is allowed to be a optional
3424 |record_separator| just before the |end_transmission|.
3425
3426 @<Do |next_record|@>= {
3427   ++ptr;
3428   while(tok_area[tok_idx]!=record_separator &&
3429    tok_area[tok_idx]!=end_transmission) tok_idx++;
3430   if(tok_area[tok_idx]!=end_transmission &&
3431    tok_area[tok_idx+1]!=end_transmission) ptr=loopptr;
3432 }
3433
3434 @*Functions Common to DVI and GF. Numbers for \.{GF} and \.{DVI} files use
3435 the |dvi_number| data type. (Change this in the change file if the current
3436 setting is inappropriate for your system.)
3437
3438 There is also the |dvi_measure| type, which is twice as long and is used
3439 to compute numbers that can be fractional (with thirty-two fractional bits
3440 and thirty-two normal bits).
3441
3442 @<Typedefs@>=
3443 @q[Type of DVI numbers::]@>
3444 typedef signed int dvi_number;
3445 typedef signed long long int dvi_measure;
3446 @q[::Type of DVI numbers]@>
3447
3448 @ There is one subroutine here to read a |dvi_number| from a file. They
3449 come in different sizes and some are signed and some are unsigned.
3450
3451 @^endianness@>
3452 @^byte order@>
3453
3454 @-p dvi_number get_dvi_number(FILE*fp,boolean is_signed,int size) {
3455   dvi_number r=0;
3456   if(size) r=fgetc(fp);
3457   if((r&0x80) && is_signed) r|=0xFFFFFF00;
3458   while(--size) r=(r<<8)|fgetc(fp);
3459   return r;
3460 }
3461
3462 @ Some macros are defined here in order to deal with |dvi_measure| values.
3463
3464 @^fractions@>
3465
3466 @d to_measure(_value) (((dvi_measure)(_value))<<32)
3467 @d floor(_value) ((dvi_number)((_value)>>32))
3468 @d round(_value) ((dvi_number)(((_value)+0x8000)>>32))
3469 @d ceiling(_value) ((dvi_number)(((_value)+0xFFFF)>>32))
3470
3471 @ Here division must be done in a careful way, to ensure that none of the
3472 intermediate results exceed sixty-four bits.
3473
3474 @d fraction_one to_measure(1)
3475
3476 @-p dvi_measure make_fraction(dvi_measure p,dvi_measure q) {
3477   dvi_measure f,n;
3478   boolean negative=(p<0)^(q<0);
3479   if(p<0) p=-p;
3480   if(q<0) q=-q;
3481   n=p/q; @+ p=p%q;
3482   n=(n-1)*fraction_one;
3483   @<Compute $f=\lfloor2^{32}(1+p/q)+{1\over2}\rfloor$@>;
3484   return (f+n)*(negative?-1:1);
3485 }
3486
3487 @ Notice that the computation specifies $(p-q)+p$ instead of $(p+p)-q$,
3488 because the latter could overflow.
3489
3490 @<Compute $f=...@>= {
3491   register dvi_measure b;
3492   f=1;
3493   while(f<fraction_one) {
3494     b=p-q; @+ p+=b;
3495     if(p>=0) {
3496       f+=f+1;
3497     } @+else {
3498       f<<=1;
3499       p+=q;
3500     }
3501   }
3502 }
3503
3504 @ And a few miscellaneous macros.
3505
3506 @d upto4(_var,_cmd) (_var>=_cmd && _var<_cmd+4)
3507
3508 @*DVI Reading. The device-independent file format is a format invented by
3509 David R.~Fuchs in 1979. The file format need not be explained here, since
3510 there are other books which explain it\biblio{Knuth, Donald. ``\TeX: The
3511 Program''. Computers {\char`\&} Typesetting. ISBN 0-201-13437-3.}\biblio{%
3512 Knuth, Donald. ``\TeX ware''. Stanford Computer Science Report 1097.}.
3513
3514 \edef\TeXwareBiblio{\the\bibliocount}
3515 @^Fuchs, David@>
3516 @.DVI@>
3517 @^device independent@>
3518
3519 At first, names will be given for the commands in a \.{DVI} file.
3520
3521 @d set_char_0 0 // Set a character and move [up to 127]
3522 @d set1 128 // Take one parameter to set character [up to 131]
3523 @d set_rule 132 // Set a rule and move down, two parameters
3524 @d put1 133 // As |set1| but no move [up to 136]
3525 @d put_rule 137 // As |set_rule| but no move
3526 @d nop 138 // No operation
3527 @d bop 139 // Beginning of a page
3528 @d eop 140 // End of a page
3529 @d push 141 // Push $(h,v,w,x,y,z)$ to the stack
3530 @d pop 142 // Pop $(h,v,w,x,y,z)$ from the stack
3531 @d right1 143 // Take one parameter, move right [up to 146]
3532 @d w0 147 // Move right $w$ units
3533 @d w1 148 // Set $w$ and move right [up to 151]
3534 @d x0 152 // Move right $x$ units
3535 @d x1 153 // Set $x$ and move right [up to 156]
3536 @d down1 157 // Take one parameter, move down [up to 160]
3537 @d y0 161 // Move down $y$ units
3538 @d y1 162 // Set $y$ and move down [up to 165]
3539 @d z0 166 // Move down $z$ units
3540 @d z1 167 // Set $z$ and move down [up to 170]
3541 @d fnt_num_0 171 // Select font 0 [up to 234]
3542 @d fnt1 235 // Take parameter to select font [up to 238]
3543 @d xxx1 239 // Specials [up to 242]
3544 @d fnt_def1 243 // Font definitions [up to 246]
3545 @d pre 247 // Preamble
3546 @d post 248 // Postamble
3547 @d post_post 249 // Postpostamble
3548
3549 @ We should now start reading the \.{DVI} file. Filenames of fonts being
3550 used will be sent to standard output.
3551
3552 @-p boolean read_dvi_file(char*filename) {
3553   boolean fonts_okay=1;
3554   FILE*fp=fopen(filename,"rb");
3555   if(!fp) dvi_error(fp,"Unable to open file");
3556   @#@<Skip the preamble of the \.{DVI} file@>;
3557   @<Skip to the next page@>;
3558   @<Read the metapage heading@>;
3559   @<Compute the conversion factor@>;
3560   read_dvi_page(fp);
3561   @<Skip to and read the postamble@>;
3562   @<Read the font definitions and load the fonts@>;
3563   if(fonts_okay) @<Read the pages for each card@>;
3564   @#fclose(fp);
3565   return fonts_okay;
3566 }
3567
3568 @ @-p void dvi_error(FILE*fp,char*text) {
3569   fprintf(stderr,"DVI error");
3570 @.DVI error@>
3571   if(fp) fprintf(stderr," at %08X",ftell(fp));
3572   fprintf(stderr,": %s\n",text);
3573   exit(1);
3574 }
3575
3576 @ Please note the version number of the \.{DVI} file must be 2.
3577
3578 @<Skip the preamble of the \.{DVI} file@>= {
3579   if(fgetc(fp)!=pre) dvi_error(fp,"Bad preamble");
3580   if(fgetc(fp)!=2) dvi_error(fp,"Wrong DVI version");
3581   @<Read the measurement parameters@>;
3582   @<Skip the DVI comment@>;
3583 }
3584
3585 @ @<Read the measurement parameters@>= {
3586   unit_num=get_dvi_number(fp,0,4);
3587   unit_den=get_dvi_number(fp,0,4);
3588   unit_mag=get_dvi_number(fp,0,4);
3589 }
3590
3591 @ @<Skip the DVI comment@>= {
3592   int n=fgetc(fp);
3593   fseek(fp,n,SEEK_CUR);
3594 }
3595
3596 @ From the postamble we can read the pointer for the last page.
3597
3598 @<Global variables@>=
3599 dvi_number last_page_ptr;
3600
3601 @ @<Skip to and read the postamble@>= {
3602   fseek(fp,-4,SEEK_END);
3603   while(fgetc(fp)==223) fseek(fp,-2,SEEK_CUR);
3604   fseek(fp,-5,SEEK_CUR);
3605   fseek(fp,get_dvi_number(fp,0,4)+1,SEEK_SET);
3606   last_page_ptr=get_dvi_number(fp,0,4);
3607   fseek(fp,20,SEEK_CUR); // Skipped parameters of |post|
3608   dvi_stack=malloc(get_dvi_number(fp,0,2)*sizeof(dvi_stack_entry));
3609   fseek(fp,2,SEEK_CUR);
3610 }
3611
3612 @ Between the preamble and the first page can be |nop| commands and font
3613 definitions, so these will be skipped. The same things can occur between
3614 the end of one page and the beginning of the next page.
3615
3616 @<Skip to the next page@>= {
3617   int c;
3618   for(;;) {
3619     c=fgetc(fp);
3620     if(c==bop) break;
3621     if(c>=fnt_def1 && c<fnt_def1+4) {
3622       @<Skip a font definition@>;
3623     } @+else if(c!=nop) {
3624       dvi_error(fp,"Bad command between pages");
3625     }
3626   }
3627 }
3628
3629 @ @<Skip a font definition@>= {
3630   int a,l;
3631   fseek(fp,c+13-fnt_def1,SEEK_CUR);
3632   a=fgetc(fp);
3633   l=fgetc(fp);
3634   fseek(fp,a+l,SEEK_CUR);
3635 }
3636
3637 @ The metapage includes the resolution and other things which must be set,
3638 such as subroutine codes. The resolution must be read before fonts can be
3639 read. Please note that no characters can be typeset on the metapage, since
3640 fonts have not been loaded yet. You can still place empty boxes. The DPI
3641 setting (resolution) is read from the \.{\\count1} register.
3642
3643 @<Read the metapage heading@>= {
3644   dvi_number n=get_dvi_number(fp,0,4);
3645   if(n) {
3646     fprintf(stderr,"Metapage must be numbered zero (found %d).\n",n);
3647 @.Metapage must be...@>
3648     exit(1);
3649   }
3650   push_num(get_dvi_number(fp,0,4)); @+ store('D');
3651   fseek(fp,9*4,SEEK_CUR); // Skip other parameters
3652   layer_width=layer_height=0;
3653 }
3654
3655 @ A stack is kept of the page registers, for use with the |push| and |pop|
3656 commands of a \.{DVI} file. This stack is used by the |read_dvi_page|
3657 subroutine and stores the |quan| registers (described in the next
3658 chapter).
3659
3660 @<Typedefs@>=
3661 typedef struct {
3662   dvi_number h;
3663   dvi_number v;
3664   dvi_number w;
3665   dvi_number x;
3666   dvi_number y;
3667   dvi_number z;
3668   dvi_number hh;
3669   dvi_number vv;
3670 } dvi_stack_entry;
3671
3672 @ @<Global variables@>=
3673 dvi_stack_entry*dvi_stack;
3674 dvi_stack_entry*dvi_stack_ptr;
3675
3676 @ Here is the subroutine to read commands from a DVI page. The file
3677 position should be at the beginning of the page after the |bop| command.
3678
3679 @^pages@>
3680
3681 @-p void read_dvi_page(FILE*fp) {
3682   memusage_log("Beginning of page",fseek(fp));
3683   @<Reset the page registers and stack@>;
3684   typeset_new_page();
3685   @<Read the commands of this page@>;
3686   if(layer_width && layer_height) @<Render this page@>;
3687 }
3688
3689 @ @<Reset the page registers and stack@>= {
3690   quan('A')=quan('B')=quan('H')=quan('I')=quan('J')=quan('L')=quan('V')=
3691   quan('W')=quan('X')=quan('Y')=quan('Z')=0;
3692   dvi_stack_ptr=dvi_stack;
3693 }
3694
3695 @ @<Read the commands of this page@>= {
3696   int c,a;
3697   boolean moveaftertyping;
3698   for(;;) {
3699     c=fgetc(fp);
3700     if(c<set1) {
3701       moveaftertyping=1;
3702       @<Typeset character |c| on the current layer@>;
3703     } @+else if(upto4(c,set1)) {
3704       moveaftertyping=1;
3705       c=get_dvi_number(fp,0,c+1-set1);
3706       @<Typeset character |c| on the current layer@>;
3707     } @+else if(c==set_rule || c==put_rule) {
3708       moveaftertyping=(c==set_rule);
3709       c=get_dvi_number(fp,1,4);
3710       a=get_dvi_number(fp,1,4);
3711       @<Typeset |a| by |c| rule on the current layer@>;
3712     } @+else if(upto4(c,put1)) {
3713       moveaftertyping=0;
3714       c=get_dvi_number(fp,0,c+1-put1);
3715       @<Typeset character |c| on the current layer@>;
3716     } @+else if(c==eop) {
3717       break;
3718     } @+else if(c==push) {
3719       if(dvi_stack) @<Push DVI registers to stack@>;
3720     } @+else if(c==pop) {
3721       if(dvi_stack) @<Pop DVI registers from stack@>;
3722     } @+else if(upto4(c,right1)) {
3723       c=get_dvi_number(fp,1,c+1-right1);
3724       horizontal_movement(c);
3725     } @+else if(c==w0) {
3726       horizontal_movement(quan('W'));
3727     } @+else if(upto4(c,w1)) {
3728       c=get_dvi_number(fp,1,c+1-w1);
3729       horizontal_movement(quan('W')=c);
3730     } @+else if(c==x0) {
3731       horizontal_movement(quan('X'));
3732     } @+else if(upto4(c,x1)) {
3733       c=get_dvi_number(fp,1,c+1-x1);
3734       horizontal_movement(quan('X')=c);
3735     } @+else if(upto4(c,down1)) {
3736       c=get_dvi_number(fp,1,c+1-down1);
3737       vertical_movement(c);
3738     } @+else if(c==y0) {
3739       vertical_movement(quan('Y'));
3740     } @+else if(upto4(c,y1)) {
3741       c=get_dvi_number(fp,1,c+1-y1);
3742       vertical_movement(quan('Y')=c);
3743     } @+else if(c==z0) {
3744       vertical_movement(quan('Z'));
3745     } @+else if(upto4(c,z1)) {
3746       c=get_dvi_number(fp,1,c+1-z1);
3747       vertical_movement(quan('Z')=c);
3748     } @+else if(c>=fnt_num_0 && c<fnt1) {
3749       quan('F')=c-fnt_num_0;
3750     } @+else if(upto4(c,fnt1)) {
3751       quan('F')=get_dvi_number(fp,0,c+1-fnt1);
3752     } @+else if(upto4(c,xxx1)) {
3753       c=get_dvi_number(fp,0,c+1-xxx1);
3754       @<Read a special of length |c|@>;
3755     } @+else if(upto4(c,fnt_def1)) {
3756       @<Skip a font definition@>;
3757     } @+else if(c!=nop) {
3758       dvi_error(fp,"Unknown DVI command");
3759     }
3760   }
3761 }
3762
3763 @ @<Push DVI registers to stack@>= {
3764   dvi_stack_ptr->h=quan('H');
3765   dvi_stack_ptr->v=quan('V');
3766   dvi_stack_ptr->w=quan('W');
3767   dvi_stack_ptr->x=quan('X');
3768   dvi_stack_ptr->y=quan('Y');
3769   dvi_stack_ptr->z=quan('Z');
3770   dvi_stack_ptr->hh=quan('I');
3771   dvi_stack_ptr->vv=quan('J');
3772   ++dvi_stack_ptr;
3773 }
3774
3775 @ @<Pop DVI registers from stack@>= {
3776   --dvi_stack_ptr;
3777   quan('H')=dvi_stack_ptr->h;
3778   quan('V')=dvi_stack_ptr->v;
3779   quan('W')=dvi_stack_ptr->w;
3780   quan('X')=dvi_stack_ptr->x;
3781   quan('Y')=dvi_stack_ptr->y;
3782   quan('Z')=dvi_stack_ptr->z;
3783   quan('I')=dvi_stack_ptr->hh;
3784   quan('J')=dvi_stack_ptr->vv;
3785 }
3786
3787 @ A special in \TeX nicard is used to execute a special code while reading
3788 the DVI file. Uses might be additional calculations, changes of registers,
3789 special effects, layer selection, etc. All of these possible commands are
3790 dealt with elsewhere in this program. All we do here is to read it and to
3791 send it to the |execute_program| subroutine.
3792
3793 @^specials@>
3794
3795 @<Read a special of length |c|@>= {
3796   char*buf=malloc(c+1);
3797   fread(buf,1,c,fp);
3798   buf[c]=0;
3799   @<Set \.X and \.Y registers to prepare for the special@>;
3800   execute_program(buf);
3801   free(buf);
3802 }
3803
3804 @ @<Set \.X and \.Y registers to prepare for the special@>= {
3805   registers['X'].is_string=registers['Y'].is_string=0;
3806   registers['X'].number=quan('I');
3807   registers['Y'].number=quan('J');
3808 }
3809
3810 @ In order to read all the pages for each card, we can skip backwards by
3811 using the back pointers. Either we will print all cards (in reverse
3812 order), or we will print cards listed on the command-line, or we will
3813 print cards listed in a file (this last way might be used to print decks
3814 or booster packs).
3815
3816 Card numbers should be one-based, and should not be negative. Any pages
3817 with negative page numbers will be ignored when it is in the mode for
3818 printing all cards.
3819
3820 @d printing_all_cards 0
3821 @d printing_list 1
3822 @d printing_list_from_file 2
3823
3824 @<Global variables@>=
3825 unsigned char printing_mode;
3826 char*printlisttext;
3827 FILE*printlistfile;
3828
3829 @ @<Read the pages for each card@>= {
3830   dvi_number page_ptr=last_page_ptr;
3831   dvi_number e=0,n; // page numbers
3832   boolean pagenotfound=0;
3833   for(;;) {
3834     @<Read the next entry from the list of pages (if applicable)@>;
3835     try_next_page:
3836     @<Seek the next page to print@>;
3837     @<Read the heading for this page@>;
3838     @<If this page shouldn't be printed now, |goto try_next_page|@>;
3839     pagenotfound=0;
3840     read_dvi_page(fp);
3841   }
3842   @#done_printing:;
3843 }
3844
3845 @ @<Read the next entry from the list of pages (if applicable)@>= {
3846   if(printing_mode==printing_list) {
3847     if(!*printlisttext) goto done_printing;
3848     e=strtol(printlisttext,&printlisttext,10);
3849     if(!e) goto done_printing;
3850     if(*printlisttext) printlisttext++;
3851   } @+else if(printing_mode==printing_list_from_file) {
3852     char buf[256];
3853     if(!printlistfile || feof(printlistfile)) goto done_printing;
3854     if(!fgets(buf,255,printlistfile)) goto done_printing;
3855     e=strtol(buf,0,10);
3856   }
3857 }
3858
3859 @ @<Seek the next page to print@>= {
3860   if(page_ptr==-1) {
3861     if(pagenotfound) {
3862       fprintf(stderr,"No page found: %d\n",e);
3863 @.No page found...@>
3864       exit(1);
3865     }
3866     page_ptr=last_page_ptr;
3867     if(printing_mode==printing_all_cards) goto done_printing;
3868     pagenotfound=1;
3869   }
3870   fseek(fp,page_ptr+1,SEEK_SET);
3871 }
3872
3873 @ @<Read the heading for this page@>= {
3874   n=quan('P')=get_dvi_number(fp,1,4);
3875   fseek(fp,4,SEEK_CUR);
3876   layer_width=get_dvi_number(fp,1,4);
3877   layer_height=get_dvi_number(fp,1,4);
3878   fseek(fp,4*6,SEEK_CUR);
3879   page_ptr=get_dvi_number(fp,1,4);
3880 }
3881
3882 @ @<If this page shouldn't be printed now, |goto try_next_page|@>= {
3883   if(n<=0 && printing_mode==printing_all_cards) goto try_next_page;
3884   if(n!=e && printing_mode!=printing_all_cards) goto try_next_page;
3885 }
3886
3887 @*DVI Font Metrics. Here, the fonts are loaded. It is assumed all fonts
3888 are in the current directory, and the ``area'' of the font name is
3889 ignored. The checksum will also be ignored (it can be checked with
3890 external programs if necessary).
3891
3892 @^area@>
3893 @^font loading@>
3894
3895 @<Read the font definitions and load the fonts@>= {
3896   int c;
3897   for(;;) {
3898     c=fgetc(fp);
3899     if(c==post_post) break;
3900     if(c>=fnt_def1 && c<fnt_def1+4) {
3901       int k=get_dvi_number(fp,0,c+1-fnt_def1);
3902       if(k&~0xFF) dvi_error(fp,"Too many fonts");
3903       memusage_log("Loading font",k);
3904       @<Read the definition for font |k| and load it@>;
3905     } @+else if(c!=nop) {
3906       dvi_error(fp,"Bad command in postamble");
3907     }
3908   }
3909   memusage_log("End of postamble",c);
3910 }
3911
3912 @ When reading fonts, it will be necessary to keep a list of the fonts
3913 and their character indices. Only 256 fonts are permitted in one job.
3914
3915 @<Global variables@>=
3916 data_index fontindex[256];
3917
3918 @ @<Read the definition for font |k| and load it@>= {
3919   dvi_number c=get_dvi_number(fp,0,4); // checksum (unused)
3920   dvi_number s=get_dvi_number(fp,0,4); // scale factor
3921   dvi_number d=get_dvi_number(fp,0,4); // design size
3922   int a=get_dvi_number(fp,0,1); // length of area
3923   int l=get_dvi_number(fp,0,1); // length of name
3924   char n[257];
3925   fseek(fp,a,SEEK_CUR);
3926   fread(n,1,l,fp);
3927   n[l]=0;
3928   if((fontindex[k]=read_gf_file(n,s,d))==none) fonts_okay=0;
3929 }
3930
3931 @ An important part of reading the font metrics is the width computation,
3932 which involves multiplying the relative widths in the \.{TFM} file (or
3933 \.{GF} file) by the scaling factor in the \.{DVI} file. This
3934 multiplication must be done in precisely the same way by all \.{DVI}
3935 reading programs, in order to validate the assumptions made by \.{DVI}-%
3936 writing programs such as \TeX.
3937
3938 % (The following paragraph is taken directly from "dvitype.web")
3939 Let us therefore summarize what needs to be done. Each width in a \.{TFM}
3940 file appears as a four-byte quantity called a |fix_word|.  A |fix_word|
3941 whose respective bytes are $(a,b,c,d)$ represents the number
3942 $$x=\left\{\vcenter{\halign{$#$,\hfil\qquad&if $#$\hfil\cr
3943 b\cdot2^{-4}+c\cdot2^{-12}+d\cdot2^{-20}&a=0;\cr
3944 -16+b\cdot2^{-4}+c\cdot2^{-12}+d\cdot2^{-20}&a=255.\cr}}\right.$$
3945 (No other choices of $a$ are allowed, since the magnitude of a \.{TFM}
3946 dimension must be less than 16.)  We want to multiply this quantity by the
3947 integer~|z|, which is known to be less than $2^{27}$.
3948 If $|z|<2^{23}$, the individual multiplications $b\cdot z$, $c\cdot z$,
3949 $d\cdot z$ cannot overflow; otherwise we will divide |z| by 2, 4, 8, or
3950 16, to obtain a multiplier less than $2^{23}$, and we can compensate for
3951 this later. If |z| has thereby been replaced by $|z|^\prime=|z|/2^e$, let
3952 $\beta=2^{4-e}$; we shall compute
3953 $$\lfloor(b+c\cdot2^{-8}+d\cdot2^{-16})\,z^\prime/\beta\rfloor$$ if $a=0$,
3954 or the same quantity minus $\alpha=2^{4+e}z^\prime$ if $a=255$.
3955 This calculation must be
3956 done exactly, for the reasons stated above; the following program does the
3957 job in a system-independent way, assuming that arithmetic is exact on
3958 numbers less than $2^{31}$ in magnitude.
3959
3960 \def\zprime{z'}
3961
3962 @f alpha TeX
3963 @f beta TeX
3964 @f zprime TeX
3965
3966 @<Compute |zprime|, |alpha|, and |beta|@>= {
3967   zprime=s; @+ alpha=16;
3968   while(zprime>=040000000) {
3969     zprime>>=1; @+ alpha<<=1;
3970   }
3971   beta=256/alpha; @+ alpha*=zprime;
3972 }
3973
3974 @ @<Compute the character width |w|@>= {
3975   w=(((((b3*zprime)>>8)+(b2*zprime))>>8)+(b1*zprime))/beta;
3976   if(b0) w-=alpha;
3977 }
3978
3979 @*GF Reading. The \.{GF} format is a ``generic font'' format. It has a lot
3980 in common with \.{DVI} format.
3981
3982 At first, names will be given for the commands in a \.{GF} file. Many
3983 commands have the same numbers as they do in a \.{DVI} file (described in
3984 the previous chapter), which makes it very convenient\biblio{This is
3985 probably on purpose for the this very reason, so that a WEB or CWEB
3986 program can use one set of named constants for reading both files.}.
3987
3988 @d paint_0 0 // Paint $d$ pixels black or white [up to 63]
3989 @d paint1 64 // Take parameter, paint pixels [up to 66]
3990 @d boc 67 // Beginning of a character picture
3991 @d boc1 68 // Short form of |boc|
3992 @d eoc 69 // End of a character picture
3993 @d skip0 70 // Skip some rows
3994 @d skip1 71 // Skip some rows [up to 73]
3995 @d new_row_0 74 // Start a new row and move right [up to 238]
3996 @d yyy 243 // Numeric specials
3997 @d no_op 244 // No operation
3998 @d char_loc 245 // Character locator
3999 @d char_loc0 246 // Short form of |char_loc|
4000
4001 @ The |font_struct| structure stores the information for each character in
4002 a font. The |raster| field points to a bitmap with eight pixels per octet,
4003 most significant bit for the leftmost pixel, each row always padded to a
4004 multiple of eight pixels.
4005
4006 While it is reading the postamble, it will fill in this structure with the
4007 |ptr| field set. After the postamble is read, it will fill in the other
4008 fields belonging to its union.
4009
4010 @<Typedefs@>=
4011 typedef struct {
4012   dvi_number dx; // character escapement in pixels
4013   dvi_number w; // width in DVI units
4014   union {
4015     struct {
4016       dvi_number min_n,max_n,min_m,max_m; // bounding box (in pixels)
4017       unsigned short n; // character code number
4018       unsigned char*raster;
4019       unsigned char flag; // bitfield of flags for this character
4020     }@+;
4021     dvi_number ptr;
4022   }@+;
4023   data_index next;
4024 } font_struct;
4025
4026 @ List of flags follows. Some of these flags might be used in order to
4027 allow$\mathord{}>256$ characters per font, since {\TeX} does not have a
4028 command to enter characters with codes more than one byte long. These
4029 flags are specified using numeric specials.
4030
4031 @d ff_select 0x01 // set high octet all characters
4032 @d ff_prefix 0x02 // set high octet for codes 128-255
4033 @d ff_roundafter 0x04 // round $\it hh$ after sending character
4034 @d ff_roundbefore 0x08 // round $\it hh$ before sending character
4035 @d ff_reset 0x10 // reset high octet
4036 @d ff_strip 0x20 // strip highest bit of prefix
4037 @d ff_space 0x40 // do not save the raster (space only)
4038 @d ff_reserved 0x80 // {\bf DO NOT USE}
4039
4040 @ @<Global variables@>=
4041 memory_of(font_struct) font_data;
4042
4043 @ @<Initialize memory@>= init_memory(font_data,4);
4044
4045 @ When loading a \.{GF} font, the filename will contain the resolution
4046 in dots per inch.
4047
4048 @^font loading@>
4049
4050 @-p data_index read_gf_file(char*fontname,dvi_measure s,dvi_measure d) {
4051   unsigned int dpi=(resolution*unit_mag*s+500*d)/(100000*d);
4052   FILE*fp;
4053   data_index index=none;
4054   data_index first_index=none;
4055   data_index last_index=none;
4056   dvi_number zprime,alpha,beta; // used for width computation
4057   @<Compute |zprime|, |alpha|, and |beta|@>;
4058  @<Figure out the filename and open the file, |return none| if it can't@>;
4059   @<Skip to the postamble of the \.{GF} file@>;
4060   @<Read the character locators@>;
4061   @<Read the character rasters and flags@>;
4062   fclose(fp);
4063   return last_index;
4064 }
4065
4066 @ When figuring out the filename, it will send it to standard output so
4067 that a list can be made of the required fonts.
4068
4069 @<Figure out the filename and open the file, ...@>= {
4070   char n[295];
4071   sprintf(n,"%s.%dgf",fontname,dpi);
4072   printf("%s\n",n);
4073   fp=open_file(n,"rb");
4074   if(!fp) return none;
4075 }
4076
4077 @ @<Skip to the postamble of the \.{GF} file@>= {
4078   int c;
4079   fseek(fp,-4,SEEK_END);
4080   while(fgetc(fp)==223) fseek(fp,-2,SEEK_CUR);
4081   fseek(fp,-5,SEEK_CUR);
4082   fseek(fp,get_dvi_number(fp,0,4)+37,SEEK_SET); // nothing matters anymore
4083 }
4084
4085 @ @<Read the character locators@>= {
4086   int c,b0,b1,b2,b3;
4087   dvi_number dx,w,p;
4088   for(;;) {
4089     c=fgetc(fp);
4090     if(c==post_post) break;
4091     p=-1;
4092     if(c==char_loc) {
4093       @<Read a long character locator@>;
4094     } @+else if(c==char_loc0) {
4095       @<Read a short character locator@>;
4096     } @+else if(c!=no_op) {
4097       fprintf(stderr,"Bad command in GF postamble.\n");
4098 @.Bad command in GF postamble@>
4099       fprintf(stderr,"(Command %d, address %08X)\n",c,ftell(fp)-1);
4100       exit(1);
4101     }
4102     if(p!=-1) @<Defer this character locator into |font_data|@>;
4103   }
4104   last_index=index;
4105 }
4106
4107 @ There are some parameters we do not care about. First is $c$, which is
4108 the character code residue (modulo 256). This is not important since it
4109 is duplicated in the |boc| heading for each character. The second
4110 parameter which we do not care about is the $\it dy$ parameter, since it
4111 should be zero for \.{DVI} files.
4112
4113 @<Read a long character locator@>= {
4114   fseek(fp,1,SEEK_CUR);
4115   dx=get_dvi_number(fp,1,4)>>16;
4116   fseek(fp,4,SEEK_CUR);
4117   @<Read four bytes@>;
4118   p=get_dvi_number(fp,1,4);
4119 }
4120
4121 @ @<Read a short character locator@>= {
4122   fseek(fp,1,SEEK_CUR);
4123   dx=get_dvi_number(fp,0,1);
4124   @<Read four bytes@>;
4125   p=get_dvi_number(fp,1,4);
4126 }
4127
4128 @ @<Read four bytes@>= {
4129   b0=fgetc(fp);@+b1=fgetc(fp);@+b2=fgetc(fp);@+b3=fgetc(fp);
4130 }
4131
4132 @ This processing is deferred, and the rest of the parameters will be
4133 filled in later (and the |ptr| field will be overwritten since it will
4134 no longer be needed at that time).
4135
4136 @<Defer this character locator into |font_data|@>= {
4137   data_index n=new_record(font_data);
4138   @<Compute the character width |w|@>;
4139   font_data.data[n].next=index;
4140   font_data.data[n].dx=dx;
4141   font_data.data[n].w=w;
4142   font_data.data[n].ptr=p;
4143   if(index==none) first_index=n;
4144   index=n;
4145 }
4146
4147 @ Now is time to go through the list we made up and this time actually
4148 fill in the parameters and pictures.
4149
4150 @<Read the character rasters and flags@>= {
4151   while(index!=none) {
4152     fseek(fp,font_data.data[index].ptr,SEEK_SET);
4153     font_data.data[index].flag=0;
4154     font_data.data[index].raster=0;
4155     @<Read commands for this character@>;
4156     @#index=font_data.data[index].next;
4157   }
4158 }
4159
4160 @ Painting the picture uses the value of |paint_switch| to determine
4161 to draw or skip. The current position in the array |raster| is also
4162 pointed by the |pic| pointer. Note that |black| and |white| are not
4163 necessary black and white (but they are on normal paper).
4164
4165 Note the value of $n$ is not needed since the |pic| pointer automatically
4166 keeps track of this kinds of stuff. However, |m| is needed because of
4167 commands that can skip rows, to know how many columns must be skipped to
4168 reach the next row. There is also |b|, which keeps track of the bit
4169 position in the current byte.
4170
4171 @d white 0
4172 @d black 1
4173 @#
4174 @d reset_m
4175   m=(font_data.data[index].max_m-font_data.data[index].min_m)/8+1@;
4176
4177 @<Read commands for this character@>= {
4178   unsigned int c,m,b;
4179   unsigned char*pic;
4180   boolean paint_switch;
4181   for(;;) {
4182     c=fgetc(fp);
4183     if(c<paint1) {
4184       @<Paint |c| pixels |black| or |white|@>;
4185     } @+else if(c>=paint1 && c<paint1+3) {
4186       c=get_dvi_number(fp,0,c+1-paint1);
4187       @<Paint |c| pixels |black| or |white|@>;
4188     } @+else if(c==boc) {
4189       @<Initialize parameters and picture (long form)@>;
4190     } @+else if(c==boc1) {
4191       @<Initialize parameters and picture (short form)@>;
4192     } @+else if(c==eoc) {
4193       break; // Well Done!
4194     } @+else if(upto4(c,skip0)) {
4195       if(c==skip0) c=0;
4196       else c=get_dvi_number(fp,0,c+1-skip1);
4197       @<Finish a row and skip |c| rows@>;
4198     } @+else if(c>=new_row_0 && c<=new_row_0+164) {
4199       c-=new_row_0;
4200       @<Finish a row and skip |c| columns@>;
4201     } @+else if(c==yyy) {
4202       font_data.data[index].flag|=get_dvi_number(fp,0,4)>>16;
4203     } @+else if(c!=no_op) {
4204       fprintf(stderr,"Unknown GF command!\n");
4205 @.Unknown GF command@>
4206       fprintf(stderr,"(Command %d, address %08X)\n",c,ftell(fp)-1);
4207     }
4208   }
4209 }
4210
4211 @ Actually |m| is something a bit different than the standard, because |m|
4212 now tells how many bytes are remaining in the current row.
4213
4214 @d pic_rows (1+font_data.data[index].max_n-font_data.data[index].min_n)
4215
4216 @<Initialize parameters and picture (long form)@>= {
4217   font_data.data[index].n=get_dvi_number(fp,0,4);
4218   @<Deal with $p$ (pointer to previous character with same metrics)@>;
4219   font_data.data[index].min_m=get_dvi_number(fp,1,4);
4220   font_data.data[index].max_m=get_dvi_number(fp,1,4);
4221   font_data.data[index].min_n=get_dvi_number(fp,1,4);
4222   font_data.data[index].max_n=get_dvi_number(fp,1,4);
4223   @<Initialize picture@>;
4224 }
4225
4226 @ @<Initialize picture@>= {
4227   if(font_data.data[index].flag&ff_space) break;
4228   paint_switch=white;
4229   reset_m;
4230   b=0;
4231   pic=font_data.data[index].raster=malloc(m*pic_rows+1);
4232   memset(pic,0,m*pic_rows);
4233 }
4234
4235 @ @<Initialize parameters and picture (short form)@>= {
4236   int d;
4237   font_data.data[index].n=get_dvi_number(fp,0,1);
4238   d=get_dvi_number(fp,0,1);
4239   font_data.data[index].max_m=get_dvi_number(fp,0,1);
4240   font_data.data[index].min_m=font_data.data[index].max_m-d;
4241   d=get_dvi_number(fp,0,1);
4242   font_data.data[index].max_n=get_dvi_number(fp,0,1);
4243   font_data.data[index].min_n=font_data.data[index].max_n-d;
4244   @<Initialize picture@>;
4245 }
4246
4247 @ The pointers to other characters will also be deferred in the same way
4248 as the character locators, but this time from the other end. Now, once it
4249 is finished all the characters, it will {\sl automatically} know to read
4250 the next one properly! (Now you can see what the purpose of the
4251 |@!first_index| variable is.)
4252
4253 @<Deal with $p$ (pointer to previous character with same metrics)@>= {
4254   dvi_number p=get_dvi_number(fp,1,4);
4255   if(p!=-1) {
4256     data_index i=new_record(font_data);
4257     font_data.data[i].next=none;
4258     font_data.data[i].dx=font_data.data[index].dx;
4259     font_data.data[i].w=font_data.data[index].w;
4260     font_data.data[i].ptr=p;
4261     font_data.data[first_index].next=i;
4262     first_index=i;
4263   }
4264 }
4265
4266 @ Now we get to the actual painting. We can assume the value of |m| is
4267 never negative and that everything else is also okay.
4268
4269 @<Paint |c| pixels |black| or |white|@>= {
4270   if(paint_switch) {
4271     if(b+c<=8) {
4272       @<Paint a small block of pixels in the current byte@>;
4273     } @+else {
4274       @<Paint the rest of the pixels in the current byte@>;
4275       @<Fill up the bytes in the middle@>;
4276       @<Clear the pixels needed clearing at the end@>;
4277     }
4278   }
4279   @<Update |paint_switch|, |pic|, |b|, and |m|@>;
4280 }
4281
4282 @ @<Update |paint_switch|, |pic|, |b|, and |m|@>= {
4283   paint_switch^=1;
4284   b+=c;
4285   pic+=b>>3;
4286   m-=b>>3;
4287   b&=7;
4288 }
4289
4290 @ @<Paint a small block of pixels in the current byte@>= {
4291   *pic|=(0xFF>>b)&~(0xFF>>(b+c));
4292 }
4293
4294 @ @<Paint the rest of the pixels in the current byte@>= {
4295   *pic|=0xFF>>b;
4296 }
4297
4298 @ @<Fill up the bytes in the middle@>= {
4299   memset(pic+1,0xFF,(c+b)>>3);
4300 }
4301
4302 @ @<Clear the pixels needed clearing at the end@>= {
4303   pic[(c+b)>>3]&=~(0xFF>>((c+b)&7));
4304 }
4305
4306 @ @<Finish a row and skip |c| rows@>= {
4307   pic+=m;
4308   b=0;
4309   reset_m;
4310   pic+=m*c;
4311   paint_switch=white;
4312 }
4313
4314 @ @<Finish a row and skip |c| columns@>= {
4315   pic+=m;
4316   reset_m;
4317   m-=c>>3;
4318   pic+=c>>3;
4319   b=c&7;
4320   paint_switch=black;
4321 }
4322
4323 @ @<Display font information@>= {
4324   data_index i;
4325   foreach(i,font_data) {
4326     printf("[%d] box=(%d,%d,%d,%d) dx=%d w=%d n=%d flag=%d [%d]\n"
4327       ,i,font_data.data[i].min_n,font_data.data[i].max_n
4328         ,font_data.data[i].min_m,font_data.data[i].max_m
4329       ,font_data.data[i].dx,font_data.data[i].w,font_data.data[i].n
4330       ,font_data.data[i].flag,font_data.data[i].next
4331     );
4332   }
4333 }
4334
4335 @*Layer Computation. Now is the chapter for actually deciding rendering on
4336 the page, where everything should go, etc.$^{[\TeXwareBiblio]}$
4337
4338 @<Global variables@>=
4339 dvi_measure unit_num; // Numerator for units of measurement
4340 dvi_measure unit_den; // Denominator for units of measurement
4341 dvi_measure unit_mag; // Magnification for measurement
4342 dvi_measure unit_conv; // Conversion factor
4343
4344 @ There are also a number of ``internal typesetting quantities''. These
4345 are parameters stored in a separate array, and are used to keep track of
4346 the current state of the typesetting. They are labeled with letters from
4347 \.A to \.Z. They can be modified inside of specials, although some of them
4348 probably shouldn't be modified in this way. Here is the list of them:
4349
4350 \.A, \.B: Horizontal and vertical offset added to \.I and \.J.
4351
4352 \.C: Character code prefix. If bit eight is not set, it only affects
4353 character codes with bit seven set.
4354
4355 \.D: Maximum horizontal drift (in pixels), meaning how far away the \.I
4356 and \.J parameters are allowed to be from the correctly rounded values.
4357
4358 \.E: Maximum vertical drift.
4359
4360 \.F: The current font.
4361
4362 \.H: The horizontal position on the page, in DVI units.
4363
4364 \.I: The horizontal position on the page, in pixels.
4365
4366 \.J: The vertical position on the page, in pixels.
4367
4368 \.L: The current layer number. If this is zero, nothing is placed on the
4369 page, although the positions can still be changed and specials can still
4370 be used.
4371
4372 \.P: Page number. This is used to determine the filename of output.
4373
4374 \.R, \.S: The limits for when horizontal motion should add the number of
4375 pixels or when it should recalculate the pixels entirely.
4376
4377 \.T, \.U: Like \.R and \.S, but for vertical motions.
4378
4379 \.V: The vertical position on the page, in DVI units.
4380
4381 \.W, \.X, \.Y, \.Z: The current spacing amounts, in DVI units.
4382
4383 @d quan(_name) (type_quan[(_name)&0x1F])
4384
4385 @<Global variables@>=
4386 dvi_number type_quan[32];
4387
4388 @ @<Cases for system commands@>=
4389   @-case 'm': {
4390     // Modify an internal typesetting quantity
4391     if(stack_ptr->is_string) program_error("Type mismatch");
4392     quan(*++ptr)=pop_num();
4393     break;
4394   }@#
4395
4396 @ The conversion factor |unit_conv| is figured as follows: There are
4397 exactly |unit_num/unit_den| decimicrons per DVI unit, and 254000
4398 decimicrons per inch, and |resolution/100| pixels per inch. Then we have
4399 to adjust this by the magnification |unit_mag|.
4400
4401 Division must be done slightly carefully to avoid overflow.
4402
4403 @d resolution (registers['D'].number)
4404
4405 @<Compute the conversion factor@>= {
4406   unit_conv=make_fraction(unit_num*resolution*unit_mag,unit_den*100000);
4407   unit_conv/=254000;
4408 }
4409
4410 @ Here are the codes to compute movements. The definition of \.{DVI} files
4411 refers to six registers which hold integer values in DVI units. However,
4412 we also have two more registers, for horizontal and vertical pixel units.
4413
4414 A sequence of characters or rules might cause the pixel values to drift
4415 from their correctly rounded values, since they are not usually an exact
4416 integer number of pixels.
4417
4418 @d to_pixels(_val) round((_val)*unit_conv)
4419
4420 @-p void horizontal_movement(dvi_number x) {
4421   quan('H')+=x;
4422   if(x>quan('S') || x<quan('R')) {
4423     quan('I')=to_pixels(quan('H'));
4424   } @+else {
4425     quan('I')+=to_pixels(x);
4426     if(to_pixels(quan('H'))-quan('I')>quan('D'))
4427       quan('I')=to_pixels(quan('H'))+quan('D');
4428     if(to_pixels(quan('H'))-quan('I')<-quan('D'))
4429       quan('I')=to_pixels(quan('H'))-quan('D');
4430   }
4431 }
4432
4433 @ @-p void vertical_movement(dvi_number x) {
4434   quan('V')+=x;
4435   if(x>quan('U') || x<quan('T')) {
4436     quan('J')=to_pixels(quan('V'));
4437   } @+else {
4438     quan('J')+=to_pixels(x);
4439     if(to_pixels(quan('V'))-quan('J')>quan('E'))
4440       quan('J')=to_pixels(quan('V'))+quan('E');
4441     if(to_pixels(quan('V'))-quan('J')<-quan('E'))
4442       quan('J')=to_pixels(quan('V'))-quan('E');
4443   }
4444 }
4445
4446 @ This is now the part that does actual sending. When many characters
4447 come next to each other, the rounding will be done such that the number
4448 of pixels between two letters will always be the same whenever those two
4449 letters occur next to each other.
4450
4451 @<Typeset character |c| on the current layer@>= {
4452   data_index n=fontindex[quan('F')&0xFF];
4453   if((quan('C')&0x100) || (c&0x80)) c|=quan('C')<<8;
4454   while(n!=none && c!=font_data.data[n].n)
4455     n=font_data.data[n].next;
4456   if(n==none) dvi_error(fp,"Character not in font");
4457   @<Typeset the character and update the current position@>;
4458   @<Update the character code prefix@>;
4459 }
4460
4461 @ @<Typeset the character and update the current position@>= {
4462   if(font_data.data[n].flag&ff_roundbefore)
4463     quan('I')=to_pixels(quan('H'));
4464   if(quan('L') && font_data.data[n].raster) typeset_char_here(n);
4465   if(moveaftertyping) {
4466     quan('H')+=font_data.data[n].w;
4467     quan('I')+=font_data.data[n].dx;
4468     if(font_data.data[n].flag&ff_roundafter)
4469       quan('I')=to_pixels(quan('H'));
4470     else horizontal_movement(0);
4471   }
4472 }
4473
4474 @ If you have a typesetting program that can ship out characters with
4475 codes more than eight bits long, you won't need this. It is provided for
4476 use with normal {\TeX} system.
4477
4478 @<Update the character code prefix@>= {
4479   if(font_data.data[n].flag&ff_strip) c&=0x7F; else c&=0xFF;
4480   if(font_data.data[n].flag&ff_select) quan('C')=c|0x100;
4481   if(font_data.data[n].flag&ff_prefix) quan('C')=c;
4482   if(font_data.data[n].flag&ff_reset) quan('C')=0;
4483 }
4484
4485 @ The number of pixels in the height or width of a rule will always be
4486 rounded up. However, unlike DVItype, this program has no floating point
4487 rounding errors.
4488
4489 @d to_rule_pixels(_val) ceiling((_val)*unit_conv)
4490
4491 @<Typeset |a| by |c| rule on the current layer@>= {
4492   dvi_number x=to_rule_pixels(a);
4493   dvi_number y=to_rule_pixels(c);
4494   if(quan('L') && a>0 && c>0) typeset_rule_here(x,y);
4495   if(moveaftertyping) {
4496     quan('I')+=x;
4497     horizontal_movement(0);
4498   }
4499 }
4500
4501 @ Sometimes you might want DVI units converted to pixels inside of a user
4502 program contained in a DVI file. Here is how it is done.
4503
4504 @<Cases for system commands@>=
4505   @-case 'C': {
4506     // Convert DVI units to pixels
4507     if(stack_ptr->is_string) program_error("Type mismatch");
4508     stack_ptr->number=to_pixels(stack_ptr->number);
4509     break;
4510   }@#
4511
4512 @*Layer Rendering. Please note, these numbers are |short|, which means
4513 that you cannot have more than 65536 pixels in width or in height. This
4514 should not be a problem, because even if you have 3000 dots per inch, and
4515 each card is 10 inches long, that is still only 30000 which is less than
4516 half of the available width. (All units here are in pixels.)
4517
4518 In order to save memory, all typeset nodes are stored in one list at
4519 first, and then rendered to a pixel buffer as each layer is being written
4520 out to the \.{PBM} file, and then the buffer can be freed (or reset to
4521 zero) afterwards to save memory.
4522
4523 @<Typedefs@>=
4524 typedef struct {
4525   unsigned short x; // X position on page
4526   unsigned short y; // Y position on page
4527   union {
4528     struct {
4529       unsigned short w; // Width of rule
4530       unsigned short h; // Height of rule
4531     }@+;
4532     data_index c; // Character index in |font_data|
4533   }@+;
4534   unsigned char l; // Layer (high bit set for rules)
4535 } typeset_node;
4536
4537 @ @<Global variables@>=
4538 memory_of(typeset_node) typeset_nodes;
4539
4540 @ @<Initialize memory@>= init_memory(typeset_nodes,8);
4541
4542 @ We also have variables for the layer size (loaded from \.{\\count2}
4543 and \.{\\count3} registers for the current page). If they are both zero,
4544 then nothing will be rendered.
4545
4546 @<Global variables@>=
4547 unsigned short layer_width;
4548 unsigned short layer_height;
4549
4550 @ Here are the subroutines which typeset characters and rules onto the
4551 page buffer. They are not rendered into a picture yet.
4552
4553 @d typeset_new_page() (typeset_nodes.used=0)
4554 @d typeset_rule_here(_w,_h) typeset_rule(quan('I'),quan('J'),(_w),(_h));
4555 @d typeset_char_here(_ch) typeset_char(quan('I'),quan('J'),(_ch));
4556
4557 @-p void typeset_rule(int x,int y,int w,int h) {
4558   data_index n=new_record(typeset_nodes);
4559   @<Ensure |w| and |h| are not too large to fit on the page@>;
4560   typeset_nodes.data[n].x=x;
4561   typeset_nodes.data[n].y=y;
4562   typeset_nodes.data[n].w=w;
4563   typeset_nodes.data[n].h=h;
4564   typeset_nodes.data[n].l=quan('L')|0x80;
4565 }
4566
4567 @ @<Ensure |w| and |h| are not too large to fit on the page@>= {
4568   if(x+w>layer_width) w=layer_width-x;
4569   if(y+h>layer_height) h=layer_height-y;
4570 }
4571
4572 @ @-p void typeset_char(int x,int y,data_index c) {
4573   data_index n=new_record(typeset_nodes);
4574   typeset_nodes.data[n].x=x;
4575   typeset_nodes.data[n].y=y;
4576   typeset_nodes.data[n].c=c;
4577   typeset_nodes.data[n].l=quan('L');
4578 }
4579
4580 @ Here is a variable |image|. This is a pointer to the buffer for the
4581 picture of the current layer, in \.{PBM} format. The internal quantity
4582 \.L should be set now to the largest layer number in use, at the end of
4583 the page, because it is used to determine how many layers must be sent to
4584 the output.
4585
4586 @d image_max (image+layer_size)
4587
4588 @<Global variables@>=
4589 unsigned char*image;
4590
4591 @ @<Render this page@>= {
4592   unsigned int row_size=((layer_width+7)>>3);
4593   unsigned int layer_size=row_size*layer_height;
4594   image=malloc(layer_size+1);
4595   while(quan('L')) {
4596     memset(image,0,layer_size);
4597     @<Read the |typeset_nodes| list and render any applicable nodes@>;
4598     @<Send the current layer to a file@>;
4599     --quan('L');
4600   }
4601   free(image);
4602 }
4603
4604 @ @<Read the |typeset_nodes| list and render any applicable nodes@>= {
4605   data_index i;
4606   foreach(i,typeset_nodes) {
4607     if((typeset_nodes.data[i].l&0x7F)==quan('L')) {
4608       if(typeset_nodes.data[i].l&0x80) {
4609         @<Render a rule node@>;
4610       } @+else {
4611         @<Render a character node@>;
4612       }
4613     }
4614   }
4615 }
4616
4617 @ In order to render a rule node (which is a filled |black| rectangle), it
4618 is split into rows, and each row is split into three parts: the left end,
4619 the filling, and the right end. However, if the width is sufficiently
4620 small, it will fit in one byte and will not have to be split in this way.
4621
4622 There are also some checks to ensure that the entire rectangle will be
4623 within the bounds of the image.
4624
4625 @<Render a rule node@>= {
4626   int y=1+typeset_nodes.data[i].y-typeset_nodes.data[i].h;
4627   int x=typeset_nodes.data[i].x;
4628   int w=typeset_nodes.data[i].w;
4629   if(y<0) y=0;
4630   if(typeset_nodes.data[i].y>=layer_height)
4631     typeset_nodes.data[i].y=layer_height-1;
4632   if((x&7)+w<=8) {
4633     @<Render a small rule node@>;
4634   } @+else {
4635     @<Render a large rule node@>;
4636   }
4637 }
4638
4639 @ @<Render a small rule node@>= {
4640   for(;y<=typeset_nodes.data[i].y;y++) {
4641     image[y*row_size+(x>>3)]|=(0xFF>>(x&7))&~(0xFF>>((x&7)+w));
4642   }
4643 }
4644
4645 @ @<Render a large rule node@>= {
4646   for(;y<=typeset_nodes.data[i].y;y++) {
4647     unsigned char*p=image+(y*row_size+(x>>3));
4648     *p++|=0xFF>>(x&7); // left
4649     memset(p,0xFF,((x&7)+w)>>3); // filling
4650     p[((x&7)+w)>>3]|=~(0xFF>>((x+w)&7)); // right
4651   }
4652 }
4653
4654 @ Character nodes are a bit different. The pictures are already stored,
4655 now we have to paste them into the layer picture. Since they will not
4656 always be aligned to a multiple to eight columns (one byte), it will have
4657 to shift out and shift in.
4658
4659 Again, it is necessary to ensure it doesn't go out of bounds. It has to be
4660 a bit more careful for characters than it does for rules. Also note that
4661 the \.{GF} format does not require that |min_m| and so on are the tightest
4662 bounds possible.
4663
4664 @<Render a character node@>= {
4665   unsigned int ch=typeset_nodes.data[i].c;
4666   unsigned int x=typeset_nodes.data[i].x+font_data.data[ch].min_m;
4667   unsigned int y=typeset_nodes.data[i].y-font_data.data[ch].max_n;
4668   unsigned int z=typeset_nodes.data[i].y-font_data.data[ch].min_n;
4669   unsigned int w=(font_data.data[ch].max_m-font_data.data[ch].min_m)/8+1;
4670   register unsigned char sh=x&7; // shifting amount for right shift
4671   register unsigned char lsh=8-sh; // shifting amount for left shift
4672   unsigned char*p=image+(y*row_size+(x>>3));
4673   unsigned char*q=font_data.data[ch].raster;
4674   @<Cut off the part of character above the top of the layer image@>;
4675   while(y<=z && p+w<image_max) {
4676     @<Render the current row of the character raster@>;
4677     @<Advance to the next row of the character@>;
4678   }
4679 }
4680
4681 @ @<Cut off the part of character above the top of the layer image@>= {
4682   if(y<0) {
4683     p-=row_size*y;
4684     q-=w*y;
4685     y=0;
4686   }
4687   if(p<image) p=image;
4688 }
4689
4690 @ @<Render the current row of the character raster@>= {
4691   int j;
4692   for(j=0;j<w;j++) {
4693     p[j]|=q[j]>>sh;
4694     p[j+1]|=q[j]<<lsh;
4695   }
4696 }
4697
4698 @ @<Advance to the next row of the character@>= {
4699   y++;
4700   q+=w;
4701   p+=row_size;
4702 }
4703
4704 @ Layer files are output in \.{PBM} format, which is very similar to the
4705 format which this program uses internally. ImageMagick is capable of
4706 reading this format.
4707
4708 @.PBM@>
4709 @^Portable Bitmap@>
4710 @^ImageMagick@>
4711 @^output@>
4712
4713 @<Send the current layer to a file@>= {
4714   FILE*fp;
4715   char filename[256];
4716   sprintf(filename,"P%dL%d.pbm",quan('P'),quan('L'));
4717   fp=fopen(filename,"wb");
4718   fprintf(fp,"P4%d %d ",layer_width,layer_height);
4719   fwrite(image,1,layer_size,fp);
4720   fclose(fp);
4721 }
4722
4723 @ @<Display the list of typeset nodes@>= {
4724   data_index i;
4725   foreach(i,typeset_nodes) {
4726     if(typeset_nodes.data[i].l&0x80) {
4727       printf("[%d] %dx%d%+d%+d\n",typeset_nodes.data[i].l&0x7F
4728         ,typeset_nodes.data[i].w,typeset_nodes.data[i].h
4729         ,typeset_nodes.data[i].x,typeset_nodes.data[i].y
4730       );
4731     } @+else {
4732       printf("[%d] %d(%d) %+d%+d\n",typeset_nodes.data[i].l
4733         ,typeset_nodes.data[i].c,font_data.data[typeset_nodes.data[i].c].n
4734         ,typeset_nodes.data[i].x,typeset_nodes.data[i].y
4735       );
4736     }
4737   }
4738 }
4739
4740 @ @<Display typesetting diagnostics@>= {
4741   int i;
4742   for(i=0;i<32;i++) {
4743     if(type_quan[i]) printf("%c=%d\n",i+'@@',type_quan[i]);
4744   }
4745   printf("unit_conv: %lld [%d]\n",unit_conv,round(unit_conv));
4746   printf("nodes: %d/%d\n",typeset_nodes.used,typeset_nodes.allocated);
4747   printf("fonts: %d/%d\n",font_data.used,font_data.allocated);
4748   if(dvi_stack) printf("stack: %d\n",dvi_stack_ptr-dvi_stack);
4749 }
4750
4751 @*Process of ImageMagick. The filename of ImageMagick \.{convert} is found
4752 by using the \.{IMCONVERT} environment variable. The entire command-line
4753 is stored in the \.Q register, with arguments separated by spaces, and it
4754 might be very long.
4755
4756 @^ImageMagick@>
4757 @.IMCONVERT@>
4758
4759 @d add_magick_arg(_val) magick_args.data[new_record(magick_args)]=_val
4760
4761 @<Typedefs@>=
4762 typedef char*char_ptr;
4763
4764 @ @<Global variables@>=
4765 memory_of(char_ptr) magick_args;
4766
4767 @ @<Switch to ImageMagick@>= {
4768   init_memory(magick_args,4);
4769   add_magick_arg("convert"); // |argv[0]| (program name)
4770   @<Add arguments from \.Q register@>;
4771   add_magick_arg(0); // (terminator)
4772   @<Call the ImageMagick executable file@>;
4773 }
4774
4775 @ The \.Q register will be clobbered here. But that is OK since it will no
4776 longer be used within \TeX nicard.
4777
4778 @<Add arguments from \.Q register@>= {
4779   char*q=registers['Q'].text;
4780   char*p;
4781   while(q && *q) {
4782     p=q;
4783     if(q=strchr(q,' ')) *q++=0;
4784     if(*p) add_magick_arg(p);
4785   }
4786 }
4787
4788 @ @<Call the ImageMagick executable file@>= {
4789   char*e=getenv("IMCONVERT");
4790   if(!e) @<Display the arguments and quit@>;
4791   execv(e,magick_args.data);
4792   fprintf(stderr,"Unable to run ImageMagick\n");
4793 @.Unable to run ImageMagick@>
4794   return 1;
4795 }
4796
4797 @ @<Display the arguments and quit@>= {
4798   data_index i;
4799   char*p;
4800   foreach(i,magick_args) if(p=magick_args.data[i]) printf("%s\n",p);
4801   return 0;
4802 }
4803
4804 @*Internal Typesetting. Until now, we only had the codes for doing
4805 external typesetting and image manipulation (which was the original plan
4806 for this program). Now, we are adding internal typesetting and image
4807 manipulation as well, to avoid external dependencies.
4808
4809 Some of the algorithms of \TeX\ will be used here, with some changes. For
4810 example, there are no leaders, marks, footnotes, alignments, mathematical
4811 formulas, or hyphenation. Ligature nodes are not needed either, because
4812 there is no hyphenation, so we can just use normal character nodes for
4813 ligatures.
4814
4815 There is also no page breaking, although you can still do vertical
4816 splitting if you want multiple columns of text on a card, or for the text
4817 to be interrupted in the middle.
4818
4819 @ Here is a list of the category codes used for internal typesetting, and
4820 the code to initialize that table and the other tables. There are also
4821 category codes from 32 to 255, which mean that it is a register number
4822 containing a code to execute (we set up |tabulation| and |escape_code| to
4823 call registers \.t and \.e, although it is unlikely to use these tokens).
4824
4825 @d cat_ignore 0 // Ignore this token
4826 @d cat_norm 1 // Add a character from the current font
4827 @d cat_space 2 // Add a glue node with the current space factor
4828 @d cat_exit 3 // Exit the current block
4829 @d cat_accent 4 // Add an accent to the next character
4830 @d cat_xaccent 5 // As above, but XOR 128
4831
4832 @<Initialize tables for internal typesetting@>= {
4833   for(i=0;i<256;i++) {
4834     tables['E'][i]=1;
4835     tables['F'][i]=40;
4836     tables['J'][i]=tables['K'][i]=128;
4837   }@#
4838   tables['E'][null_char]=cat_ignore;
4839   tables['E'][end_transmission]=cat_exit; // Not actually used
4840   tables['E'][tabulation]='t';
4841   tables['E'][escape_code]='e';
4842   tables['E'][record_separator]=cat_exit;
4843   tables['E'][field_separator]=cat_exit;
4844   tables['E'][' ']=cat_space;
4845 }
4846
4847 @ All dimensions are stored in units of scaled points (where there are
4848 65536 scaled points in one point, and $72.27$ points in one inch).
4849
4850 There will also be a type for glue ratios, which is used to multiply by
4851 glue stretch and shrink inside of a box, where a value of |0x100000000|
4852 means 100\char`\%\relax\space stretch or shrink, or 1pt per fil unit.
4853
4854 @<Typedefs@>=
4855 typedef signed int scaled;
4856 typedef signed long long int glue_ratio;
4857
4858 @*Data Structures for Boxes. Typesetting is done first by storing
4859 horizontal and vertical boxes of nodes. These boxes may then be included
4860 in other boxes, or shipped out to the next part of the program, which is
4861 image manipulation.
4862
4863 Here we list the possible kind of nodes. These are four-bit numbers, with
4864 bit 3 set for a breakable\slash discardable node. The four high bits are
4865 used as a small parameter for the node.
4866
4867 There are structures for many kinds of nodes, but only one pointer type
4868 will be used. Unions are used to allow many kinds of nodes at once.
4869
4870 @d chars_node 00 // One word of text (including kerns, ligatures, accents)
4871 @d hlist_node 01 // Horizontal box
4872 @d vlist_node 02 // Vertical box
4873 @d rule_node 03 // Filled rectangle
4874 @d adjust_node 04 // Add material before or after current line
4875 @d special_node 05 // Execute commands when this node is found
4876 @d layer_node 06 // Like |special_node| but with only one purpose
4877 @d kern_node 010 // Fixed movement
4878 @d glue_node 011 // Variable movement
4879 @d penalty_node 012 // Tell how bad it is to break a line/page here
4880 @#
4881 @d type_of(_node) ((_node)->type_and_subtype&0x0F)
4882 @d subtype_of(_node) ((_node)->type_and_subtype>>4)
4883 @s box_node int
4884 @d calc_size(_members) (sizeof(struct{
4885   struct box_node*y;unsigned char z;struct{_members}@+;
4886 }))
4887
4888 @<Typedefs@>=
4889 typedef struct box_node {
4890   struct box_node*next; // next node, or 0
4891   unsigned char type_and_subtype;
4892   union @+{
4893     @<Structure of a |chars_node|@>;
4894     @<Structure of a |hlist_node|, |vlist_node|, or |rule_node|@>;
4895     @<Structure of a |adjust_node|@>;
4896     @<Structure of a |special_node|@>;
4897     @<Structure of a |layer_node|@>;
4898     @<Structure of a |kern_node|@>;
4899     @<Structure of a |glue_node|@>;
4900     @<Structure of a |penalty_node|@>;
4901   }@+;
4902 } box_node;
4903 @#
4904
4905 @ In a |chars_node|, there is a font number (0 to 255), and then sixteen
4906 bits for each character, accent, or kern. Data |0x0000| to |0x7FFF| adds a
4907 character (so only 32768 characters are available, while \TeX\ supports
4908 only 256 characters, so it is still more than \TeX), data |0x8000| to
4909 |0xBFFF| specifies an accent for the next character (so only characters
4910 numbered 0 to 16383 can be used as accents), |0xC000| to |0xFFFE| are
4911 implicit kerns (allowing only 16383 possible kerns, although most fonts
4912 use only ten or so, certainly not as many as sixteen thousand), and data
4913 |0xFFFF| is a terminator. All characters are from the same font.
4914
4915 If an accent is specified, it is added to the immediately next character
4916 in this list.
4917
4918 @d sizeof_chars_node calc_size(unsigned char a;unsigned short b[0];)
4919
4920 @<Structure of a |chars_node|@>=
4921 struct {
4922   unsigned char font;
4923   unsigned short chars[0];
4924 }
4925
4926 @ An |hlist_node|, |vlist_node|, and |rule_node| are all similar to each
4927 other, except that a |rule_node| does not have a |list| or |glue_set|, and
4928 a |hlist_node| has an additional |tracking| parameter.
4929
4930 Tracking is 128 for normal width of each letter. They can be adjusted to a
4931 lesser number to make the letters closer together, or greater to make
4932 farther apart leters, for example 64 means half of normal width.
4933
4934 The |subtype_of| a |hlist_node| or |vlist_node| is the glue set order,
4935 setting the high bit for shrinking (otherwise it is stretching).
4936
4937 @d sizeof_hlist_node calc_size(
4938   scaled a;scaled b;scaled c;scaled d;
4939   struct box_node*e;glue_ratio f;unsigned char g;
4940 )
4941 @d sizeof_vlist_node calc_size(
4942   scaled a;scaled b;scaled c;scaled d;
4943   struct box_node*e;glue_ratio f;
4944 )
4945 @d sizeof_rule_node calc_size(scaled a;scaled b;scaled c;scaled d;)
4946
4947 @<Structure of a |hlist_node|...@>=
4948 struct {
4949   scaled width;
4950   scaled height;
4951   scaled depth;
4952   scaled shift_amount; // shift this box by the specified amount
4953   @#
4954   struct box_node*list; // pointer to first child node
4955   glue_ratio glue_set;
4956   @#
4957   unsigned char tracking; // adjust letter spacing
4958 }
4959
4960 @ An |adjust_node| has only a pointer to the sublist, and the |subtype_of|
4961 should be zero to append the vertical material after this line of the
4962 paragraph, or one to put it before this line of the paragraph.
4963
4964 @d sizeof_adjust_node calc_size(struct box_node*a;)
4965
4966 @<Structure of a |adjust_node|@>=
4967 struct {
4968   struct box_node*sublist; // pointer to first child node
4969 }
4970
4971 @ A |special_node| contains a null-terminated C string. The |subtype_of|
4972 specifies how it is used; they are listed below.
4973
4974 @d spec_measure 1 // Measuring the length of a line in a paragraph
4975 @d spec_break 2 // Breaking a paragraph
4976 @d spec_pack 3 // Packaging a box
4977 @d spec_vbreak 4 // Breaking a vertical box
4978 @d spec_render 5 // Shipping out the nodes to the page
4979 @#
4980 @d sizeof_special_node calc_size(char a[0];)
4981
4982 @<Structure of a |special_node|@>=
4983 struct {
4984   char program[0];
4985 }
4986
4987 @ A |layer_node| acts like a |special_node| with subtype |spec_render| and
4988 the |program| set to |"3mL"| if the |layer| parameter is 3. It is probably
4989 a more common kind of special.
4990
4991 For example, it might be used to specify typing in different colors.
4992
4993 @d sizeof_layer_node calc_size(unsigned char a;)
4994
4995 @<Structure of a |layer_node|@>=
4996 struct {
4997   unsigned char layer;
4998 }
4999
5000 @ A |kern_node| represents a horizontal or vertical movement, such as
5001 where some amount of space is skipped.
5002
5003 @d sizeof_kern_node calc_size(scaled a;)
5004
5005 @<Structure of a |kern_node|@>=
5006 struct {
5007   scaled distance;
5008 }
5009
5010 @ A |glue_node| is similar to a |kern_node| although there are some
5011 differences. One difference is that it can stretch and shrink. The
5012 |subtype_of| parameter has the stretch order in the low two bits and the
5013 shrink order in the high two bits.
5014
5015 @d finite 0
5016 @d fil 1
5017 @d fill 2
5018 @d filll 3
5019 @#
5020 @d sizeof_glue_node calc_size(scaled a;scaled b;scaled c;)
5021
5022 @<Structure of a |glue_node|@>=
5023 struct {
5024   scaled natural;
5025   scaled stretch;
5026   scaled shrink;
5027 }
5028
5029 @ A |penalty_node| specifies a valid breakpoint in a paragraph, and in
5030 addition, specifies how bad it is to break here. A penalty value 10000001
5031 is bad enough that it will not break here, and $-10000001$ is good enough
5032 that it will definitely break here.
5033
5034 @d sizeof_penalty_node calc_size(signed int a;)
5035
5036 @<Structure of a |penalty_node|@>=
5037 struct {
5038   signed int penalty;
5039 }
5040
5041 @ Here are functions for manipulation of box nodes, including creation,
5042 destruction, and so on.
5043
5044 First is simple creation of a node. It sets nothing other than type and
5045 subtype.
5046
5047 @-p box_node*create_node(int type,int subtype,int size) {
5048   box_node*ptr=malloc(size);
5049   ptr->next=0;
5050   ptr->type_and_subtype=(type&0x0F)|(subtype<<4);
5051   return ptr;
5052 }
5053
5054 @ Now is destruction. It is recursive because some nodes are boxes that
5055 point to other lists too.
5056
5057 @-p void trash_nodes(box_node*this) {
5058   box_node*next;
5059   while(this) {
5060     next=this->next;
5061     @<Recurse if there is a sublist to trash@>;
5062     free(this);
5063     this=next;
5064   }
5065 }
5066
5067 @ @<Recurse if there is a sublist to trash@>= {
5068   switch(type_of(this)) {
5069     case hlist_node: case vlist_node: @/
5070       trash_nodes(this->list); @+break;
5071     case adjust_node: @/
5072       trash_nodes(this->sublist); @+break;
5073     default: ; // Do nothing
5074   }
5075 }
5076
5077 @ You might realize there are no reference counts. They aren't needed,
5078 because each node is used exactly once. (Later on in the semantic nest, it
5079 is seen that this is not quite true; the box nest also includes a
5080 reference, which is in addition to the |next| pointers of each node, but
5081 this is OK since those are nodes are never isolated or destroyed when
5082 picked off of that list.)
5083
5084 @*Font Metric Data. In order to do internal typesetting, it is necessary
5085 to load the font metric data from a \.{TFM} file. The data in a \.{TFM}
5086 file consists of 32-bit words in big-endian order.
5087
5088 However, the first 6 words are twelve 16-bit integers instead, giving
5089 lengths of various parts of the file.
5090
5091 @s fix_word int
5092 @ The most important data type used here is a |fix_word|, which is a
5093 32-bit signed number, with 12 integer bits and 20 fractional bits. Most of
5094 the |fix_word| values in a \.{TFM} file range from $-16$ to $+16$.
5095
5096 @<Typedefs@>=
5097 typedef signed int fix_word;
5098
5099 @ The twelve lengths are according to the following:
5100
5101 \hbox to\hsize{\hfil\vbox{\smallskip\halign{\hfil$\it#={}$&#\hfil\cr
5102   lf&length of the entire file, in words\cr
5103   lh&number of words of header data\cr
5104   bc&smallest character code in this font\cr
5105   ec&largest character code in this font\cr
5106   nw&number of words in the width table\cr
5107   nh&number of words in the height table\cr
5108   nd&number of words in the depth table\cr
5109   ni&number of words in the italic correction table\cr
5110   nl&number of words in the ligature/kern program\cr
5111   nk&number of words in the kern table\cr
5112   ne&number of words in the extensible character table\cr
5113   np&number of font parameter words\cr
5114 }\smallskip}\hfil}
5115
5116 \noindent The parts of the file are in the order listed above. Some of the
5117 sections of the file are not used by this program (the extensible
5118 characters and the header words), but they still must be skipped over when
5119 reading the \.{TFM} file. Also, the $\it lf$ parameter is only for
5120 verification, and this program does not attempt to verify it.
5121
5122 @ Here is data structures for storing information about font metrics. It
5123 is a managed memory. Some elements will be shared by multiple fonts that
5124 use the same \.{TFM} file, such as |design_size|, |fontname|, and the
5125 ligature/kerning programs.
5126
5127 @<Late Typedefs@>=
5128 typedef struct {
5129   scaled parameters[16]; // Font parameters (up to sixteen)
5130   scaled at_size; // At size, for figuring out \.{GF} filename
5131   scaled design_size; // Design size, for figuring out \.{GF} filename
5132   char*fontname; // Name of font, without extension or area
5133   scaled*width_base;
5134   scaled*height_base;
5135   scaled*depth_base;
5136   scaled*italic_base;
5137   scaled*kern_base;
5138   unsigned char min_char; // Smallest valid character code
5139   unsigned char max_char; // Largest valid character code
5140   int right_boundary; // If this is |none| then there is no right boundary
5141   unsigned char lig_limit; // Code |x| ligatures if |x<256*lig_limit|
5142   @<More elements of |font_metric_data|@>@;
5143 } font_metric_data;
5144
5145 @ @<Global variables@>=
5146 memory_of(font_metric_data) metrics;
5147
5148 @ @<Initialize memory@>= init_memory(metrics,4);
5149
5150 @ Now the ligature/kerning program. The purpose of these fields is
5151 explained later.
5152
5153 @<Typedefs@>=
5154 typedef struct {
5155   unsigned char skip;
5156   unsigned char next;
5157   unsigned char op;
5158   unsigned char remainder;
5159 } lig_kern_command;
5160
5161 @ Some fonts will have a fake ``left boundary character'', which is
5162 implied at the beginning of each word. This points to the command which
5163 should become active at the beginning of a word. If it is null, then no
5164 ligature/kerning program will be active.
5165
5166 @<More elements of |font_metric_data|@>=
5167   lig_kern_command*left_boundary; // Program for left boundary character
5168
5169 @ Another thing is the character info. These are the same data for
5170 different sizes of the same font, since they are index into the other
5171 arrays, which are different for each font.
5172
5173 @<Typedefs@>=
5174 typedef struct {
5175   unsigned char width; // Index into |width_base|
5176   unsigned char height; // Index into |height_base|
5177   unsigned char depth; // Index into |depth_base|
5178   unsigned char italic; // Index into |italic_base|
5179   lig_kern_command*program; // Program for this character (null if none)
5180 } char_info_data;
5181
5182 @ @<More elements of |font_metric_data|@>=
5183   char_info_data*info; // |info[c]| is info for character code |c|
5184
5185 @ So let's get started, please.
5186
5187 The parameter |fontnum| shall be the font number of the first size of this
5188 font set up. The |fontname| is the name of the font, without extension.
5189 The |at_size| parameter points to the beginning of a zero-terminated list
5190 of at-sizes to load the font at (much of the data is the same for
5191 different at-sizes so that we can save memory in this way). However, the
5192 |at_size| values are |scaled|, while the \.{TFM} expects |fix_word|. This
5193 is easy to correct by right-shifting four spaces.
5194
5195 The |fix_word| values are in the same format as numbers in a \.{DVI} file,
5196 so the same code can be used. A macro is set here to make convenience.
5197
5198 @d get_fix_word(_fp) ((fix_word)get_dvi_number((_fp),1,4))
5199
5200 @-p void load_tfm(unsigned char fontnum,char*fontname,scaled*at_size) {
5201   char filename[max_filename_length+1];
5202   short lengths[12]; // The data described above, now numbered 0 to 11
5203   lig_kern_command*program; // Beginning of ligature/kerning program
5204   font_metric_data common_data; // Data common to all sizes of a font
5205   data_index metrics_index=metrics.used; // Index into |metrics|
5206   int num_sizes=0; // How many fonts we are loading at once
5207   int w_offset; // Offset of width table
5208   FILE*fp;
5209   @<Set up the filename of the \.{TFM} file and try to open the file@>;
5210   @<Load the |lengths| data@>;
5211   @<Set up |common_data| and |program|@>;
5212   @<Skip the header words@>;
5213   @<Load the character info@>;
5214   @<Set |w_offset|, and skip to the ligature/kerning program@>;
5215   @<Load the ligature/kerning program@>;
5216   @<Correct the pointers into the ligature/kerning program@>;
5217   @<Calculate |num_sizes| and allocate font metric structures@>;
5218   @<Load the dimension values for each size of this font@>;
5219   fclose(fp);
5220 }
5221
5222 @ @<Set up the filename of the \.{TFM} file and try to open the file@>= {
5223   sprintf(filename,"%s.tfm",fontname);
5224   fp=open_file(filename,"rb");
5225   if(!fp) {
5226     fprintf(stderr,"Cannot open font %s\n",filename);
5227 @.Cannot open font...@>
5228     exit(1);
5229   }
5230 }
5231
5232 @ @<Load the |lengths| data@>= {
5233   int i;
5234   for(i=0;i<12;i++) {
5235     int x=fgetc(fp);
5236     int y=fgetc(fp);
5237     lengths[i]=(x<<8)|y;
5238   }
5239 }
5240
5241 @ @<Set up |common_data| and |program|@>= {
5242   common_data.fontname=strdup(fontname);
5243   common_data.min_char=lengths[2]; // Hopefully should be zero
5244   common_data.max_char=lengths[3];
5245   common_data.right_boundary=none;
5246   common_data.lig_limit=255;
5247   common_data.info=malloc((lengths[3]+1)*sizeof(char_info_data));
5248   program=malloc(lengths[8]*sizeof(lig_kern_command));
5249 }
5250
5251 @ @<Skip the header words@>= {
5252   fseek(fp,4,SEEK_CUR); // Skip checksum
5253   common_data.design_size=get_fix_word(fp)>>4;
5254   fseek(fp,4*(lengths[1]-2),SEEK_CUR); // Skip everything else
5255 }
5256
5257 @ The character info is stored in a packed format. This is then unpacked
5258 and loaded into the |common_data.info| array, which has already been
5259 allocated.
5260
5261 @<Load the character info@>= {
5262   char_info_data*info=common_data.info+common_data.min_char;
5263   int i,c;
5264   for(i=common_data.min_char;i<=common_data.max_char;i++) {
5265     info->width=fgetc(fp);
5266     c=fgetc(fp);
5267     info->height=c>>4;
5268     info->depth=c&0xF;
5269     c=fgetc(fp);
5270     info->italic=c>>2;
5271     if((c&0x3)==0x1) {
5272       info->program=program+fgetc(fp);
5273     } @+else {
5274       info->program=0;
5275       fgetc(fp); // Lists and extensible recipes are not used
5276     }
5277     info++;
5278   }
5279 }
5280
5281 @ The ligature/kerning program will be read before the dimensions specific
5282 to the font size, so that the |common_data| can be set up first. And then
5283 we can skip back to |w_offset|, multiple times, once for each size that is
5284 being loaded.
5285
5286 @<Set |w_offset|, and skip to the ligature/kerning program@>= {
5287   w_offset=ftell(fp);
5288   fseek(fp,4*(lengths[4]+lengths[5]+lengths[6]+lengths[7]),SEEK_CUR);
5289 }
5290
5291 @ @<Load the ligature/kerning program@>= {
5292   int i;
5293   for(i=0;i<lengths[8];i++) {
5294     program[i].skip=fgetc(fp);
5295     program[i].next=fgetc(fp);
5296     program[i].op=fgetc(fp);
5297     program[i].remainder=fgetc(fp);
5298   }
5299 }
5300
5301 @ Sometimes you might need large ligature/kerning programs for many
5302 characters, so you can start at addresses other than 0 to 255. This is the
5303 way that specifies how that is done.
5304
5305 @<Correct the pointers into the ligature/kerning program@>= {
5306   int i;
5307   for(i=common_data.min_char;i<=common_data.max_char;i++)
5308     if(common_data.info[i].program &&
5309      common_data.info[i].program->skip>128)
5310       common_data.info[i].program=program+
5311        (common_data.info[i].program->op<<8)+
5312        common_data.info[i].program->remainder;
5313 }
5314
5315 @ @<Calculate |num_sizes| and allocate font metric structures@>= {
5316   scaled*p=at_size;
5317   data_index n;
5318   while(*p) {
5319     n=new_record(metrics);
5320     memcpy(&(metrics.data[n]),&common_data,sizeof(font_metric_data));
5321     metrics.data[n].at_size=*p;
5322     num_sizes++;
5323     p++;
5324   }
5325 }
5326
5327 @ Now to load the widths, heights, depths, italic corrections, and kerning
5328 distances. This is what |w_offset| is for, so that we can skip back to it.
5329 One allocated memory object is used for all dimension values of one size,
5330 and then the points are moved into the fields of the |font_metric_data|.
5331
5332 @d total_font_dimen
5333   (lengths[4]+lengths[5]+lengths[6]+lengths[7]+lengths[9])
5334 @d cur_metrics (metrics.data[metrics_index])
5335
5336 @<Load the dimension values for each size of this font@>= {
5337   scaled*p;
5338   scaled s,z,zprime,alpha,beta;
5339   for(p=at_size;*p;p++,metrics_index++) {
5340     scaled*d=malloc(sizeof(scaled)*total_font_dimen);
5341     int c;
5342     @<Ensure |d| is valid@>;
5343     @<Set the dimension base pointers for this font@>;
5344     z=*p; // The at size is now called |z|
5345     @<Compute |zprime|, |alpha|, and |beta|@>;
5346     fseek(fp,w_offset,SEEK_SET);
5347     c=lengths[4]+lengths[5]+lengths[6]+lengths[7];
5348     @<Load |c| scaled dimension values from |fp| into |d|@>;
5349     fseek(fp,4*lengths[8],SEEK_CUR);
5350     c=lengths[9];
5351     @<Load |c| scaled dimension values from |fp| into |d|@>;
5352     @<Load the font parameters@>;
5353   }
5354 }
5355
5356 @ @<Ensure |d| is valid@>= {
5357   if(!d) {
5358     fprintf(stderr,"Out of font memory\n");
5359     exit(1);
5360   }
5361 }
5362
5363 @ @<Set the dimension base pointers for this font@>= {
5364   cur_metrics.width_base=d;
5365   cur_metrics.height_base=cur_metrics.width_base+lengths[4];
5366   cur_metrics.depth_base=cur_metrics.height_base+lengths[5];
5367   cur_metrics.italic_base=cur_metrics.depth_base+lengths[6];
5368   cur_metrics.kern_base=cur_metrics.italic_base+lengths[7];
5369 }
5370
5371 @ @<Load |c| scaled dimension values from |fp| into |d|@>= {
5372   while(c--) {
5373     scaled b3,b2,b1,b0;
5374     b0=fgetc(fp); @+ b1=fgetc(fp); @+ b2=fgetc(fp); @+ b3=fgetc(fp);
5375     *d++=(((((b3*zprime)>>8)+(b2*zprime))>>8)+(b1*zprime))/beta
5376      -(b0?alpha:0);
5377   }
5378 }
5379
5380 @ Now there are font parameters. There are up to sixteen font parameters,
5381 but numbered starting at 1. This is the code that makes it to do this.
5382
5383 @<Load the font parameters@>= {
5384   c=lengths[11]-1;
5385   if(c>14) c=14;
5386   if(c<0) c=0;
5387   cur_metrics.parameters[0]=cur_metrics.parameters[1]=0;
5388   if(lengths[11]) cur_metrics.parameters[1]=get_fix_word(fp)>>4;
5389   d=cur_metrics.parameters+2;
5390   @<Load |c| scaled dimension values from |fp| into |d|@>;
5391 }
5392
5393 @*Semantic Nest. We might be building many boxes at once, nested inside of
5394 each other. So, we need to keep the stack of what kind of boxes are
5395 currently in use, and the associated parameters, such as space factors,
5396 and the previous depth of the box.
5397
5398 There is two kinds, horizontal and vertical. The outer mode is considered
5399 horizontal so that it does not add leading between boxes, although it is
5400 not for making a box of the outer mode.
5401
5402 The currently active modes are stored both forwards and backwards, so that
5403 we can use them as a stack of box nodes. There is a null pointer to mark
5404 the end of the list.
5405
5406 @<Typedefs@>=
5407 typedef box_node*box_node_ptr;
5408
5409 @ @<Global variables@>=
5410 memory_of(box_node_ptr) box_nest;
5411
5412 @ @<Initialize memory@>=
5413 init_memory(box_nest,2);
5414
5415 @ We also have the semantic list with local variables to the current
5416 group. The purpose of the |data| fields depends on whether this state is
5417 in horizontal or vertical mode, and that is why it is a union so that we
5418 can access then by names in that case, although they can also be accessed
5419 by numbers as well.
5420
5421 @<Typedefs@>=
5422 typedef struct nest_state {
5423   struct nest_state*link; // Link to state this one is inside of
5424   boolean is_vertical; // 0 for horizontal, 1 for vertical
5425   data_index box_nest_index; // Index into |box_nest|
5426   union {
5427     scaled data[16];
5428     @<Nest state variables for horizontal mode@>;
5429     @<Nest state variables for vertical mode@>;
5430   }@+;
5431 } nest_state;
5432
5433 @ @<Global variables@>=
5434 nest_state*cur_nest;
5435
5436 @ @<Initialize memory@>= {
5437   cur_nest=malloc(sizeof(nest_state));
5438   cur_nest->link=0; // Means this is the outer level
5439   cur_nest->is_vertical=0; // Horizontal mode, no leading
5440   cur_nest->box_nest_index=new_record(box_nest);
5441   box_nest.data[cur_nest->box_nest_index]=0;
5442   cur_nest->space_factor=40; // Normal spacing
5443 }
5444
5445 @ @<Nest state variables for horizontal mode@>=
5446 struct {
5447   scaled space_factor; // Really just a number, but I don't care
5448 }
5449
5450 @ @<Nest state variables for vertical mode@>=
5451 struct {
5452   scaled prev_depth;
5453 }
5454
5455 @ Here are codes to enter a nest.
5456
5457 @-p void enter_nest(boolean is_vertical) {
5458   nest_state*link=cur_nest;
5459   cur_nest=malloc(sizeof(nest_state));
5460   cur_nest->link=link;
5461   cur_nest->is_vertical=is_vertical;
5462   cur_nest->box_nest_index=new_record(box_nest);
5463   box_nest.data[cur_nest->box_nest_index]=0;
5464   if(is_vertical) cur_nest->prev_depth=0;
5465   else cur_nest->space_factor=40;
5466 }
5467
5468 @ And we also need codes to leave a nest. This function returns the
5469 pointer to the first node in the box that was being created, and then the
5470 packaging programs can use that to make a box and iterate over the |next|
5471 pointers to read the entire list.
5472
5473 @-p box_node*leave_nest(void) {
5474   nest_state*link=cur_nest->link;
5475   box_node*node;
5476   @<Ensure it is not nest underflow@>;
5477   @<Set |node| to the node at the beginning of the current list@>;
5478   @<Rewind |box_nest| to the end of the parent list@>;
5479   free(cur_nest);
5480   cur_nest=link;
5481   return node;
5482 }
5483
5484 @ The outer nest should never be left or packaged; it is only used as a
5485 general-purpose stack and a container for other nests. (Unlike \TeX, the
5486 outer nest is never split into pages in \TeX nicard.)
5487
5488 @<Ensure it is not nest underflow@>= {
5489   if(!link) {
5490     fprintf(stderr,"\nNest underflow\n");
5491     exit(1);
5492   }
5493 }
5494
5495 @ Note: Sometimes |node| will be a null pointer if the current list is
5496 making an empty box (i.e. no nodes have been pushed).
5497
5498 @<Set |node| to the node at the beginning of the current list@>= {
5499   if(box_nest.used==cur_nest->box_nest_index+1) {
5500     node=0;
5501   } @+else {
5502     node=box_nest.data[cur_nest->box_nest_index+1];
5503   }
5504 }
5505
5506 @ @<Rewind |box_nest| to the end of the parent list@>= {
5507   box_nest.used=cur_nest->box_nest_index;
5508 }
5509
5510 @ And finally we have codes to push and pop nodes in the current list.
5511 These are simple codes since there isn't much to do.
5512
5513 @d top_of_nodelist (box_nest.data[box_nest.used-1])
5514
5515 @-p inline void push_node(box_node*ptr) {
5516   top_of_nodelist->next=ptr;
5517   box_nest.data[new_record(box_nest)]=ptr;
5518 }
5519
5520 @ @-p box_node*pop_node(void) {
5521   box_node*ptr=top_of_nodelist;
5522   if(ptr) {
5523     box_nest.used--;
5524     top_of_nodelist->next=0;
5525   }
5526   return ptr;
5527 }
5528
5529 @*Box Calculation. Here are codes to calculate various things about the
5530 boxes, including badness, width\slash height\slash depth of a string of
5531 characters, and so on.
5532
5533 This function is used to compute the ``badness'' of a glue setting, when a
5534 total $t$ is supposed to be made from amounts that sum to $s$. In this
5535 program, the badness is $1000(t/s)^3$ (ten times as much as \TeX). It does
5536 not have to be extremely accurate, although it is sufficiently accurate to
5537 do line breaking and so on. Ten million occurs when you stretch more than
5538 21 times as much as it should; this should never happen so it is given the
5539 maximum possible badness that can be computed using this. The badness
5540 squared should never exceed sixty-three bits (which it won't).
5541
5542 @!@^badness@>
5543
5544 @d very_bad 10000000
5545 @d too_bad  10000001
5546
5547 @-p int calc_badness(scaled t,scaled s) {
5548   long long int r; // Apprximately $\root3\of{1000\cdot2^{32}}(t/s)$
5549   if(t==0) return 0;
5550   if(s<=0) return very_bad;
5551   r=(16255LL*t)/s;
5552   if(r>2097152LL) return very_bad;
5553   r=(r*r*r+(1LL<<31))>>32;
5554   if(r>very_bad) r=very_bad;
5555   return r;
5556 }
5557
5558 @ Next we calculate the width, height, and depth of a string of
5559 characters in one font, possibly including accents, kerns, and tracking.
5560 Ligatures will have already been dealt with before this code is reached,
5561 and kerns will already have been added in.
5562
5563 @-p void calc_chars(box_node*b,scaled*w,scaled*h,scaled*d,short t) {
5564   font_metric_data*m=&(metrics.data[b->font]);
5565   unsigned short*c; // Pointer to current character code
5566   scaled junk; // Ensures no segmentation faults are occuring
5567   if(!w) w=&junk;
5568   if(!h) h=&junk;
5569   if(!d) d=&junk;
5570   *w=*h=*d=0;
5571   for(c=b->chars;*c!=0xFFFF;c++) {
5572     if(*c&0x8000) {
5573       if(*c&0x4000) {
5574         @<Process an implicit kern in |calc_chars|@>;
5575       } @+else {
5576         @<Process an accent in |calc_chars|@>;
5577       }
5578     } @+else {
5579       @<Process a normal character in |calc_chars|@>;
5580     }
5581   }
5582 }
5583
5584 @ @<Process a normal character in |calc_chars|@>= {
5585   scaled width=m->width_base[m->info[*c&0xFF].width];
5586   scaled height=m->height_base[m->info[*c&0xFF].height];
5587   scaled depth=m->depth_base[m->info[*c&0xFF].depth];
5588   if(*h<height) *h=height;
5589   if(*d<depth) *d=depth;
5590   *w+=(t*width)>>7;
5591 }
5592
5593 @ @<Process an implicit kern in |calc_chars|@>= {
5594   scaled width=m->kern_base[*c&0x3FFF];
5595   *w+=(t*width)>>7;
5596 }
5597
5598 @ Now to do accents. This requires looking ahead to see the height for the
5599 next character. If the accent has positive height and zero depth, then it
5600 should be adjusted higher in case the letter is taller than an `x' (for
5601 example uppercase letters such as `\'E'). However, if the accent has
5602 positive depth and zero height, then it is an accent that should not be
5603 adjusted for the height of the character (for example `\c C'), although it
5604 might be adjusted for the depth.
5605
5606 It should never happen that the next item is not a normal character (if it
5607 does, then I am not considered responsible for your bad luck).
5608
5609 @<Process an accent in |calc_chars|@>= {
5610   scaled height=m->height_base[m->info[*c&0xFF].height];
5611   scaled depth=m->depth_base[m->info[*c&0xFF].depth];
5612   scaled c_height=m->height_base[m->info[c[1]&0xFF].height];
5613   scaled c_depth=m->height_base[m->info[c[1]&0xFF].depth];
5614   if(height<=0 && depth>0) {
5615     depth+=c_depth;
5616   } @+else {
5617     height+=c_height-m->parameters[5];
5618   }
5619   if(*h<height) *h=height;
5620   if(*d<depth) *d=depth;
5621 }
5622
5623 @*Packaging. This is how the nest lists are packaged into boxes and the
5624 width, height, and depth are calculated from them. They are separate for
5625 horizontal and vertical packing, although there are similarities.
5626
5627 The packing code is also used to compute the glue set of the box, and its
5628 badness. Here is the global variable to store the badness.
5629
5630 @<Global variables@>=
5631 int last_badness=too_bad;
5632
5633 @ There are two such subroutines, |hpackage| and |vpackage|, depending on
5634 what kind of box is wanted. Each one also takes three parameters: |first|,
5635 the first node in the box; |at_size|, the intended size, and |factor|, the
5636 amount to multiply the natural size by before adding |at_size|.
5637
5638 @d common_package box_node*first,scaled at_size,signed char factor
5639
5640 @ Horizontal packaging must compute height, width, and depth of characters
5641 and other boxes it contains, as well as compute glue settings, specials,
5642 adjustments, and so on.
5643
5644 For horizontal packaging, there is also a |tracking| parameter for spacing
5645 the letters in the box.
5646
5647 @-p box_node*hpackage(common_package,unsigned char tracking) {
5648   box_node*box=create_node(hlist_node,0,sizeof_hlist_node);
5649   scaled stretchability[4]; // Total stretch of all glue
5650   scaled shrinkability[4]; // Total shrink of all glue
5651   scaled natural=0; // Total width
5652   box_node*this; // Current node
5653   @<Initialize variables for |hpackage|@>;
5654   @<Read all nodes in a horizontal list to package them@>;
5655 #define @!actual @, box->width
5656   actual=(factor*natural)/8+at_size;
5657   @<Compute glue set and badness@>;
5658 #undef actual
5659   return box;
5660 }
5661
5662 @ @<Initialize variables for |hpackage|@>= {
5663   int o;
5664   box->list=first;
5665   box->tracking=tracking;
5666   box->height=box->depth=box->shift_amount=0;
5667   box->glue_set=0;
5668   for(o=0;o<4;o++) stretchability[o]=shrinkability[o]=0;
5669 }
5670
5671 @ @<Read all nodes in a horizontal list to package them@>= {
5672   for(this=first;this;this=this->next) {
5673     switch(type_of(this)) {
5674       case chars_node: @<Add word to box size@>; @+break;
5675       case hlist_node: case vlist_node: case rule_node:
5676         @<Apply the size of a box to a horizontal list@>; @+break;
5677       case kern_node: natural+=this->distance; @+break;
5678       case glue_node: @<Add glue to box size@>; @+break;
5679       case special_node:
5680         if(subtype_of(this)==spec_pack) @<Pack a special node@>;
5681         @+break;
5682       default: break; // All other nodes are ignored
5683     }
5684   }
5685 }
5686
5687 @ @<Add word to box size@>= {
5688   scaled w,h,d;
5689   calc_chars(this,&w,&h,&d,tracking<<1);
5690   natural+=w;
5691   if(h>box->height) box->height=h;
5692   if(d>box->depth) box->depth=d;
5693 }
5694
5695 @ @<Apply the size of a box to a horizontal list@>= {
5696   natural+=this->width;
5697   if(this->height+this->shift_amount>box->height)
5698     box->height=this->height+this->shift_amount;
5699   if(this->depth-this->shift_amount>box->depth)
5700     box->depth=this->depth-this->shift_amount;
5701 }
5702
5703 @ @<Add glue to box size@>= {
5704   natural+=this->natural;
5705   stretchability[subtype_of(this)&3]+=this->stretch;
5706   shrinkability[subtype_of(this)>>2]+=this->shrink;
5707 }
5708
5709 @ When packing a special node that has a code to run during packing, it
5710 can read and affect the current width and the intended width; it could
5711 also do other things, such as accumulating boxes for adjustments and so
5712 on.
5713
5714 @<Pack a special node@>= {
5715   push_num(at_size);
5716   push_num(natural);
5717   @# execute_program(this->program); @#
5718   natural=pop_num();
5719   at_size=pop_num();
5720 }
5721
5722 @ A macro named |actual| is defined above so that this code can be used
5723 for both horizontal and for vertical packaging.
5724
5725 We also have a macro here to decide setting the glue.
5726
5727 @d set_glue(_order,_flag,_diff,_glue)
5728   (box->type_and_subtype|=((_order)<<4)|((_flag)<<7)),
5729   (box->glue_set=make_fraction(_glue,_diff))
5730
5731 @<Compute glue set and badness@>= {
5732   if(actual>natural) {
5733     @<Glue is stretching@>;
5734   } @+else if(actual<natural) {
5735     @<Glue is shrinking@>;
5736   } @+else {
5737     last_badness=0; // Perfect!
5738   }
5739 }
5740
5741 @ @<Glue is stretching@>= {
5742   if(stretching[filll]!=0) {
5743     set_glue(filll,0,actual-natural,stretching[filll]);
5744     last_badness=0;
5745   } @+else if(stretching[fill]!=0) {
5746     set_glue(fill,0,actual-natural,stretching[fill]);
5747     last_badness=0;
5748   } @+else if(stretching[fil]!=0) {
5749     set_glue(fil,0,actual-natural,stretching[fil]);
5750     last_badness=0;
5751   } @+else if(stretching[finite]!=0) {
5752     set_glue(finite,0,actual-natural,stretching[finite]);
5753     last_badness=calc_badness(actual-natural,stretching[finite]);
5754   } @+else {
5755     last_badness=too_bad;
5756   }
5757 }
5758
5759 @ @<Glue is shrinking@>= {
5760   if(shrinking[filll]!=0) {
5761     set_glue(filll,1,natural-actual,shrinking[filll]);
5762     last_badness=0;
5763   } @+else if(shrinking[fill]!=0) {
5764     set_glue(fill,1,natural-actual,shrinking[fill]);
5765     last_badness=0;
5766   } @+else if(shrinking[fil]!=0) {
5767     set_glue(fil,1,natural-actual,shrinking[fil]);
5768     last_badness=0;
5769   } @+else if(shrinking[finite]>=natural-actual) {
5770     set_glue(finite,1,natural-actual,shrinking[finite]);
5771     last_badness=calc_badness(natural-actual,shrinking[finite]);
5772   } @+else {
5773     set_glue(finite,1,1,1); // Shrink as much as possible
5774     last_badness=too_bad;
5775   }
5776 }
5777
5778 @ Now vertical.
5779
5780 For vertical packaging, the two extra parameters are |max_dp|, the maximum
5781 depth; and |align_top|, which should be set true if it is wanted to align
5782 at the top instead of at the bottom.
5783
5784 @-p box_node*vpackage(common_package,scaled max_dp,boolean align_top) {
5785   box_node*box=create_node(vlist_node,0,sizeof_vlist_node);
5786   scaled stretchability[4]; // Total stretch of all glue
5787   scaled shrinkability[4]; // Total shrink of all glue
5788   scaled natural=0; // Total height plus depth
5789   scaled bonnet=0; // Height of first item
5790   scaled boot=0; // Depth of last item
5791   box_node*this; // Current node
5792   @<Initialize variables for |vpackage|@>;
5793   @<Read all nodes in a vertical list to package them@>;
5794   box->height=bonnet; @+ box->depth=boot;
5795 #define @!actual @, (*(align_top?&(box->depth):&(box->height)))
5796   natural-=align_top?bonnet:boot;
5797   actual=(factor*natural)/8+at_size;
5798   @<Compute glue set and badness@>;
5799 #undef actual
5800   @<Move the reference point to match the maximum depth, if applicable@>;
5801   return box;
5802 }
5803
5804 @ @<Initialize variables for |vpackage|@>= {
5805   int o;
5806   box->list=first;
5807   box->width=box->shift_amount=0;
5808   box->glue_set=0;
5809   for(o=0;o<4;o++) stretchability[o]=shrinkability[o]=0;
5810 }
5811
5812 @ @<Read all nodes in a vertical list to package them@>= {
5813   for(this=first;this;this=this->next) {
5814     switch(type_of(this)) {
5815       case hlist_node: case vlist_node: case rule_node:
5816         @<Apply the size of a box to a vertical list@>; @+break;
5817       case kern_node: natural+=this->distance; @+boot=0; @+break;
5818       case glue_node: @<Add glue to box size@>; @+break;
5819       case special_node:
5820         if(subtype_of(this)==spec_pack) @<Pack a special node@>;
5821         @+break;
5822       default: break; // All other nodes are ignored
5823     }
5824     if(this==first) bonnet=natural-boot;
5825   }
5826 }
5827
5828 @ @<Apply the size of a box to a vertical list@>= {
5829   natural+=this->height+(boot=this->depth);
5830   if(this->width+this->shift_amount>box->width)
5831     box->width=this->width+this->width;
5832 }
5833
5834 @ @<Move the reference point to match the maximum depth, if applicable@>= {
5835   if(box->depth>max_dp) {
5836     box->height+=box->depth-max_dp;
5837     box->depth=max_dp;
5838   }
5839 }
5840
5841 @*Typesetting Commands. There are various commands available in \TeX
5842 nicard for dealing with typesetting. Another thing it does is to allow you
5843 to enter distances using units.
5844
5845 @<Do a typesetting command@>= {
5846   int c=*++ptr;
5847   if((c>='0' && c<='9') || c=='.') @<Read a distance and units@>@;
5848   else switch(c) {
5849     @<Typesetting commands@>@;
5850   }
5851 }
5852
5853 @ The following units are supported:
5854
5855 \halign{\quad\tt#\space\hss&(#)\hss\cr
5856   pt&Point\cr
5857   bp&Desktop publishing point\cr
5858   in&Inch\cr
5859   cm&Centimetre\cr
5860   mm&Millimetre\cr
5861   P0&Pica\cr
5862   p0&Desktop publishing pica\cr
5863   q&Quarter\cr
5864   em&Em width -- font specific\cr
5865   ex&Ex height -- font specific\cr
5866 }
5867
5868 @<Read a distance and units@>= {
5869   char dig[18]; // Digits entered
5870   char digc=0; // Number of digits
5871   int num,den; // Numerator and denominator to scale the units
5872 }
5873
5874 @*Internal Image Rendering.
5875
5876 @*Main Program. This is where the program starts and ends. Everything else
5877 in the other chapters is started from here.
5878
5879 @<Include files@>=
5880 #include <signal.h>
5881 #include <stdio.h>
5882 #include <stdlib.h>
5883 #include <string.h>
5884 #include <time.h>
5885 #include <unistd.h>
5886
5887 @ @-p int main(int argc,char**argv) {
5888   boolean dvi_mode=0;
5889   @<Set up signal handler@>;
5890   @<Initialize memory@>;
5891   @<Display the banner message@>;
5892   @<Decide whether in DVI reading mode@>;
5893   if(!dvi_mode) @<Open the main input file@>;
5894   @<Initialize the input states@>;
5895   @<Initialize the tables and registers@>;
5896   @<Initialize the random number generator@>;
5897   @<Set registers according to command-line parameters@>;
5898   if(!dvi_mode) @<Process the input files@>;
5899   if(dvi_mode) dvi_mode=read_dvi_file(argv[1]);
5900   @<Call program in \.Z register if necessary@>;
5901   if(!dvi_mode) @<Send |end_transmission| to each card area@>;
5902   @<Write the output files@>;
5903   if(registers['Q'].is_string && dvi_mode &&
5904     (argv[0][0]!='-' || argv[0][1]!='z'))  @<Switch to ImageMagick@>;
5905   return 0;
5906 }
5907
5908 @ @<Display the banner message@>= {
5909   fprintf(stderr,"TeXnicard version %s\n",version_string);
5910   fprintf(stderr,
5911     "This program is free software and comes with NO WARRANTY.\n");
5912   fflush(stderr);
5913 }
5914
5915 @ @<Set registers according to command-line parameters@>= {
5916   int i;
5917   for(i=2;i<argc;i++) {
5918     registers[i+('0'-2)].is_string=1;
5919     registers[i+('0'-2)].text=strdup(argv[i]);
5920   }
5921 }
5922
5923 @ The main input file will be either the terminal, or another file if the
5924 command-line argument is given.
5925
5926 @<Open the main input file@>= {
5927   if(argc>1 && strcmp(argv[1],"-")!=0) {
5928     --current_input_file;
5929     open_input(argv[1]);
5930   } @+else {
5931     current_fp=0;
5932     strcpy(current_filename,"<Teletype>");
5933   }
5934 }
5935
5936 @ @<Call program in \.Z register if necessary@>= {
5937   if(registers['Z'].is_string) execute_program(registers['Z'].text);
5938 }
5939
5940 @ The alternative mode to run this program is DVI mode. DVI mode is
5941 specified by a command-line switch.
5942
5943 @.DVI@>
5944
5945 @<Decide whether in DVI reading mode@>= {
5946   if(argc>1 && argv[1][0]=='-' && argv[1][1]) {
5947     dvi_mode=1;
5948     argv++; @+ argc--;
5949     if(argv[0][1]=='a') {
5950       printing_mode=printing_all_cards;
5951     } @+else if(argv[0][1]=='f') {
5952       printing_mode=printing_list_from_file;
5953       printlistfile=fopen(argv[1],"r");
5954       argv++; @+ argc--;
5955     } @+else if(argv[0][1]=='n') {
5956       printing_mode=printing_list;
5957       printlisttext=argv[1];
5958       argv++; @+ argc--;
5959     } @+else if(argv[0][1]=='z') {
5960       printing_mode=printing_list;
5961       printlisttext="";
5962     }
5963   }
5964 }
5965
5966 @*Signal Handlers. The |SIGSEGV| signal should be handled in case
5967 something goes wrong in the program and it causes a segmentation fault, it
5968 should attempt to recover what you have before terminating, in order to be
5969 better at diagnosing the error.
5970
5971 @<Set up signal handler@>= {
5972   signal(SIGSEGV,handle_crash);
5973 }
5974
5975 @ Some things will be more careful here to ensure not to cause the error
5976 again (if it does, it will just quit, though).
5977
5978 @-p void handle_crash(int sig) {
5979   signal(SIGSEGV,SIG_DFL);
5980   @#fprintf(stderr,"\nFatal signal error (%d)\n",sig);
5981 @.Fatal signal error...@>
5982   fprintf(stderr,"cur_state=%d\ncur_name=%d\ncur_data=%d\n",
5983    cur_state,cur_name,cur_data);
5984   if(current_input_file>=input_files && current_input_file<input_files
5985    +max_input_stack) @<Display input stack after a crash@>;
5986   fprintf(stderr,"Program stack level: %d\n",stack_ptr-stack);
5987   fprintf(stderr,"Save stack level: %d\n",save_stack_ptr-save_stack);
5988   @#exit(3);
5989 }
5990
5991 @ @<Display input stack after a crash@>= {
5992   for(;;) {
5993     fprintf(stderr,"File %s line %d\n",current_filename,current_line);
5994     if(current_input_file--==input_files) break;
5995   }
5996 }
5997
5998 @*The Future. Here are some ideas for future versions of this program:
5999
6000 $\bullet$ A customizable Inform7-like parser, that would compile into a C
6001 code, so that you can play the cards on rule-enforcing computer programs.
6002 @^Inform@>
6003
6004 $\bullet$ A database to keep track of how many copies of a card have been
6005 sold, for inventory purposes.
6006 @^commercial viability@>
6007
6008 $\bullet$ Full text search, for things such as the Oracle text search.
6009 @^Oracle@>
6010
6011 $\bullet$ Allow more than 256 fonts in one card set.
6012
6013 $\bullet$ Unicode input (UTF-8).
6014
6015 $\bullet$ Built-in typesetting (using some of the algorithms of \TeX) and
6016 image manipulation, so that there is no dependence on external programs,
6017 and everything can be done in one pass.
6018
6019 @*Bibliography.
6020
6021 \count255=0 %
6022 \long\def\Par{\csname par\endcsname}%
6023 \loop\ifnum\count255<\bibliocount%
6024   \advance\count255 by 1
6025   \Par$^{[\the\count255]}$\csname biblio \the\count255\endcsname\Par%
6026 \repeat%
6027
6028 @*Index. Here you can find references to the definition and use of all the
6029 variables, subroutines, etc.\ used in this program, as well as a few other
6030 things of interest. Underlined entries indicate where it is defined.
6031
6032 {\bf Important note:} All the numbers in this index are section numbers,
6033 not page numbers.
6034
6035 % End of file "texnicard.w"