Horizontal and vertical packaging.
[TeXnicard.git] / texnicard.w
blob4bea20bdd6356e5cb0590681b9d4cc8a13a07906
1 % TeXnicard
2 % version 0.1
4 % Licensed by GNU GPL v3 or later version.
6 \def\contentspagenumber{1}\pageno=3
7 \def\title{\TeX nicard}
8 \def\covernote{{\fiverm Batteries not included. Do not use this book as a
9 flotation device. This is free software; see source file for details.}}
11 % Prevent \outer from getting in the way, stupid!
12 \def\+{\tabalign}
14 @mp@-
15 ``u{YJ"@<Predeclaration of procedures@>=
16 qJA";
17 J"@
18 "@<Procedure codes@>=
19 B" {
22 \long\def\IndexCharacter#1':{`\.{\char`#1}'}
23 @mcase@-
24 ``u "case
25 qAqA/@!@^\IndexCharacter\
26 Bqu'B"@>
27 YJ"@<Nothing~@>
30 \iffalse
31 @s _decl_head_ =09
32 @s FILE int
33 @s dvi_measure int
34 \fi
36 \newcount\bibliocount \bibliocount=0
37 \def\biblio#1{%
38 \advance\bibliocount by 1 %
39 $^{[\the\bibliocount]}$%
40 \expandafter\def\csname biblio \the\bibliocount\endcsname{#1}%
43 \emergencystretch=\hsize
45 \def\strike#1{%
46 \setbox0=\hbox{#1}%
47 \rlap{\vrule height 3.2pt depth -2.5pt width \wd0}{\box0}%
50 \def\sectionnumber\PB#1{\sectionnumbernext#1}
51 \def\sectionnumbernext$\X#1:#2\X${#1}
53 @*Introduction. This is \TeX nicard, a program designed for similar
54 purposes of Magic Set Editor, but in a different (and better) way. It
55 should be able to produce higher quality cards than Wizards of the Coast,
56 and then they ought to use this program, too!
58 @^Magic Set Editor@>
59 @^Wizards of the Coast@>
60 @^commercial viability@>
63 @<Memory usage logging@>@;
64 @<Interpreted C codes@>@;
65 @<Include files@>@;
67 @<Typedefs@>@;
68 @<Late Typedefs@>@;
69 @<The include file for memory managed types@>@;
70 @<Global variables@>@;
71 @<Predeclaration of procedures@>@;
72 @<Procedure codes@>@;
74 @ This line below should be changed with the current version number,
75 whenever a new version is released. (If you fork this program, you should
76 also include some indication of forking in the \\{version\_string}.)
77 % (it doesn't work if I use vertical bars here)
79 @^forking@>
81 @d version_string "0.1"
82 @d version_number 1 // one major is worth ten minors
84 @ @<Typedefs@>=
85 typedef unsigned char boolean;
87 @ You might be wondering what this section is for (especially since it
88 appears to be unused). The reason is that some metamacros use it in order
89 to force the compiler to know the correct line numbers (in case some lines
90 have been added by metamacros).
92 @^nothing@>
93 @^metamacro@>
95 @<Nothing~@>= /* ... */
97 @ There is also memory usage logging. If it is not being compiled for
98 memory usage logging, it should just ignore these kind of commands.
100 @<Memory usage logging@>=
101 #ifndef @!memusage_log
102 #define @[memusage_log(_text,_arg1)@]
103 #endif
105 @*Memory Management. This program uses a lot of similar memory management,
106 so they will be defined in this chapter.
108 @^memory management@>
110 @d none -1 // indication that a |data_index| means nothing
112 @<Typedefs@>=
113 typedef struct {
114 char*data; // pointer to array of blocks (|char*| for use with |sizeof|)
115 int used; // number of blocks used
116 int allocated; // number of blocks allocated
117 } managed_memory;
118 @#typedef int data_index;
120 @ We will use an interpreted C code here, which will send output to a
121 header file |"memory_management.h"|.
123 @<The include file for memory managed types@>=
124 #include "memory_management.h"
126 @ We will need some variables now just to keep track of which kinds of
127 memory managed areas are needed.
129 @<Interpreted C codes@>= @{
130 char**memory_managed_types;
131 int num_memory_managed_types;
132 memory_managed_types=malloc(128*sizeof(char*));
133 num_memory_managed_types=0;
136 @ From this code, the structure will be created in the header file for
137 each type that we need a |memory_of|. This section, however, is just a
138 ``wrapper'' code for the template.
140 @f @!memory_of _decl_head_ // category 9
142 @<Interpreted C codes@>= @{
143 void memory_of$() {
144 should_output=0;
145 set_goal("bp","",@+{
146 sendc(0200|'{'); // begin interpret mode
147 send("send_memory_of(\"");
148 set_goal("e","",@+{
149 send("\");");
150 sendc(0200|'}'); // end interpret mode
151 should_output=0;
152 }@+);
153 }@+);
157 @ Here is what it does in order to keep a list of the memory managed
158 types. Note the type name was enclosed in quotation marks, so now it will
159 be received as a string.
161 @<Interpreted C codes@>= @{
162 void send_memory_of(char*s) {
163 int i;
164 s++;
165 @<Send the proper name of the memory managed type@>;
166 for(i=0;i<num_memory_managed_types;i++) {
167 if(!strcmp(s,memory_managed_types[i])) return;
169 memory_managed_types[num_memory_managed_types++]=s;
173 @ @<Send the proper name of the memory managed type@>= {
174 send(" x__");
175 send(s);
176 send(" ");
179 @ Now the code you get to in order to define the structures in the header
180 file. We are mostly just copying the form of our |managed_memory|
181 structure, but it will be customized to work with the specific type of the
182 |data| components.
184 @<Interpreted C codes@>= @{
185 void send_memory_managed_types() {
186 int i;
187 for(i=0;i<num_memory_managed_types;i++) {
188 send("typedef struct {");
189 send(memory_managed_types[i]);
190 send("*data; int used; int allocated; } x__");
191 send(memory_managed_types[i]);
192 send(";");
197 @ @(memory_management.h@>= @{
198 send_memory_managed_types();
201 @ These next two subroutines are used to allocate additional memory.
203 @d init_memory(_a,_size) init_memory_(&(_a),sizeof(*((_a).data)),(_size))
204 @d new_record(_area) new_record_(&(_area),sizeof(*((_area).data)))
206 @-p void*init_memory_(void*mem,int record_size,int num_records) {
207 managed_memory*m=mem;
208 m->data=malloc(record_size*num_records);
209 m->used=0;
210 m->allocated=num_records;
211 if(!m->data) @<Fatal error due to lack of memory@>;
212 return m->data;
215 @ @-p data_index new_record_(void*mem,int record_size) {
216 managed_memory*m=mem;
217 m->used++;
218 if(m->used>m->allocated) {
219 m->allocated*=2;
220 m->data=realloc(m->data,m->allocated*record_size);
222 if(!m->data) @<Fatal error due to lack of memory@>;
223 @<Zero the new record@>;
224 return m->used-1;
227 @ @<Fatal error due to lack of memory@>= {
228 fprintf(stderr,"Out of memory\n");
229 @.Out of memory@>
230 exit(1);
233 @ @<Zero the new record@>= {
234 memset(m->data+(record_size*(m->used-1)),0,record_size);
237 @ Now just one more thing. It is useful to have a |foreach| macro to
238 iterate the areas.
240 @d foreach(_var,_area) for(_var=0;_var<_area.used;_var++)@;
241 @f foreach while
243 @*Symbolic Names. There will be some names defined for the use of naming
244 subroutines, symbolic constants, patterns, card areas, etc. These names
245 are stored in a |managed_memory| called |names|.
247 It also stores references to other things (defined in later chapters). The
248 numeric value of a name in |names.data[x]| is |x+256|.
250 @<Late Typedefs@>=
251 typedef struct {
252 char*name;
253 @<More elements of |name_data|@>@;
254 } name_data;
256 @ @<Global variables@>=
257 memory_of(name_data) names;
259 @ @<Initialize memory@>= init_memory(names,16);
261 @ This subroutine finds a name, adding it if necessary. The number
262 corresponding to it (as described above) will be the return value.
264 @-p int find_name(char*name) {
265 @<Search for the |name| in |names|@>;
266 @<Add the new name (it was not found)@>;
269 @ @<Search for the |name| in |names|@>= {
270 int i;
271 foreach(i,names) {
272 if(!strcmp(names.data[i].name,name)) return i+256;
276 @ @<Add the new name (it was not found)@>= {
277 int n=new_record(names);
278 names.data[n].name=strdup(name);
279 return n+256;
282 @ A macro will be useful to access the data from a number.
284 @d name_info(_num) names.data[(_num)-0x0100]
286 @ This code lists the names. It is used for a diagnostic purpose.
288 @<Display the list of names@>= {
289 int n;
290 foreach(n,names) {
291 printf("%d \"%s\" ",n+256,names.data[n].name);
292 @<Display other fields of |names.data[n]|@>;
293 printf("\n");
297 @*Storage of Tokens. Tokens are stored as 16-bit numbers. Values |0x0020|
298 to |0x00FF| represent those ASCII characters, and |0x0000| to |0x001F| are
299 ASCII control codes. Higher numbers represent an index into the |names|
300 array (where |0x0101| represents |names.data[0x0001]|).
302 @<Typedefs@>=
303 @q[data type of tokens]@>
304 typedef unsigned short token;
306 @ This section lists the ASCII control codes which can be used. Some of
307 them have slightly different meaning from the ASCII standard.
309 @d null_char 0x00 // end of a |raw_data| string or similar things
310 @d pre_null_char 0x01 // becomes |null_char|
311 @d end_transmission 0x04 // marks the end of the last card in this area
312 @d tabulation 0x09 // represents a tab in a {\TeX} alignment
313 @d raw_data 0x10 // enter raw {\TeX} mode
314 @d whatsit 0x1A // a token for converting into a name token
315 @d escape_code 0x1B // represents a {\TeX} control sequence introducer
316 @d record_separator 0x1E // marks the end of a card
317 @d field_separator 0x1F // marks the end of a field of a card
318 @d start_name_code 0x0100
320 @ These tokens are used in card areas, which are defined (and described)
321 in the next chapter.
323 @*Cards. The data of the cards is stored in card areas. Each card area
324 is a list of tokens, terminated by |record_separator|. The final card in
325 the area is terminated by |end_transmission|.
327 @<Typedefs@>=
328 typedef struct {
329 token*tokens;
330 int allocated;
331 int used;
332 } card_area_data;
334 @ @<More elements of |name_data|@>=
335 boolean has_card_area;
336 data_index card_area;
338 @ @<Global variables@>=
339 memory_of(card_area_data) card_areas;
341 @ @<Initialize memory@>= init_memory(card_areas,1);
343 @ A new card area is created with this.
345 @-p data_index set_card_area(int num) {
346 name_data*m=&name_info(num);
347 @<Use the card area which is already set, if able@>;
348 @<Otherwise, create a new card area and use the new one@>;
351 @ @<Use the card area which is already set, if able@>= {
352 if(m->has_card_area) return m->card_area;
355 @ @<Otherwise, create a new card area and use the new one@>= {
356 data_index n=new_record(card_areas);
357 m->has_card_area=1;
358 card_areas.data[n].allocated=0x100;
359 card_areas.data[n].tokens=malloc(0x100*sizeof(token));
360 card_areas.data[n].used=0;
361 return n;
364 @ This subroutine sends a token to a card area.
366 @-p void send_token(data_index a,token x) {
367 if(card_areas.data[a].allocated<card_areas.data[a].used+4)
368 @<Double the allocation of card area tokens@>;
369 card_areas.data[a].tokens[card_areas.data[a].used++]=x;
372 @ @<Double the allocation of card area tokens@>= {
373 int n=(card_areas.data[a].allocated*=2)*sizeof(token);
374 card_areas.data[a].tokens=realloc(card_areas.data[a].tokens,n);
377 @ @<Display other fields of |names.data[n]|@>= {
378 if(names.data[n].has_card_area)
379 printf("C(%d) ",names.data[n].card_area);
382 @ The code in this section is used to ensure that each card area is
383 properly terminated with |end_transmission| marker, so that when it is
384 time to write the output files, it will know when to stop.
386 @<Send |end_transmission| to each card area@>= {
387 data_index a;
388 foreach(a,card_areas) send_token(a,end_transmission);
391 @*Patterns. For pattern matching, we store the patterns in one memory
392 managed area. The index of the beginning of each pattern area is stored
393 in the |names| list.
395 These constants are special codes which can occur in the |text| string
396 of a pattern.
398 @d begin_capture 1
399 @d end_capture 2
400 @d match_keyword 3 // match a keyword followed by a character in a table
401 @d match_table 4 // match a character using a table
402 @d optional_table 5 // match a character optional using a table
403 @d failed_match 6
404 @d jump_table 7 // use a table to jump to a marker
405 @d successful_match 8
406 @d back_one_space 9
407 @d forward_one_space 10
408 @d match_left_side 11 // match at beginning of line
409 @d match_right_side 12 // match at end of line
410 @d match_eight_bit 13 // match 8-bit encodings and control characters
412 @<Typedefs@>=
413 typedef struct {
414 char*text;
415 unsigned int category; // category for keywords
416 data_index subroutine;
417 data_index next;
418 } pattern_data;
420 @ @<More elements of |name_data|@>=
421 boolean has_pattern_area;
422 data_index pattern_area;
424 @ @<Global variables@>=
425 memory_of(pattern_data) pattern_areas;
427 @ @<Initialize memory@>= init_memory(pattern_areas,4);
429 @ @<Display other fields of |names.data[n]|@>= {
430 if(names.data[n].has_pattern_area)
431 printf("P(%d) ",names.data[n].pattern_area);
434 @ A new pattern area is created with this. The patterns in an area are
435 stored like a linked list. The last one with |next| pointing to nothing,
436 is the terminator entry.
438 @-p data_index set_pattern_area(int num) {
439 name_data*m=&name_info(num);
440 @<Use the pattern area which is already set, if able@>;
441 @<Otherwise, create a new pattern area and use the new one@>;
444 @ @<Use the pattern area which is already set, if able@>= {
445 if(m->has_pattern_area) return m->pattern_area;
448 @ @<Otherwise, create a new pattern area and use the new one@>= {
449 data_index n=new_record(pattern_areas);
450 m->has_pattern_area=1;
451 pattern_areas.data[n].subroutine=none;
452 pattern_areas.data[n].next=none;
453 return n;
456 @ @<Display the list of patterns@>= {
457 int i;
458 foreach(i,pattern_areas) {
459 if(pattern_areas.data[i].text) {
460 printf("%d:%08X:%d:%d\n",i,pattern_areas.data[i].category
461 ,pattern_areas.data[i].subroutine,pattern_areas.data[i].next
463 display_string(pattern_areas.data[i].text);
464 printf("\n");
469 @*Keywords. Keywords means words which can be placed on the card and which
470 can have special meanings, and possibly reminder text.
472 Keywords are stored in a large list in only one keyword area. A category
473 can be given a name, which will automatically be assigned for the next bit
474 of the keyword category when it is entered the first time.
476 @<Typedefs@>=
477 typedef struct {
478 char*match; // match text (can contain pattern codes)
479 unsigned int category; // bitfield of categories
480 int extra1;
481 int extra2;
482 char*replacement; // replacement text or reminder text
483 } keyword_data;
485 @ @<Global variables@>=
486 unsigned int next_keyword_category=1;
487 memory_of(keyword_data) keywords;
489 @ @<Initialize memory@>= init_memory(keywords,4);
491 @ A keyword category is found (and created, if it is not found) using the
492 following code.
494 @-p unsigned int find_category(char*name) {
495 int i=find_name(name);
496 if(name_info(i).value.number) {
497 return name_info(i).value.number;
498 } @+else if(!name_info(i).value.is_string) {
499 name_info(i).value.number=next_keyword_category;
500 next_keyword_category<<=1;
501 if(!next_keyword_category)
502 fprintf(stderr,"Too many keyword categories: %s\n",name);
503 @.Too many keyword categories@>
504 return name_info(i).value.number;
508 @ Some stack code commands are used when dealing with reading/writing
509 keyword info.
511 In order that you might be able to iterate them, it will exit out of the
512 current block when trying to read nonexisting keyword info instead of
513 displaying an error message.
515 @<Cases for system commands@>=
516 @-case 'k': {
517 // Read keyword info
518 if(registers['K'].number<0 || registers['K'].number>=keywords.used)
519 return 0;
520 push_num(keywords.data[registers['K'].number].extra1);
521 push_num(keywords.data[registers['K'].number].extra2);
522 push_string(keywords.data[registers['K'].number].replacement);
523 break;
525 @-case 'K': {
526 // Write keyword info
527 if(registers['K'].number<0 || registers['K'].number>=keywords.used)
528 program_error("Out of range");
529 free(keywords.data[registers['K'].number].replacement);
530 keywords.data[registers['K'].number].replacement=pop_string();
531 keywords.data[registers['K'].number].extra2=pop_num();
532 keywords.data[registers['K'].number].extra1=pop_num();
533 break;
536 @ @<Display the list of keywords@>= {
537 int i;
538 foreach(i,keywords) {
539 display_string(keywords.data[i].match);
540 printf(" [%d:%08X:%d:%d:%d]\n",i,keywords.data[i].category
541 ,keywords.data[i].extra1,keywords.data[i].extra2
542 ,strlen(keywords.data[i].replacement)
547 @*Card List. A sorted summary list of the cards is kept in one list,
548 having thirty-two general-purpose numeric fields, and a pointer to the
549 beginning of the record (usually the name in which it will be indexed by).
551 @<Typedefs@>=
552 typedef struct {
553 int token_ptr;
554 int field[32];
555 int amount_in_pack; // used in pack generation
556 } list_entry;
558 @ @<Global variables@>=
559 memory_of(list_entry) card_list;
561 @ @<Initialize memory@>= init_memory(card_list,16);
563 @*Deck Lists. Deck lists involve lists of cards or rules for cards that
564 belong to a deck or pack.
566 @^booster pack@>
568 There is one macro |lflag| here just to convert letters to bit flags. For
569 example |lflag('a')| is the least significant bit.
571 @d lflag(_ch) (1<<((_ch)-'a'))
573 @<Typedefs@>=
574 typedef struct {
575 int amount;
576 unsigned int flags;
577 char*name;
578 data_index next;
579 } deck_entry;
581 @ @<Global variables@>=
582 memory_of(deck_entry) deck_lists;
584 @ @<More elements of |name_data|@>=
585 boolean has_deck_list;
586 data_index deck_list;
588 @ @<Initialize memory@>= init_memory(deck_lists,4);
590 @ A new deck list is created with this. The deck entries are stored like a
591 linked list. The terminator has |next| pointing to |none|.
593 @-p data_index set_deck_list(int num) {
594 name_data*m=&name_info(num);
595 @<Use the deck list which is already set, if able@>;
596 @<Otherwise, create a new deck list and use the new one@>;
599 @ @<Use the deck list which is already set, if able@>= {
600 if(m->has_deck_list) return m->deck_list;
603 @ @<Otherwise, create a new deck list and use the new one@>= {
604 data_index n=new_record(deck_lists);
605 m->has_deck_list=1;
606 deck_lists.data[n].next=none;
607 return n;
610 @ @<Display the deck list@>= {
611 data_index i;
612 foreach(i,deck_lists) {
613 printf("%d ",i);
614 if(deck_lists.data[i].name) display_string(deck_lists.data[i].name);
615 else printf("-");
616 printf(" [%08X:%d:%d]\n",deck_lists.data[i].flags
617 ,deck_lists.data[i].amount,deck_lists.data[i].next);
621 @*Word Forms. These structures are used to store word form rules, such as
622 plurals\biblio{Conway, Damian. ``An Algorithmic Approach to English
623 Pluralization''. \hskip 0pt plus 1in\hbox{}
624 \.{http://www.csse.monash.edu.au/\~damian/papers/HTML/Plurals.html}}. You
625 can store up to four different kinds, in case of languages other than
626 English.
628 @^Conway, Damian@>
629 @^plurals@>
631 @<Typedefs@>=
632 typedef struct {
633 int level;
634 data_index next;
635 unsigned char orig[32];
636 unsigned char dest[32];
637 boolean left_boundary;
638 boolean right_boundary;
639 } word_form_entry;
641 @ @<Global variables@>=
642 memory_of(word_form_entry) word_forms;
644 @ @<Initialize memory@>= {
645 int i;
646 init_memory(word_forms,16);
647 word_forms.used=8;
648 for(i=0;i<8;i+=2) {
649 word_forms.data[i].orig[0]=word_forms.data[i].dest[0]=0;
650 word_forms.data[i].next=i+1;
651 word_forms.data[i].level=0x7FFFFFFF;
652 word_forms.data[i+1].orig[0]=word_forms.data[i+1].dest[0]=0;
653 word_forms.data[i+1].next=none;
654 word_forms.data[i+1].level=0;
658 @ Word form rules are added and then inserted in the correct place in the
659 linked list using the |next| field. Entries with a higher numbered level
660 take higher priority, therefore will be placed before the ones with lower
661 numbered level. Next, longer |orig| strings come before shorter strings,
662 since they might be more specific forms of the others and will therefore
663 override them.
665 @-p data_index add_word_form(int kind,int level,char*orig,char*dest) {
666 data_index n=new_record(word_forms);
667 @<Set the fields of the new word form rule@>;
668 @<Insert the new word form rule into the linked list@>;
669 return n;
672 @ The |left_boundary| and |right_boundary| fields specify if they should
673 match only at the boundary. Characters are checked using the \.W table and
674 removed from the string to place in the list.
676 @d last_character(_str) ((_str)[strlen(_str)-1])
678 @<Set the fields of the new word form rule@>= {
679 word_forms.data[n].level=level;
680 strcpy(word_forms.data[n].orig,orig+(tables['W'][*orig]==2));
681 word_forms.data[n].left_boundary=(tables['W'][*orig]==2);
682 if((word_forms.data[n].right_boundary=
683 (tables['W'][last_character(word_forms.data[n].orig)]==3)))
684 last_character(word_forms.data[n].orig)=0;
685 strcpy(word_forms.data[n].dest,dest+(tables['W'][*dest]==2));
686 if(tables['W'][last_character(word_forms.data[n].dest)]==3)
687 last_character(word_forms.data[n].dest)=0;
690 @ @<Insert the new word form rule into the linked list@>= {
691 data_index y=(kind&3)<<1; // previous item to |x|
692 data_index x=word_forms.data[y].next; // current item
693 int s=strlen(orig);
694 for(;x!=none;y=x,x=word_forms.data[y].next) {
695 if(word_forms.data[x].next==none) break;
696 @#if(word_forms.data[x].level<level) break;
697 if(word_forms.data[x].level>level) continue;
698 @#if(strlen(word_forms.data[x].orig)<s) break;
700 word_forms.data[y].next=n;
701 word_forms.data[n].next=x;
704 @ Now to do computation of changing a word by word forms. This function
705 expects only one word from input, or multiple words where the last one
706 should be the word to be converted. Uppercase letters are converted to
707 lowercase for conversion (but not the other way around), but if the
708 letters are uppercase in the input, the output will also have uppercase
709 letters on those positions. The algorithm starts from the right side of
710 the input string.
712 The parameter |src| is the input, and |dest| should point to a buffer
713 which is large enough to store the output string.
715 @^plurals@>
717 @-p data_index reform_word(int kind,char*src,char*dest) {
718 char*l=src+strlen(src);
719 data_index n=word_forms.data[(kind&3)<<1].next;
720 strcpy(dest,src); // this is used later
721 @<Try each word form rule, following the |next| pointers@>;
722 return none; // in case there is nothing to do
725 @ @<Try each word form rule, following the |next| pointers@>= {
726 char*p;
727 int s;
728 while(n!=none && word_forms.data[n].next!=none) {
729 s=strlen(word_forms.data[n].orig); @+ p=l-s;
730 @<Check the characters matching from |p|, going backwards@>;
731 n=word_forms.data[n].next;
735 @ Look ahead for the definition of |wcasecmp| (true means it matches).
737 @<Check the characters matching from |p|, going backwards@>= {
738 for(;;) {
739 if((!word_forms.data[n].left_boundary || p==src
740 || tables['W'][p[-1]])
741 && wcasecmp(word_forms.data[n].orig,p))
742 @<A match to the word form rules has been found@>;
743 @<Go backwards, stop if we are not allowed to continue backwards@>;
747 @ @<A match to the word form rules has been found@>= {
748 char*o=dest+(p-src);
749 sprintf(o,"%s%s",word_forms.data[n].dest,p+s);
750 @<Change the capitalization to match the original@>;
751 return n;
754 @ Remember, that for example if ``cow'' becomes ``kine'', then ``Cow''
755 will become ``Kine''. So, it will retain capitalization.
757 @^cows@>
759 @<Change the capitalization to match the original@>= {
760 char*q=word_forms.data[n].orig;
761 for(;*p && *q;p++,o++,q++)
762 if(*p==tables['U'][*q] && *p!=tables['L'][*q]) *o=tables['U'][*o];
765 @ @<Go backwards, stop if we are not allowed to continue backwards@>= {
766 if(word_forms.data[n].right_boundary) break; // matches only on boundary
767 if(tables['W'][p[s]]) break; // only the last word(s) can be matched
768 if(p--==src) break; // stop at beginning
771 @ This function is defined to compare strings in the way needed for
772 matching word forms, including case conversion. The lowercase letters in
773 the |shorter| string are permitted to match lowercase and uppercase
774 letters in the |longer| string, and the |shorter| string is permitted to
775 be shorter and still match.
777 @-p boolean wcasecmp(char*shorter,char*longer) {
778 for(;;shorter++,longer++) {
779 if(!*shorter) return 1;
780 if(!*longer) return 0;
781 if(*shorter!=*longer && *shorter!=tables['L'][*longer]) return 0;
785 @ Of course it is now needed a command that can access these features from
786 within a \TeX nicard template. The |level| of the matched rule is also
787 returned, in case your program might use that information for something.
789 @<Cases for system commands@>=
790 @-case 'W': {
791 // Convert a word form
792 int k=pop_num();
793 char*o=pop_string();
794 char q[1500];
795 data_index n=reform_word(k,o,q);
796 push_string(q);
797 if(n==none) push_num(0);
798 else push_num(word_forms.data[n].level);
799 free(o);
800 break;
803 @ @<Display the list of word form rules@>= {
804 data_index i;
805 foreach(i,word_forms) {
806 printf("%d %c\"",i,word_forms.data[i].left_boundary?'[':' ');
807 display_string(word_forms.data[i].orig);
808 printf("\"%c -> \"",word_forms.data[i].right_boundary?']':' ');
809 display_string(word_forms.data[i].dest);
810 printf("\" %d >%d\n",word_forms.data[i].level
811 ,word_forms.data[i].next);
815 @*Random Number Generation. This program uses the Xorshift algorithm,
816 invented by George Marsaglia\biblio{Marsaglia (July 2003). ``Xorshift
817 RNGs''. Journal of Statistical Software Vol.~8 (Issue 14). {\tt
818 http://www.jstatsoft.org/v08/i14/paper}.}.
820 @^Marsaglia, George@>
821 @^random numbers@>
823 @<Global variables@>=
824 unsigned int rng_x;
825 unsigned int rng_y;
826 unsigned int rng_z;
827 unsigned int rng_w;
829 @ @<Initialize the random number generator@>= {
830 @q[initialize the random seed::]@>
831 rng_seed((unsigned int)time(0));
832 @q[::initialize the random seed]@>
835 @ The seed parameters for the random number generator will be seeded using
836 the linear congruential generator, which is a simpler generator which can
837 be used to seed it with.
839 The parameters |lcg_a| and |lcg_c| are parameters to the linear
840 congruential generator algorithm. The values used here are the same as
841 those used in GNU C. In this program they will be specified explicitly so
842 that you can get identical output on different computers.
844 @d lcg_a 1103515245
845 @d lcg_c 12345
847 @-p void rng_seed(unsigned int x) {
848 rng_x=x=lcg_a*x+lcg_c;
849 rng_y=x=lcg_a*x+lcg_c;
850 rng_z=x=lcg_a*x+lcg_c;
851 rng_w=x=lcg_a*x+lcg_c;
854 @ There is a command to reseed it using a constant (so that you can
855 generate the same numbers on different computers).
857 @<Cases for system commands@>=
858 @-case 'U': {
859 // Reseed the random number generator
860 if(stack_ptr->is_string) program_error("Type mismatch");
861 rng_seed(pop_num());
862 break;
865 @ And now follows the algorithm for generating random numbers. One change
866 has been made so that once it is modulo, all number will still be of equal
867 probability.
869 Numbers are generated in the range from 0 up to but not including |limit|.
871 @d max_uint ((unsigned int)(-1))
873 @-p unsigned int gen_random(unsigned int limit) {
874 unsigned int r=max_uint-(max_uint%limit); // range check
875 for(;;) {
876 @<Make the next number |rng_w|...@>;
877 @<Check the range, try again if out of range, else |return|@>;
881 @ @<Make the next number |rng_w| by Xorshift algorithm@>= {
882 unsigned int t = rng_x ^ (rng_x << 11);
883 rng_x = rng_y; @+ rng_y = rng_z; @+ rng_z = rng_w;
884 rng_w ^= (rng_w >> 19) ^ t ^ (t >> 8);
887 @ @<Check the range, try again if out of range, else |return|@>= {
888 if(rng_w<=r) return rng_w%limit;
891 @ @<Cases for system commands@>=
892 @-case 'u': {
893 // Generate a random number
894 if(stack_ptr->is_string) program_error("Type mismatch");
895 stack_ptr->number=gen_random(stack_ptr->number);
896 break;
899 @*Stack Programming Language. Now we get to the part where the user can
900 enter a program, in order to control the features of this program. The
901 programming language used is like \.{dc}, but different.
903 @.dc@>
905 Subroutines are simply stored as strings in the |names| area, since they
906 are the same as registers.
908 @ Now we have the storage of registers. Registers 0 to 255 are stored in
909 this separate list, while other register values are just stored in the
910 |names| list. There is also a stack, which has storage of the same values
911 as registers can contain.
913 @d max_stack 0x1000
915 @<Typedefs@>=
916 typedef struct {
917 boolean is_string;
918 union @+{
919 int number;
920 unsigned char*text;
921 }@+;
922 } register_value;
924 @ @<More elements of |name_data|@>=
925 register_value value;
927 @ @<Global variables@>=
928 register_value registers[256];
929 register_value stack[max_stack];
930 register_value*stack_ptr=stack-1; // current top of stack element
932 @ Here are some codes for pushing and popping the stack.
934 @d pop_num() ((stack_ptr--)->number)
936 @-p inline void push_string(char*s) {
937 ++stack_ptr;
938 stack_ptr->is_string=1;
939 stack_ptr->text=strdup(s);
942 @ @-p inline void push_num(int n) {
943 ++stack_ptr;
944 stack_ptr->is_string=0;
945 stack_ptr->number=n;
948 @ @-p inline void stack_dup(void) {
949 if((stack_ptr[1].is_string=stack_ptr->is_string)) {
950 stack_ptr[1].text=strdup(stack_ptr->text);
951 } @+else {
952 stack_ptr[1].number=stack_ptr->number;
954 stack_ptr++;
957 @ @-p inline void stack_drop(void) {
958 if(stack_ptr->is_string) free(stack_ptr->text);
959 --stack_ptr;
962 @ @-p inline char*pop_string(void) {
963 char*p=stack_ptr->text;
964 stack_ptr->is_string=0; stack_ptr->text=0;
965 --stack_ptr;
966 return p;
969 @ Also, some subroutines are needed here in order to deal with registers.
971 For |fetch_code|, the string |"0[]+"| is returned if it is not a string,
972 generating a ``Type mismatch'' error when you try to run it.
974 @-p inline char*fetch_code(int r) {
975 if(!(r&~0xFF)) {
976 if(!registers[r].is_string) return "0[]+";
977 return registers[r].text;
978 } @+else {
979 if(!name_info(r).value.is_string) return "0[]+";
980 return name_info(r).value.text;
984 @ @-p inline void fetch(int r) {
985 register_value*v;
986 if(!(r&~0xFF)) v=&(registers[r]);
987 else v=&(name_info(r).value);
988 (++stack_ptr)->is_string=v->is_string;
989 if(v->is_string) {
990 stack_ptr->text=strdup(v->text);
991 } @+else {
992 stack_ptr->number=v->number;
996 @ @-p inline void store(int r) {
997 register_value*v;
998 if(!(r&~0xFF)) v=&(registers[r]);
999 else v=&(name_info(r).value);
1000 if(v->is_string) free(v->text);
1001 v->is_string=stack_ptr->is_string;
1002 if(v->is_string) {
1003 v->text=stack_ptr->text;
1004 } @+else {
1005 v->number=stack_ptr->number;
1007 --stack_ptr;
1010 @ There is also a save stack. This save stack stores the saved values of
1011 the registers |'0'| to |'9'|, so that you can have local variables in a
1012 subroutine.
1014 @<Global variables@>=
1015 register_value save_stack[520];
1016 register_value*save_stack_ptr=save_stack;
1018 @ These codes deal with the save stack. Strings will be copied when
1019 saving. When loading, strings that were previously in the registers will
1020 be freed.
1022 @<Save local registers to the save stack@>= {
1023 int i;
1024 for(i='0';i<='9';i++) {
1025 *save_stack_ptr=registers[i];
1026 if(registers[i].is_string)
1027 save_stack_ptr->text=strdup(save_stack_ptr->text);
1028 save_stack_ptr++;
1032 @ @<Load local registers from the save stack@>= {
1033 int i;
1034 for(i='9';i>='0';i--) {
1035 if(registers[i].is_string) free(registers[i].text);
1036 registers[i]=*--save_stack_ptr;
1040 @*Commands for Stack Programming Language. Finally, is the code where it
1041 can be executed. The return value of this function indicates how many
1042 levels should be exit when it is called.
1044 @-p int execute_program(unsigned char*prog) {
1045 unsigned char*ptr=prog;
1046 reset_execute_program:
1047 for(;*ptr;ptr++) {
1048 switch(*ptr) {
1049 @<Cases for literal data commands@>@;
1050 @<Cases for stack manipulation commands@>@;
1051 @<Cases for arithmetic commands@>@;
1052 @<Cases for flow-control commands@>@;
1053 @<Cases for register/table operation commands@>@;
1054 @<Cases for string commands@>@;
1055 @<Cases for condition/compare commands@>@;
1056 @<Cases for local registers commands@>@;
1057 @<Cases for system commands@>@;
1058 @-case '?': @<Do a diagnostics command@>@;@+break;
1059 @-case '=': @<Do a typesetting command@>@;@+break;
1060 default:
1061 if(*ptr>='0' && *ptr<='9') {
1062 @<Read a literal number and push to stack@>;
1063 } @+else if(0x80&*ptr) {
1064 @<Execute a subroutine code from the current character@>;
1066 break;
1068 if(stack_ptr<stack-1) program_error("Stack underflow");
1069 if(stack_ptr>stack+max_stack) program_error("Stack overflow");
1071 return 0;
1074 @ @<Cases for literal data commands@>=
1075 @-case '`': {
1076 // Literal ASCII character
1077 push_num(*++ptr);
1078 break;
1080 @-case '[': {
1081 // Literal string
1082 @<Read a literal string and push to stack@>;
1083 break;
1085 @-case '(': {
1086 // Literal name
1087 @<Read a literal name and push its number to the stack@>;
1088 break;
1091 @ @<Read a literal number and push to stack@>= {
1092 int n=0;
1093 while(*ptr>='0' && *ptr<='9') n=10*n+(*ptr++)-'0';
1094 --ptr;
1095 push_num(n);
1098 @ @<Read a literal string and push to stack@>= {
1099 char*p=++ptr;
1100 int n=1;
1101 while(n && *ptr) {
1102 if(*ptr=='[') ++n;
1103 if(*ptr==']') --n;
1104 if(n) ptr++;
1106 if(!*ptr) program_error("Unterminated string literal");
1107 *ptr=0;
1108 push_string(p);
1109 *ptr=']';
1112 @ @<Read a literal name and push its number to the stack@>= {
1113 char*p=++ptr;
1114 while(*ptr && *ptr!=')') ptr++;
1115 if(!*ptr) program_error("Unterminated string literal");
1116 *ptr=0;
1117 push_num(find_name(p));
1118 *ptr=')';
1121 @ @<Cases for stack manipulation commands@>=
1122 @-case 'D': {
1123 // Drop top item of stack
1124 stack_drop();
1125 break;
1127 @-case 'c': {
1128 // Clears the stack, rendering it empty
1129 while(stack_ptr>=stack) stack_drop();
1130 break;
1132 @-case 'd': {
1133 // Duplicates the value on top of the stack.
1134 stack_dup();
1135 break;
1137 @-case 'r': {
1138 // Swaps the top two values on the stack
1139 stack_ptr[1]=stack_ptr[0];
1140 stack_ptr[0]=stack_ptr[-1];
1141 stack_ptr[-1]=stack_ptr[1];
1142 break;
1145 @ @<Cases for arithmetic commands@>=
1146 @-case '+': {
1147 // Add two numbers, or concatenate two strings
1148 if(stack_ptr->is_string) {
1149 @<Concatenate strings on the stack@>;
1150 }@+ else {
1151 int n=pop_num();
1152 if(stack_ptr->is_string)
1153 program_error("Type mismatch");
1154 stack_ptr->number+=n;
1156 break;
1158 @-case '-': {
1159 // Subtract two numbers, or compare two strings
1160 if(stack_ptr->is_string) {
1161 @<Compare strings on the stack@>;
1162 }@+ else {
1163 int n=pop_num();
1164 if(stack_ptr->is_string)
1165 program_error("Type mismatch");
1166 stack_ptr->number-=n;
1168 break;
1170 @-case '*': {
1171 // Multiply two numbers
1172 int n=pop_num();
1173 if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1174 program_error("Number expected");
1175 stack_ptr->number*=n;
1176 break;
1178 @-case '/': {
1179 // Divide two numbers
1180 int n=pop_num();
1181 if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1182 program_error("Number expected");
1183 if(n==0) program_error("Division by zero");
1184 stack_ptr->number/=n;
1185 break;
1187 @-case '%': {
1188 // Modulo of two numbers
1189 int n=pop_num();
1190 if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1191 program_error("Number expected");
1192 if(n==0) program_error("Division by zero");
1193 stack_ptr->number%=n;
1194 break;
1197 @ @<Concatenate strings on the stack@>= {
1198 char*s=pop_string();
1199 char*q;
1200 if(!stack_ptr->is_string) program_error("Type mismatch");
1201 q=malloc(strlen(s)+strlen(stack_ptr->text)+1);
1202 strcpy(q,stack_ptr->text);
1203 strcpy(q+strlen(q),s);
1204 stack_drop();
1205 push_string(q);
1206 free(q);
1207 free(s);
1210 @ @<Compare strings on the stack@>= {
1211 char*s=pop_string();
1212 char*q=pop_string();
1213 push_num(strcmp(q,s));
1214 free(q);
1215 free(s);
1218 @ @<Cases for flow-control commands@>=
1219 @-case 'Q': {
1220 // Exit from multiple levels
1221 int q=pop_num();
1222 if(q>0) return q-1;
1223 break;
1225 @-case 'Y': {
1226 // Go back to beginning
1227 ptr=prog-1;
1228 break;
1230 @-case 'q': {
1231 // Exit from two levels
1232 return 1;
1233 break;
1235 @-case 'x': {
1236 // Execute code from top of stack
1237 @<Execute a string or subroutine code from top of stack@>;
1238 break;
1241 @ Note here, it is a recursive function call.
1242 @^recursive@>
1244 @<Execute a string or subroutine code from top of stack@>= {
1245 if(stack_ptr->is_string) {
1246 char*p=pop_string();
1247 int q=execute_program(p);
1248 free(p);
1249 if(q) return q-1;
1250 } @+else {
1251 char*p=fetch_code(pop_num());
1252 int q=execute_program(p);
1253 if(q) return q-1;
1257 @ Since the extended characters (|0x80| to |0xFF|) do not correspond to
1258 any commands, here we can use them to execute a subroutine code, allowing
1259 many things related to self-modifying code (and other stuff) to be done
1260 that would be difficult otherwise.
1262 @<Execute a subroutine code from the current character@>= {
1263 char*p=fetch_code(*ptr);
1264 int q=execute_program(p);
1265 if(q) return q-1;
1268 @ @<Cases for register/table operation commands@>=
1269 @-case ':': {
1270 // Store value to table
1271 int n;
1272 if(stack_ptr->is_string) program_error("Number expected");
1273 n=pop_num();
1274 tables[0x7F&*++ptr][n]=pop_num();
1275 break;
1277 @-case ';': {
1278 // Load value from table
1279 stack_ptr->number=tables[0x7F&*++ptr][stack_ptr->number];
1280 break;
1282 @-case 'L': {
1283 // Load value from register named by stack
1284 if(stack_ptr->is_string) program_error("Number expected");
1285 fetch(pop_num());
1286 break;
1288 @-case 'S': {
1289 // Store value in register named by stack
1290 if(stack_ptr->is_string) program_error("Number expected");
1291 store(pop_num());
1292 break;
1294 @-case 'l': {
1295 // Load value from register
1296 fetch(*++ptr);
1297 break;
1299 @-case 's': {
1300 // Store value in register
1301 store(*++ptr);
1302 break;
1305 @ @<Cases for string commands@>=
1306 @-case 'B': {
1307 // Put brackets around a string, or convert number to text
1308 if(stack_ptr->is_string) {
1309 @<Put brackets around string at top of stack@>;
1310 } @+else {
1311 @<Convert top of stack to string representation of a number@>;
1313 break;
1315 @-case 'Z': {
1316 // Calculate number of characters in a string
1317 char*s=pop_string();
1318 push_num(strlen(s));
1319 free(s);
1320 break;
1322 @-case 'a': {
1323 // ``ASCIIfy'' a number
1324 if(stack_ptr->is_string) {
1325 if(stack_ptr->text[0]) stack_ptr->text[1]=0;
1326 } @+else {
1327 int n=stack_ptr->number;
1328 stack_ptr->is_string=1;
1329 stack_ptr->text=malloc(2);
1330 stack_ptr->text[0]=n;
1331 stack_ptr->text[1]=0;
1333 break;
1335 @-case 'A': {
1336 // Take the first character from the string
1337 char*s=stack_ptr->text;
1338 if(!stack_ptr->is_string || !*s) return 0;
1339 push_num(*s);
1340 stack_ptr[-1].text=strdup(s+1);
1341 free(s);
1342 break;
1344 @-case 'N': {
1345 // Convert a register number to its name
1346 int n=stack_ptr->number;
1347 if(stack_ptr->is_string) program_error("Type mismatch");
1348 if(n<256 || n>=names.used+256) program_error("Out of range");
1349 stack_drop();
1350 push_string(names.data[n-256].name);
1351 break;
1354 @ @<Put brackets around string at top of stack@>= {
1355 char*buf=malloc(strlen(stack_ptr->text)+3);
1356 sprintf(buf,"[%s]",stack_ptr->text);
1357 free(stack_ptr->text);
1358 stack_ptr->text=buf;
1361 @ @<Convert top of stack to string representation of a number@>= {
1362 char buf[32];
1363 sprintf(buf,"%d",stack_ptr->number);
1364 stack_drop();
1365 push_string(buf);
1368 @ Here is how the ``Arithmetic IF'' command works: On the stack you have
1369 any three values at the top, and a number underneath it. Those are all
1370 removed, except one of the three values which is selected based on the
1371 sign of the number (the condition value).
1373 @<Cases for condition/compare commands@>=
1374 @-case 'i': {
1375 // Arithmetic IF
1376 @<Do the ``Arithmetic IF''@>;
1377 break;
1379 @-case '&': {
1380 // Bitwise AND
1381 int n=pop_num();
1382 if(stack_ptr[0].is_string || stack_ptr[1].is_string)
1383 program_error("Number expected");
1384 stack_ptr->number&=n;
1385 break;
1388 @ Do you like this algorithm? Is this a real question?
1390 @^strange codes@>
1392 @<Do the ``Arithmetic IF''@>= {
1393 register_value v=stack_ptr[-3];
1394 int n=v.number;
1395 n=-(n<0?2:!n);
1396 stack_ptr[-3]=stack_ptr[n];
1397 stack_ptr[n]=v;
1398 stack_drop();@+stack_drop();@+stack_drop();
1401 @ @<Cases for local registers commands@>=
1402 @-case '<': {
1403 // Save locals
1404 @<Save local registers to the save stack@>;
1405 break;
1407 @-case '>': {
1408 // Restore locals
1409 @<Load local registers from the save stack@>;
1410 break;
1413 @ When there is a program error (such as stack underflow), the following
1414 subroutine is used to handle it.
1416 @d program_error(_text) program_error_(prog,ptr,_text)
1418 @-p void program_error_(char*prog,char*ptr,char*msg) {
1419 fprintf(stderr,"Error in %s on line %d",current_filename,current_line);
1420 fprintf(stderr,"\n! %s\ns%dS%dp%d near \"",msg,stack_ptr-stack,
1421 save_stack_ptr-save_stack,ptr-prog);
1422 @<Display the codes near the part that caused the error@>;
1423 fprintf(stderr,"\"\n");
1424 exit(1);
1427 @ @<Display the codes near the part that caused the error@>= {
1428 char buf[32];
1429 char*p=ptr-5;
1430 int i;
1431 if(p<prog || p>ptr) p=prog;
1432 for(i=0;p+i<=ptr && p[i];i++) buf[i]=p[i];
1433 buf[i]=0;
1434 fprintf(stderr,"%s",buf);
1437 @*Tables and Registers. The tables must be stored here. There are 128
1438 tables with 256 entries each, each of which can store one byte of data.
1439 These tables are used for converting uppercase/lowercase, for deciding
1440 which characters need to be escaped in \TeX, and so on.
1442 The purposes of the built-in registers are also described in this chapter.
1443 The tables and registers named by uppercase letters are for system use.
1444 The tables and registers named by lowercase can be used by the user.
1446 @<Global variables@>=
1447 unsigned char tables[128][256];
1449 @ Here are the uses of the built-in tables and registers:
1450 @^built-in registers@>
1451 @^built-in tables@>
1453 Register \.A: The current position in the current cards area.
1455 Register \.C: The current cards area.
1457 Register \.D: Dots per inch, multiplied by 100.
1459 Register \.E: The escape character for \TeX. If this is a string, the
1460 entire string is the prefix; otherwise, it is a ASCII number of the
1461 character to be used.
1463 Register \.K: Index number for last keyword entry added. Also used when
1464 dealing with keyword operation commands, and when a keyword is matched in
1465 a pattern.
1467 Register \.P: The current pattern area.
1469 Register \.Q: The parameters for the ImageMagick command-line, separated
1470 by spaces.
1472 Register \.T: Alignment tab character for \TeX. Same considerations apply
1473 as the \.E register.
1475 Register \.U: A code to execute for a deck specification enrty with \.x
1476 flag set.
1478 Register \.V: The version number of this program.
1480 Register \.W: A code which pushes the whatsit replacements onto the stack.
1481 It is initialized to a blank string before each line in a card area. It
1482 should push the replacements in the reverse order of the whatsits, so you
1483 could use a code like this, for example: \.{[(Abracadabra)]lW+sW}
1485 Register \.X: Horizontal coordinate across the page (in pixels).
1487 Register \.Y: Vertical coordinate across the page (in pixels).
1489 Register \.Z: Should be set to a code to execute after doing everything
1490 else (but before writing output files).
1492 Table \.E: Indicates which characters need escaped for \TeX. Also used for
1493 category codes in internal typesetting mode (a discussion of the category
1494 codes will be deferred to a later part of this book).
1496 Table \.F: Space factor codes for internal typesetting, where 40 is normal
1497 (multiplying these values by 25 results in the corresponding \.{\\sfcode}
1498 values in \TeX). Zero means no change.
1500 Table \.G: Table containing information for sorting and grouping.
1502 Table \.J: Left margin protrusions for internal typesetting. A value of
1503 128 is normal. Each one unit less or greater than 128 represents a
1504 distance of 0.005 em, where number less than 128 for negative kerns and
1505 greater than 128 for positive kerns. (Note that you will use {\sl negative
1506 negative} kerns to protrude into the margin, both for the left protrusions
1507 and for the right protrusions!)
1509 Table \.K: Right margin protrusions for internal typesetting.
1511 Table \.L: Conversion to lowercase.
1513 Table \.S: Information for natural sorting.
1515 Table \.U: Conversion to uppercase.
1517 Table \.W: Table for word form rules. Zero means a letter, one means a
1518 word separator, two means use to mark beginning of a word, three means use
1519 to mark the end of a word. In this program, it is advantageous to use the
1520 fact that zero means word characters (such as letters), and nonzero means
1521 nonword characters.
1523 @d init_register(_reg,_val) do@+{
1524 registers[_reg].is_string=0;
1525 registers[_reg].number=(_val);
1526 }@+while(0)@;
1528 @d init_register_str(_reg,_val) do@+{
1529 registers[_reg].is_string=1;
1530 registers[_reg].text=strdup(_val);
1531 }@+while(0)@;
1533 @<Initialize the tables and registers@>= {
1534 int i;
1535 for(i=0;i<256;i++) init_register(i,0);
1536 init_register('E','\\');
1537 init_register('V',version_number);
1538 @<Initialize table of alphabetical case conversion@>;
1539 @<Initialize tables for internal typesetting@>;
1542 @ @<Initialize table of alphabetical case conversion@>= {
1543 for(i=0;i<256;i++) tables['L'][i]=tables['U'][i]=i;
1544 for(i='A';i<='Z';i++) {
1545 tables['L'][i]=i+'a'-'A';
1546 tables['U'][i+'a'-'A']=i;
1550 @ @<Display the contents of table |*++ptr|@>= {
1551 int t=*++ptr;
1552 int i;
1553 for(i=0;i<256;i++) {
1554 printf("%c%c",tables[t][i]?'+':'.',@|
1555 (tables[t][i]<0x7F && tables[t][i]>=' ')?tables[t][i]:'.'
1557 if((i&0x0F)==0x0F) printf("\n");
1559 for(i=' ';i<0x7F;i++) if(tables[t][i]) printf("%c",i);
1562 @*Diagnostics. Here is diagnostics commands. These are used to display the
1563 internal information on standard output, so that you can check how these
1564 things are working. (You can also use \.{gdb} for debugging purposes.) A
1565 diagnostics command always starts with a question mark, and is then
1566 followed by one more character indicating the type of diagnostics
1567 requestsed. (Some are followed by an additional character after that.)
1569 @<Do a diagnostics command@>= {
1570 switch(*++ptr) {
1571 case 'c': @<Display the sorted card list@>; @+break;
1572 case 'd': @<Display the deck list@>; @+break;
1573 case 'f': @<Display font information@>; @+break;
1574 case 'k': @<Display the list of keywords@>; @+break;
1575 case 'n': @<Display the list of names@>; @+break;
1576 case 'p': @<Display the list of patterns@>; @+break;
1577 case 's': @<Display the contents of the stack@>; @+break;
1578 case 't': @<Display the contents of table |*++ptr|@>; @+break;
1579 case 'w': @<Display the list of word form rules@>; @+break;
1580 case 'x': @<Display the list of typeset nodes@>; @+break;
1581 case 'y': @<Display typesetting diagnostics@>; @+break;
1582 default: program_error("Unknown type of diagnostics");
1586 @ One subroutine is used here for displaying strings with escaped, so that
1587 it will display on a terminal without messing it up or omitting the
1588 display of some characters.
1590 @-p void display_string(char*s) {
1591 for(;*s;s++) {
1592 if(*s<' ' || *s==0x7F) {
1593 printf("^%c",0x40^*s);
1594 } @+else {
1595 printf("%c",*s);
1600 @ @<Display the contents of the stack@>= {
1601 register_value*p;
1602 for(p=stack;p<=stack_ptr;p++) {
1603 if(p->is_string) {
1604 printf("[");
1605 display_string(p->text);
1606 printf("]\n");
1607 } @+else {
1608 printf("%d\n",p->number);
1613 @ More of the diagnostics functions are included in the chapters for the
1614 data structures which it is displaying.
1616 @*Pattern Matching. Now, finally, after the chapter about patterns, and
1617 going through many other things in between, comes to the chapter in which
1618 patterns are actually being matched.
1620 One structure is used here for the information about how to match it, and
1621 what has been matched from it. The parameter |num_capture| is how many
1622 captured parts there are, and the |start| and |end| arrays store the index
1623 into the |src| string of where the matches are. The entire matched part is
1624 indicated by |start[0]| and |end[0]| (note always |start[0]==0|).
1626 @<Typedefs@>=
1627 typedef struct {
1628 char*src;
1629 char*truesrc; // used for checking true beginning of the line
1630 char*pattern;
1631 unsigned int category;
1632 int start[16];
1633 int end[16];
1634 int num_capture;
1635 } match_info;
1637 @ This first one just matches one pattern against a string to see if it
1638 matches. It returns true if it does match. (It is somewhat inefficient.)
1640 @-p boolean match_pattern(match_info*mat) {
1641 char*src; // current start of source string
1642 char*ptr; // pointer into source string |src|
1643 char*pptr; // pointer into pattern string
1644 src=mat->src; @+ mat->num_capture=0; @+ pptr=mat->pattern; @+ ptr=src;
1645 @<Execute the pattern on the string |src|@>;
1646 mismatch: return 0;
1649 @ This loop executes each command in the pattern in attempt to match each
1650 character. In case of mismatch, it will break out of this loop, and
1651 continue with the next iteration of the loop in the previous section.
1653 @d not_a_marker !(pptr[-1]&0x80)
1655 @<Execute the pattern on the string |src|@>= {
1656 while(*pptr) {
1657 switch(*pptr++) {
1658 case begin_capture:
1659 mat->start[++mat->num_capture]=ptr-mat->src; @+break;
1660 case end_capture: mat->end[mat->num_capture]=ptr-mat->src; @+break;
1661 case match_keyword: @<Do |match_keyword|@>; @+break;
1662 case match_table:
1663 if(!tables[*pptr++][*ptr++]) goto mismatch; @+break;
1664 case optional_table: ptr+=!!tables[*pptr++][*ptr]; @+break;
1665 case failed_match: goto mismatch;
1666 case jump_table:
1667 if(!(pptr=strchr(mat->pattern,0x80|tables[*pptr++][*ptr++])))
1668 goto mismatch;
1669 @+break;
1670 case successful_match: @<Do |successful_match|@>;
1671 case back_one_space: if(ptr--==mat->src) goto mismatch; @+break;
1672 case forward_one_space: if(!*ptr++) goto mismatch; @+break;
1673 case match_left_side: if(ptr!=mat->truesrc) goto mismatch; @+break;
1674 case match_right_side: if(*ptr>=' ') goto mismatch; @+break;
1675 default: if(not_a_marker && pptr[-1]!=*ptr++) goto mismatch;
1680 @ @<Do |successful_match|@>= {
1681 mat->start[0]=0;
1682 mat->end[0]=ptr-mat->src;
1683 return 1;
1686 @ And now, the next part matches from an area and changes the string in
1687 place, possibly by reallocating it. The |src| pointer passed to this
1688 function should be one that can be freed!
1690 @-p char*do_patterns(char*src,int area) {
1691 pattern_data*pat;
1692 match_info mat;
1693 int index=0; // index into |src| string
1694 @<Cancel if there isn't a pattern area@>;
1695 continue_matching:
1696 if(index>=strlen(src)) return src;
1697 pat=pattern_areas.data+name_info(area).pattern_area;
1698 for(;;) {
1699 @<Fill up the |mat| structure for testing the current pattern@>;
1700 if(mat.pattern && match_pattern(&mat)) {
1701 @<Push the captured strings to the stack@>;
1702 @<Call the subroutine associated with this pattern@>;
1703 if(stack_ptr->is_string) {
1704 @<Replace the matched part from the stack and fix the |index|@>;
1705 } @+else {
1706 index+=mat.end[0];
1708 stack_drop();
1709 goto continue_matching;
1711 @<Select the next pattern in this area or |break|@>;
1713 index++; @+ goto continue_matching;
1716 @ @<Cancel if there isn't a pattern area@>= {
1717 if(area<256) return src;
1718 if(!name_info(area).has_pattern_area) return src;
1721 @ @<Fill up the |mat| structure for testing the current pattern@>= {
1722 mat.src=src+index;
1723 mat.truesrc=src;
1724 mat.pattern=pat->text;
1725 mat.category=pat->category;
1728 @ @<Push the captured strings to the stack@>= {
1729 int i;
1730 for(i=mat.num_capture;i;i--) {
1731 push_string(src+index+mat.start[i]);
1732 stack_ptr->text[mat.end[i]-mat.start[i]]=0;
1736 @ @<Call the subroutine associated with this pattern@>= {
1737 execute_program(names.data[pat->subroutine].value.text);
1740 @ The memory allocated is probably more than is needed, but this way is
1741 simpler. It is always sufficient amount, though. Think about it.
1743 @^thought@>
1745 @<Replace the matched part from the stack and fix the |index|@>= {
1746 char*q=malloc(strlen(src)+strlen(stack_ptr->text)+1);
1747 strcpy(q,src);
1748 sprintf(q+index,"%s%s",stack_ptr->text,src+index+mat.end[0]);
1749 free(src);
1750 src=q;
1751 index+=strlen(stack_ptr->text);
1754 @ @<Select the next pattern in this area or |break|@>= {
1755 if(pat->next==none) break;
1756 pat=pattern_areas.data+pat->next;
1759 @ Finally, there is a command |'M'| to do a pattern matching and
1760 replacement with a string, inside of a stack subroutine code.
1762 @<Cases for system commands@>=
1763 @-case 'M': {
1764 // do pattern matching and replacement
1765 int n=pop_num();
1766 if(!stack_ptr->is_string) program_error("Type mismatch");
1767 stack_ptr->text=do_patterns(stack_ptr->text,n);
1768 break;
1771 @*Matching Keywords. Codes for matching keywords have been placed in
1772 another chapter, instead of making the previous chapter longer.
1774 So now we can see how it is matched keywords in a pattern code.
1776 @<Do |match_keyword|@>= {
1777 match_info m;
1778 char mstr[512];
1779 char t=*pptr++; // indicate which table to use
1780 data_index best=none;
1781 int best_length=-1;
1782 @<Try matching each keyword belonging to the category@>;
1783 if(best==none) goto mismatch;
1784 @<Adjust the \.K register for this keyword match@>;
1785 ptr+=m.end[0];
1788 @ @<Adjust the \.K register for this keyword match@>= {
1789 if(registers['K'].is_string) free(registers['K'].text);
1790 registers['K'].is_string=0;
1791 registers['K'].number=best;
1794 @ When matching keywords, all of them will be tried, in case there are
1795 better candidates for the search (bigger is better (so, for example,
1796 |"Power of One"| will override |"Power"|); failing that, later ones are
1797 better than earlier ones (so that user files can override keywords in
1798 template files)).
1800 @^Courtenay, Bryce@>
1801 @^Houghton, Israel@>
1802 @^Luce, Ron@>
1804 @<Try matching each keyword belonging to the category@>= {
1805 data_index i;
1806 foreach(i,keywords) {
1807 if(keywords.data[i].category&mat->category &&
1808 strlen(keywords.data[i].match)>=best_length) {
1809 @<Set up the |match_info| structure called |m|@>;
1810 @<Attempt applying this keyword match@>;
1815 @ @<Set up the |match_info| structure called |m|@>= {
1816 sprintf(mstr,"%s%c%c%c",
1817 keywords.data[i].match,match_table,t,successful_match);
1818 m.src=m.truesrc=ptr;
1819 m.pattern=mstr;
1822 @ @<Attempt applying this keyword match@>= {
1823 if(match_pattern(&m)) {
1824 best=i;
1825 best_length=strlen(keywords.data[i].match);
1829 @*Sorting and Grouping. The card lists can be sorted/grouped using these
1830 commands, which are generally used by macros that create the records for
1831 the cards in the card areas.
1833 @<Cases for system commands@>=
1834 @-case 'n': {
1835 // Add a new list entry
1836 data_index n=new_record(card_list);
1837 card_list.data[n].token_ptr=
1838 card_areas.data[set_card_area(registers['C'].number)].used
1840 break;
1842 @-case 'f': {
1843 // Set a field value of the list entry
1844 data_index n=card_list.used-1;
1845 int x=pop_num();
1846 int y=pop_num();
1847 if(n==none) program_error("No card list is available");
1848 card_list.data[n].field[x&31]=y;
1849 break;
1852 @ Other than the commands to make the list entries above, there must be,
1853 of course, the actual sorting and grouping being done!
1855 Sorting and grouping are controlled by the \.G table. Starting from a
1856 given offset (added), you use thirty-two entries for the thirty-two
1857 fields.
1859 @<Cases for system commands@>=
1860 @-case 'G': {
1861 // Sort the list
1862 sorting_table_offset=pop_num();
1863 qsort(card_list.data,card_list.used,sizeof(list_entry),list_compare);
1864 @<Mark positions in the sorted list@>;
1865 break;
1868 @ @<Global variables@>=
1869 int sorting_table_offset;
1871 @ This is the compare function for the list sorting. It is also worth to
1872 notice here what values belong in the \.G table. (There are also some
1873 other values, which are described a bit later.)
1875 @d no_sort 0
1876 @d primary_ascending 'A'
1877 @d primary_descending 'Z'
1878 @d primary_name 'N'
1879 @d secondary_ascending 'a'
1880 @d secondary_descending 'z'
1881 @d secondary_name 'n'
1882 @d record_sorted_position 'R'
1883 @d reset_high_bits 'q'
1885 @d G_table(_field) (tables['G'][((sorting_table_offset+(_field))&0xFF)])
1886 @d p1s ((list_entry*)p1)
1887 @d p2s ((list_entry*)p2)
1889 @-p int list_compare(const void*p1,const void*p2) {
1890 @<Compare using fields indicated by \.G table@>;
1891 @<Compare using the card's name and the \.S table@>;
1892 @<Compare using the order in which the cards are typed in@>;
1893 return 0; // This can't, but will, happen.
1896 @ @<Compare using fields indicated by \.G table@>= {
1897 int i;
1898 for(i=0;i<32;i++) if(p1s->field[i]!=p2s->field[i]) {
1899 if(G_table(i)==primary_ascending || (G_table(i)&0x80)) {
1900 return (p1s->field[i]>p2s->field[i])?1:-1;
1901 } @+else if(G_table(i)==primary_descending) {
1902 return (p1s->field[i]<p2s->field[i])?1:-1;
1903 } @+else if(G_table(i)==primary_name) {
1904 return name_compare(p1s->field[i],p2s->field[i]);
1907 for(i=0;i<32;i++) if(p1s->field[i]!=p2s->field[i]) {
1908 if(G_table(i)==secondary_ascending) {
1909 return (p1s->field[i]>p2s->field[i])?1:-1;
1910 } @+else if(G_table(i)==secondary_descending) {
1911 return (p1s->field[i]<p2s->field[i])?1:-1;
1912 } @+else if(G_table(i)==secondary_name) {
1913 return name_compare(p1s->field[i],p2s->field[i]);
1918 @ When all else fails, \strike{play dead} use the order in which the cards
1919 have been typed in. This is how it is made stable, and that you can get
1920 the same results on any computer.
1922 @^Smith, Steve@>
1924 @<Compare using the order in which the cards...@>= {
1925 if(p1s->token_ptr>p2s->token_ptr) return 1;
1926 if(p1s->token_ptr<p2s->token_ptr) return -1;
1929 @ The last thing to do after sorting, is mark positions in the list if it
1930 is requested to do so.
1932 In addition, it shall also optionally mark high bits (30 to 27) of some
1933 fields, based on when other fields change. This helps with doing multi-%
1934 dimensional statistics. The fields that it is based on will automatically
1935 be primary sorted since such sorting is required for the marking algorithm
1936 to work properly.
1938 @<Mark positions in the sorted list@>= {
1939 data_index i;
1940 int j;
1941 for(j=0;j<32;j++) {
1942 if(G_table(j)==record_sorted_position) {
1943 foreach(i,card_list) card_list.data[i].field[j]=i;
1944 } @+else if(G_table(j)&0x80) {
1945 @<Mark high bits of fields to prepare for...@>;
1946 } @+else if(G_table(j)==reset_high_bits) {
1947 foreach(i,card_list) card_list.data[i].field[j]&=0x0FFFFFFF;
1952 @ The rule is that whenever the current field's value changes, the bit in
1953 the corresponding grouping field will be flipped. Since the statistics
1954 grouping always treats consecutive equal values in the grouping field as
1955 belonging to the same group, this is a way to insert ``group breaks'' into
1956 the list.
1958 @<Mark high bits of fields to prepare for complex statistics@>= {
1959 int f=G_table(j)&0x1F; // other field number
1960 int v=card_list.data[0].field[j]; // previous value
1961 int k=1<<(27+((G_table(j)&0x60)>>5)); // bit flip value
1962 int b=0; // current bit value
1963 foreach(i,card_list) {
1964 if(v!=card_list.data[i].field[j]) b^=k;
1965 card_list.data[i].field[f]&=~k;
1966 card_list.data[i].field[f]|=b;
1967 v=card_list.data[i].field[j];
1971 @ @<Display the sorted card list@>= {
1972 data_index i;
1973 int j;
1974 foreach(i,card_list) {
1975 printf("%d=[ ",card_list.data[i].token_ptr);
1976 for(j=0;j<32;j++) printf("%d ",card_list.data[i].field[j]);
1977 printf("]\n");
1981 @*Natural Sorting. A natural compare algorithm is used here. It is a
1982 generalization of Martin Pool's algorithm\biblio{Pool, Martin. ``Natural
1983 Order String Comparison''. {\tt
1984 http://sourcefrog.net/projects/natsort/}.}.
1986 The \.S table maps from character tokens to the sorting specifications.
1987 Name tokens are converted to |whatsit| when looking up in this table.
1989 Tokens are grouped into digits, letters, and priority letters. There are
1990 also some extras, such as spaces and radix point. A string of consecutive
1991 digits is treated as numeric, so a number with more digits comes after a
1992 number with less digits.
1994 Priority letters are used mainly for sorting roman numerals. Two or more
1995 consecutive priority letters are considered as a group, otherwise they are
1996 treated in the same way as ordinary letters. A group is ranked with the
1997 letters latest in the alphabet, so for example, if |'I'| and |'X'| are
1998 priority, then |"IX"| is placed between |"W"| and |"X"|. This way, all
1999 roman numerals from I to XXXIX will be sorted correctly.
2001 @^natural compare@>
2002 @^Pool, Martin@>
2004 @d nat_end_low 0
2005 @d nat_end_high 1
2006 @d nat_space 2
2007 @d nat_ignore 3
2008 @d nat_radix_point 4
2010 @d nat_digit_zero 64 // digits go up to 127
2011 @d nat_first_letter 128 // letters go up to 191
2012 @d nat_first_priority_letter 192 // priority letters go up to 255
2013 @d nat_high_value 256
2015 @<Compare using the card's name and the \.S table@>= {
2016 token*pa=card_areas.data[set_card_area(registers['C'].number)].tokens
2017 +p1s->token_ptr;
2018 token*pb=card_areas.data[set_card_area(registers['C'].number)].tokens
2019 +p2s->token_ptr;
2020 boolean fractional=0; // Are we reading digits after a radix point?
2021 int a,b,c;
2022 for(;;pa++,pb++) {
2023 begin_natural_compare_loop: @/
2024 a=tables['S'][*pa>=256?whatsit:*pa];
2025 @+ b=tables['S'][*pb>=256?whatsit:*pb];
2026 @<Skip over leading spaces and/or zeros@>;
2027 @<Process a run of digits@>;
2028 @<Check if the end of either string is reached@>;
2029 @<Check for a radix point@>;
2030 @<Process priority letters@>;
2031 @<Check if the current positions of each string sufficiently differ@>;
2035 @ @<Skip over leading spaces and/or zeros@>= {
2036 while(a==nat_space||a==nat_ignore||(!fractional&&a==nat_digit_zero)) {
2037 int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2038 if(a!=nat_ignore) fractional=0;
2039 if(!fractional && a==nat_digit_zero
2040 && aa>=nat_digit_zero && aa<nat_first_letter) break;
2041 pa++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2043 while(b==nat_space||b==nat_ignore||(!fractional&&b==nat_digit_zero)) {
2044 int bb=tables['S'][pa[1]>=256?whatsit:pa[1]];
2045 if(b!=nat_ignore) fractional=0;
2046 if(!fractional && b==nat_digit_zero
2047 && bb>=nat_digit_zero && bb<nat_first_letter) break;
2048 pb++; @+ b=tables['S'][*pb>=256?whatsit:*pb];
2052 @ @<Process a run of digits@>= {
2053 if(a>=nat_digit_zero&&a<nat_first_letter&&
2054 b>=nat_digit_zero&&b<nat_first_letter) {
2055 if((c=(fractional?compare_left:compare_right)(pa,pb))) return c;
2056 @<Skip the run of digits, since they are the same@>;
2057 fractional=0;
2060 @^strange codes@>
2062 @ Compare two left-aligned numbers: the first to have a different value
2063 wins. This function and |compare_right| are basically equivalent, there
2064 are only a few differences (this one is the simpler one).
2066 @-p int compare_left(token*pa,token*pb) {
2067 int a,b;
2068 for(;;pa++,pb++) {
2069 a=tables['S'][*pa>=256?whatsit:*pa];
2070 @+ b=tables['S'][*pb>=256?whatsit:*pb];
2071 @<Skip over ignored characters@>;
2072 @<If neither |a| nor |b| is digit, |break|@>;
2073 @<If one is a digit and the other isn't, the longest run wins@>;
2074 @<If both are different digits, the greater one wins@>;
2076 return 0;
2079 @ The longest run of digits wins. That aside, the greatest value wins, but
2080 we can't know that it will until we've scanned both numbers to know they
2081 have the same magnitude, so we remember it in |bias|.
2083 @-p int compare_right(token*pa,token*pb) {
2084 int a,b;
2085 int bias=0;
2086 for(;;pa++,pb++) {
2087 a=tables['S'][*pa>=256?whatsit:*pa];
2088 @+ b=tables['S'][*pb>=256?whatsit:*pb];
2089 @<Skip over ignored characters@>;
2090 @<If neither |a| nor |b| is digit, |break|@>;
2091 @<If one is a digit and the other isn't, the longest run wins@>;
2092 @<If both are digits, set the |bias|@>;
2094 return bias;
2097 @ Ignored characters might be commas for grouping digits into thousands.
2099 @<Skip over ignored characters@>= {
2100 while(a==nat_ignore) {
2101 pa++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2103 while(b==nat_ignore) {
2104 pb++; @+ b=tables['S'][*pb>=256?whatsit:*pb];
2108 @ @<If neither |a| nor |b| is digit, |break|@>= {
2109 if(!(a>=nat_digit_zero&&a<nat_first_letter)&&
2110 !(b>=nat_digit_zero&&b<nat_first_letter)) break;
2113 @ @<If one is a digit and the other isn't, the longest run wins@>= {
2114 if(!(a>=nat_digit_zero&&a<nat_first_letter)) return -1;
2115 if(!(b>=nat_digit_zero&&b<nat_first_letter)) return 1;
2118 @ @<If both are different digits, the greater one wins@>= {
2119 if(a!=b) return a-b;
2122 @ @<If both are digits, set the |bias|@>= {
2123 if(a!=b && !bias) bias=(a<b)?-1:1;
2126 @ @<Skip the run of digits, since they are the same@>= {
2127 while(a>=nat_digit_zero&&a<nat_first_letter) {
2128 pa++; @+ pb++; @+ a=tables['S'][*pa>=256?whatsit:*pa];
2130 b=tables['S'][*pb>=256?whatsit:*pb];
2133 @ @<Check if the end of either string is reached@>= {
2134 if(a==nat_end_low && b>nat_end_high) return -1;
2135 if(b==nat_end_low && a>nat_end_high) return 1;
2136 if(a==nat_end_high && b>nat_end_high) return 1;
2137 if(b==nat_end_high && a>nat_end_high) return -1;
2138 if(a<=nat_end_high && b<=nat_end_high) break; // tied
2141 @ A radix point must be followed by a digit, otherwise it is considered to
2142 be punctuation (and ignored). Radix points come before digits in the
2143 sorting order (|".5"| comes before |"5"|).
2145 @<Check for a radix point@>= {
2146 if(a==nat_radix_point && b==nat_radix_point) {
2147 int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2148 int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2149 if(aa>=nat_digit_zero&&aa<nat_first_letter
2150 &&bb>=nat_digit_zero&&bb<nat_first_letter) fractional=1;
2151 } @+else if(a==nat_radix_point) {
2152 int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2153 if(!(aa>=nat_digit_zero&&aa<nat_first_letter)) {
2154 pa++; goto begin_natural_compare_loop;
2156 } @+else if(b==nat_radix_point) {
2157 int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2158 if(!(bb>=nat_digit_zero&&bb<nat_first_letter)) {
2159 pb++; goto begin_natural_compare_loop;
2164 @ This is used so that |"IX"| can be sorted between |"VIII"| and |"X"|. In
2165 normal alphabetical order, |"IX"| sorts before |"V"|. This algorithm makes
2166 it so that doesn't happen. For example: |a| is |'I'| and |aa| (the
2167 character after |a| in the text) is |'X'| (the check |aa>a| ensures that
2168 it too is priority, in addition to checking that |a| represents a negative
2169 part of a roman number), and |b| is |'V'|. Now, since |'V'| comes between
2170 |'I'| and |'X'| in the alphabetical order, the condition is checked to be
2171 valid and it overrides the later check.
2173 @<Process priority letters@>= {
2174 if(a>=nat_first_priority_letter) {
2175 int aa=tables['S'][pa[1]>=256?whatsit:pa[1]];
2176 if(aa>a && b>=nat_first_letter && (b&63)>(a&63) && (b&63)<(aa&63))
2177 return 1;
2179 if(b>=nat_first_priority_letter) {
2180 int bb=tables['S'][pb[1]>=256?whatsit:pb[1]];
2181 if(bb>b && a>=nat_first_letter && (a&63)>(b&63) && (a&63)<(bb&63))
2182 return -1;
2186 @ At this point, |a| and |b| will both be |@[@]>=nat_radix_point|. Numbers
2187 always come after letters (this rule is designed so that when a radix
2188 point is found after a number, it will make a larger number; otherwise it
2189 will be followed by a letter and therefore the one followed by the letter
2190 is lesser since it has no fractional part to make it greater).
2192 @<Check if the current positions of each string suffic...@>= {
2193 if(a>=nat_first_priority_letter) a-=64;
2194 if(b>=nat_first_priority_letter) b-=64;
2195 if(a<nat_first_letter) a+=128;
2196 if(b<nat_first_letter) b+=128;
2197 if(a!=b) return (a<b)?-1:1;
2200 @*Name Sorting. This kind of sorting is used when items are grouped
2201 together by some extra field in the statistics, such as creature types in
2202 Magic: the Gathering.
2204 It works in a similar way to the natural sorting algorithm, but this time
2205 it is simpler and not as many things need to be checked. Digits and
2206 priority letters are treated as normal letters, and the types |nat_space|,
2207 |nat_ignore|, and |nat_radix_point| are all ignored. In addition, a null
2208 terminator is always treated as |nat_end_low|.
2210 If both names compare the same, their number is used instead, in order to
2211 force sorting stability.
2213 @-p int name_compare(int n1,int n2) {
2214 char*s1=name_info(n1).name;
2215 char*s2=name_info(n2).name;
2216 int a,b;
2217 for(;*s1 || *s2;s1++,s2++) {
2218 a=(*s1)?tables['S'][*s1]:nat_end_low;
2219 b=(*s2)?tables['S'][*s2]:nat_end_low;
2220 @<Skip over spaces and ignored characters@>;
2221 @<Check if the end of either string is reached@>;
2222 @<Check if the current positions of...@>;
2224 return (n1<n2)?-1:1;
2227 @ @<Skip over spaces and ignored characters@>= {
2228 while(a<nat_digit_zero) {
2229 s1++; @+ a=(*s1)?tables['S'][*s1]:nat_end_low;
2231 while(b<nat_digit_zero) {
2232 s2++; @+ b=(*s2)?tables['S'][*s2]:nat_end_low;
2236 @*Statistics. After the card lists are created and sorted and grouped, it
2237 can make statistics from them. It can be just a plain list, or it can be
2238 in summary of groups, measuring count, minimum, maximum, mean, median, and
2239 so on.
2241 First we do the simple iteration.
2243 @^mean@>
2244 @^median@>
2245 @^groups@>
2246 @^minimum@>
2247 @^maximum@>
2249 @<Cases for system commands@>=
2250 @-case 'V': {
2251 // Iterate the card list
2252 data_index i;
2253 char*q=pop_string();
2254 if(!stack_ptr[1].is_string) program_error("Type mismatch");
2255 foreach(i,card_list) {
2256 push_num(card_list.data[i].token_ptr);
2257 store('A');
2258 execute_program(q);
2260 free(q);
2261 break;
2263 @-case 'v': {
2264 // Read a field from the card list
2265 int x=pop_num()&31;
2266 int y=0;
2267 data_index i;
2268 foreach(i,card_list) {
2269 if(registers['A'].number==card_list.data[i].token_ptr)
2270 y=card_list.data[i].field[x];
2272 push_num(y);
2273 break;
2276 @ That was simple, see? Now to do gathering statistics of summary of
2277 groups, which is a bit more complicated. The list is expected to be sorted
2278 by the group field primary, and the statistics field ascending as
2279 secondary, in order to make the correct calculation of the fields.
2281 However, it will not do the sorting automatically, since there are some
2282 reasons why you might want it to work differently. One thing you can do is
2283 to sort the group field {\sl secondary} and some other more major group as
2284 primary, in order to do two-dimensional statistics, and this will work as
2285 long as you do not require the minimum, maximum, or median.
2287 @<Cases for system commands@>=
2288 @-case 'g': {
2289 // Gather statistics of groups
2290 data_index i,si=0;
2291 int x=pop_num()&31; // field for grouping
2292 int y=pop_num()&31; // field to measure statistics with
2293 int sum1,sum2; // running totals of $s_1$ and $s_2$
2294 sum1=sum2=0;
2295 char*q=pop_string(); // code to execute for each group
2296 if(!stack_ptr[1].is_string) program_error("Type mismatch");
2297 foreach(i,card_list) {
2298 if(card_list.data[i].field[x]!=card_list.data[si].field[x]) {
2299 @<Send the results of the current group@>;
2300 sum1=sum2=0; @+ si=i;
2302 @<Add to the running totals@>;
2304 @<Send the results of the current group@>;
2305 free(q);
2306 break;
2309 @ Running totals are kept for two quantities called $s_1$ and $s_2$. There
2310 is also $s_0$, but that can be calculated easily using subtraction, so
2311 there is no need to keep a running total. If the sample values are denoted
2312 $x_k$, the following equation represents the running totals:
2313 $$s_j=\sum_{k=1}^N{x_k^j}$$ (note that $s_0=N$.)
2315 @^mathematics@>
2317 @<Add to the running totals@>= {
2318 sum1+=card_list.data[i].field[y];
2319 sum2+=card_list.data[i].field[y]*card_list.data[i].field[y];
2322 @ Now we will send the results and call |q|. The results are sent to the
2323 stack in the following order: $s_0$, $s_1$, $s_2$, $Q_0$, $2Q_2$, $Q_4$
2324 (where $Q_0$ is the minimum, $Q_2$ the median, and $Q_4$ the maximum).
2326 From these results, it is then possible to calculate the standard
2327 deviation: $$\sigma={1\over s_0}\sqrt{s_0s_2-s_1^2}$$ and
2328 $$s=\sqrt{s_0s_2-s_1^2\over s_0(s_0-1)}.$$
2330 @^mathematics@>
2332 @<Send the results of the current group@>= {
2333 push_num(i-si); // $s_0$
2334 push_num(sum1); // $s_1$
2335 push_num(sum2); // $s_2$
2336 push_num(card_list.data[si].field[y]); // $Q_0$
2337 push_num(
2338 card_list.data[(si+i)/2].field[y]+card_list.data[(si+i+1)/2].field[y]
2339 ); // $2Q_2$
2340 push_num(card_list.data[i-1].field[y]); // $Q_4$
2341 @# push_num(card_list.data[si].token_ptr); @+ store('A');
2342 execute_program(q);
2345 @*Random Pack Generation. Now the codes so that it can create random packs
2346 (such as booster packs) by using the card lists and deck lists.
2348 A command |'P'| is used for evaluation of a deck list. It expects the deck
2349 list number and the code to execute for each card on the list.
2351 @^booster pack@>
2353 @<Cases for system commands@>=
2354 @-case 'P': {
2355 // Generate a random pack or deck
2356 data_index s=set_deck_list(pop_num());
2357 data_index n; // current deck list entry
2358 if(stack_ptr[1].is_string) program_error("Number expected");
2359 @<Figure out what cards belong in the pack@>;
2360 @<Execute the code on the stack for each card in the pack@>;
2361 break;
2364 @ @<Figure out what cards belong in the pack@>= {
2365 deck_entry*e;
2366 int tries=1000; // How many times can you retry if it fails?
2367 figure_out_again:
2368 if(!--tries) program_error("No cards matched the deck criteria");
2369 n=s;
2370 @<Reset |amount_in_pack| of each card to zero@>;
2371 while(n!=none && (n=(e=deck_lists.data+n)->next)!=none)
2372 @<Process this deck entry@>;
2375 @ @<Reset |amount_in_pack| of each card to zero@>= {
2376 data_index i;
2377 foreach(i,card_list) card_list.data[i].amount_in_pack=0;
2380 @ The deck entry must be processed according to the flags. Here is a list
2381 of flags:
2383 \.a: Use all cards that meet the criteria, instead of only one. If this is
2384 the case, it is possible to use negative weights to remove cards from the
2385 pack. Also, it cannot fail.
2386 [Combine with \.{x}]
2388 \.k: Select without replacement. It is fail if the total weight is not
2389 enough. There are two ways in which this differs from \.u (below). One is
2390 that the previous lines in the deck list are not used. The other one is
2391 that if the weight is more than one, there will be more than one ball for
2392 that card, therefore the same card can be picked up multiple times.
2393 [Combine with \.{sux}]
2395 \.n: Use the |amount| as a probability. If |amount<=100| then the
2396 probability is |amount/100| otherwise it is |100/amount|. This is a
2397 probability of using the |name| to select another deck list instead of
2398 this one.
2399 [Combine with nothing]
2401 \.s: Skip the next line if this line does not fail. (Normally, if one line
2402 fails, everything does, and you have to try again.)
2403 [Combine with \.{kux}]
2405 \.u: Require unique selection. It is fail if the card is already in this
2406 pack.
2407 [Combine with \.{ksx}]
2409 \.x: Pass the |name| as a string to the code in the \.U register, and then
2410 use the resulting code as the code to determine weights instead of using
2411 the code in the register named by |name| directly. Now you can type things
2412 such as |"12x Forest"| into your deck list.
2413 [Combine with \.{aksu}]
2415 @<Process this deck entry@>= {
2416 if(e->flags&lflag('n')) {
2417 @<Determine whether or not to skip to another deck list@>;
2418 } @+else {
2419 char*c; // code for weights of each card
2420 int total; // total weight of cards
2421 data_index*bag=malloc(sizeof(data_index));
2422 @<Get the code |c| for the weights of each card@>;
2423 @<Calculate the weights of each card@>;
2424 if(!(e->flags&lflag('a')))
2425 @<Select some of the cards at random and add them to the pack@>;
2426 if(e->flags&lflag('x')) free(c);
2427 free(bag);
2431 @ @<Determine whether or not to skip to another deck list@>= {
2432 boolean q;
2433 if(e->amount<=100) {
2434 q=(gen_random(100)<e->amount);
2435 } @+else {
2436 q=(100<gen_random(e->amount));
2438 if(q) n=set_deck_list(find_name(e->name));
2441 @ @<Get the code |c| for the weights of each card@>= {
2442 if(e->flags&lflag('x')) {
2443 execute_program(registers['U'].text);
2444 if(stack_ptr->is_string) {
2445 c=pop_string();
2446 } @+else {
2447 program_error("Type mismatch");
2449 } @+else {
2450 int n=find_name(e->name);
2451 if(name_info(n).value.is_string) {
2452 c=name_info(n).value.text;
2453 } @+else {
2454 program_error("Type mismatch");
2459 @ @<Calculate the weights of each card@>= {
2460 data_index i;
2461 foreach(i,card_list) {
2462 registers['A'].number=card_list.data[i].token_ptr;
2463 execute_program(c);
2464 if(stack_ptr->number) {
2465 if(e->flags&lflag('a')) {
2466 card_list.data[i].amount_in_pack+=e->amount*stack_ptr->number;
2467 } @+else if(stack_ptr->number>0) {
2468 @<Add the cards to the |bag|@>;
2471 stack_drop();
2475 @ The |bag| is like, you put the balls in the bag so that you can mix it
2476 and take one out, whatever number is on the ball is the card you put into
2477 the pack. Except, that there is no balls and no bag.
2479 There is one ball per point of weight.
2481 @^balls@>
2483 @<Add the cards to the |bag|@>= {
2484 int j=stack_ptr->number;
2485 bag=realloc(bag,(total+j)*sizeof(data_index));
2486 while(j--) bag[total+j]=i;
2487 total+=stack_ptr->number;
2490 @ If it is not a line which adds all possibilities at once, then the cards
2491 must be selected from the |bag| at random to bag them. In some cases it
2492 will fail.
2494 @<Select some of the cards at random and add them to the pack@>= {
2495 data_index r;
2496 int amount=e->amount;
2497 bag_next:
2498 if(!total) @<Deal with bag failure@>;
2499 r=gen_random(total);
2500 if((e->flags&lflag('u')) && card_list.data[bag[r]].amount_in_pack) {
2501 bag[r]=bag[--total];
2502 goto bag_next;
2504 card_list.data[bag[r]].amount_in_pack++;
2505 if(e->flags&lflag('k')) bag[r]=bag[--total];
2506 if(amount--) goto bag_next;
2507 @#if(e->flags&lflag('s')) n=deck_lists.data[n].next;
2508 bag_done: ;
2511 @ @<Deal with bag failure@>= {
2512 if(e->flags&lflag('s')) goto bag_done;
2513 else goto figure_out_again;
2516 @ Now it must do stuff using the list which is generated. The quantity for
2517 how many of that card is pushed on the stack, and this is done even for
2518 cards with negative quantity (but not for zero quantity).
2520 @<Execute the code on the stack for each card in the pack@>= {
2521 data_index i;
2522 char*q=pop_string();
2523 if(!stack_ptr[1].is_string) program_error("Type mismatch");
2524 foreach(i,card_list) {
2525 if(card_list.data[i].amount_in_pack) {
2526 push_num(card_list.data[i].amount_in_pack);
2527 execute_program(q);
2530 free(q);
2533 @*Reading Input Files. Now it is time for the part of the program where
2534 input files are read and processed. The areas of the file (and other
2535 special commands) are indicated using \.@@ signs.
2537 At first we have state information. Each state is labeled by uppercase
2538 letters, or by digits 1 to 9. The high bit is set for the heading state,
2539 meaning the first line that contains the name and/or other heading
2540 information.
2542 @d null_state 0
2543 @d card_state 'C'
2544 @d deck_state 'D'
2545 @d execute_state 'E'
2546 @d file_state 'F'
2547 @d include_state 'I'
2548 @d keyword_state 'K'
2549 @d image_state 'M'
2550 @d pattern_state 'P'
2551 @d subroutine_state 'S'
2552 @d font_state 'T'
2553 @d encoding_state 'U'
2554 @d wordforms_state 'W'
2555 @d heading 0x80
2557 @<Global variables@>=
2558 int cur_state;
2559 data_index cur_name;
2560 data_index cur_data;
2561 boolean omit_line_break;
2563 @ The next thing that must be kept track of for input files is the stack
2564 of open input files.
2566 @d max_pathname_length 128
2567 @d max_filename_length 128
2568 @d max_input_stack 128
2569 @d max_line_length 256
2571 @<Typedefs@>=
2572 typedef struct {
2573 FILE*fp; // zero for terminal input
2574 char name[max_filename_length+1];
2575 int line;
2576 } input_file_data;
2578 @ @<Global variables@>=
2579 input_file_data input_files[max_input_stack];
2580 input_file_data*current_input_file=input_files;
2581 char input_buffer[max_line_length];
2583 @ Some macros are useful to access the current file data.
2585 @d current_line (current_input_file->line)
2586 @d current_filename (current_input_file->name)
2587 @d current_fp (current_input_file->fp)
2589 @d parsing_error(_text) fprintf(stderr,"%s on line %d in %s\n",
2590 _text,current_line,current_filename)@;
2592 @ There is also conditional processing directives, which uses a single
2593 variable to keep track of the level. If it is greater than zero, the
2594 condition is false, and it is increased for nesting conditions (the
2595 nested conditions have no truth to them).
2597 @<Global variables@>=
2598 int condition_level=0;
2600 @ This subroutine inputs the next line. True is returned if there is a
2601 line, or false if it is finished.
2603 It is necessary to check for end of file and if so, close that file and
2604 try the one it was included from; and if it is terminal input, display the
2605 current state when prompting input from the user.
2607 @-p boolean input_line(void) {
2608 input_line_again: if(current_fp) {
2609 @<Get a line of input from the file@>;
2610 } @+else {
2611 @<Get a line of terminal input@>;
2613 @<Remove trailing |'\n'|, |'\r'|, and spaces@>;
2614 ++current_line;
2615 return 1;
2618 @ @<Get a line of input from the file@>= {
2619 if(!fgets(input_buffer,max_line_length,current_fp)) {
2620 memusage_log("Closing input file",current_input_file-input_files)@;
2621 fclose(current_fp);
2622 if(current_input_file>input_files) {
2623 --current_input_file;
2624 goto input_line_again;
2625 } @+else {
2626 return 0;
2631 @ @<Get a line of terminal input@>= {
2632 printf("\n%c> ",cur_state?cur_state:'>');
2633 fflush(stdout);
2634 if(!fgets(input_buffer,max_line_length,stdin)) return 0;
2637 @ This function is used to open the main input file.
2639 @-p void open_input(char*name) {
2640 if(++current_input_file>input_files+max_input_stack) {
2641 fprintf(stderr,"Too many simultaneous input files\n");
2642 @.Too many simultaneous...@>
2643 exit(1);
2645 memusage_log("Opening input file",current_input_file-input_files)@;
2646 strcpy(current_filename,name);
2647 current_line=0;
2648 current_fp=fopen(name,"r");
2649 if(!current_fp) {
2650 fprintf(stderr,"Cannot open input file: %s\n",name);
2651 @.Cannot open input file@>
2652 exit(1);
2656 @ Trailing newlines and spaces are removed. On some computers, there will
2657 be a carriage return before the line feed, it should be removed, so that
2658 the same file will work on other computers, too.
2660 @d last_character_input input_buffer[strlen(input_buffer)-1]
2662 @<Remove trailing |'\n'|, |'\r'|, and spaces@>= {
2663 if(last_character_input=='\n') last_character_input=0;
2664 if(last_character_input=='\r') last_character_input=0;
2665 while(last_character_input==' ') last_character_input=0;
2668 @ The input states start at these values.
2670 @<Initialize the input states@>= {
2671 cur_state=execute_state;
2672 cur_name=cur_data=0;
2675 @ Now it is the time to do the actual processing according to the contents
2676 of the lines of the file. A line starting with \.@@ sign will indicate a
2677 special command (to operate in all modes) or a mode switch command.
2679 @d delete_chars(_buf,_c) memmove((_buf),(_buf)+(_c),strlen((_buf)+(_c))+1)
2681 @<Process the input files@>= {
2682 char*buf;
2683 while(input_line()) {
2684 buf=input_buffer;
2685 if(condition_level) {
2686 buf+=strspn(buf," ");
2687 condition_level+=!strcmp(buf,"@@<");
2688 condition_level-=!strcmp(buf,"@@>");
2689 } @+else {
2690 omit_line_break=1;
2691 @<Convert \.@@ commands in the |input_buffer|@>;
2692 omit_line_break=0;
2693 process_line(buf);
2698 @ @<Convert \.@@ commands in the |input_buffer|@>= {
2699 char*ptr=input_buffer;
2700 while(*ptr) {
2701 if(*ptr=='@@') {
2702 @<Convert the current \.@@ command@>;
2703 } @+else {
2704 ptr++;
2709 @ @<Convert the current \.@@ command@>= {
2710 switch(*++ptr) {
2711 case '@@': @/
2712 delete_chars(ptr,1);
2713 break;
2714 case '.': @<Process \.{@@.} command@>;@+break;
2715 case '&': @<Process \.{@@\&} command@>;@+break;
2716 case '^': @<Process \.{@@\^} command@>;@+break;
2717 case '(': @<Process \.{@@(} command@>;@+break;
2718 case '<': @<Process \.{@@<} command@>;@+break;
2719 case '>': @<Remove this command from the input@>;@+break;
2720 default: @/
2721 if((*ptr>='A' && *ptr<='Z') || (*ptr>='0' && *ptr<='9')) {
2722 @<Enter a |heading| state@>;
2723 } @+else {
2724 parsing_error("Unknown @@ command");
2729 @ @<Remove this command from the input@>= {
2730 ptr--;
2731 delete_chars(ptr,2);
2734 @ Heading states are used for the first line of a section in the file.
2735 After that line is processed, it becomes the corresponding non-heading
2736 state |(cur_state&~heading)|.
2738 Note: The state |'0'| is deliberately unused; you might use it for
2739 documentation areas, for example.
2741 @^documentation areas@>
2743 @<Enter a |heading| state@>= {
2744 cur_state=heading|*ptr--;
2745 delete_chars(ptr,2);
2746 while(*ptr==' ' || *ptr=='\t') delete_chars(ptr,1);
2749 @ @-p void process_line(char*buf) {
2750 int q=cur_state;
2751 cur_state&=~heading;
2752 switch(q) {
2753 case card_state: @<Process card state@>;@+break;
2754 case deck_state: @<Process deck state@>;@+break;
2755 case execute_state: @<Process execute state@>;@+break;
2756 case file_state: @<Process file state@>;@+break;
2757 case keyword_state: @<Process keyword state@>;@+break;
2758 case pattern_state: @<Process pattern state@>;@+break;
2759 case subroutine_state: @<Process subroutine state@>;@+break;
2760 case wordforms_state: @<Process word forms state@>;@+break;
2761 case card_state|heading: @<Process card heading@>;@+break;
2762 case deck_state|heading: @<Process deck heading@>;@+break;
2763 case file_state|heading: @<Process file heading@>;@+break;
2764 case include_state|heading: @<Process include heading@>;@+break;
2765 case keyword_state|heading: @<Process keyword heading@>;@+break;
2766 case pattern_state|heading: @<Process pattern heading@>;@+break;
2767 case subroutine_state|heading: @<Process subroutine heading@>;@+break;
2768 default: ; // nothing happens
2772 @ Sometimes you might want a macro which can send a line programmatically.
2773 So, here is the way that it is done.
2775 @<Cases for system commands@>=
2776 @-case 'X': {
2777 // Process a line by programming
2778 int n;
2779 if(stack_ptr->is_string) program_error("Type mismatch");
2780 n=pop_num();
2781 if(n) cur_state=n|heading;
2782 if(!stack_ptr->is_string) program_error("Type mismatch");
2783 omit_line_break=1;
2784 process_line(stack_ptr->text);
2785 stack_drop();
2786 break;
2789 @*Inner Commands. These are commands other than the section headings.
2791 @ The first command to deal with is simple--it is a comment. The rest of
2792 the current line is simply discarded.
2794 @<Process \.{@@.} command@>= {
2795 ptr[-1]=0;
2798 @ This command is a pattern split. It means it will process the part of
2799 the line before this command and then process the stuff after it. The
2800 variable |omit_line_break| is 1 if this command is used; because it means
2801 there will not be a line break. (Otherwise, patterns and so on are split
2802 at line breaks.)
2804 @<Process \.{@@\&} command@>= {
2805 ptr[-1]=0;
2806 process_line(buf);
2807 buf=++ptr;
2810 @ This allows control characters to be inserted into the input. This code
2811 takes advantage of the way the ASCII code works, in which stripping all
2812 but the five low bits can convert a letter (uppercase or lowercase) to its
2813 corresponding control character.
2815 @^control character@>
2817 @<Process \.{@@\^} command@>= {
2818 ptr[1]&=0x1F;
2819 --ptr;
2820 delete_chars(ptr,2);
2823 @ The following command is used to execute a code in a different state and
2824 then include the results in this area.
2826 @<Process \.{@@(} command@>= {
2827 char*p;
2828 char*q;
2829 @<Skip over the name and save the rest of the line@>;
2830 @<Execute the code for the named subroutine@>;
2831 @<Insert the returned string and fix the line buffer@>;
2834 @ @<Skip over the name and save the rest of the line@>= {
2835 p=ptr+1;
2836 while(*ptr && *ptr!=')') ptr++;
2837 q=strdup(ptr+!!*ptr);
2838 *ptr=0;
2841 @ @<Execute the code for the named subroutine@>= {
2842 int n=find_name(p);
2843 execute_program(fetch_code(n));
2846 @ @<Insert the returned string and fix the line buffer@>= {
2847 char*s=pop_string();
2848 sprintf(p-2,"%s%s",s,q);
2849 ptr=p+strlen(s)-2;
2850 free(s);
2851 free(q);
2854 @ This command is used for conditional processing. The condition value
2855 comes from the stack. Zero is false, everything else is true.
2857 @<Process \.{@@<} command@>= {
2858 --ptr;
2859 delete_chars(ptr,2);
2860 condition_level=!stack_ptr->number;
2861 stack_drop();
2864 @*Card State. Cards are added to the card areas by using the card state.
2865 The \.C register tells which is the current card area, and \.P register is
2866 used to select the current pattern area. The pattern area is used to match
2867 patterns after reading a line. Please note that it won't work to change
2868 the value of the \.C register during the card state.
2870 @<Process card heading@>= {
2871 int n=find_name(buf);
2872 cur_data=set_card_area(n);
2873 cur_name=n-256;
2874 push_num(n);@+store('C');
2877 @ @<Process card state@>= {
2878 char*b;
2879 if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
2880 @<Initialize the \.W register@>;
2881 b=do_patterns(strdup(buf),registers['P'].number);
2882 if(registers['W'].is_string) execute_program(registers['W'].text);
2883 @<Send the tokens of |b| and replace whatsits@>;
2884 free(b);
2887 @ @<Initialize the \.W register@>= {
2888 if(registers['W'].is_string) free(registers['W'].text);
2889 registers['W'].is_string=1;
2890 registers['W'].text=strdup("");
2893 @ @<Send the tokens of |b| and replace whatsits@>= {
2894 char*p;
2895 for(p=b;*p;p++) {
2896 if(*p==whatsit) {
2897 send_token(cur_data,pop_num());
2898 } @+else {
2899 send_token(cur_data,(*p==1)?0:*p);
2904 @ There is one command you might want to send tokens in any other time.
2906 @<Cases for system commands@>=
2907 @-case 'T': {
2908 // Add tokens to the card area
2909 if(stack_ptr->is_string) {
2910 @<Send tokens from the string on the stack@>;
2911 } @+else {
2912 send_token(set_card_area(registers['C'].number),stack_ptr->number);
2914 stack_drop();
2915 break;
2918 @ @<Send tokens from the string on the stack@>= {
2919 char*p;
2920 data_index q=set_card_area(registers['C'].number);
2921 for(p=stack_ptr->text;*p;p++) send_token(q,*p);
2924 @*Deck State. Deck state is used for creating deck lists and random packs.
2926 @<Process deck heading@>= {
2927 cur_name=find_name(buf)-256;
2928 cur_data=set_deck_list(cur_name+256);
2929 @<Skip to the end of the deck list@>;
2932 @ @<Skip to the end of the deck list@>= {
2933 while(deck_lists.data[cur_data].next!=none)
2934 cur_data=deck_lists.data[cur_data].next;
2937 @ Now to parse each line in turn. Each line consists of a number, the
2938 flags, and a text.
2940 @<Process deck state@>= {
2941 int n=strtol(buf,&buf,10);
2942 unsigned int f=0;
2943 if(n) {
2944 buf+=strspn(buf,"\x20\t");
2945 @<Read the flags for the deck list@>;
2946 buf+=strspn(buf,"\x20\t"); // Now we are at the point of the text
2947 @<Send this line to the deck list@>;
2948 @<Create and advance to the new terminator of the deck list@>;
2952 @ @<Read the flags for the deck list@>= {
2953 while(*buf>='a' && *buf<='z') f |=lflag(*buf++);
2954 buf++; // Skip terminator of flags
2957 @ If the \.x flag is set, it will be determined what to do with the text
2958 by the user-defined code. Otherwise, it is always a name, so we can save
2959 memory by pointing to the name buffer (since name buffers never vary).
2961 @<Send this line to the deck list@>= {
2962 if(f&lflag('x')) {
2963 deck_lists.data[cur_data].name=strdup(buf);
2964 } @+else {
2965 deck_lists.data[cur_data].name=name_info(find_name(buf)).name;
2969 @ @<Create and advance to the new terminator of the deck list@>= {
2970 data_index i=new_record(deck_lists);
2971 deck_lists.data[cur_data].next=i;
2972 deck_lists.data[cur_data=i].next=none;
2975 @*Execute State. This state is simple, just execute stack codes. It is the
2976 initial state; you can use it with terminal input, too.
2978 @<Process execute state@>= {
2979 execute_program(buf);
2982 @*File State. This state is used to make list of output files. Each one is
2983 stored as a string, like subroutine state. The difference is that newlines
2984 will not be discarded. The other difference is that there is a flag in the
2985 |name_data| record for it that tells it that it is a file that should be
2986 sent to output.
2988 @<More elements of |name_data|@>=
2989 boolean is_output_file;
2991 @ @<Process file heading@>= {
2992 cur_name=find_name(buf)-256;
2993 if(!names.data[cur_name].value.is_string) {
2994 names.data[cur_name].value.is_string=1;
2995 names.data[cur_name].value.text=strdup("");
2996 names.data[cur_name].is_output_file=1;
3000 @ @<Process file state@>= {
3001 int z=strlen(names.data[cur_name].value.text);
3002 if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
3003 names.data[cur_name].value.text=realloc(names.data[cur_name].value.text,
3004 z+strlen(buf)+1);
3005 strcpy(names.data[cur_name].value.text+z,buf);
3008 @*Include State. The include state causes inclusion of another source file
3009 from this one.
3011 @<Process include heading@>= {
3012 cur_state=execute_state;
3013 @<Push the include file onto the input stack@>;
3014 @<Attempt to open the include file...@>;
3017 @ @<Push the include file onto the input stack@>= {
3018 ++current_input_file;
3019 memusage_log("Opening input file",current_input_file-input_files)@;
3020 strcpy(current_filename,buf);
3021 current_line=0;
3022 current_fp=0;
3025 @ Include files are searched using the search path specified in the
3026 environment variable called \.{TEXNICARDPATH}, which is a list of paths
3027 delimited by colons on UNIX systems (including Cygwin), but semicolons on
3028 Windows (colons are used in Windows for drive letters). A forward slash is
3029 the path separator. Please note that if you want to use include files in
3030 the current directory, you must include |"."| as the first entry in the
3031 search path!!
3033 @^search path@>
3034 @.TEXNICARDPATH@>
3035 @^Windows@>
3037 @<Set |includepath_separator| depending on operating system@>=
3038 #ifdef WIN32
3039 #define @!includepath_separator ';'
3040 #else
3041 #define includepath_separator ':'
3042 #endif
3044 @ @<Attempt to open the include file by finding it in the search path@>= {
3045 current_fp=open_file(current_filename,"r");
3046 @<It is a fatal error if no such file was found@>;
3049 @ Since this part of the code is activated in many parts of the program,
3050 we will make it a subroutine that can open files in different modes.
3052 @-p FILE*open_file(char*filename,char*mode) {
3053 char searchpath[max_pathname_length+max_filename_length+1];
3054 char*cpath;
3055 char*npath=getenv("TEXNICARDPATH");
3056 FILE*fp=0;
3057 strcpy(searchpath,npath?npath:".");
3058 npath=cpath=searchpath;
3059 @<Set |includepath_separator| depending on operating system@>;
3060 @<Attempt to open the file from each each directory in the search path@>;
3061 return fp;
3064 @ @<Attempt to open the file from each each directory...@>= {
3065 while(!fp) {
3066 char f[max_pathname_length+max_filename_length+1];
3067 @<Select the next path name into |cpath| and |npath|@>;
3068 sprintf(f,"%s/%s",cpath,filename);
3069 fp=fopen(f,mode);
3073 @ @<Select the next path name into |cpath| and |npath|@>= {
3074 if(!(cpath=npath)) break;
3075 if((npath=strchr(npath,includepath_separator))) *npath++=0;
3078 @ @<It is a fatal error if no such file was found@>= {
3079 if(!current_fp) {
3080 fprintf(stderr,"%s not found in search path.\n",current_filename);
3081 @.not found in search path@>
3082 exit(1);
3086 @*Keyword State. You can add keywords to the keyword area by using this.
3087 Each keyword heading is one entry in the list.
3089 @<Process keyword heading@>= {
3090 cur_data=new_record(keywords);
3091 keywords.data[cur_data].match=strdup(buf);
3092 keywords.data[cur_data].replacement=strdup("");
3095 @ @<Process keyword state@>= {
3096 keyword_data*k=&keywords.data[cur_data];
3097 if(*buf=='+') {
3098 k->category|=find_category(buf+1);
3099 } @+else {
3100 if(!omit_line_break) strcpy(buf+strlen(buf),"\n");
3101 @<Append buffer to keyword text@>;
3105 @ @<Append buffer to keyword text@>= {
3106 if(*buf) {
3107 int z=strlen(k->replacement);
3108 k->replacement=realloc(k->replacement,z+strlen(buf)+1);
3109 strcpy(k->replacement+z,buf);
3113 @*Pattern State. This state compiles patterns into a pattern area. It
3114 uses its own syntax, and then is converted into the proper control codes
3115 for the |text| of a pattern.
3117 @<Process pattern heading@>= {
3118 cur_name=find_name(buf)-256;
3119 cur_data=set_pattern_area(cur_name+256);
3122 @ The stuff inside the pattern state has its own commands.
3124 @<Process pattern state@>= {
3125 char add_buf[1024]; // buffer of text to add to the current pattern
3126 pattern_data*pat=&pattern_areas.data[cur_data];
3127 *add_buf=0;
3128 switch(*buf) {
3129 case '<': @<Create a new pattern with top priority@>;@+break;
3130 case '>': @<Create a new pattern with bottom priority@>;@+break;
3131 case ':': @<Make a pattern text with a marker@>;@+break;
3132 case '+': @<Add a keyword category to this pattern@>;@+break;
3133 default: ; // do nothing
3135 @<Append contents of |add_buf| to the pattern, if needed@>;
3138 @ @<Create a new pattern with top priority@>= {
3139 cur_data=new_record(pattern_areas);
3140 pattern_areas.data[cur_data].text=strdup("");
3141 pattern_areas.data[cur_data].subroutine=find_name(buf+1)-256;
3142 pattern_areas.data[cur_data].next=names.data[cur_name].pattern_area;
3143 names.data[cur_name].pattern_area=cur_data;
3146 @ @<Create a new pattern with bottom priority@>= {
3147 data_index n;
3148 cur_data=new_record(pattern_areas);
3149 pattern_areas.data[cur_data].text=strdup("");
3150 pattern_areas.data[cur_data].subroutine=find_name(buf+1)-256;
3151 pattern_areas.data[cur_data].next=none;
3152 @<Find the bottom pattern and store its index in |n|@>;
3153 pattern_areas.data[n].next=cur_data;
3156 @ @<Find the bottom pattern and...@>= {
3157 n=names.data[cur_name].pattern_area;
3158 while(pattern_areas.data[n].next!=none && pattern_areas.data[n].text &&
3159 pattern_areas.data[pattern_areas.data[n].next].next!=none)
3160 n=pattern_areas.data[n].next;
3163 @ Actually, the name of this \strike{cake} chunk is a lie, because it does
3164 not always add a marker.
3166 @<Make a pattern text with a marker@>= {
3167 char*p;
3168 char*b=add_buf;
3169 @<Add the pattern marker if applicable@>;
3170 for(p=buf+2;p[-1] && *p;p++) {
3171 switch(*p) {
3172 case '\\': *b++=*++p; @+break;
3173 case '(': *b++=begin_capture; @+break;
3174 case ')': *b++=end_capture; @+break;
3175 case '%': *b++=match_keyword; @+*b++=*++p; @+break;
3176 case '!': *b++=match_table; @+*b++=*++p; @+break;
3177 case '?': *b++=optional_table; @+*b++=*++p; @+break;
3178 case '#': *b++=failed_match; @+break;
3179 case '&': *b++=jump_table; @+*b++=*++p; @+break;
3180 case ';': *b++=successful_match; @+break;
3181 case '<': *b++=back_one_space; @+break;
3182 case '>': *b++=forward_one_space; @+break;
3183 case '[': *b++=match_left_side; @+break;
3184 case ']': *b++=match_right_side; @+break;
3185 default: *b++=*p; @+break;
3188 *b=0;
3191 @ @<Add the pattern marker if applicable@>= {
3192 if(buf[1]>' ') *b++=buf[1]|0x80;
3195 @ @<Add a keyword category to this pattern@>= {
3196 pattern_areas.data[cur_data].category=find_category(buf+1);
3199 @ @<Append contents of |add_buf| to the pattern...@>= {
3200 if(*add_buf) {
3201 int z=strlen(pat->text);
3202 pat->text=realloc(pat->text,z+strlen(add_buf)+1);
3203 strcpy(pat->text+z,add_buf);
3207 @*Subroutine State. This state is used to add a named subroutine.
3209 @<Process subroutine heading@>= {
3210 cur_name=find_name(buf)-256;
3211 if(!names.data[cur_name].value.is_string) {
3212 names.data[cur_name].value.is_string=1;
3213 names.data[cur_name].value.text=strdup("");
3217 @ @<Process subroutine state@>= {
3218 int z=strlen(names.data[cur_name].value.text);
3219 names.data[cur_name].value.text=realloc(names.data[cur_name].value.text,
3220 z+strlen(buf)+1);
3221 strcpy(names.data[cur_name].value.text+z,buf);
3224 @*Word Forms State. You can use the word forms state to enter rules and
3225 exceptions for word forms, such as plurals.
3227 @<Global variables@>=
3228 char wordform_code[256]; // code to execute at \.= line
3229 char wordform_kind; // which kind of word forms is being made now?
3231 @ @<Process word forms state@>= {
3232 switch(*buf) {
3233 case '>': @<Process \.> line in word forms state@>; @+break;
3234 case '=': @<Process \.= line in word forms state@>; @+break;
3235 case '#': wordform_kind=buf[1]; @+break;
3236 default: if(*buf>='0' && *buf<='9')
3237 @<Process numeric line in word forms state@>;
3241 @ The commands are \.>, \.=, and numbers. The command \.> sets a code for
3242 processing \.= commands, and then add to the list.
3244 @<Process \.> line in word forms state@>= {
3245 strcpy(wordform_code,buf+1);
3248 @ @<Process \.= line in word forms state@>= {
3249 int level,kind;
3250 char*orig;
3251 char*dest;
3252 push_string(buf+1);
3253 execute_program(wordform_code);
3254 kind=pop_num(); @+ level=pop_num();
3255 dest=pop_string(); @+ orig=pop_string();
3256 add_word_form(kind,level,orig,dest);
3257 free(orig); @+ free(dest);
3260 @ Now the command for numeric forms. You put ``level\.:orig\.:dest'' in
3261 that order, please.
3263 @<Process numeric line in word forms state@>= {
3264 int level=strtol(buf,&buf,0);
3265 char*p;
3266 if(*buf==':') buf++;
3267 p=strchr(buf,':');
3268 if(p) *p=0;
3269 add_word_form(wordform_kind,level,buf,p+1);
3272 @*Writing Output Files. Finally, it will be time to send any output
3273 generated into the files (if there is any, which there usually is).
3275 @^output@>
3277 @d ctrl(_letter) (0x1F&(_letter))
3279 @d call_final_subroutine ctrl('C')
3280 @d copy_field ctrl('F')
3281 @d newline ctrl('J')
3282 @d loop_point ctrl('L')
3283 @d next_record ctrl('N')
3284 @d skip_one_character ctrl('S')
3286 @<Write the output files@>= {
3287 data_index n;
3288 foreach(n,names) {
3289 if(names.data[n].is_output_file && names.data[n].value.is_string)
3290 @<Write this output file@>;
3294 @ @<Write this output file@>= {
3295 FILE*fout=fopen(names.data[n].name,"w");
3296 char*ptr=names.data[n].value.text;
3297 char*loopptr=ptr; // loop point
3298 if(!fout) @<Error about unable to open output file@>;
3299 while(*ptr) @<Write the character and advance to the next one@>;
3300 fclose(fout);
3303 @ @<Error about unable to open output file@>= {
3304 fprintf(stderr,"Unable to open output file: %s\n",names.data[n].name);
3305 @.Unable to open output file@>
3306 exit(1);
3309 @ @<Write the character and advance to the next one@>= {
3310 switch(*ptr) {
3311 case call_final_subroutine: @<Do |call_final_subroutine|@>; @+break;
3312 case copy_field: @<Do |copy_field|@>; @+break;
3313 case loop_point: loopptr=++ptr; @+break;
3314 case next_record: @<Do |next_record|@>; @+break;
3315 case skip_one_character: ptr+=2; @+break;
3316 default: fputc(*ptr++,fout);
3318 done_writing_one_character: ;
3321 @ @<Do |call_final_subroutine|@>= {
3322 register_value*v;
3323 if(*++ptr=='(') {
3324 char*p=strchr(ptr,')');
3325 *p=0;
3326 v=&name_info(find_name(ptr+1)).value;
3327 *p=')';
3328 ptr=p+1;
3329 } @+else {
3330 v=&registers[*ptr++];
3332 if(v->is_string) {
3333 execute_program(v->text);
3334 @<Write or loop based on result of subroutine call@>;
3335 stack_drop();
3339 @ @<Write or loop based on result of subroutine call@>= {
3340 if(stack_ptr->is_string) {
3341 fprintf(fout,"%s",stack_ptr->text);
3342 } @+else if(stack_ptr->number) {
3343 ptr=loopptr;
3347 @ This command is used to copy the next field.
3349 Look at the definition for the |send_reg_char_or_text| macro. It is
3350 strange, but it should work wherever a statement is expected. Please note
3351 that a ternary condition operator should have both choices of the same
3352 type.
3354 @^strange codes@>
3356 @d tok_idx (registers['A'].number)
3357 @d tok_area
3358 (card_areas.data[name_info(registers['C'].number).value.number].tokens)
3360 @d send_reg_char_or_text(_reg)
3361 if(!registers[_reg].is_string || *registers[_reg].text)
3362 fprintf(fout, "%c%s",
3363 registers[_reg].is_string?
3364 *registers[_reg].text:registers[_reg].number,
3365 registers[_reg].is_string?
3366 registers[_reg].text+1:(unsigned char*)""
3369 @<Do |copy_field|@>= {
3370 ++ptr;
3371 for(;;) {
3372 switch(tok_area[tok_idx++]) {
3373 case null_char: @<Unexpected |null_char|@>;
3374 case end_transmission: tok_idx=0; @+goto done_writing_one_character;
3375 case tabulation: send_reg_char_or_text('T'); @+break;
3376 case raw_data: @<Do |raw_data|@>; @+break;
3377 case escape_code: send_reg_char_or_text('E'); @+break;
3378 case record_separator: tok_idx--; @+goto done_writing_one_character;
3379 case field_separator: @+goto done_writing_one_character;
3380 default: @/
3381 if(tok_area[--tok_idx]&~0xFF)
3382 @<Deal with name code@>@;
3383 else
3384 @<Deal with normal character@>;
3385 tok_idx++;
3390 @ @<Unexpected |null_char|@>= {
3391 fprintf(stderr,"Unexpected null character found in a card area\n");
3392 @.Unexpected null character...@>
3393 exit(1);
3396 @ @<Do |raw_data|@>= {
3397 while(tok_area[tok_idx]) fputc(tok_area[tok_idx++],fout);
3398 tok_idx++;
3401 @ A name code found here is a code to tell it to call the subroutine code
3402 when it is time to write it out to the file. It should return a string on
3403 the stack (if it is a number, it will be ignored).
3405 @<Deal with name code@>= {
3406 if(name_info(tok_area[tok_idx]).value.is_string)
3407 execute_program(name_info(tok_area[tok_idx]).value.text);
3408 if(stack_ptr->is_string) fprintf(fout,"%s",stack_ptr->text);
3409 stack_drop();
3412 @ In case of a normal character, normally just write it out. But some
3413 characters need escaped for \TeX.
3415 @<Deal with normal character@>= {
3416 if(tables['E'][tok_area[tok_idx]]) send_reg_char_or_text('E');
3417 fputc(tok_area[tok_idx],fout);
3420 @ This one moves to the next record, looping if a next record is in fact
3421 available. Otherwise, just continue. Note that a |record_separator|
3422 immediately followed by a |end_transmission| is assumed to mean there is
3423 no next record, and that there is allowed to be a optional
3424 |record_separator| just before the |end_transmission|.
3426 @<Do |next_record|@>= {
3427 ++ptr;
3428 while(tok_area[tok_idx]!=record_separator &&
3429 tok_area[tok_idx]!=end_transmission) tok_idx++;
3430 if(tok_area[tok_idx]!=end_transmission &&
3431 tok_area[tok_idx+1]!=end_transmission) ptr=loopptr;
3434 @*Functions Common to DVI and GF. Numbers for \.{GF} and \.{DVI} files use
3435 the |dvi_number| data type. (Change this in the change file if the current
3436 setting is inappropriate for your system.)
3438 There is also the |dvi_measure| type, which is twice as long and is used
3439 to compute numbers that can be fractional (with thirty-two fractional bits
3440 and thirty-two normal bits).
3442 @<Typedefs@>=
3443 @q[Type of DVI numbers::]@>
3444 typedef signed int dvi_number;
3445 typedef signed long long int dvi_measure;
3446 @q[::Type of DVI numbers]@>
3448 @ There is one subroutine here to read a |dvi_number| from a file. They
3449 come in different sizes and some are signed and some are unsigned.
3451 @^endianness@>
3452 @^byte order@>
3454 @-p dvi_number get_dvi_number(FILE*fp,boolean is_signed,int size) {
3455 dvi_number r=0;
3456 if(size) r=fgetc(fp);
3457 if((r&0x80) && is_signed) r|=0xFFFFFF00;
3458 while(--size) r=(r<<8)|fgetc(fp);
3459 return r;
3462 @ Some macros are defined here in order to deal with |dvi_measure| values.
3464 @^fractions@>
3466 @d to_measure(_value) (((dvi_measure)(_value))<<32)
3467 @d floor(_value) ((dvi_number)((_value)>>32))
3468 @d round(_value) ((dvi_number)(((_value)+0x8000)>>32))
3469 @d ceiling(_value) ((dvi_number)(((_value)+0xFFFF)>>32))
3471 @ Here division must be done in a careful way, to ensure that none of the
3472 intermediate results exceed sixty-four bits.
3474 @d fraction_one to_measure(1)
3476 @-p dvi_measure make_fraction(dvi_measure p,dvi_measure q) {
3477 dvi_measure f,n;
3478 boolean negative=(p<0)^(q<0);
3479 if(p<0) p=-p;
3480 if(q<0) q=-q;
3481 n=p/q; @+ p=p%q;
3482 n=(n-1)*fraction_one;
3483 @<Compute $f=\lfloor2^{32}(1+p/q)+{1\over2}\rfloor$@>;
3484 return (f+n)*(negative?-1:1);
3487 @ Notice that the computation specifies $(p-q)+p$ instead of $(p+p)-q$,
3488 because the latter could overflow.
3490 @<Compute $f=...@>= {
3491 register dvi_measure b;
3492 f=1;
3493 while(f<fraction_one) {
3494 b=p-q; @+ p+=b;
3495 if(p>=0) {
3496 f+=f+1;
3497 } @+else {
3498 f<<=1;
3499 p+=q;
3504 @ And a few miscellaneous macros.
3506 @d upto4(_var,_cmd) (_var>=_cmd && _var<_cmd+4)
3508 @*DVI Reading. The device-independent file format is a format invented by
3509 David R.~Fuchs in 1979. The file format need not be explained here, since
3510 there are other books which explain it\biblio{Knuth, Donald. ``\TeX: The
3511 Program''. Computers {\char`\&} Typesetting. ISBN 0-201-13437-3.}\biblio{%
3512 Knuth, Donald. ``\TeX ware''. Stanford Computer Science Report 1097.}.
3514 \edef\TeXwareBiblio{\the\bibliocount}
3515 @^Fuchs, David@>
3516 @.DVI@>
3517 @^device independent@>
3519 At first, names will be given for the commands in a \.{DVI} file.
3521 @d set_char_0 0 // Set a character and move [up to 127]
3522 @d set1 128 // Take one parameter to set character [up to 131]
3523 @d set_rule 132 // Set a rule and move down, two parameters
3524 @d put1 133 // As |set1| but no move [up to 136]
3525 @d put_rule 137 // As |set_rule| but no move
3526 @d nop 138 // No operation
3527 @d bop 139 // Beginning of a page
3528 @d eop 140 // End of a page
3529 @d push 141 // Push $(h,v,w,x,y,z)$ to the stack
3530 @d pop 142 // Pop $(h,v,w,x,y,z)$ from the stack
3531 @d right1 143 // Take one parameter, move right [up to 146]
3532 @d w0 147 // Move right $w$ units
3533 @d w1 148 // Set $w$ and move right [up to 151]
3534 @d x0 152 // Move right $x$ units
3535 @d x1 153 // Set $x$ and move right [up to 156]
3536 @d down1 157 // Take one parameter, move down [up to 160]
3537 @d y0 161 // Move down $y$ units
3538 @d y1 162 // Set $y$ and move down [up to 165]
3539 @d z0 166 // Move down $z$ units
3540 @d z1 167 // Set $z$ and move down [up to 170]
3541 @d fnt_num_0 171 // Select font 0 [up to 234]
3542 @d fnt1 235 // Take parameter to select font [up to 238]
3543 @d xxx1 239 // Specials [up to 242]
3544 @d fnt_def1 243 // Font definitions [up to 246]
3545 @d pre 247 // Preamble
3546 @d post 248 // Postamble
3547 @d post_post 249 // Postpostamble
3549 @ We should now start reading the \.{DVI} file. Filenames of fonts being
3550 used will be sent to standard output.
3552 @-p boolean read_dvi_file(char*filename) {
3553 boolean fonts_okay=1;
3554 FILE*fp=fopen(filename,"rb");
3555 if(!fp) dvi_error(fp,"Unable to open file");
3556 @#@<Skip the preamble of the \.{DVI} file@>;
3557 @<Skip to the next page@>;
3558 @<Read the metapage heading@>;
3559 @<Compute the conversion factor@>;
3560 read_dvi_page(fp);
3561 @<Skip to and read the postamble@>;
3562 @<Read the font definitions and load the fonts@>;
3563 if(fonts_okay) @<Read the pages for each card@>;
3564 @#fclose(fp);
3565 return fonts_okay;
3568 @ @-p void dvi_error(FILE*fp,char*text) {
3569 fprintf(stderr,"DVI error");
3570 @.DVI error@>
3571 if(fp) fprintf(stderr," at %08X",ftell(fp));
3572 fprintf(stderr,": %s\n",text);
3573 exit(1);
3576 @ Please note the version number of the \.{DVI} file must be 2.
3578 @<Skip the preamble of the \.{DVI} file@>= {
3579 if(fgetc(fp)!=pre) dvi_error(fp,"Bad preamble");
3580 if(fgetc(fp)!=2) dvi_error(fp,"Wrong DVI version");
3581 @<Read the measurement parameters@>;
3582 @<Skip the DVI comment@>;
3585 @ @<Read the measurement parameters@>= {
3586 unit_num=get_dvi_number(fp,0,4);
3587 unit_den=get_dvi_number(fp,0,4);
3588 unit_mag=get_dvi_number(fp,0,4);
3591 @ @<Skip the DVI comment@>= {
3592 int n=fgetc(fp);
3593 fseek(fp,n,SEEK_CUR);
3596 @ From the postamble we can read the pointer for the last page.
3598 @<Global variables@>=
3599 dvi_number last_page_ptr;
3601 @ @<Skip to and read the postamble@>= {
3602 fseek(fp,-4,SEEK_END);
3603 while(fgetc(fp)==223) fseek(fp,-2,SEEK_CUR);
3604 fseek(fp,-5,SEEK_CUR);
3605 fseek(fp,get_dvi_number(fp,0,4)+1,SEEK_SET);
3606 last_page_ptr=get_dvi_number(fp,0,4);
3607 fseek(fp,20,SEEK_CUR); // Skipped parameters of |post|
3608 dvi_stack=malloc(get_dvi_number(fp,0,2)*sizeof(dvi_stack_entry));
3609 fseek(fp,2,SEEK_CUR);
3612 @ Between the preamble and the first page can be |nop| commands and font
3613 definitions, so these will be skipped. The same things can occur between
3614 the end of one page and the beginning of the next page.
3616 @<Skip to the next page@>= {
3617 int c;
3618 for(;;) {
3619 c=fgetc(fp);
3620 if(c==bop) break;
3621 if(c>=fnt_def1 && c<fnt_def1+4) {
3622 @<Skip a font definition@>;
3623 } @+else if(c!=nop) {
3624 dvi_error(fp,"Bad command between pages");
3629 @ @<Skip a font definition@>= {
3630 int a,l;
3631 fseek(fp,c+13-fnt_def1,SEEK_CUR);
3632 a=fgetc(fp);
3633 l=fgetc(fp);
3634 fseek(fp,a+l,SEEK_CUR);
3637 @ The metapage includes the resolution and other things which must be set,
3638 such as subroutine codes. The resolution must be read before fonts can be
3639 read. Please note that no characters can be typeset on the metapage, since
3640 fonts have not been loaded yet. You can still place empty boxes. The DPI
3641 setting (resolution) is read from the \.{\\count1} register.
3643 @<Read the metapage heading@>= {
3644 dvi_number n=get_dvi_number(fp,0,4);
3645 if(n) {
3646 fprintf(stderr,"Metapage must be numbered zero (found %d).\n",n);
3647 @.Metapage must be...@>
3648 exit(1);
3650 push_num(get_dvi_number(fp,0,4)); @+ store('D');
3651 fseek(fp,9*4,SEEK_CUR); // Skip other parameters
3652 layer_width=layer_height=0;
3655 @ A stack is kept of the page registers, for use with the |push| and |pop|
3656 commands of a \.{DVI} file. This stack is used by the |read_dvi_page|
3657 subroutine and stores the |quan| registers (described in the next
3658 chapter).
3660 @<Typedefs@>=
3661 typedef struct {
3662 dvi_number h;
3663 dvi_number v;
3664 dvi_number w;
3665 dvi_number x;
3666 dvi_number y;
3667 dvi_number z;
3668 dvi_number hh;
3669 dvi_number vv;
3670 } dvi_stack_entry;
3672 @ @<Global variables@>=
3673 dvi_stack_entry*dvi_stack;
3674 dvi_stack_entry*dvi_stack_ptr;
3676 @ Here is the subroutine to read commands from a DVI page. The file
3677 position should be at the beginning of the page after the |bop| command.
3679 @^pages@>
3681 @-p void read_dvi_page(FILE*fp) {
3682 memusage_log("Beginning of page",fseek(fp));
3683 @<Reset the page registers and stack@>;
3684 typeset_new_page();
3685 @<Read the commands of this page@>;
3686 if(layer_width && layer_height) @<Render this page@>;
3689 @ @<Reset the page registers and stack@>= {
3690 quan('A')=quan('B')=quan('H')=quan('I')=quan('J')=quan('L')=quan('V')=
3691 quan('W')=quan('X')=quan('Y')=quan('Z')=0;
3692 dvi_stack_ptr=dvi_stack;
3695 @ @<Read the commands of this page@>= {
3696 int c,a;
3697 boolean moveaftertyping;
3698 for(;;) {
3699 c=fgetc(fp);
3700 if(c<set1) {
3701 moveaftertyping=1;
3702 @<Typeset character |c| on the current layer@>;
3703 } @+else if(upto4(c,set1)) {
3704 moveaftertyping=1;
3705 c=get_dvi_number(fp,0,c+1-set1);
3706 @<Typeset character |c| on the current layer@>;
3707 } @+else if(c==set_rule || c==put_rule) {
3708 moveaftertyping=(c==set_rule);
3709 c=get_dvi_number(fp,1,4);
3710 a=get_dvi_number(fp,1,4);
3711 @<Typeset |a| by |c| rule on the current layer@>;
3712 } @+else if(upto4(c,put1)) {
3713 moveaftertyping=0;
3714 c=get_dvi_number(fp,0,c+1-put1);
3715 @<Typeset character |c| on the current layer@>;
3716 } @+else if(c==eop) {
3717 break;
3718 } @+else if(c==push) {
3719 if(dvi_stack) @<Push DVI registers to stack@>;
3720 } @+else if(c==pop) {
3721 if(dvi_stack) @<Pop DVI registers from stack@>;
3722 } @+else if(upto4(c,right1)) {
3723 c=get_dvi_number(fp,1,c+1-right1);
3724 horizontal_movement(c);
3725 } @+else if(c==w0) {
3726 horizontal_movement(quan('W'));
3727 } @+else if(upto4(c,w1)) {
3728 c=get_dvi_number(fp,1,c+1-w1);
3729 horizontal_movement(quan('W')=c);
3730 } @+else if(c==x0) {
3731 horizontal_movement(quan('X'));
3732 } @+else if(upto4(c,x1)) {
3733 c=get_dvi_number(fp,1,c+1-x1);
3734 horizontal_movement(quan('X')=c);
3735 } @+else if(upto4(c,down1)) {
3736 c=get_dvi_number(fp,1,c+1-down1);
3737 vertical_movement(c);
3738 } @+else if(c==y0) {
3739 vertical_movement(quan('Y'));
3740 } @+else if(upto4(c,y1)) {
3741 c=get_dvi_number(fp,1,c+1-y1);
3742 vertical_movement(quan('Y')=c);
3743 } @+else if(c==z0) {
3744 vertical_movement(quan('Z'));
3745 } @+else if(upto4(c,z1)) {
3746 c=get_dvi_number(fp,1,c+1-z1);
3747 vertical_movement(quan('Z')=c);
3748 } @+else if(c>=fnt_num_0 && c<fnt1) {
3749 quan('F')=c-fnt_num_0;
3750 } @+else if(upto4(c,fnt1)) {
3751 quan('F')=get_dvi_number(fp,0,c+1-fnt1);
3752 } @+else if(upto4(c,xxx1)) {
3753 c=get_dvi_number(fp,0,c+1-xxx1);
3754 @<Read a special of length |c|@>;
3755 } @+else if(upto4(c,fnt_def1)) {
3756 @<Skip a font definition@>;
3757 } @+else if(c!=nop) {
3758 dvi_error(fp,"Unknown DVI command");
3763 @ @<Push DVI registers to stack@>= {
3764 dvi_stack_ptr->h=quan('H');
3765 dvi_stack_ptr->v=quan('V');
3766 dvi_stack_ptr->w=quan('W');
3767 dvi_stack_ptr->x=quan('X');
3768 dvi_stack_ptr->y=quan('Y');
3769 dvi_stack_ptr->z=quan('Z');
3770 dvi_stack_ptr->hh=quan('I');
3771 dvi_stack_ptr->vv=quan('J');
3772 ++dvi_stack_ptr;
3775 @ @<Pop DVI registers from stack@>= {
3776 --dvi_stack_ptr;
3777 quan('H')=dvi_stack_ptr->h;
3778 quan('V')=dvi_stack_ptr->v;
3779 quan('W')=dvi_stack_ptr->w;
3780 quan('X')=dvi_stack_ptr->x;
3781 quan('Y')=dvi_stack_ptr->y;
3782 quan('Z')=dvi_stack_ptr->z;
3783 quan('I')=dvi_stack_ptr->hh;
3784 quan('J')=dvi_stack_ptr->vv;
3787 @ A special in \TeX nicard is used to execute a special code while reading
3788 the DVI file. Uses might be additional calculations, changes of registers,
3789 special effects, layer selection, etc. All of these possible commands are
3790 dealt with elsewhere in this program. All we do here is to read it and to
3791 send it to the |execute_program| subroutine.
3793 @^specials@>
3795 @<Read a special of length |c|@>= {
3796 char*buf=malloc(c+1);
3797 fread(buf,1,c,fp);
3798 buf[c]=0;
3799 @<Set \.X and \.Y registers to prepare for the special@>;
3800 execute_program(buf);
3801 free(buf);
3804 @ @<Set \.X and \.Y registers to prepare for the special@>= {
3805 registers['X'].is_string=registers['Y'].is_string=0;
3806 registers['X'].number=quan('I');
3807 registers['Y'].number=quan('J');
3810 @ In order to read all the pages for each card, we can skip backwards by
3811 using the back pointers. Either we will print all cards (in reverse
3812 order), or we will print cards listed on the command-line, or we will
3813 print cards listed in a file (this last way might be used to print decks
3814 or booster packs).
3816 Card numbers should be one-based, and should not be negative. Any pages
3817 with negative page numbers will be ignored when it is in the mode for
3818 printing all cards.
3820 @d printing_all_cards 0
3821 @d printing_list 1
3822 @d printing_list_from_file 2
3824 @<Global variables@>=
3825 unsigned char printing_mode;
3826 char*printlisttext;
3827 FILE*printlistfile;
3829 @ @<Read the pages for each card@>= {
3830 dvi_number page_ptr=last_page_ptr;
3831 dvi_number e=0,n; // page numbers
3832 boolean pagenotfound=0;
3833 for(;;) {
3834 @<Read the next entry from the list of pages (if applicable)@>;
3835 try_next_page:
3836 @<Seek the next page to print@>;
3837 @<Read the heading for this page@>;
3838 @<If this page shouldn't be printed now, |goto try_next_page|@>;
3839 pagenotfound=0;
3840 read_dvi_page(fp);
3842 @#done_printing:;
3845 @ @<Read the next entry from the list of pages (if applicable)@>= {
3846 if(printing_mode==printing_list) {
3847 if(!*printlisttext) goto done_printing;
3848 e=strtol(printlisttext,&printlisttext,10);
3849 if(!e) goto done_printing;
3850 if(*printlisttext) printlisttext++;
3851 } @+else if(printing_mode==printing_list_from_file) {
3852 char buf[256];
3853 if(!printlistfile || feof(printlistfile)) goto done_printing;
3854 if(!fgets(buf,255,printlistfile)) goto done_printing;
3855 e=strtol(buf,0,10);
3859 @ @<Seek the next page to print@>= {
3860 if(page_ptr==-1) {
3861 if(pagenotfound) {
3862 fprintf(stderr,"No page found: %d\n",e);
3863 @.No page found...@>
3864 exit(1);
3866 page_ptr=last_page_ptr;
3867 if(printing_mode==printing_all_cards) goto done_printing;
3868 pagenotfound=1;
3870 fseek(fp,page_ptr+1,SEEK_SET);
3873 @ @<Read the heading for this page@>= {
3874 n=quan('P')=get_dvi_number(fp,1,4);
3875 fseek(fp,4,SEEK_CUR);
3876 layer_width=get_dvi_number(fp,1,4);
3877 layer_height=get_dvi_number(fp,1,4);
3878 fseek(fp,4*6,SEEK_CUR);
3879 page_ptr=get_dvi_number(fp,1,4);
3882 @ @<If this page shouldn't be printed now, |goto try_next_page|@>= {
3883 if(n<=0 && printing_mode==printing_all_cards) goto try_next_page;
3884 if(n!=e && printing_mode!=printing_all_cards) goto try_next_page;
3887 @*DVI Font Metrics. Here, the fonts are loaded. It is assumed all fonts
3888 are in the current directory, and the ``area'' of the font name is
3889 ignored. The checksum will also be ignored (it can be checked with
3890 external programs if necessary).
3892 @^area@>
3893 @^font loading@>
3895 @<Read the font definitions and load the fonts@>= {
3896 int c;
3897 for(;;) {
3898 c=fgetc(fp);
3899 if(c==post_post) break;
3900 if(c>=fnt_def1 && c<fnt_def1+4) {
3901 int k=get_dvi_number(fp,0,c+1-fnt_def1);
3902 if(k&~0xFF) dvi_error(fp,"Too many fonts");
3903 memusage_log("Loading font",k);
3904 @<Read the definition for font |k| and load it@>;
3905 } @+else if(c!=nop) {
3906 dvi_error(fp,"Bad command in postamble");
3909 memusage_log("End of postamble",c);
3912 @ When reading fonts, it will be necessary to keep a list of the fonts
3913 and their character indices. Only 256 fonts are permitted in one job.
3915 @<Global variables@>=
3916 data_index fontindex[256];
3918 @ @<Read the definition for font |k| and load it@>= {
3919 dvi_number c=get_dvi_number(fp,0,4); // checksum (unused)
3920 dvi_number s=get_dvi_number(fp,0,4); // scale factor
3921 dvi_number d=get_dvi_number(fp,0,4); // design size
3922 int a=get_dvi_number(fp,0,1); // length of area
3923 int l=get_dvi_number(fp,0,1); // length of name
3924 char n[257];
3925 fseek(fp,a,SEEK_CUR);
3926 fread(n,1,l,fp);
3927 n[l]=0;
3928 if((fontindex[k]=read_gf_file(n,s,d))==none) fonts_okay=0;
3931 @ An important part of reading the font metrics is the width computation,
3932 which involves multiplying the relative widths in the \.{TFM} file (or
3933 \.{GF} file) by the scaling factor in the \.{DVI} file. This
3934 multiplication must be done in precisely the same way by all \.{DVI}
3935 reading programs, in order to validate the assumptions made by \.{DVI}-%
3936 writing programs such as \TeX.
3938 % (The following paragraph is taken directly from "dvitype.web")
3939 Let us therefore summarize what needs to be done. Each width in a \.{TFM}
3940 file appears as a four-byte quantity called a |fix_word|. A |fix_word|
3941 whose respective bytes are $(a,b,c,d)$ represents the number
3942 $$x=\left\{\vcenter{\halign{$#$,\hfil\qquad&if $#$\hfil\cr
3943 b\cdot2^{-4}+c\cdot2^{-12}+d\cdot2^{-20}&a=0;\cr
3944 -16+b\cdot2^{-4}+c\cdot2^{-12}+d\cdot2^{-20}&a=255.\cr}}\right.$$
3945 (No other choices of $a$ are allowed, since the magnitude of a \.{TFM}
3946 dimension must be less than 16.) We want to multiply this quantity by the
3947 integer~|z|, which is known to be less than $2^{27}$.
3948 If $|z|<2^{23}$, the individual multiplications $b\cdot z$, $c\cdot z$,
3949 $d\cdot z$ cannot overflow; otherwise we will divide |z| by 2, 4, 8, or
3950 16, to obtain a multiplier less than $2^{23}$, and we can compensate for
3951 this later. If |z| has thereby been replaced by $|z|^\prime=|z|/2^e$, let
3952 $\beta=2^{4-e}$; we shall compute
3953 $$\lfloor(b+c\cdot2^{-8}+d\cdot2^{-16})\,z^\prime/\beta\rfloor$$ if $a=0$,
3954 or the same quantity minus $\alpha=2^{4+e}z^\prime$ if $a=255$.
3955 This calculation must be
3956 done exactly, for the reasons stated above; the following program does the
3957 job in a system-independent way, assuming that arithmetic is exact on
3958 numbers less than $2^{31}$ in magnitude.
3960 \def\zprime{z'}
3962 @f alpha TeX
3963 @f beta TeX
3964 @f zprime TeX
3966 @<Compute |zprime|, |alpha|, and |beta|@>= {
3967 zprime=s; @+ alpha=16;
3968 while(zprime>=040000000) {
3969 zprime>>=1; @+ alpha<<=1;
3971 beta=256/alpha; @+ alpha*=zprime;
3974 @ @<Compute the character width |w|@>= {
3975 w=(((((b3*zprime)>>8)+(b2*zprime))>>8)+(b1*zprime))/beta;
3976 if(b0) w-=alpha;
3979 @*GF Reading. The \.{GF} format is a ``generic font'' format. It has a lot
3980 in common with \.{DVI} format.
3982 At first, names will be given for the commands in a \.{GF} file. Many
3983 commands have the same numbers as they do in a \.{DVI} file (described in
3984 the previous chapter), which makes it very convenient\biblio{This is
3985 probably on purpose for the this very reason, so that a WEB or CWEB
3986 program can use one set of named constants for reading both files.}.
3988 @d paint_0 0 // Paint $d$ pixels black or white [up to 63]
3989 @d paint1 64 // Take parameter, paint pixels [up to 66]
3990 @d boc 67 // Beginning of a character picture
3991 @d boc1 68 // Short form of |boc|
3992 @d eoc 69 // End of a character picture
3993 @d skip0 70 // Skip some rows
3994 @d skip1 71 // Skip some rows [up to 73]
3995 @d new_row_0 74 // Start a new row and move right [up to 238]
3996 @d yyy 243 // Numeric specials
3997 @d no_op 244 // No operation
3998 @d char_loc 245 // Character locator
3999 @d char_loc0 246 // Short form of |char_loc|
4001 @ The |font_struct| structure stores the information for each character in
4002 a font. The |raster| field points to a bitmap with eight pixels per octet,
4003 most significant bit for the leftmost pixel, each row always padded to a
4004 multiple of eight pixels.
4006 While it is reading the postamble, it will fill in this structure with the
4007 |ptr| field set. After the postamble is read, it will fill in the other
4008 fields belonging to its union.
4010 @<Typedefs@>=
4011 typedef struct {
4012 dvi_number dx; // character escapement in pixels
4013 dvi_number w; // width in DVI units
4014 union {
4015 struct {
4016 dvi_number min_n,max_n,min_m,max_m; // bounding box (in pixels)
4017 unsigned short n; // character code number
4018 unsigned char*raster;
4019 unsigned char flag; // bitfield of flags for this character
4020 }@+;
4021 dvi_number ptr;
4022 }@+;
4023 data_index next;
4024 } font_struct;
4026 @ List of flags follows. Some of these flags might be used in order to
4027 allow$\mathord{}>256$ characters per font, since {\TeX} does not have a
4028 command to enter characters with codes more than one byte long. These
4029 flags are specified using numeric specials.
4031 @d ff_select 0x01 // set high octet all characters
4032 @d ff_prefix 0x02 // set high octet for codes 128-255
4033 @d ff_roundafter 0x04 // round $\it hh$ after sending character
4034 @d ff_roundbefore 0x08 // round $\it hh$ before sending character
4035 @d ff_reset 0x10 // reset high octet
4036 @d ff_strip 0x20 // strip highest bit of prefix
4037 @d ff_space 0x40 // do not save the raster (space only)
4038 @d ff_reserved 0x80 // {\bf DO NOT USE}
4040 @ @<Global variables@>=
4041 memory_of(font_struct) font_data;
4043 @ @<Initialize memory@>= init_memory(font_data,4);
4045 @ When loading a \.{GF} font, the filename will contain the resolution
4046 in dots per inch.
4048 @^font loading@>
4050 @-p data_index read_gf_file(char*fontname,dvi_measure s,dvi_measure d) {
4051 unsigned int dpi=(resolution*unit_mag*s+500*d)/(100000*d);
4052 FILE*fp;
4053 data_index index=none;
4054 data_index first_index=none;
4055 data_index last_index=none;
4056 dvi_number zprime,alpha,beta; // used for width computation
4057 @<Compute |zprime|, |alpha|, and |beta|@>;
4058 @<Figure out the filename and open the file, |return none| if it can't@>;
4059 @<Skip to the postamble of the \.{GF} file@>;
4060 @<Read the character locators@>;
4061 @<Read the character rasters and flags@>;
4062 fclose(fp);
4063 return last_index;
4066 @ When figuring out the filename, it will send it to standard output so
4067 that a list can be made of the required fonts.
4069 @<Figure out the filename and open the file, ...@>= {
4070 char n[295];
4071 sprintf(n,"%s.%dgf",fontname,dpi);
4072 printf("%s\n",n);
4073 fp=open_file(n,"rb");
4074 if(!fp) return none;
4077 @ @<Skip to the postamble of the \.{GF} file@>= {
4078 int c;
4079 fseek(fp,-4,SEEK_END);
4080 while(fgetc(fp)==223) fseek(fp,-2,SEEK_CUR);
4081 fseek(fp,-5,SEEK_CUR);
4082 fseek(fp,get_dvi_number(fp,0,4)+37,SEEK_SET); // nothing matters anymore
4085 @ @<Read the character locators@>= {
4086 int c,b0,b1,b2,b3;
4087 dvi_number dx,w,p;
4088 for(;;) {
4089 c=fgetc(fp);
4090 if(c==post_post) break;
4091 p=-1;
4092 if(c==char_loc) {
4093 @<Read a long character locator@>;
4094 } @+else if(c==char_loc0) {
4095 @<Read a short character locator@>;
4096 } @+else if(c!=no_op) {
4097 fprintf(stderr,"Bad command in GF postamble.\n");
4098 @.Bad command in GF postamble@>
4099 fprintf(stderr,"(Command %d, address %08X)\n",c,ftell(fp)-1);
4100 exit(1);
4102 if(p!=-1) @<Defer this character locator into |font_data|@>;
4104 last_index=index;
4107 @ There are some parameters we do not care about. First is $c$, which is
4108 the character code residue (modulo 256). This is not important since it
4109 is duplicated in the |boc| heading for each character. The second
4110 parameter which we do not care about is the $\it dy$ parameter, since it
4111 should be zero for \.{DVI} files.
4113 @<Read a long character locator@>= {
4114 fseek(fp,1,SEEK_CUR);
4115 dx=get_dvi_number(fp,1,4)>>16;
4116 fseek(fp,4,SEEK_CUR);
4117 @<Read four bytes@>;
4118 p=get_dvi_number(fp,1,4);
4121 @ @<Read a short character locator@>= {
4122 fseek(fp,1,SEEK_CUR);
4123 dx=get_dvi_number(fp,0,1);
4124 @<Read four bytes@>;
4125 p=get_dvi_number(fp,1,4);
4128 @ @<Read four bytes@>= {
4129 b0=fgetc(fp);@+b1=fgetc(fp);@+b2=fgetc(fp);@+b3=fgetc(fp);
4132 @ This processing is deferred, and the rest of the parameters will be
4133 filled in later (and the |ptr| field will be overwritten since it will
4134 no longer be needed at that time).
4136 @<Defer this character locator into |font_data|@>= {
4137 data_index n=new_record(font_data);
4138 @<Compute the character width |w|@>;
4139 font_data.data[n].next=index;
4140 font_data.data[n].dx=dx;
4141 font_data.data[n].w=w;
4142 font_data.data[n].ptr=p;
4143 if(index==none) first_index=n;
4144 index=n;
4147 @ Now is time to go through the list we made up and this time actually
4148 fill in the parameters and pictures.
4150 @<Read the character rasters and flags@>= {
4151 while(index!=none) {
4152 fseek(fp,font_data.data[index].ptr,SEEK_SET);
4153 font_data.data[index].flag=0;
4154 font_data.data[index].raster=0;
4155 @<Read commands for this character@>;
4156 @#index=font_data.data[index].next;
4160 @ Painting the picture uses the value of |paint_switch| to determine
4161 to draw or skip. The current position in the array |raster| is also
4162 pointed by the |pic| pointer. Note that |black| and |white| are not
4163 necessary black and white (but they are on normal paper).
4165 Note the value of $n$ is not needed since the |pic| pointer automatically
4166 keeps track of this kinds of stuff. However, |m| is needed because of
4167 commands that can skip rows, to know how many columns must be skipped to
4168 reach the next row. There is also |b|, which keeps track of the bit
4169 position in the current byte.
4171 @d white 0
4172 @d black 1
4174 @d reset_m
4175 m=(font_data.data[index].max_m-font_data.data[index].min_m)/8+1@;
4177 @<Read commands for this character@>= {
4178 unsigned int c,m,b;
4179 unsigned char*pic;
4180 boolean paint_switch;
4181 for(;;) {
4182 c=fgetc(fp);
4183 if(c<paint1) {
4184 @<Paint |c| pixels |black| or |white|@>;
4185 } @+else if(c>=paint1 && c<paint1+3) {
4186 c=get_dvi_number(fp,0,c+1-paint1);
4187 @<Paint |c| pixels |black| or |white|@>;
4188 } @+else if(c==boc) {
4189 @<Initialize parameters and picture (long form)@>;
4190 } @+else if(c==boc1) {
4191 @<Initialize parameters and picture (short form)@>;
4192 } @+else if(c==eoc) {
4193 break; // Well Done!
4194 } @+else if(upto4(c,skip0)) {
4195 if(c==skip0) c=0;
4196 else c=get_dvi_number(fp,0,c+1-skip1);
4197 @<Finish a row and skip |c| rows@>;
4198 } @+else if(c>=new_row_0 && c<=new_row_0+164) {
4199 c-=new_row_0;
4200 @<Finish a row and skip |c| columns@>;
4201 } @+else if(c==yyy) {
4202 font_data.data[index].flag|=get_dvi_number(fp,0,4)>>16;
4203 } @+else if(c!=no_op) {
4204 fprintf(stderr,"Unknown GF command!\n");
4205 @.Unknown GF command@>
4206 fprintf(stderr,"(Command %d, address %08X)\n",c,ftell(fp)-1);
4211 @ Actually |m| is something a bit different than the standard, because |m|
4212 now tells how many bytes are remaining in the current row.
4214 @d pic_rows (1+font_data.data[index].max_n-font_data.data[index].min_n)
4216 @<Initialize parameters and picture (long form)@>= {
4217 font_data.data[index].n=get_dvi_number(fp,0,4);
4218 @<Deal with $p$ (pointer to previous character with same metrics)@>;
4219 font_data.data[index].min_m=get_dvi_number(fp,1,4);
4220 font_data.data[index].max_m=get_dvi_number(fp,1,4);
4221 font_data.data[index].min_n=get_dvi_number(fp,1,4);
4222 font_data.data[index].max_n=get_dvi_number(fp,1,4);
4223 @<Initialize picture@>;
4226 @ @<Initialize picture@>= {
4227 if(font_data.data[index].flag&ff_space) break;
4228 paint_switch=white;
4229 reset_m;
4230 b=0;
4231 pic=font_data.data[index].raster=malloc(m*pic_rows+1);
4232 memset(pic,0,m*pic_rows);
4235 @ @<Initialize parameters and picture (short form)@>= {
4236 int d;
4237 font_data.data[index].n=get_dvi_number(fp,0,1);
4238 d=get_dvi_number(fp,0,1);
4239 font_data.data[index].max_m=get_dvi_number(fp,0,1);
4240 font_data.data[index].min_m=font_data.data[index].max_m-d;
4241 d=get_dvi_number(fp,0,1);
4242 font_data.data[index].max_n=get_dvi_number(fp,0,1);
4243 font_data.data[index].min_n=font_data.data[index].max_n-d;
4244 @<Initialize picture@>;
4247 @ The pointers to other characters will also be deferred in the same way
4248 as the character locators, but this time from the other end. Now, once it
4249 is finished all the characters, it will {\sl automatically} know to read
4250 the next one properly! (Now you can see what the purpose of the
4251 |@!first_index| variable is.)
4253 @<Deal with $p$ (pointer to previous character with same metrics)@>= {
4254 dvi_number p=get_dvi_number(fp,1,4);
4255 if(p!=-1) {
4256 data_index i=new_record(font_data);
4257 font_data.data[i].next=none;
4258 font_data.data[i].dx=font_data.data[index].dx;
4259 font_data.data[i].w=font_data.data[index].w;
4260 font_data.data[i].ptr=p;
4261 font_data.data[first_index].next=i;
4262 first_index=i;
4266 @ Now we get to the actual painting. We can assume the value of |m| is
4267 never negative and that everything else is also okay.
4269 @<Paint |c| pixels |black| or |white|@>= {
4270 if(paint_switch) {
4271 if(b+c<=8) {
4272 @<Paint a small block of pixels in the current byte@>;
4273 } @+else {
4274 @<Paint the rest of the pixels in the current byte@>;
4275 @<Fill up the bytes in the middle@>;
4276 @<Clear the pixels needed clearing at the end@>;
4279 @<Update |paint_switch|, |pic|, |b|, and |m|@>;
4282 @ @<Update |paint_switch|, |pic|, |b|, and |m|@>= {
4283 paint_switch^=1;
4284 b+=c;
4285 pic+=b>>3;
4286 m-=b>>3;
4287 b&=7;
4290 @ @<Paint a small block of pixels in the current byte@>= {
4291 *pic|=(0xFF>>b)&~(0xFF>>(b+c));
4294 @ @<Paint the rest of the pixels in the current byte@>= {
4295 *pic|=0xFF>>b;
4298 @ @<Fill up the bytes in the middle@>= {
4299 memset(pic+1,0xFF,(c+b)>>3);
4302 @ @<Clear the pixels needed clearing at the end@>= {
4303 pic[(c+b)>>3]&=~(0xFF>>((c+b)&7));
4306 @ @<Finish a row and skip |c| rows@>= {
4307 pic+=m;
4308 b=0;
4309 reset_m;
4310 pic+=m*c;
4311 paint_switch=white;
4314 @ @<Finish a row and skip |c| columns@>= {
4315 pic+=m;
4316 reset_m;
4317 m-=c>>3;
4318 pic+=c>>3;
4319 b=c&7;
4320 paint_switch=black;
4323 @ @<Display font information@>= {
4324 data_index i;
4325 foreach(i,font_data) {
4326 printf("[%d] box=(%d,%d,%d,%d) dx=%d w=%d n=%d flag=%d [%d]\n"
4327 ,i,font_data.data[i].min_n,font_data.data[i].max_n
4328 ,font_data.data[i].min_m,font_data.data[i].max_m
4329 ,font_data.data[i].dx,font_data.data[i].w,font_data.data[i].n
4330 ,font_data.data[i].flag,font_data.data[i].next
4335 @*Layer Computation. Now is the chapter for actually deciding rendering on
4336 the page, where everything should go, etc.$^{[\TeXwareBiblio]}$
4338 @<Global variables@>=
4339 dvi_measure unit_num; // Numerator for units of measurement
4340 dvi_measure unit_den; // Denominator for units of measurement
4341 dvi_measure unit_mag; // Magnification for measurement
4342 dvi_measure unit_conv; // Conversion factor
4344 @ There are also a number of ``internal typesetting quantities''. These
4345 are parameters stored in a separate array, and are used to keep track of
4346 the current state of the typesetting. They are labeled with letters from
4347 \.A to \.Z. They can be modified inside of specials, although some of them
4348 probably shouldn't be modified in this way. Here is the list of them:
4350 \.A, \.B: Horizontal and vertical offset added to \.I and \.J.
4352 \.C: Character code prefix. If bit eight is not set, it only affects
4353 character codes with bit seven set.
4355 \.D: Maximum horizontal drift (in pixels), meaning how far away the \.I
4356 and \.J parameters are allowed to be from the correctly rounded values.
4358 \.E: Maximum vertical drift.
4360 \.F: The current font.
4362 \.H: The horizontal position on the page, in DVI units.
4364 \.I: The horizontal position on the page, in pixels.
4366 \.J: The vertical position on the page, in pixels.
4368 \.L: The current layer number. If this is zero, nothing is placed on the
4369 page, although the positions can still be changed and specials can still
4370 be used.
4372 \.P: Page number. This is used to determine the filename of output.
4374 \.R, \.S: The limits for when horizontal motion should add the number of
4375 pixels or when it should recalculate the pixels entirely.
4377 \.T, \.U: Like \.R and \.S, but for vertical motions.
4379 \.V: The vertical position on the page, in DVI units.
4381 \.W, \.X, \.Y, \.Z: The current spacing amounts, in DVI units.
4383 @d quan(_name) (type_quan[(_name)&0x1F])
4385 @<Global variables@>=
4386 dvi_number type_quan[32];
4388 @ @<Cases for system commands@>=
4389 @-case 'm': {
4390 // Modify an internal typesetting quantity
4391 if(stack_ptr->is_string) program_error("Type mismatch");
4392 quan(*++ptr)=pop_num();
4393 break;
4396 @ The conversion factor |unit_conv| is figured as follows: There are
4397 exactly |unit_num/unit_den| decimicrons per DVI unit, and 254000
4398 decimicrons per inch, and |resolution/100| pixels per inch. Then we have
4399 to adjust this by the magnification |unit_mag|.
4401 Division must be done slightly carefully to avoid overflow.
4403 @d resolution (registers['D'].number)
4405 @<Compute the conversion factor@>= {
4406 unit_conv=make_fraction(unit_num*resolution*unit_mag,unit_den*100000);
4407 unit_conv/=254000;
4410 @ Here are the codes to compute movements. The definition of \.{DVI} files
4411 refers to six registers which hold integer values in DVI units. However,
4412 we also have two more registers, for horizontal and vertical pixel units.
4414 A sequence of characters or rules might cause the pixel values to drift
4415 from their correctly rounded values, since they are not usually an exact
4416 integer number of pixels.
4418 @d to_pixels(_val) round((_val)*unit_conv)
4420 @-p void horizontal_movement(dvi_number x) {
4421 quan('H')+=x;
4422 if(x>quan('S') || x<quan('R')) {
4423 quan('I')=to_pixels(quan('H'));
4424 } @+else {
4425 quan('I')+=to_pixels(x);
4426 if(to_pixels(quan('H'))-quan('I')>quan('D'))
4427 quan('I')=to_pixels(quan('H'))+quan('D');
4428 if(to_pixels(quan('H'))-quan('I')<-quan('D'))
4429 quan('I')=to_pixels(quan('H'))-quan('D');
4433 @ @-p void vertical_movement(dvi_number x) {
4434 quan('V')+=x;
4435 if(x>quan('U') || x<quan('T')) {
4436 quan('J')=to_pixels(quan('V'));
4437 } @+else {
4438 quan('J')+=to_pixels(x);
4439 if(to_pixels(quan('V'))-quan('J')>quan('E'))
4440 quan('J')=to_pixels(quan('V'))+quan('E');
4441 if(to_pixels(quan('V'))-quan('J')<-quan('E'))
4442 quan('J')=to_pixels(quan('V'))-quan('E');
4446 @ This is now the part that does actual sending. When many characters
4447 come next to each other, the rounding will be done such that the number
4448 of pixels between two letters will always be the same whenever those two
4449 letters occur next to each other.
4451 @<Typeset character |c| on the current layer@>= {
4452 data_index n=fontindex[quan('F')&0xFF];
4453 if((quan('C')&0x100) || (c&0x80)) c|=quan('C')<<8;
4454 while(n!=none && c!=font_data.data[n].n)
4455 n=font_data.data[n].next;
4456 if(n==none) dvi_error(fp,"Character not in font");
4457 @<Typeset the character and update the current position@>;
4458 @<Update the character code prefix@>;
4461 @ @<Typeset the character and update the current position@>= {
4462 if(font_data.data[n].flag&ff_roundbefore)
4463 quan('I')=to_pixels(quan('H'));
4464 if(quan('L') && font_data.data[n].raster) typeset_char_here(n);
4465 if(moveaftertyping) {
4466 quan('H')+=font_data.data[n].w;
4467 quan('I')+=font_data.data[n].dx;
4468 if(font_data.data[n].flag&ff_roundafter)
4469 quan('I')=to_pixels(quan('H'));
4470 else horizontal_movement(0);
4474 @ If you have a typesetting program that can ship out characters with
4475 codes more than eight bits long, you won't need this. It is provided for
4476 use with normal {\TeX} system.
4478 @<Update the character code prefix@>= {
4479 if(font_data.data[n].flag&ff_strip) c&=0x7F; else c&=0xFF;
4480 if(font_data.data[n].flag&ff_select) quan('C')=c|0x100;
4481 if(font_data.data[n].flag&ff_prefix) quan('C')=c;
4482 if(font_data.data[n].flag&ff_reset) quan('C')=0;
4485 @ The number of pixels in the height or width of a rule will always be
4486 rounded up. However, unlike DVItype, this program has no floating point
4487 rounding errors.
4489 @d to_rule_pixels(_val) ceiling((_val)*unit_conv)
4491 @<Typeset |a| by |c| rule on the current layer@>= {
4492 dvi_number x=to_rule_pixels(a);
4493 dvi_number y=to_rule_pixels(c);
4494 if(quan('L') && a>0 && c>0) typeset_rule_here(x,y);
4495 if(moveaftertyping) {
4496 quan('I')+=x;
4497 horizontal_movement(0);
4501 @ Sometimes you might want DVI units converted to pixels inside of a user
4502 program contained in a DVI file. Here is how it is done.
4504 @<Cases for system commands@>=
4505 @-case 'C': {
4506 // Convert DVI units to pixels
4507 if(stack_ptr->is_string) program_error("Type mismatch");
4508 stack_ptr->number=to_pixels(stack_ptr->number);
4509 break;
4512 @*Layer Rendering. Please note, these numbers are |short|, which means
4513 that you cannot have more than 65536 pixels in width or in height. This
4514 should not be a problem, because even if you have 3000 dots per inch, and
4515 each card is 10 inches long, that is still only 30000 which is less than
4516 half of the available width. (All units here are in pixels.)
4518 In order to save memory, all typeset nodes are stored in one list at
4519 first, and then rendered to a pixel buffer as each layer is being written
4520 out to the \.{PBM} file, and then the buffer can be freed (or reset to
4521 zero) afterwards to save memory.
4523 @<Typedefs@>=
4524 typedef struct {
4525 unsigned short x; // X position on page
4526 unsigned short y; // Y position on page
4527 union {
4528 struct {
4529 unsigned short w; // Width of rule
4530 unsigned short h; // Height of rule
4531 }@+;
4532 data_index c; // Character index in |font_data|
4533 }@+;
4534 unsigned char l; // Layer (high bit set for rules)
4535 } typeset_node;
4537 @ @<Global variables@>=
4538 memory_of(typeset_node) typeset_nodes;
4540 @ @<Initialize memory@>= init_memory(typeset_nodes,8);
4542 @ We also have variables for the layer size (loaded from \.{\\count2}
4543 and \.{\\count3} registers for the current page). If they are both zero,
4544 then nothing will be rendered.
4546 @<Global variables@>=
4547 unsigned short layer_width;
4548 unsigned short layer_height;
4550 @ Here are the subroutines which typeset characters and rules onto the
4551 page buffer. They are not rendered into a picture yet.
4553 @d typeset_new_page() (typeset_nodes.used=0)
4554 @d typeset_rule_here(_w,_h) typeset_rule(quan('I'),quan('J'),(_w),(_h));
4555 @d typeset_char_here(_ch) typeset_char(quan('I'),quan('J'),(_ch));
4557 @-p void typeset_rule(int x,int y,int w,int h) {
4558 data_index n=new_record(typeset_nodes);
4559 @<Ensure |w| and |h| are not too large to fit on the page@>;
4560 typeset_nodes.data[n].x=x;
4561 typeset_nodes.data[n].y=y;
4562 typeset_nodes.data[n].w=w;
4563 typeset_nodes.data[n].h=h;
4564 typeset_nodes.data[n].l=quan('L')|0x80;
4567 @ @<Ensure |w| and |h| are not too large to fit on the page@>= {
4568 if(x+w>layer_width) w=layer_width-x;
4569 if(y+h>layer_height) h=layer_height-y;
4572 @ @-p void typeset_char(int x,int y,data_index c) {
4573 data_index n=new_record(typeset_nodes);
4574 typeset_nodes.data[n].x=x;
4575 typeset_nodes.data[n].y=y;
4576 typeset_nodes.data[n].c=c;
4577 typeset_nodes.data[n].l=quan('L');
4580 @ Here is a variable |image|. This is a pointer to the buffer for the
4581 picture of the current layer, in \.{PBM} format. The internal quantity
4582 \.L should be set now to the largest layer number in use, at the end of
4583 the page, because it is used to determine how many layers must be sent to
4584 the output.
4586 @d image_max (image+layer_size)
4588 @<Global variables@>=
4589 unsigned char*image;
4591 @ @<Render this page@>= {
4592 unsigned int row_size=((layer_width+7)>>3);
4593 unsigned int layer_size=row_size*layer_height;
4594 image=malloc(layer_size+1);
4595 while(quan('L')) {
4596 memset(image,0,layer_size);
4597 @<Read the |typeset_nodes| list and render any applicable nodes@>;
4598 @<Send the current layer to a file@>;
4599 --quan('L');
4601 free(image);
4604 @ @<Read the |typeset_nodes| list and render any applicable nodes@>= {
4605 data_index i;
4606 foreach(i,typeset_nodes) {
4607 if((typeset_nodes.data[i].l&0x7F)==quan('L')) {
4608 if(typeset_nodes.data[i].l&0x80) {
4609 @<Render a rule node@>;
4610 } @+else {
4611 @<Render a character node@>;
4617 @ In order to render a rule node (which is a filled |black| rectangle), it
4618 is split into rows, and each row is split into three parts: the left end,
4619 the filling, and the right end. However, if the width is sufficiently
4620 small, it will fit in one byte and will not have to be split in this way.
4622 There are also some checks to ensure that the entire rectangle will be
4623 within the bounds of the image.
4625 @<Render a rule node@>= {
4626 int y=1+typeset_nodes.data[i].y-typeset_nodes.data[i].h;
4627 int x=typeset_nodes.data[i].x;
4628 int w=typeset_nodes.data[i].w;
4629 if(y<0) y=0;
4630 if(typeset_nodes.data[i].y>=layer_height)
4631 typeset_nodes.data[i].y=layer_height-1;
4632 if((x&7)+w<=8) {
4633 @<Render a small rule node@>;
4634 } @+else {
4635 @<Render a large rule node@>;
4639 @ @<Render a small rule node@>= {
4640 for(;y<=typeset_nodes.data[i].y;y++) {
4641 image[y*row_size+(x>>3)]|=(0xFF>>(x&7))&~(0xFF>>((x&7)+w));
4645 @ @<Render a large rule node@>= {
4646 for(;y<=typeset_nodes.data[i].y;y++) {
4647 unsigned char*p=image+(y*row_size+(x>>3));
4648 *p++|=0xFF>>(x&7); // left
4649 memset(p,0xFF,((x&7)+w)>>3); // filling
4650 p[((x&7)+w)>>3]|=~(0xFF>>((x+w)&7)); // right
4654 @ Character nodes are a bit different. The pictures are already stored,
4655 now we have to paste them into the layer picture. Since they will not
4656 always be aligned to a multiple to eight columns (one byte), it will have
4657 to shift out and shift in.
4659 Again, it is necessary to ensure it doesn't go out of bounds. It has to be
4660 a bit more careful for characters than it does for rules. Also note that
4661 the \.{GF} format does not require that |min_m| and so on are the tightest
4662 bounds possible.
4664 @<Render a character node@>= {
4665 unsigned int ch=typeset_nodes.data[i].c;
4666 unsigned int x=typeset_nodes.data[i].x+font_data.data[ch].min_m;
4667 unsigned int y=typeset_nodes.data[i].y-font_data.data[ch].max_n;
4668 unsigned int z=typeset_nodes.data[i].y-font_data.data[ch].min_n;
4669 unsigned int w=(font_data.data[ch].max_m-font_data.data[ch].min_m)/8+1;
4670 register unsigned char sh=x&7; // shifting amount for right shift
4671 register unsigned char lsh=8-sh; // shifting amount for left shift
4672 unsigned char*p=image+(y*row_size+(x>>3));
4673 unsigned char*q=font_data.data[ch].raster;
4674 @<Cut off the part of character above the top of the layer image@>;
4675 while(y<=z && p+w<image_max) {
4676 @<Render the current row of the character raster@>;
4677 @<Advance to the next row of the character@>;
4681 @ @<Cut off the part of character above the top of the layer image@>= {
4682 if(y<0) {
4683 p-=row_size*y;
4684 q-=w*y;
4685 y=0;
4687 if(p<image) p=image;
4690 @ @<Render the current row of the character raster@>= {
4691 int j;
4692 for(j=0;j<w;j++) {
4693 p[j]|=q[j]>>sh;
4694 p[j+1]|=q[j]<<lsh;
4698 @ @<Advance to the next row of the character@>= {
4699 y++;
4700 q+=w;
4701 p+=row_size;
4704 @ Layer files are output in \.{PBM} format, which is very similar to the
4705 format which this program uses internally. ImageMagick is capable of
4706 reading this format.
4708 @.PBM@>
4709 @^Portable Bitmap@>
4710 @^ImageMagick@>
4711 @^output@>
4713 @<Send the current layer to a file@>= {
4714 FILE*fp;
4715 char filename[256];
4716 sprintf(filename,"P%dL%d.pbm",quan('P'),quan('L'));
4717 fp=fopen(filename,"wb");
4718 fprintf(fp,"P4%d %d ",layer_width,layer_height);
4719 fwrite(image,1,layer_size,fp);
4720 fclose(fp);
4723 @ @<Display the list of typeset nodes@>= {
4724 data_index i;
4725 foreach(i,typeset_nodes) {
4726 if(typeset_nodes.data[i].l&0x80) {
4727 printf("[%d] %dx%d%+d%+d\n",typeset_nodes.data[i].l&0x7F
4728 ,typeset_nodes.data[i].w,typeset_nodes.data[i].h
4729 ,typeset_nodes.data[i].x,typeset_nodes.data[i].y
4731 } @+else {
4732 printf("[%d] %d(%d) %+d%+d\n",typeset_nodes.data[i].l
4733 ,typeset_nodes.data[i].c,font_data.data[typeset_nodes.data[i].c].n
4734 ,typeset_nodes.data[i].x,typeset_nodes.data[i].y
4740 @ @<Display typesetting diagnostics@>= {
4741 int i;
4742 for(i=0;i<32;i++) {
4743 if(type_quan[i]) printf("%c=%d\n",i+'@@',type_quan[i]);
4745 printf("unit_conv: %lld [%d]\n",unit_conv,round(unit_conv));
4746 printf("nodes: %d/%d\n",typeset_nodes.used,typeset_nodes.allocated);
4747 printf("fonts: %d/%d\n",font_data.used,font_data.allocated);
4748 if(dvi_stack) printf("stack: %d\n",dvi_stack_ptr-dvi_stack);
4751 @*Process of ImageMagick. The filename of ImageMagick \.{convert} is found
4752 by using the \.{IMCONVERT} environment variable. The entire command-line
4753 is stored in the \.Q register, with arguments separated by spaces, and it
4754 might be very long.
4756 @^ImageMagick@>
4757 @.IMCONVERT@>
4759 @d add_magick_arg(_val) magick_args.data[new_record(magick_args)]=_val
4761 @<Typedefs@>=
4762 typedef char*char_ptr;
4764 @ @<Global variables@>=
4765 memory_of(char_ptr) magick_args;
4767 @ @<Switch to ImageMagick@>= {
4768 init_memory(magick_args,4);
4769 add_magick_arg("convert"); // |argv[0]| (program name)
4770 @<Add arguments from \.Q register@>;
4771 add_magick_arg(0); // (terminator)
4772 @<Call the ImageMagick executable file@>;
4775 @ The \.Q register will be clobbered here. But that is OK since it will no
4776 longer be used within \TeX nicard.
4778 @<Add arguments from \.Q register@>= {
4779 char*q=registers['Q'].text;
4780 char*p;
4781 while(q && *q) {
4782 p=q;
4783 if(q=strchr(q,' ')) *q++=0;
4784 if(*p) add_magick_arg(p);
4788 @ @<Call the ImageMagick executable file@>= {
4789 char*e=getenv("IMCONVERT");
4790 if(!e) @<Display the arguments and quit@>;
4791 execv(e,magick_args.data);
4792 fprintf(stderr,"Unable to run ImageMagick\n");
4793 @.Unable to run ImageMagick@>
4794 return 1;
4797 @ @<Display the arguments and quit@>= {
4798 data_index i;
4799 char*p;
4800 foreach(i,magick_args) if(p=magick_args.data[i]) printf("%s\n",p);
4801 return 0;
4804 @*Internal Typesetting. Until now, we only had the codes for doing
4805 external typesetting and image manipulation (which was the original plan
4806 for this program). Now, we are adding internal typesetting and image
4807 manipulation as well, to avoid external dependencies.
4809 Some of the algorithms of \TeX\ will be used here, with some changes. For
4810 example, there are no leaders, marks, footnotes, alignments, mathematical
4811 formulas, or hyphenation. Ligature nodes are not needed either, because
4812 there is no hyphenation, so we can just use normal character nodes for
4813 ligatures.
4815 There is also no page breaking, although you can still do vertical
4816 splitting if you want multiple columns of text on a card, or for the text
4817 to be interrupted in the middle.
4819 @ Here is a list of the category codes used for internal typesetting, and
4820 the code to initialize that table and the other tables. There are also
4821 category codes from 32 to 255, which mean that it is a register number
4822 containing a code to execute (we set up |tabulation| and |escape_code| to
4823 call registers \.t and \.e, although it is unlikely to use these tokens).
4825 @d cat_ignore 0 // Ignore this token
4826 @d cat_norm 1 // Add a character from the current font
4827 @d cat_space 2 // Add a glue node with the current space factor
4828 @d cat_exit 3 // Exit the current block
4829 @d cat_accent 4 // Add an accent to the next character
4830 @d cat_xaccent 5 // As above, but XOR 128
4832 @<Initialize tables for internal typesetting@>= {
4833 for(i=0;i<256;i++) {
4834 tables['E'][i]=1;
4835 tables['F'][i]=40;
4836 tables['J'][i]=tables['K'][i]=128;
4838 tables['E'][null_char]=cat_ignore;
4839 tables['E'][end_transmission]=cat_exit; // Not actually used
4840 tables['E'][tabulation]='t';
4841 tables['E'][escape_code]='e';
4842 tables['E'][record_separator]=cat_exit;
4843 tables['E'][field_separator]=cat_exit;
4844 tables['E'][' ']=cat_space;
4847 @ All dimensions are stored in units of scaled points (where there are
4848 65536 scaled points in one point, and $72.27$ points in one inch).
4850 There will also be a type for glue ratios, which is used to multiply by
4851 glue stretch and shrink inside of a box, where a value of |0x100000000|
4852 means 100\char`\%\relax\space stretch or shrink, or 1pt per fil unit.
4854 @<Typedefs@>=
4855 typedef signed int scaled;
4856 typedef signed long long int glue_ratio;
4858 @*Data Structures for Boxes. Typesetting is done first by storing
4859 horizontal and vertical boxes of nodes. These boxes may then be included
4860 in other boxes, or shipped out to the next part of the program, which is
4861 image manipulation.
4863 Here we list the possible kind of nodes. These are four-bit numbers, with
4864 bit 3 set for a breakable\slash discardable node. The four high bits are
4865 used as a small parameter for the node.
4867 There are structures for many kinds of nodes, but only one pointer type
4868 will be used. Unions are used to allow many kinds of nodes at once.
4870 @d chars_node 00 // One word of text (including kerns, ligatures, accents)
4871 @d hlist_node 01 // Horizontal box
4872 @d vlist_node 02 // Vertical box
4873 @d rule_node 03 // Filled rectangle
4874 @d adjust_node 04 // Add material before or after current line
4875 @d special_node 05 // Execute commands when this node is found
4876 @d layer_node 06 // Like |special_node| but with only one purpose
4877 @d kern_node 010 // Fixed movement
4878 @d glue_node 011 // Variable movement
4879 @d penalty_node 012 // Tell how bad it is to break a line/page here
4881 @d type_of(_node) ((_node)->type_and_subtype&0x0F)
4882 @d subtype_of(_node) ((_node)->type_and_subtype>>4)
4883 @s box_node int
4884 @d calc_size(_members) (sizeof(struct{
4885 struct box_node*y;unsigned char z;struct{_members}@+;
4888 @<Typedefs@>=
4889 typedef struct box_node {
4890 struct box_node*next; // next node, or 0
4891 unsigned char type_and_subtype;
4892 union @+{
4893 @<Structure of a |chars_node|@>;
4894 @<Structure of a |hlist_node|, |vlist_node|, or |rule_node|@>;
4895 @<Structure of a |adjust_node|@>;
4896 @<Structure of a |special_node|@>;
4897 @<Structure of a |layer_node|@>;
4898 @<Structure of a |kern_node|@>;
4899 @<Structure of a |glue_node|@>;
4900 @<Structure of a |penalty_node|@>;
4901 }@+;
4902 } box_node;
4905 @ In a |chars_node|, there is a font number (0 to 255), and then sixteen
4906 bits for each character, accent, or kern. Data |0x0000| to |0x7FFF| adds a
4907 character (so only 32768 characters are available, while \TeX\ supports
4908 only 256 characters, so it is still more than \TeX), data |0x8000| to
4909 |0xBFFF| specifies an accent for the next character (so only characters
4910 numbered 0 to 16383 can be used as accents), |0xC000| to |0xFFFE| are
4911 implicit kerns (allowing only 16383 possible kerns, although most fonts
4912 use only ten or so, certainly not as many as sixteen thousand), and data
4913 |0xFFFF| is a terminator. All characters are from the same font.
4915 If an accent is specified, it is added to the immediately next character
4916 in this list.
4918 @d sizeof_chars_node calc_size(unsigned char a;unsigned short b[0];)
4920 @<Structure of a |chars_node|@>=
4921 struct {
4922 unsigned char font;
4923 unsigned short chars[0];
4926 @ An |hlist_node|, |vlist_node|, and |rule_node| are all similar to each
4927 other, except that a |rule_node| does not have a |list| or |glue_set|, and
4928 a |hlist_node| has an additional |tracking| parameter.
4930 Tracking is 128 for normal width of each letter. They can be adjusted to a
4931 lesser number to make the letters closer together, or greater to make
4932 farther apart leters, for example 64 means half of normal width.
4934 The |subtype_of| a |hlist_node| or |vlist_node| is the glue set order,
4935 setting the high bit for shrinking (otherwise it is stretching).
4937 @d sizeof_hlist_node calc_size(
4938 scaled a;scaled b;scaled c;scaled d;
4939 struct box_node*e;glue_ratio f;unsigned char g;
4941 @d sizeof_vlist_node calc_size(
4942 scaled a;scaled b;scaled c;scaled d;
4943 struct box_node*e;glue_ratio f;
4945 @d sizeof_rule_node calc_size(scaled a;scaled b;scaled c;scaled d;)
4947 @<Structure of a |hlist_node|...@>=
4948 struct {
4949 scaled width;
4950 scaled height;
4951 scaled depth;
4952 scaled shift_amount; // shift this box by the specified amount
4954 struct box_node*list; // pointer to first child node
4955 glue_ratio glue_set;
4957 unsigned char tracking; // adjust letter spacing
4960 @ An |adjust_node| has only a pointer to the sublist, and the |subtype_of|
4961 should be zero to append the vertical material after this line of the
4962 paragraph, or one to put it before this line of the paragraph.
4964 @d sizeof_adjust_node calc_size(struct box_node*a;)
4966 @<Structure of a |adjust_node|@>=
4967 struct {
4968 struct box_node*sublist; // pointer to first child node
4971 @ A |special_node| contains a null-terminated C string. The |subtype_of|
4972 specifies how it is used; they are listed below.
4974 @d spec_measure 1 // Measuring the length of a line in a paragraph
4975 @d spec_break 2 // Breaking a paragraph
4976 @d spec_pack 3 // Packaging a box
4977 @d spec_vbreak 4 // Breaking a vertical box
4978 @d spec_render 5 // Shipping out the nodes to the page
4980 @d sizeof_special_node calc_size(char a[0];)
4982 @<Structure of a |special_node|@>=
4983 struct {
4984 char program[0];
4987 @ A |layer_node| acts like a |special_node| with subtype |spec_render| and
4988 the |program| set to |"3mL"| if the |layer| parameter is 3. It is probably
4989 a more common kind of special.
4991 For example, it might be used to specify typing in different colors.
4993 @d sizeof_layer_node calc_size(unsigned char a;)
4995 @<Structure of a |layer_node|@>=
4996 struct {
4997 unsigned char layer;
5000 @ A |kern_node| represents a horizontal or vertical movement, such as
5001 where some amount of space is skipped.
5003 @d sizeof_kern_node calc_size(scaled a;)
5005 @<Structure of a |kern_node|@>=
5006 struct {
5007 scaled distance;
5010 @ A |glue_node| is similar to a |kern_node| although there are some
5011 differences. One difference is that it can stretch and shrink. The
5012 |subtype_of| parameter has the stretch order in the low two bits and the
5013 shrink order in the high two bits.
5015 @d finite 0
5016 @d fil 1
5017 @d fill 2
5018 @d filll 3
5020 @d sizeof_glue_node calc_size(scaled a;scaled b;scaled c;)
5022 @<Structure of a |glue_node|@>=
5023 struct {
5024 scaled natural;
5025 scaled stretch;
5026 scaled shrink;
5029 @ A |penalty_node| specifies a valid breakpoint in a paragraph, and in
5030 addition, specifies how bad it is to break here. A penalty value 10000001
5031 is bad enough that it will not break here, and $-10000001$ is good enough
5032 that it will definitely break here.
5034 @d sizeof_penalty_node calc_size(signed int a;)
5036 @<Structure of a |penalty_node|@>=
5037 struct {
5038 signed int penalty;
5041 @ Here are functions for manipulation of box nodes, including creation,
5042 destruction, and so on.
5044 First is simple creation of a node. It sets nothing other than type and
5045 subtype.
5047 @-p box_node*create_node(int type,int subtype,int size) {
5048 box_node*ptr=malloc(size);
5049 ptr->next=0;
5050 ptr->type_and_subtype=(type&0x0F)|(subtype<<4);
5051 return ptr;
5054 @ Now is destruction. It is recursive because some nodes are boxes that
5055 point to other lists too.
5057 @-p void trash_nodes(box_node*this) {
5058 box_node*next;
5059 while(this) {
5060 next=this->next;
5061 @<Recurse if there is a sublist to trash@>;
5062 free(this);
5063 this=next;
5067 @ @<Recurse if there is a sublist to trash@>= {
5068 switch(type_of(this)) {
5069 case hlist_node: case vlist_node: @/
5070 trash_nodes(this->list); @+break;
5071 case adjust_node: @/
5072 trash_nodes(this->sublist); @+break;
5073 default: ; // Do nothing
5077 @ You might realize there are no reference counts. They aren't needed,
5078 because each node is used exactly once. (Later on in the semantic nest, it
5079 is seen that this is not quite true; the box nest also includes a
5080 reference, which is in addition to the |next| pointers of each node, but
5081 this is OK since those are nodes are never isolated or destroyed when
5082 picked off of that list.)
5084 @*Font Metric Data. In order to do internal typesetting, it is necessary
5085 to load the font metric data from a \.{TFM} file. The data in a \.{TFM}
5086 file consists of 32-bit words in big-endian order.
5088 However, the first 6 words are twelve 16-bit integers instead, giving
5089 lengths of various parts of the file.
5091 @s fix_word int
5092 @ The most important data type used here is a |fix_word|, which is a
5093 32-bit signed number, with 12 integer bits and 20 fractional bits. Most of
5094 the |fix_word| values in a \.{TFM} file range from $-16$ to $+16$.
5096 @<Typedefs@>=
5097 typedef signed int fix_word;
5099 @ The twelve lengths are according to the following:
5101 \hbox to\hsize{\hfil\vbox{\smallskip\halign{\hfil$\it#={}$&#\hfil\cr
5102 lf&length of the entire file, in words\cr
5103 lh&number of words of header data\cr
5104 bc&smallest character code in this font\cr
5105 ec&largest character code in this font\cr
5106 nw&number of words in the width table\cr
5107 nh&number of words in the height table\cr
5108 nd&number of words in the depth table\cr
5109 ni&number of words in the italic correction table\cr
5110 nl&number of words in the ligature/kern program\cr
5111 nk&number of words in the kern table\cr
5112 ne&number of words in the extensible character table\cr
5113 np&number of font parameter words\cr
5114 }\smallskip}\hfil}
5116 \noindent The parts of the file are in the order listed above. Some of the
5117 sections of the file are not used by this program (the extensible
5118 characters and the header words), but they still must be skipped over when
5119 reading the \.{TFM} file. Also, the $\it lf$ parameter is only for
5120 verification, and this program does not attempt to verify it.
5122 @ Here is data structures for storing information about font metrics. It
5123 is a managed memory. Some elements will be shared by multiple fonts that
5124 use the same \.{TFM} file, such as |design_size|, |fontname|, and the
5125 ligature/kerning programs.
5127 @<Late Typedefs@>=
5128 typedef struct {
5129 scaled parameters[16]; // Font parameters (up to sixteen)
5130 scaled at_size; // At size, for figuring out \.{GF} filename
5131 scaled design_size; // Design size, for figuring out \.{GF} filename
5132 char*fontname; // Name of font, without extension or area
5133 scaled*width_base;
5134 scaled*height_base;
5135 scaled*depth_base;
5136 scaled*italic_base;
5137 scaled*kern_base;
5138 unsigned char min_char; // Smallest valid character code
5139 unsigned char max_char; // Largest valid character code
5140 int right_boundary; // If this is |none| then there is no right boundary
5141 unsigned char lig_limit; // Code |x| ligatures if |x<256*lig_limit|
5142 @<More elements of |font_metric_data|@>@;
5143 } font_metric_data;
5145 @ @<Global variables@>=
5146 memory_of(font_metric_data) metrics;
5148 @ @<Initialize memory@>= init_memory(metrics,4);
5150 @ Now the ligature/kerning program. The purpose of these fields is
5151 explained later.
5153 @<Typedefs@>=
5154 typedef struct {
5155 unsigned char skip;
5156 unsigned char next;
5157 unsigned char op;
5158 unsigned char remainder;
5159 } lig_kern_command;
5161 @ Some fonts will have a fake ``left boundary character'', which is
5162 implied at the beginning of each word. This points to the command which
5163 should become active at the beginning of a word. If it is null, then no
5164 ligature/kerning program will be active.
5166 @<More elements of |font_metric_data|@>=
5167 lig_kern_command*left_boundary; // Program for left boundary character
5169 @ Another thing is the character info. These are the same data for
5170 different sizes of the same font, since they are index into the other
5171 arrays, which are different for each font.
5173 @<Typedefs@>=
5174 typedef struct {
5175 unsigned char width; // Index into |width_base|
5176 unsigned char height; // Index into |height_base|
5177 unsigned char depth; // Index into |depth_base|
5178 unsigned char italic; // Index into |italic_base|
5179 lig_kern_command*program; // Program for this character (null if none)
5180 } char_info_data;
5182 @ @<More elements of |font_metric_data|@>=
5183 char_info_data*info; // |info[c]| is info for character code |c|
5185 @ So let's get started, please.
5187 The parameter |fontnum| shall be the font number of the first size of this
5188 font set up. The |fontname| is the name of the font, without extension.
5189 The |at_size| parameter points to the beginning of a zero-terminated list
5190 of at-sizes to load the font at (much of the data is the same for
5191 different at-sizes so that we can save memory in this way). However, the
5192 |at_size| values are |scaled|, while the \.{TFM} expects |fix_word|. This
5193 is easy to correct by right-shifting four spaces.
5195 The |fix_word| values are in the same format as numbers in a \.{DVI} file,
5196 so the same code can be used. A macro is set here to make convenience.
5198 @d get_fix_word(_fp) ((fix_word)get_dvi_number((_fp),1,4))
5200 @-p void load_tfm(unsigned char fontnum,char*fontname,scaled*at_size) {
5201 char filename[max_filename_length+1];
5202 short lengths[12]; // The data described above, now numbered 0 to 11
5203 lig_kern_command*program; // Beginning of ligature/kerning program
5204 font_metric_data common_data; // Data common to all sizes of a font
5205 data_index metrics_index=metrics.used; // Index into |metrics|
5206 int num_sizes=0; // How many fonts we are loading at once
5207 int w_offset; // Offset of width table
5208 FILE*fp;
5209 @<Set up the filename of the \.{TFM} file and try to open the file@>;
5210 @<Load the |lengths| data@>;
5211 @<Set up |common_data| and |program|@>;
5212 @<Skip the header words@>;
5213 @<Load the character info@>;
5214 @<Set |w_offset|, and skip to the ligature/kerning program@>;
5215 @<Load the ligature/kerning program@>;
5216 @<Correct the pointers into the ligature/kerning program@>;
5217 @<Calculate |num_sizes| and allocate font metric structures@>;
5218 @<Load the dimension values for each size of this font@>;
5219 fclose(fp);
5222 @ @<Set up the filename of the \.{TFM} file and try to open the file@>= {
5223 sprintf(filename,"%s.tfm",fontname);
5224 fp=open_file(filename,"rb");
5225 if(!fp) {
5226 fprintf(stderr,"Cannot open font %s\n",filename);
5227 @.Cannot open font...@>
5228 exit(1);
5232 @ @<Load the |lengths| data@>= {
5233 int i;
5234 for(i=0;i<12;i++) {
5235 int x=fgetc(fp);
5236 int y=fgetc(fp);
5237 lengths[i]=(x<<8)|y;
5241 @ @<Set up |common_data| and |program|@>= {
5242 common_data.fontname=strdup(fontname);
5243 common_data.min_char=lengths[2]; // Hopefully should be zero
5244 common_data.max_char=lengths[3];
5245 common_data.right_boundary=none;
5246 common_data.lig_limit=255;
5247 common_data.info=malloc((lengths[3]+1)*sizeof(char_info_data));
5248 program=malloc(lengths[8]*sizeof(lig_kern_command));
5251 @ @<Skip the header words@>= {
5252 fseek(fp,4,SEEK_CUR); // Skip checksum
5253 common_data.design_size=get_fix_word(fp)>>4;
5254 fseek(fp,4*(lengths[1]-2),SEEK_CUR); // Skip everything else
5257 @ The character info is stored in a packed format. This is then unpacked
5258 and loaded into the |common_data.info| array, which has already been
5259 allocated.
5261 @<Load the character info@>= {
5262 char_info_data*info=common_data.info+common_data.min_char;
5263 int i,c;
5264 for(i=common_data.min_char;i<=common_data.max_char;i++) {
5265 info->width=fgetc(fp);
5266 c=fgetc(fp);
5267 info->height=c>>4;
5268 info->depth=c&0xF;
5269 c=fgetc(fp);
5270 info->italic=c>>2;
5271 if((c&0x3)==0x1) {
5272 info->program=program+fgetc(fp);
5273 } @+else {
5274 info->program=0;
5275 fgetc(fp); // Lists and extensible recipes are not used
5277 info++;
5281 @ The ligature/kerning program will be read before the dimensions specific
5282 to the font size, so that the |common_data| can be set up first. And then
5283 we can skip back to |w_offset|, multiple times, once for each size that is
5284 being loaded.
5286 @<Set |w_offset|, and skip to the ligature/kerning program@>= {
5287 w_offset=ftell(fp);
5288 fseek(fp,4*(lengths[4]+lengths[5]+lengths[6]+lengths[7]),SEEK_CUR);
5291 @ @<Load the ligature/kerning program@>= {
5292 int i;
5293 for(i=0;i<lengths[8];i++) {
5294 program[i].skip=fgetc(fp);
5295 program[i].next=fgetc(fp);
5296 program[i].op=fgetc(fp);
5297 program[i].remainder=fgetc(fp);
5301 @ Sometimes you might need large ligature/kerning programs for many
5302 characters, so you can start at addresses other than 0 to 255. This is the
5303 way that specifies how that is done.
5305 @<Correct the pointers into the ligature/kerning program@>= {
5306 int i;
5307 for(i=common_data.min_char;i<=common_data.max_char;i++)
5308 if(common_data.info[i].program &&
5309 common_data.info[i].program->skip>128)
5310 common_data.info[i].program=program+
5311 (common_data.info[i].program->op<<8)+
5312 common_data.info[i].program->remainder;
5315 @ @<Calculate |num_sizes| and allocate font metric structures@>= {
5316 scaled*p=at_size;
5317 data_index n;
5318 while(*p) {
5319 n=new_record(metrics);
5320 memcpy(&(metrics.data[n]),&common_data,sizeof(font_metric_data));
5321 metrics.data[n].at_size=*p;
5322 num_sizes++;
5323 p++;
5327 @ Now to load the widths, heights, depths, italic corrections, and kerning
5328 distances. This is what |w_offset| is for, so that we can skip back to it.
5329 One allocated memory object is used for all dimension values of one size,
5330 and then the points are moved into the fields of the |font_metric_data|.
5332 @d total_font_dimen
5333 (lengths[4]+lengths[5]+lengths[6]+lengths[7]+lengths[9])
5334 @d cur_metrics (metrics.data[metrics_index])
5336 @<Load the dimension values for each size of this font@>= {
5337 scaled*p;
5338 scaled s,z,zprime,alpha,beta;
5339 for(p=at_size;*p;p++,metrics_index++) {
5340 scaled*d=malloc(sizeof(scaled)*total_font_dimen);
5341 int c;
5342 @<Ensure |d| is valid@>;
5343 @<Set the dimension base pointers for this font@>;
5344 z=*p; // The at size is now called |z|
5345 @<Compute |zprime|, |alpha|, and |beta|@>;
5346 fseek(fp,w_offset,SEEK_SET);
5347 c=lengths[4]+lengths[5]+lengths[6]+lengths[7];
5348 @<Load |c| scaled dimension values from |fp| into |d|@>;
5349 fseek(fp,4*lengths[8],SEEK_CUR);
5350 c=lengths[9];
5351 @<Load |c| scaled dimension values from |fp| into |d|@>;
5352 @<Load the font parameters@>;
5356 @ @<Ensure |d| is valid@>= {
5357 if(!d) {
5358 fprintf(stderr,"Out of font memory\n");
5359 exit(1);
5363 @ @<Set the dimension base pointers for this font@>= {
5364 cur_metrics.width_base=d;
5365 cur_metrics.height_base=cur_metrics.width_base+lengths[4];
5366 cur_metrics.depth_base=cur_metrics.height_base+lengths[5];
5367 cur_metrics.italic_base=cur_metrics.depth_base+lengths[6];
5368 cur_metrics.kern_base=cur_metrics.italic_base+lengths[7];
5371 @ @<Load |c| scaled dimension values from |fp| into |d|@>= {
5372 while(c--) {
5373 scaled b3,b2,b1,b0;
5374 b0=fgetc(fp); @+ b1=fgetc(fp); @+ b2=fgetc(fp); @+ b3=fgetc(fp);
5375 *d++=(((((b3*zprime)>>8)+(b2*zprime))>>8)+(b1*zprime))/beta
5376 -(b0?alpha:0);
5380 @ Now there are font parameters. There are up to sixteen font parameters,
5381 but numbered starting at 1. This is the code that makes it to do this.
5383 @<Load the font parameters@>= {
5384 c=lengths[11]-1;
5385 if(c>14) c=14;
5386 if(c<0) c=0;
5387 cur_metrics.parameters[0]=cur_metrics.parameters[1]=0;
5388 if(lengths[11]) cur_metrics.parameters[1]=get_fix_word(fp)>>4;
5389 d=cur_metrics.parameters+2;
5390 @<Load |c| scaled dimension values from |fp| into |d|@>;
5393 @*Semantic Nest. We might be building many boxes at once, nested inside of
5394 each other. So, we need to keep the stack of what kind of boxes are
5395 currently in use, and the associated parameters, such as space factors,
5396 and the previous depth of the box.
5398 There is two kinds, horizontal and vertical. The outer mode is considered
5399 horizontal so that it does not add leading between boxes, although it is
5400 not for making a box of the outer mode.
5402 The currently active modes are stored both forwards and backwards, so that
5403 we can use them as a stack of box nodes. There is a null pointer to mark
5404 the end of the list.
5406 @<Typedefs@>=
5407 typedef box_node*box_node_ptr;
5409 @ @<Global variables@>=
5410 memory_of(box_node_ptr) box_nest;
5412 @ @<Initialize memory@>=
5413 init_memory(box_nest,2);
5415 @ We also have the semantic list with local variables to the current
5416 group. The purpose of the |data| fields depends on whether this state is
5417 in horizontal or vertical mode, and that is why it is a union so that we
5418 can access then by names in that case, although they can also be accessed
5419 by numbers as well.
5421 @<Typedefs@>=
5422 typedef struct nest_state {
5423 struct nest_state*link; // Link to state this one is inside of
5424 boolean is_vertical; // 0 for horizontal, 1 for vertical
5425 data_index box_nest_index; // Index into |box_nest|
5426 union {
5427 scaled data[16];
5428 @<Nest state variables for horizontal mode@>;
5429 @<Nest state variables for vertical mode@>;
5430 }@+;
5431 } nest_state;
5433 @ @<Global variables@>=
5434 nest_state*cur_nest;
5436 @ @<Initialize memory@>= {
5437 cur_nest=malloc(sizeof(nest_state));
5438 cur_nest->link=0; // Means this is the outer level
5439 cur_nest->is_vertical=0; // Horizontal mode, no leading
5440 cur_nest->box_nest_index=new_record(box_nest);
5441 box_nest.data[cur_nest->box_nest_index]=0;
5442 cur_nest->space_factor=40; // Normal spacing
5445 @ @<Nest state variables for horizontal mode@>=
5446 struct {
5447 scaled space_factor; // Really just a number, but I don't care
5450 @ @<Nest state variables for vertical mode@>=
5451 struct {
5452 scaled prev_depth;
5455 @ Here are codes to enter a nest.
5457 @-p void enter_nest(boolean is_vertical) {
5458 nest_state*link=cur_nest;
5459 cur_nest=malloc(sizeof(nest_state));
5460 cur_nest->link=link;
5461 cur_nest->is_vertical=is_vertical;
5462 cur_nest->box_nest_index=new_record(box_nest);
5463 box_nest.data[cur_nest->box_nest_index]=0;
5464 if(is_vertical) cur_nest->prev_depth=0;
5465 else cur_nest->space_factor=40;
5468 @ And we also need codes to leave a nest. This function returns the
5469 pointer to the first node in the box that was being created, and then the
5470 packaging programs can use that to make a box and iterate over the |next|
5471 pointers to read the entire list.
5473 @-p box_node*leave_nest(void) {
5474 nest_state*link=cur_nest->link;
5475 box_node*node;
5476 @<Ensure it is not nest underflow@>;
5477 @<Set |node| to the node at the beginning of the current list@>;
5478 @<Rewind |box_nest| to the end of the parent list@>;
5479 free(cur_nest);
5480 cur_nest=link;
5481 return node;
5484 @ The outer nest should never be left or packaged; it is only used as a
5485 general-purpose stack and a container for other nests. (Unlike \TeX, the
5486 outer nest is never split into pages in \TeX nicard.)
5488 @<Ensure it is not nest underflow@>= {
5489 if(!link) {
5490 fprintf(stderr,"\nNest underflow\n");
5491 exit(1);
5495 @ Note: Sometimes |node| will be a null pointer if the current list is
5496 making an empty box (i.e. no nodes have been pushed).
5498 @<Set |node| to the node at the beginning of the current list@>= {
5499 if(box_nest.used==cur_nest->box_nest_index+1) {
5500 node=0;
5501 } @+else {
5502 node=box_nest.data[cur_nest->box_nest_index+1];
5506 @ @<Rewind |box_nest| to the end of the parent list@>= {
5507 box_nest.used=cur_nest->box_nest_index;
5510 @ And finally we have codes to push and pop nodes in the current list.
5511 These are simple codes since there isn't much to do.
5513 @d top_of_nodelist (box_nest.data[box_nest.used-1])
5515 @-p inline void push_node(box_node*ptr) {
5516 top_of_nodelist->next=ptr;
5517 box_nest.data[new_record(box_nest)]=ptr;
5520 @ @-p box_node*pop_node(void) {
5521 box_node*ptr=top_of_nodelist;
5522 if(ptr) {
5523 box_nest.used--;
5524 top_of_nodelist->next=0;
5526 return ptr;
5529 @*Box Calculation. Here are codes to calculate various things about the
5530 boxes, including badness, width\slash height\slash depth of a string of
5531 characters, and so on.
5533 This function is used to compute the ``badness'' of a glue setting, when a
5534 total $t$ is supposed to be made from amounts that sum to $s$. In this
5535 program, the badness is $1000(t/s)^3$ (ten times as much as \TeX). It does
5536 not have to be extremely accurate, although it is sufficiently accurate to
5537 do line breaking and so on. Ten million occurs when you stretch more than
5538 21 times as much as it should; this should never happen so it is given the
5539 maximum possible badness that can be computed using this. The badness
5540 squared should never exceed sixty-three bits (which it won't).
5542 @!@^badness@>
5544 @d very_bad 10000000
5545 @d too_bad 10000001
5547 @-p int calc_badness(scaled t,scaled s) {
5548 long long int r; // Apprximately $\root3\of{1000\cdot2^{32}}(t/s)$
5549 if(t==0) return 0;
5550 if(s<=0) return very_bad;
5551 r=(16255LL*t)/s;
5552 if(r>2097152LL) return very_bad;
5553 r=(r*r*r+(1LL<<31))>>32;
5554 if(r>very_bad) r=very_bad;
5555 return r;
5558 @ Next we calculate the width, height, and depth of a string of
5559 characters in one font, possibly including accents, kerns, and tracking.
5560 Ligatures will have already been dealt with before this code is reached,
5561 and kerns will already have been added in.
5563 @-p void calc_chars(box_node*b,scaled*w,scaled*h,scaled*d,short t) {
5564 font_metric_data*m=&(metrics.data[b->font]);
5565 unsigned short*c; // Pointer to current character code
5566 scaled junk; // Ensures no segmentation faults are occuring
5567 if(!w) w=&junk;
5568 if(!h) h=&junk;
5569 if(!d) d=&junk;
5570 *w=*h=*d=0;
5571 for(c=b->chars;*c!=0xFFFF;c++) {
5572 if(*c&0x8000) {
5573 if(*c&0x4000) {
5574 @<Process an implicit kern in |calc_chars|@>;
5575 } @+else {
5576 @<Process an accent in |calc_chars|@>;
5578 } @+else {
5579 @<Process a normal character in |calc_chars|@>;
5584 @ @<Process a normal character in |calc_chars|@>= {
5585 scaled width=m->width_base[m->info[*c&0xFF].width];
5586 scaled height=m->height_base[m->info[*c&0xFF].height];
5587 scaled depth=m->depth_base[m->info[*c&0xFF].depth];
5588 if(*h<height) *h=height;
5589 if(*d<depth) *d=depth;
5590 *w+=(t*width)>>7;
5593 @ @<Process an implicit kern in |calc_chars|@>= {
5594 scaled width=m->kern_base[*c&0x3FFF];
5595 *w+=(t*width)>>7;
5598 @ Now to do accents. This requires looking ahead to see the height for the
5599 next character. If the accent has positive height and zero depth, then it
5600 should be adjusted higher in case the letter is taller than an `x' (for
5601 example uppercase letters such as `\'E'). However, if the accent has
5602 positive depth and zero height, then it is an accent that should not be
5603 adjusted for the height of the character (for example `\c C'), although it
5604 might be adjusted for the depth.
5606 It should never happen that the next item is not a normal character (if it
5607 does, then I am not considered responsible for your bad luck).
5609 @<Process an accent in |calc_chars|@>= {
5610 scaled height=m->height_base[m->info[*c&0xFF].height];
5611 scaled depth=m->depth_base[m->info[*c&0xFF].depth];
5612 scaled c_height=m->height_base[m->info[c[1]&0xFF].height];
5613 scaled c_depth=m->height_base[m->info[c[1]&0xFF].depth];
5614 if(height<=0 && depth>0) {
5615 depth+=c_depth;
5616 } @+else {
5617 height+=c_height-m->parameters[5];
5619 if(*h<height) *h=height;
5620 if(*d<depth) *d=depth;
5623 @*Packaging. This is how the nest lists are packaged into boxes and the
5624 width, height, and depth are calculated from them. They are separate for
5625 horizontal and vertical packing, although there are similarities.
5627 The packing code is also used to compute the glue set of the box, and its
5628 badness. Here is the global variable to store the badness.
5630 @<Global variables@>=
5631 int last_badness=too_bad;
5633 @ There are two such subroutines, |hpackage| and |vpackage|, depending on
5634 what kind of box is wanted. Each one also takes three parameters: |first|,
5635 the first node in the box; |at_size|, the intended size, and |factor|, the
5636 amount to multiply the natural size by before adding |at_size|.
5638 @d common_package box_node*first,scaled at_size,signed char factor
5640 @ Horizontal packaging must compute height, width, and depth of characters
5641 and other boxes it contains, as well as compute glue settings, specials,
5642 adjustments, and so on.
5644 For horizontal packaging, there is also a |tracking| parameter for spacing
5645 the letters in the box.
5647 @-p box_node*hpackage(common_package,unsigned char tracking) {
5648 box_node*box=create_node(hlist_node,0,sizeof_hlist_node);
5649 scaled stretchability[4]; // Total stretch of all glue
5650 scaled shrinkability[4]; // Total shrink of all glue
5651 scaled natural=0; // Total width
5652 box_node*this; // Current node
5653 @<Initialize variables for |hpackage|@>;
5654 @<Read all nodes in a horizontal list to package them@>;
5655 #define @!actual @, box->width
5656 actual=(factor*natural)/8+at_size;
5657 @<Compute glue set and badness@>;
5658 #undef actual
5659 return box;
5662 @ @<Initialize variables for |hpackage|@>= {
5663 int o;
5664 box->list=first;
5665 box->tracking=tracking;
5666 box->height=box->depth=box->shift_amount=0;
5667 box->glue_set=0;
5668 for(o=0;o<4;o++) stretchability[o]=shrinkability[o]=0;
5671 @ @<Read all nodes in a horizontal list to package them@>= {
5672 for(this=first;this;this=this->next) {
5673 switch(type_of(this)) {
5674 case chars_node: @<Add word to box size@>; @+break;
5675 case hlist_node: case vlist_node: case rule_node:
5676 @<Apply the size of a box to a horizontal list@>; @+break;
5677 case kern_node: natural+=this->distance; @+break;
5678 case glue_node: @<Add glue to box size@>; @+break;
5679 case special_node:
5680 if(subtype_of(this)==spec_pack) @<Pack a special node@>;
5681 @+break;
5682 default: break; // All other nodes are ignored
5687 @ @<Add word to box size@>= {
5688 scaled w,h,d;
5689 calc_chars(this,&w,&h,&d,tracking<<1);
5690 natural+=w;
5691 if(h>box->height) box->height=h;
5692 if(d>box->depth) box->depth=d;
5695 @ @<Apply the size of a box to a horizontal list@>= {
5696 natural+=this->width;
5697 if(this->height+this->shift_amount>box->height)
5698 box->height=this->height+this->shift_amount;
5699 if(this->depth-this->shift_amount>box->depth)
5700 box->depth=this->depth-this->shift_amount;
5703 @ @<Add glue to box size@>= {
5704 natural+=this->natural;
5705 stretchability[subtype_of(this)&3]+=this->stretch;
5706 shrinkability[subtype_of(this)>>2]+=this->shrink;
5709 @ When packing a special node that has a code to run during packing, it
5710 can read and affect the current width and the intended width; it could
5711 also do other things, such as accumulating boxes for adjustments and so
5714 @<Pack a special node@>= {
5715 push_num(at_size);
5716 push_num(natural);
5717 @# execute_program(this->program); @#
5718 natural=pop_num();
5719 at_size=pop_num();
5722 @ A macro named |actual| is defined above so that this code can be used
5723 for both horizontal and for vertical packaging.
5725 We also have a macro here to decide setting the glue.
5727 @d set_glue(_order,_flag,_diff,_glue)
5728 (box->type_and_subtype|=((_order)<<4)|((_flag)<<7)),
5729 (box->glue_set=make_fraction(_glue,_diff))
5731 @<Compute glue set and badness@>= {
5732 if(actual>natural) {
5733 @<Glue is stretching@>;
5734 } @+else if(actual<natural) {
5735 @<Glue is shrinking@>;
5736 } @+else {
5737 last_badness=0; // Perfect!
5741 @ @<Glue is stretching@>= {
5742 if(stretching[filll]!=0) {
5743 set_glue(filll,0,actual-natural,stretching[filll]);
5744 last_badness=0;
5745 } @+else if(stretching[fill]!=0) {
5746 set_glue(fill,0,actual-natural,stretching[fill]);
5747 last_badness=0;
5748 } @+else if(stretching[fil]!=0) {
5749 set_glue(fil,0,actual-natural,stretching[fil]);
5750 last_badness=0;
5751 } @+else if(stretching[finite]!=0) {
5752 set_glue(finite,0,actual-natural,stretching[finite]);
5753 last_badness=calc_badness(actual-natural,stretching[finite]);
5754 } @+else {
5755 last_badness=too_bad;
5759 @ @<Glue is shrinking@>= {
5760 if(shrinking[filll]!=0) {
5761 set_glue(filll,1,natural-actual,shrinking[filll]);
5762 last_badness=0;
5763 } @+else if(shrinking[fill]!=0) {
5764 set_glue(fill,1,natural-actual,shrinking[fill]);
5765 last_badness=0;
5766 } @+else if(shrinking[fil]!=0) {
5767 set_glue(fil,1,natural-actual,shrinking[fil]);
5768 last_badness=0;
5769 } @+else if(shrinking[finite]>=natural-actual) {
5770 set_glue(finite,1,natural-actual,shrinking[finite]);
5771 last_badness=calc_badness(natural-actual,shrinking[finite]);
5772 } @+else {
5773 set_glue(finite,1,1,1); // Shrink as much as possible
5774 last_badness=too_bad;
5778 @ Now vertical.
5780 For vertical packaging, the two extra parameters are |max_dp|, the maximum
5781 depth; and |align_top|, which should be set true if it is wanted to align
5782 at the top instead of at the bottom.
5784 @-p box_node*vpackage(common_package,scaled max_dp,boolean align_top) {
5785 box_node*box=create_node(vlist_node,0,sizeof_vlist_node);
5786 scaled stretchability[4]; // Total stretch of all glue
5787 scaled shrinkability[4]; // Total shrink of all glue
5788 scaled natural=0; // Total height plus depth
5789 scaled bonnet=0; // Height of first item
5790 scaled boot=0; // Depth of last item
5791 box_node*this; // Current node
5792 @<Initialize variables for |vpackage|@>;
5793 @<Read all nodes in a vertical list to package them@>;
5794 box->height=bonnet; @+ box->depth=boot;
5795 #define @!actual @, (*(align_top?&(box->depth):&(box->height)))
5796 natural-=align_top?bonnet:boot;
5797 actual=(factor*natural)/8+at_size;
5798 @<Compute glue set and badness@>;
5799 #undef actual
5800 @<Move the reference point to match the maximum depth, if applicable@>;
5801 return box;
5804 @ @<Initialize variables for |vpackage|@>= {
5805 int o;
5806 box->list=first;
5807 box->width=box->shift_amount=0;
5808 box->glue_set=0;
5809 for(o=0;o<4;o++) stretchability[o]=shrinkability[o]=0;
5812 @ @<Read all nodes in a vertical list to package them@>= {
5813 for(this=first;this;this=this->next) {
5814 switch(type_of(this)) {
5815 case hlist_node: case vlist_node: case rule_node:
5816 @<Apply the size of a box to a vertical list@>; @+break;
5817 case kern_node: natural+=this->distance; @+boot=0; @+break;
5818 case glue_node: @<Add glue to box size@>; @+break;
5819 case special_node:
5820 if(subtype_of(this)==spec_pack) @<Pack a special node@>;
5821 @+break;
5822 default: break; // All other nodes are ignored
5824 if(this==first) bonnet=natural-boot;
5828 @ @<Apply the size of a box to a vertical list@>= {
5829 natural+=this->height+(boot=this->depth);
5830 if(this->width+this->shift_amount>box->width)
5831 box->width=this->width+this->width;
5834 @ @<Move the reference point to match the maximum depth, if applicable@>= {
5835 if(box->depth>max_dp) {
5836 box->height+=box->depth-max_dp;
5837 box->depth=max_dp;
5841 @*Typesetting Commands. There are various commands available in \TeX
5842 nicard for dealing with typesetting. Another thing it does is to allow you
5843 to enter distances using units.
5845 @<Do a typesetting command@>= {
5846 int c=*++ptr;
5847 if((c>='0' && c<='9') || c=='.') @<Read a distance and units@>@;
5848 else switch(c) {
5849 @<Typesetting commands@>@;
5853 @ The following units are supported:
5855 \halign{\quad\tt#\space\hss&(#)\hss\cr
5856 pt&Point\cr
5857 bp&Desktop publishing point\cr
5858 in&Inch\cr
5859 cm&Centimetre\cr
5860 mm&Millimetre\cr
5861 P0&Pica\cr
5862 p0&Desktop publishing pica\cr
5863 q&Quarter\cr
5864 em&Em width -- font specific\cr
5865 ex&Ex height -- font specific\cr
5868 @<Read a distance and units@>= {
5869 char dig[18]; // Digits entered
5870 char digc=0; // Number of digits
5871 int num,den; // Numerator and denominator to scale the units
5874 @*Internal Image Rendering.
5876 @*Main Program. This is where the program starts and ends. Everything else
5877 in the other chapters is started from here.
5879 @<Include files@>=
5880 #include <signal.h>
5881 #include <stdio.h>
5882 #include <stdlib.h>
5883 #include <string.h>
5884 #include <time.h>
5885 #include <unistd.h>
5887 @ @-p int main(int argc,char**argv) {
5888 boolean dvi_mode=0;
5889 @<Set up signal handler@>;
5890 @<Initialize memory@>;
5891 @<Display the banner message@>;
5892 @<Decide whether in DVI reading mode@>;
5893 if(!dvi_mode) @<Open the main input file@>;
5894 @<Initialize the input states@>;
5895 @<Initialize the tables and registers@>;
5896 @<Initialize the random number generator@>;
5897 @<Set registers according to command-line parameters@>;
5898 if(!dvi_mode) @<Process the input files@>;
5899 if(dvi_mode) dvi_mode=read_dvi_file(argv[1]);
5900 @<Call program in \.Z register if necessary@>;
5901 if(!dvi_mode) @<Send |end_transmission| to each card area@>;
5902 @<Write the output files@>;
5903 if(registers['Q'].is_string && dvi_mode &&
5904 (argv[0][0]!='-' || argv[0][1]!='z')) @<Switch to ImageMagick@>;
5905 return 0;
5908 @ @<Display the banner message@>= {
5909 fprintf(stderr,"TeXnicard version %s\n",version_string);
5910 fprintf(stderr,
5911 "This program is free software and comes with NO WARRANTY.\n");
5912 fflush(stderr);
5915 @ @<Set registers according to command-line parameters@>= {
5916 int i;
5917 for(i=2;i<argc;i++) {
5918 registers[i+('0'-2)].is_string=1;
5919 registers[i+('0'-2)].text=strdup(argv[i]);
5923 @ The main input file will be either the terminal, or another file if the
5924 command-line argument is given.
5926 @<Open the main input file@>= {
5927 if(argc>1 && strcmp(argv[1],"-")!=0) {
5928 --current_input_file;
5929 open_input(argv[1]);
5930 } @+else {
5931 current_fp=0;
5932 strcpy(current_filename,"<Teletype>");
5936 @ @<Call program in \.Z register if necessary@>= {
5937 if(registers['Z'].is_string) execute_program(registers['Z'].text);
5940 @ The alternative mode to run this program is DVI mode. DVI mode is
5941 specified by a command-line switch.
5943 @.DVI@>
5945 @<Decide whether in DVI reading mode@>= {
5946 if(argc>1 && argv[1][0]=='-' && argv[1][1]) {
5947 dvi_mode=1;
5948 argv++; @+ argc--;
5949 if(argv[0][1]=='a') {
5950 printing_mode=printing_all_cards;
5951 } @+else if(argv[0][1]=='f') {
5952 printing_mode=printing_list_from_file;
5953 printlistfile=fopen(argv[1],"r");
5954 argv++; @+ argc--;
5955 } @+else if(argv[0][1]=='n') {
5956 printing_mode=printing_list;
5957 printlisttext=argv[1];
5958 argv++; @+ argc--;
5959 } @+else if(argv[0][1]=='z') {
5960 printing_mode=printing_list;
5961 printlisttext="";
5966 @*Signal Handlers. The |SIGSEGV| signal should be handled in case
5967 something goes wrong in the program and it causes a segmentation fault, it
5968 should attempt to recover what you have before terminating, in order to be
5969 better at diagnosing the error.
5971 @<Set up signal handler@>= {
5972 signal(SIGSEGV,handle_crash);
5975 @ Some things will be more careful here to ensure not to cause the error
5976 again (if it does, it will just quit, though).
5978 @-p void handle_crash(int sig) {
5979 signal(SIGSEGV,SIG_DFL);
5980 @#fprintf(stderr,"\nFatal signal error (%d)\n",sig);
5981 @.Fatal signal error...@>
5982 fprintf(stderr,"cur_state=%d\ncur_name=%d\ncur_data=%d\n",
5983 cur_state,cur_name,cur_data);
5984 if(current_input_file>=input_files && current_input_file<input_files
5985 +max_input_stack) @<Display input stack after a crash@>;
5986 fprintf(stderr,"Program stack level: %d\n",stack_ptr-stack);
5987 fprintf(stderr,"Save stack level: %d\n",save_stack_ptr-save_stack);
5988 @#exit(3);
5991 @ @<Display input stack after a crash@>= {
5992 for(;;) {
5993 fprintf(stderr,"File %s line %d\n",current_filename,current_line);
5994 if(current_input_file--==input_files) break;
5998 @*The Future. Here are some ideas for future versions of this program:
6000 $\bullet$ A customizable Inform7-like parser, that would compile into a C
6001 code, so that you can play the cards on rule-enforcing computer programs.
6002 @^Inform@>
6004 $\bullet$ A database to keep track of how many copies of a card have been
6005 sold, for inventory purposes.
6006 @^commercial viability@>
6008 $\bullet$ Full text search, for things such as the Oracle text search.
6009 @^Oracle@>
6011 $\bullet$ Allow more than 256 fonts in one card set.
6013 $\bullet$ Unicode input (UTF-8).
6015 $\bullet$ Built-in typesetting (using some of the algorithms of \TeX) and
6016 image manipulation, so that there is no dependence on external programs,
6017 and everything can be done in one pass.
6019 @*Bibliography.
6021 \count255=0 %
6022 \long\def\Par{\csname par\endcsname}%
6023 \loop\ifnum\count255<\bibliocount%
6024 \advance\count255 by 1
6025 \Par$^{[\the\count255]}$\csname biblio \the\count255\endcsname\Par%
6026 \repeat%
6028 @*Index. Here you can find references to the definition and use of all the
6029 variables, subroutines, etc.\ used in this program, as well as a few other
6030 things of interest. Underlined entries indicate where it is defined.
6032 {\bf Important note:} All the numbers in this index are section numbers,
6033 not page numbers.
6035 % End of file "texnicard.w"