diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8e0b548 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +c-manual.tgz: c.texi cpp.texi fp.texi fdl.texi + mkdir c-manual + ln Makefile c.texi cpp.texi fp.texi fdl.texi c-manual + tar czf c-manual.tgz c-manual + rm -rf c-manual + +c.info: c.texi cpp.texi fp.texi fdl.texi + makeinfo c.texi + +c.pdf: c.dvi + dvipdf c.dvi + +c.dvi: c.texi cpp.texi fp.texi fdl.texi + tex c.texi + +c.doc: + makeinfo --docbook c.texi +c.html: + makeinfo --html c.texi +c.txt: + makeinfo --plaintext c.texi + +clean: + rm -f c-manual.tgz c.dvi c.pdf c c-1 c-2 c.doc c.html c.txt + rm -f c.aux c.cp c.fn c.ky c.log c.pg c.tmp c.toc c.tp c.vr diff --git a/c.texi b/c.texi new file mode 100644 index 0000000..3ead1d2 --- /dev/null +++ b/c.texi @@ -0,0 +1,13008 @@ +\input texinfo + +@c Copyright @copyright{} 2022 Richard Stallman and Free Software Foundation, Inc. + +(The work of Trevis Rothwell and Nelson Beebe has been assigned or +licensed to the FSF.) + +@c move alignment later? + +@setfilename ./c +@settitle GNU C Language Manual +@documentencoding UTF-8 + +@smallbook +@synindex vr fn + +@copying +Copyright @copyright{} 2022 Richard Stallman and Free Software Foundation, Inc. + +(The work of Trevis Rothwell and Nelson Beebe has been assigned or +licensed to the FSF.) + +@quotation +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with the +Invariant Sections being ``GNU General Public License,'' with the +Front-Cover Texts being ``A GNU Manual,'' and with the Back-Cover +Texts as in (a) below. A copy of the license is included in the +section entitled ``GNU Free Documentation License.'' + +(a) The FSF's Back-Cover Text is: ``You have the freedom to copy and +modify this GNU manual. Buying copies from the FSF supports it in +developing GNU and promoting software freedom.'' +@end quotation +@end copying + +@dircategory Programming +@direntry +* C: (c). GNU C Language Intro and Reference Manual +@end direntry + +@documentencoding UTF-8 + +@titlepage +@sp 6 +@center @titlefont{GNU C Language Intro and Reference Manual} +@sp 4 +@c @center @value{EDITION} Edition +@sp 5 +@center Richard Stallman +@center and +@center Trevis Rothwell +@center plus Nelson Beebe +@center on floating point +@page +@vskip 0pt plus 1filll + +@insertcopying + +@sp 2 +WILL BE Published by the Free Software Foundation @* +51 Franklin Street, Fifth Floor @* +Boston, MA 02110-1301 USA @* +ISBN ?-??????-??-? + +@ignore +@sp 1 +Cover art by J. Random Artist +@end ignore + +@end titlepage + +@summarycontents +@contents + + +@node Top +@ifnottex +@top GNU C Manual +@end ifnottex +@iftex +@top Preface +@end iftex + +This manual explains the C language for use with the GNU Compiler +Collection (GCC) on the GNU/Linux system and other systems. We refer +to this dialect as GNU C. If you already know C, you can use this as +a reference manual. + +If you understand basic concepts of programming but know nothing about +C, you can read this manual sequentially from the beginning to learn +the C language. + +If you are a beginner to programming, we recommend you first learn a +language with automatic garbage collection and no explicit pointers, +rather than starting with C@. Good choices include Lisp, Scheme, +Python and Java. C's explicit pointers mean that programmers must be +careful to avoid certain kinds of errors. + +C is a venerable language; it was first used in 1973. The GNU C +Compiler, which was subsequently extended into the GNU Compiler +Collection, was first released in 1987. Other important languages +were designed based on C: once you know C, it gives you a useful base +for learning C@t{++}, C#, Java, Scala, D, Go, and more. + +The special advantage of C is that it is fairly simple while allowing +close access to the computer's hardware, which previously required +writing in assembler language to describe the individual machine +instructions. Some have called C a ``high-level assembler language'' +because of its explicit pointers and lack of automatic management of +storage. As one wag put it, ``C combines the power of assembler +language with the convenience of assembler language.'' However, C is +far more portable, and much easier to read and write, than assembler +language. + +This manual focuses on the GNU C language supported by the GNU +Compiler Collection, version ???. When a construct may be absent or +work differently in other C compilers, we say so. When it is not part +of ISO standard C, we say it is a ``GNU C extension,'' because it is +useful to know that; however, other dialects and standards are not the +focus of this manual. We keep those notes short, unless it is vital +to say more. For the same reason, we hardly mention C@t{++} or other +languages that the GNU Compiler Collection supports. + +Some aspects of the meaning of C programs depend on the target +platform: which computer, and which operating system, the compiled +code will run on. Where this is the case, we say so. + +The C language provides no built-in facilities for performing such +common operations as input/output, memory management, string +manipulation, and the like. Instead, these facilities are defined in +a standard library, which is automatically available in every C +program. @xref{Top, The GNU C Library, , libc, The GNU C Library +Reference Manual}. + +This manual incorporates the former GNU C Preprocessor Manual, which +was among the earliest GNU Manuals. It also uses some text from the +earlier GNU C Manual that was written by Trevis Rothwell and James +Youngman. + +GNU C has many obscure features, each one either for historical +compatibility or meant for very special situations. We have left them +to a companion manual, the GNU C Obscurities Manual, which will be +published digitally later. + +@menu +* The First Example:: Getting started with basic C code. +* Complete Program:: A whole example program + that can be compiled and run. +* Storage:: Basic layout of storage; bytes. +* Beyond Integers:: Exploring different numeric types. +* Lexical Syntax:: The various lexical components of C programs. +* Arithmetic:: Numeric computations. +* Assignment Expressions:: Storing values in variables. +* Execution Control Expressions:: Expressions combining values in various ways. +* Binary Operator Grammar:: An overview of operator precedence. +* Order of Execution:: The order of program execution. +* Primitive Types:: More details about primitive data types. +* Constants:: Explicit constant values: + details and examples. +* Type Size:: The memory space occupied by a type. +* Pointers:: Creating and manipulating memory pointers. +* Structures:: Compound data types built + by grouping other types. +* Arrays:: Creating and manipulating arrays. +* Enumeration Types:: Sets of integers with named values. +* Defining Typedef Names:: Using @code{typedef} to define type names. +* Statements:: Controling program flow. +* Variables:: Details about declaring, initializing, + and using variables. +* Type Qualifiers:: Mark variables for certain intended uses. +* Functions:: Declaring, defining, and calling functions. +* Compatible Types:: How to tell if two types are compatible + with each other. +* Type Conversions:: Converting between types. +* Scope:: Different categories of identifier scope. +* Preprocessing:: Using the GNU C preprocessor. +* Integers in Depth:: How integer numbers are represented. +* Floating Point in Depth:: How floating-point numbers are represented. +* Compilation:: How to compile multi-file programs. +* Directing Compilation:: Operations that affect compilation + but don't change the program. + +Appendices + +* Type Alignment:: Where in memory a type can validly start. +* Aliasing:: Accessing the same data in two types. +* Digraphs:: Two-character aliases for some characters. +* Attributes:: Specifying additional information + in a declaration. +* Signals:: Fatal errors triggered in various scenarios. +* GNU Free Documentation License:: The license for this manual. +* Symbol Index:: Keyword and symbol index. +* Concept Index:: Detailed topical index. + +@detailmenu +--- The Detailed Node Listing --- + +* Recursive Fibonacci:: Writing a simple function recursively. +* Stack:: Each function call uses space in the stack. +* Iterative Fibonacci:: Writing the same function iteratively. +* Complete Example:: Turn the simple function into a full program. +* Complete Explanation:: Explanation of each part of the example. +* Complete Line-by-Line:: Explaining each line of the example. +* Compile Example:: Using GCC to compile the example. +* Float Example:: A function that uses floating-point numbers. +* Array Example:: A function that works with arrays. +* Array Example Call:: How to call that function. +* Array Example Variations:: Different ways to write the call example. + +Lexical Syntax + +* English:: Write programs in English! +* Characters:: The characters allowed in C programs. +* Whitespace:: The particulars of whitespace characters. +* Comments:: How to include comments in C code. +* Identifiers:: How to form identifiers (names). +* Operators/Punctuation:: Characters used as operators or punctuation. +* Line Continuation:: Splitting one line into multiple lines. +* Digraphs:: Two-character substitutes for some characters. + +Arithmetic + +* Basic Arithmetic:: Addition, subtraction, multiplication, + and division. +* Integer Arithmetic:: How C performs arithmetic with integer values. +* Integer Overflow:: When an integer value exceeds the range + of its type. +* Mixed Mode:: Calculating with both integer values + and floating-point values. +* Division and Remainder:: How integer division works. +* Numeric Comparisons:: Comparing numeric values for + equality or order. +* Shift Operations:: Shift integer bits left or right. +* Bitwise Operations:: Bitwise conjunction, disjunction, negation. + +Assignment Expressions + +* Simple Assignment:: The basics of storing a value. +* Lvalues:: Expressions into which a value can be stored. +* Modifying Assignment:: Shorthand for changing an lvalue's contents. +* Increment/Decrement:: Shorthand for incrementing and decrementing + an lvalue's contents. +* Postincrement/Postdecrement:: Accessing then incrementing or decrementing. +* Assignment in Subexpressions:: How to avoid ambiguity. +* Write Assignments Separately:: Write assignments as separate statements. + +Execution Control Expressions + +* Logical Operators:: Logical conjunction, disjunction, negation. +* Logicals and Comparison:: Logical operators with comparison operators. +* Logicals and Assignments:: Assignments with logical operators. +* Conditional Expression:: An if/else construct inside expressions. +* Comma Operator:: Build a sequence of subexpressions. + +Order of Execution + +* Reordering of Operands:: Operations in C are not necessarily computed + in the order they are written. +* Associativity and Ordering:: Some associative operations are performed + in a particular order; others are not. +* Sequence Points:: Some guarantees about the order of operations. +* Postincrement and Ordering:: Ambiguous excution order with postincrement. +* Ordering of Operands:: Evaluation order of operands + and function arguments. +* Optimization and Ordering:: Compiler optimizations can reorder operations + only if it has no impact on program results. + +Primitive Data Types + +* Integer Types:: Description of integer types. +* Floating-Point Data Types:: Description of floating-point types. +* Complex Data Types:: Description of complex number types. +* The Void Type:: A type indicating no value at all. +* Other Data Types:: A brief summary of other types. + +Constants + +* Integer Constants:: Literal integer values. +* Integer Const Type:: Types of literal integer values. +* Floating Constants:: Literal floating-point values. +* Imaginary Constants:: Literal imaginary number values. +* Invalid Numbers:: Avoiding preprocessing number misconceptions. +* Character Constants:: Literal character values. +* Unicode Character Codes:: Unicode characters represented + in either UTF-16 or UTF-32. +* Wide Character Constants:: Literal characters values larger than 8 bits. +* String Constants:: Literal string values. +* UTF-8 String Constants:: Literal UTF-8 string values. +* Wide String Constants:: Literal string values made up of + 16- or 32-bit characters. + +Pointers + +* Address of Data:: Using the ``address-of'' operator. +* Pointer Types:: For each type, there is a pointer type. +* Pointer Declarations:: Declaring variables with pointer types. +* Pointer Type Designators:: Designators for pointer types. +* Pointer Dereference:: Accessing what a pointer points at. +* Null Pointers:: Pointers which do not point to any object. +* Invalid Dereference:: Dereferencing null or invalid pointers. +* Void Pointers:: Totally generic pointers, can cast to any. +* Pointer Comparison:: Comparing memory address values. +* Pointer Arithmetic:: Computing memory address values. +* Pointers and Arrays:: Using pointer syntax instead of array syntax. +* Pointer Arithmetic Low Level:: More about computing memory address values. +* Pointer Increment/Decrement:: Incrementing and decrementing pointers. +* Pointer Arithmetic Drawbacks:: A common pointer bug to watch out for. +* Pointer-Integer Conversion:: Converting pointer types to integer types. +* Printing Pointers:: Using @code{printf} for a pointer's value. + +Structures + +* Referencing Fields:: Accessing field values in a structure object. +* Dynamic Memory Allocation:: Allocating space for objects + while the program is running. +* Field Offset:: Memory layout of fields within a structure. +* Structure Layout:: Planning the memory layout of fields. +* Packed Structures:: Packing structure fields as close as possible. +* Bit Fields:: Dividing integer fields + into fields with fewer bits. +* Bit Field Packing:: How bit fields pack together in integers. +* const Fields:: Making structure fields immutable. +* Zero Length:: Zero-length array as a variable-length object. +* Flexible Array Fields:: Another approach to variable-length objects. +* Overlaying Structures:: Casting one structure type + over an object of another structure type. +* Structure Assignment:: Assigning values to structure objects. +* Unions:: Viewing the same object in different types. +* Packing With Unions:: Using a union type to pack various types into + the same memory space. +* Cast to Union:: Casting a value one of the union's alternative + types to the type of the union itself. +* Structure Constructors:: Building new structure objects. +* Unnamed Types as Fields:: Fields' types do not always need names. +* Incomplete Types:: Types which have not been fully defined. +* Intertwined Incomplete Types:: Defining mutually-recursive structue types. +* Type Tags:: Scope of structure and union type tags. + +Arrays + +* Accessing Array Elements:: How to access individual elements of an array. +* Declaring an Array:: How to name and reserve space for a new array. +* Strings:: A string in C is a special case of array. +* Incomplete Array Types:: Naming, but not allocating, a new array. +* Limitations of C Arrays:: Arrays are not first-class objects. +* Multidimensional Arrays:: Arrays of arrays. +* Constructing Array Values:: Assigning values to an entire array at once. +* Arrays of Variable Length:: Declaring arrays of non-constant size. + +Statements + +* Expression Statement:: Evaluate an expression, as a statement, + usually done for a side effect. +* if Statement:: Basic conditional execution. +* if-else Statement:: Multiple branches for conditional execution. +* Blocks:: Grouping multiple statements together. +* return Statement:: Return a value from a function. +* Loop Statements:: Repeatedly executing a statement or block. +* switch Statement:: Multi-way conditional choices. +* switch Example:: A plausible example of using @code{switch}. +* Duffs Device:: A special way to use @code{switch}. +* Case Ranges:: Ranges of values for @code{switch} cases. +* Null Statement:: A statement that does nothing. +* goto Statement:: Jump to another point in the source code, + identified by a label. +* Local Labels:: Labels with limited scope. +* Labels as Values:: Getting the address of a label. +* Statement Exprs:: A series of statements used as an expression. + +Variables + +* Variable Declarations:: Name a variable and and reserve space for it. +* Initializers:: Assigning inital values to variables. +* Designated Inits:: Assigning initial values to array elements + at particular array indices. +* Auto Type:: Obtaining the type of a variable. +* Local Variables:: Variables declared in function definitions. +* File-Scope Variables:: Variables declared outside of + function definitions. +* Static Local Variables:: Variables declared within functions, + but with permanent storage allocation. +* Extern Declarations:: Declaring a variable + which is allocated somewhere else. +* Allocating File-Scope:: When is space allocated + for file-scope variables? +* auto and register:: Historically used storage directions. +* Omitting Types:: The bad practice of declaring variables + with implicit type. + +Type Qualifiers + +* const:: Variables whose values don't change. +* volatile:: Variables whose values may be accessed + or changed outside of the control of + this program. +* restrict Pointers:: Restricted pointers for code optimization. +* restrict Pointer Example:: Example of how that works. + +Functions + +* Function Definitions:: Writing the body of a function. +* Function Declarations:: Declaring the interface of a function. +* Function Calls:: Using functions. +* Function Call Semantics:: Call-by-value argument passing. +* Function Pointers:: Using references to functions. +* The main Function:: Where execution of a GNU C program begins. + +Type Conversions + +* Explicit Type Conversion:: Casting a value from one type to another. +* Assignment Type Conversions:: Automatic conversion by assignment operation. +* Argument Promotions:: Automatic conversion of function parameters. +* Operand Promotions:: Automatic conversion of arithmetic operands. +* Common Type:: When operand types differ, which one is used? + +Scope + +* Scope:: Different categories of identifier scope. + +Preprocessing + +* Preproc Overview:: Introduction to the C preprocessor. +* Directives:: The form of preprocessor directives. +* Preprocessing Tokens:: The lexical elements of preprocessing. +* Header Files:: Including one source file in another. +* Macros:: Macro expansion by the preprocessor. +* Conditionals:: Controling whether to compile some lines + or ignore them. +* Diagnostics:: Reporting warnings and errors. +* Line Control:: Reporting source line numbers. +* Null Directive:: A preprocessing no-op. + +Integers in Depth + +* Integer Representations:: How integer values appear in memory. +* Maximum and Minimum Values:: Value ranges of integer types. + +Floating Point in Depth + +* Floating Representations:: How floating-point values appear in memory. +* Floating Type Specs:: Precise details of memory representations. +* Special Float Values:: Infinity, Not a Number, and Subnormal Numbers. +* Invalid Optimizations:: Don't mess up non-numbers and signed zeros. +* Exception Flags:: Handling certain conditions in floating point. +* Exact Floating-Point:: Not all floating calculations lose precision. +* Rounding:: When a floating result can't be represented + exactly in the floating-point type in use. +* Rounding Issues:: Avoid magnifying rounding errors. +* Significance Loss:: Subtracting numbers that are almost equal. +* Fused Multiply-Add:: Taking advantage of a special floating-point + instruction for faster execution. +* Error Recovery:: Determining rounding errors. +* Exact Floating Constants:: Precisely specified floating-point numbers. +* Handling Infinity:: When floating calculation is out of range. +* Handling NaN:: What floating calculation is undefined. +* Signed Zeros:: Positive zero vs. negative zero. +* Scaling by the Base:: A useful exact floating-point operation. +* Rounding Control:: Specifying some rounding behaviors. +* Machine Epsilon:: The smallest number you can add to 1.0 + and get a sum which is larger than 1.0. +* Complex Arithmetic:: Details of arithmetic with complex numbers. +* Round-Trip Base Conversion:: What happens between base-2 and base-10. +* Further Reading:: References for floating-point numbers. + +Directing Compilation + +* Pragmas:: Controling compilation of some constructs. +* Static Assertions:: Compile-time tests for conditions. + +@end detailmenu +@end menu + +@node The First Example +@chapter The First Example + +This chapter presents the source code for a very simple C program and +uses it to explain a few features of the language. If you already +know the basic points of C presented in this chapter, you can skim it +or skip it. + +@menu +* Recursive Fibonacci:: Writing a simple function recursively. +* Stack:: Each function call uses space in the stack. +* Iterative Fibonacci:: Writing the same function iteratively. +@end menu + +@node Recursive Fibonacci +@section Example: Recursive Fibonacci +@cindex recursive Fibonacci function +@cindex Fibonacci function, recursive + +To introduce the most basic features of C, let's look at code for a +simple mathematical function that does calculations on integers. This +function calculates the @var{n}th number in the Fibonacci series, in +which each number is the sum of the previous two: 1, 1, 2, 3, 5, 8, +13, 21, 34, 55, @dots{}. + +@example +int +fib (int n) +@{ + if (n <= 2) /* @r{This avoids infinite recursion.} */ + return 1; + else + return fib (n - 1) + fib (n - 2); +@} +@end example + +This very simple program illustrates several features of C: + +@itemize @bullet +@item +A function definition, whose first two lines constitute the function +header. @xref{Function Definitions}. + +@item +A function parameter @code{n}, referred to as the variable @code{n} +inside the function body. @xref{Function Parameter Variables}. +A function definition uses parameters to refer to the argument +values provided in a call to that function. + +@item +Arithmetic. C programs add with @samp{+} and subtract with +@samp{-}. @xref{Arithmetic}. + +@item +Numeric comparisons. The operator @samp{<=} tests for ``less than or +equal.'' @xref{Numeric Comparisons}. + +@item +Integer constants written in base 10. +@xref{Integer Constants}. + +@item +A function call. The function call @code{fib (n - 1)} calls the +function @code{fib}, passing as its argument the value @code{n - 1}. +@xref{Function Calls}. + +@item +A comment, which starts with @samp{/*} and ends with @samp{*/}. The +comment has no effect on the execution of the program. Its purpose is +to provide explanations to people reading the source code. Including +comments in the code is tremendously important---they provide +background information so others can understand the code more quickly. +@xref{Comments}. + +@item +Two kinds of statements, the @code{return} statement and the +@code{if}@dots{}@code{else} statement. @xref{Statements}. + +@item +Recursion. The function @code{fib} calls itself; that is called a +@dfn{recursive call}. These are valid in C, and quite common. + +The @code{fib} function would not be useful if it didn't return. +Thus, recursive definitions, to be of any use, must avoid infinite +recursion. + +This function definition prevents infinite recursion by specially +handling the case where @code{n} is two or less. Thus the maximum +depth of recursive calls is less than @code{n}. +@end itemize + +@menu +* Function Header:: The function's name and how it is called. +* Function Body:: Declarations and statements that implement the function. +@end menu + +@node Function Header +@subsection Function Header +@cindex function header + +In our example, the first two lines of the function definition are the +@dfn{header}. Its purpose is to state the function's name and say how +it is called: + +@example +int +fib (int n) +@end example + +@noindent +says that the function returns an integer (type @code{int}), its name is +@code{fib}, and it takes one argument named @code{n} which is also an +integer. (Data types will be explained later, in @ref{Primitive Types}.) + +@node Function Body +@subsection Function Body +@cindex function body +@cindex recursion + +The rest of the function definition is called the @dfn{function body}. +Like every function body, this one starts with @samp{@{}, ends with +@samp{@}}, and contains zero or more @dfn{statements} and +@dfn{declarations}. Statements specify actions to take, whereas +declarations define names of variables, functions, and so on. Each +statement and each declaration ends with a semicolon (@samp{;}). + +Statements and declarations often contain @dfn{expressions}; an +expression is a construct whose execution produces a @dfn{value} of +some data type, but may also take actions through ``side effects'' +that alter subsequent execution. A statement, by contrast, does not +have a value; it affects further execution of the program only through +the actions it takes. + +This function body contains no declarations, and just one statement, +but that one is a complex statement in that it contains nested +statements. This function uses two kinds of statements: + +@table @code +@item return +The @code{return} statement makes the function return immediately. +It looks like this: + +@example +return @var{value}; +@end example + +Its meaning is to compute the expression @var{value} and exit the +function, making it return whatever value that expression produced. +For instance, + +@example +return 1; +@end example + +@noindent +returns the integer 1 from the function, and + +@example +return fib (n - 1) + fib (n - 2); +@end example + +@noindent +returns a value computed by performing two function calls +as specified and adding their results. + +@item @code{if}@dots{}@code{else} +The @code{if}@dots{}@code{else} statement is a @dfn{conditional}. +Each time it executes, it chooses one of its two substatements to execute +and ignores the other. It looks like this: + +@example +if (@var{condition}) + @var{if-true-statement} +else + @var{if-false-statement} +@end example + +Its meaning is to compute the expression @var{condition} and, if it's +``true,'' execute @var{if-true-statement}. Otherwise, execute +@var{if-false-statement}. @xref{if-else Statement}. + +Inside the @code{if}@dots{}@code{else} statement, @var{condition} is +simply an expression. It's considered ``true'' if its value is +nonzero. (A comparison operation, such as @code{n <= 2}, produces the +value 1 if it's ``true'' and 0 if it's ``false.'' @xref{Numeric +Comparisons}.) Thus, + +@example +if (n <= 2) + return 1; +else + return fib (n - 1) + fib (n - 2); +@end example + +@noindent +first tests whether the value of @code{n} is less than or equal to 2. +If so, the expression @code{n <= 2} has the value 1. So execution +continues with the statement + +@example +return 1; +@end example + +@noindent +Otherwise, execution continues with this statement: + +@example +return fib (n - 1) + fib (n - 2); +@end example + +Each of these statements ends the execution of the function and +provides a value for it to return. @xref{return Statement}. +@end table + +Calculating @code{fib} using ordinary integers in C works only for +@var{n} < 47, because the value of @code{fib (47)} is too large to fit +in type @code{int}. The addition operation that tries to add +@code{fib (46)} and @code{fib (45)} cannot deliver the correct result. +This occurrence is called @dfn{integer overflow}. + +Overflow can manifest itself in various ways, but one thing that can't +possibly happen is to produce the correct value, since that can't fit +in the space for the value. @xref{Integer Overflow}. + +@xref{Functions}, for a full explanation about functions. + +@node Stack +@section The Stack, And Stack Overflow +@cindex stack +@cindex stack frame +@cindex stack overflow +@cindex recursion, drawbacks of + +@cindex stack frame +Recursion has a drawback: there are limits to how many nested function +calls a program can make. In C, each function call allocates a block +of memory which it uses until the call returns. C allocates these +blocks consecutively within a large area of memory known as the +@dfn{stack}, so we refer to the blocks as @dfn{stack frames}. + +The size of the stack is limited; if the program tries to use too +much, that causes the program to fail because the stack is full. This +is called @dfn{stack overflow}. + +@cindex crash +@cindex segmentation fault +Stack overflow on GNU/Linux typically manifests itself as the +@dfn{signal} named @code{SIGSEGV}, also known as a ``segmentation +fault.'' By default, this signal terminates the program immediately, +rather than letting the program try to recover, or reach an expected +ending point. (We commonly say in this case that the program +``crashes''). @xref{Signals}. + +It is inconvenient to observe a crash by passing too large +an argument to recursive Fibonacci, because the program would run a +long time before it crashes. This algorithm is simple but +ridiculously slow: in calculating @code{fib (@var{n})}, the number of +(recursive) calls @code{fib (1)} or @code{fib (2)} that it makes equals +the final result. + +However, you can observe stack overflow very quickly if you use +this function instead: + +@example +int +fill_stack (int n) +@{ + if (n <= 1) /* @r{This limits the depth of recursion.} */ + return 1; + else + return fill_stack (n - 1); +@} +@end example + +Under gNewSense GNU/Linux on the Lemote Yeeloong, without optimization +and using the default configuration, an experiment showed there is +enough stack space to do 261906 nested calls to that function. One +more, and the stack overflows and the program crashes. On another +platform, with a different configuration, or with a different +function, the limit might be bigger or smaller. + +@node Iterative Fibonacci +@section Example: Iterative Fibonacci +@cindex iterative Fibonacci function +@cindex Fibonacci function, iterative + +Here's a much faster algorithm for computing the same Fibonacci +series. It is faster for two reasons. First, it uses @dfn{iteration} +(that is, repetition or looping) rather than recursion, so it doesn't +take time for a large number of function calls. But mainly, it is +faster because the number of repetitions is small---only @code{@var{n}}. + +@c If you change this, change the duplicate in node Example of for. + +@example +int +fib (int n) +@{ + int last = 1; /* @r{Initial value is @code{fib (1)}.} */ + int prev = 0; /* @r{Initial value controls @code{fib (2)}.} */ + int i; + + for (i = 1; i < n; ++i) + /* @r{If @code{n} is 1 or less, the loop runs zero times,} */ + /* @r{since @code{i < n} is false the first time.} */ + @{ + /* @r{Now @code{last} is @code{fib (@code{i})}} + @r{and @code{prev} is @code{fib (@code{i} @minus{} 1)}.} */ + /* @r{Compute @code{fib (@code{i} + 1)}.} */ + int next = prev + last; + /* @r{Shift the values down.} */ + prev = last; + last = next; + /* @r{Now @code{last} is @code{fib (@code{i} + 1)}} + @r{and @code{prev} is @code{fib (@code{i})}.} + @r{But that won't stay true for long,} + @r{because we are about to increment @code{i}.} */ + @} + + return last; +@} +@end example + +This definition computes @code{fib (@var{n})} in a time proportional +to @code{@var{n}}. The comments in the definition explain how it works: it +advances through the series, always keeps the last two values in +@code{last} and @code{prev}, and adds them to get the next value. + +Here are the additional C features that this definition uses: + +@table @asis +@item Internal blocks +Within a function, wherever a statement is called for, you can write a +@dfn{block}. It looks like @code{@{ @r{@dots{}} @}} and contains zero or +more statements and declarations. (You can also use additional +blocks as statements in a block.) + +The function body also counts as a block, which is why it can contain +statements and declarations. + +@xref{Blocks}. + +@item Declarations of local variables +This function body contains declarations as well as statements. There +are three declarations directly in the function body, as well as a +fourth declaration in an internal block. Each starts with @code{int} +because it declares a variable whose type is integer. One declaration +can declare several variables, but each of these declarations is +simple and declares just one variable. + +Variables declared inside a block (either a function body or an +internal block) are @dfn{local variables}. These variables exist only +within that block; their names are not defined outside the block, and +exiting the block deallocates their storage. This example declares +four local variables: @code{last}, @code{prev}, @code{i}, and +@code{next}. + +The most basic local variable declaration looks like this: + +@example +@var{type} @var{variablename}; +@end example + +For instance, + +@example +int i; +@end example + +@noindent +declares the local variable @code{i} as an integer. +@xref{Variable Declarations}. + +@item Initializers +When you declare a variable, you can also specify its initial value, +like this: + +@example +@var{type} @var{variablename} = @var{value}; +@end example + +For instance, + +@example +int last = 1; +@end example + +@noindent +declares the local variable @code{last} as an integer (type +@code{int}) and starts it off with the value 1. @xref{Initializers}. + +@item Assignment +Assignment: a specific kind of expression, written with the @samp{=} +operator, that stores a new value in a variable or other place. Thus, + +@example +@var{variable} = @var{value} +@end example + +@noindent +is an expression that computes @code{@var{value}} and stores the value in +@code{@var{variable}}. @xref{Assignment Expressions}. + +@item Expression statements +An expression statement is an expression followed by a semicolon. +That computes the value of the expression, then ignores the value. + +An expression statement is useful when the expression changes some +data or has other side effects---for instance, with function calls, or +with assignments as in this example. @xref{Expression Statement}. + +Using an expression with no side effects in an expression statement is +pointless except in very special cases. For instance, the expression +statement @code{x;} would examine the value of @code{x} and ignore it. +That is not useful. + +@item Increment operator +The increment operator is @samp{++}. @code{++i} is an +expression that is short for @code{i = i + 1}. +@xref{Increment/Decrement}. + +@item @code{for} statements +A @code{for} statement is a clean way of executing a statement +repeatedly---a @dfn{loop} (@pxref{Loop Statements}). Specifically, + +@example +for (i = 1; i < n; ++i) + @var{body} +@end example + +@noindent +means to start by doing @code{i = 1} (set @code{i} to one) to prepare +for the loop. The loop itself consists of + +@itemize @bullet +@item +Testing @code{i < n} and exiting the loop if that's false. + +@item +Executing @var{body}. + +@item +Advancing the loop (executing @code{++i}, which increments @code{i}). +@end itemize + +The net result is to execute @var{body} with 0 in @code{i}, +then with 1 in @code{i}, and so on, stopping just before the repetition +where @code{i} would equal @code{n}. + +The body of the @code{for} statement must be one and only one +statement. You can't write two statements in a row there; if you try +to, only the first of them will be treated as part of the loop. + +The way to put multiple statements in those places is to group them +with a block, and that's what we do in this example. +@end table + +@node Complete Program +@chapter A Complete Program +@cindex complete example program +@cindex example program, complete + +It's all very well to write a Fibonacci function, but you cannot run +it by itself. It is a useful program, but it is not a complete +program. + +In this chapter we present a complete program that contains the +@code{fib} function. This example shows how to make the program +start, how to make it finish, how to do computation, and how to print +a result. + +@menu +* Complete Example:: Turn the simple function into a full program. +* Complete Explanation:: Explanation of each part of the example. +* Complete Line-by-Line:: Explaining each line of the example. +* Compile Example:: Using GCC to compile the example. +@end menu + +@node Complete Example +@section Complete Program Example + +Here is the complete program that uses the simple, recursive version +of the @code{fib} function (@pxref{Recursive Fibonacci}): + +@example +#include + +int +fib (int n) +@{ + if (n <= 2) /* @r{This avoids infinite recursion.} */ + return 1; + else + return fib (n - 1) + fib (n - 2); +@} + +int +main (void) +@{ + printf ("Fibonacci series item %d is %d\n", + 20, fib (20)); + return 0; +@} +@end example + +@noindent +This program prints a message that shows the value of @code{fib (20)}. + +Now for an explanation of what that code means. + +@node Complete Explanation +@section Complete Program Explanation + +@ifnottex +Here's the explanation of the code of the example in the +previous section. +@end ifnottex + +This sample program prints a message that shows the value of @code{fib +(20)}, and exits with code 0 (which stands for successful execution). + +Every C program is started by running the function named @code{main}. +Therefore, the example program defines a function named @code{main} to +provide a way to start it. Whatever that function does is what the +program does. @xref{The main Function}. + +The @code{main} function is the first one called when the program +runs, but it doesn't come first in the example code. The order of the +function definitions in the source code makes no difference to the +program's meaning. + +The initial call to @code{main} always passes certain arguments, but +@code{main} does not have to pay attention to them. To ignore those +arguments, define @code{main} with @code{void} as the parameter list. +(@code{void} as a function's parameter list normally means ``call with +no arguments,'' but @code{main} is a special case.) + +The function @code{main} returns 0 because that is +the conventional way for @code{main} to indicate successful execution. +It could instead return a positive integer to indicate failure, and +some utility programs have specific conventions for the meaning of +certain numeric @dfn{failure codes}. @xref{Values from main}. + +@cindex @code{printf} +The simplest way to print text in C is by calling the @code{printf} +function, so here we explain what that does. + +@cindex standard output +The first argument to @code{printf} is a @dfn{string constant} +(@pxref{String Constants}) that is a template for output. The +function @code{printf} copies most of that string directly as output, +including the newline character at the end of the string, which is +written as @samp{\n}. The output goes to the program's @dfn{standard +output} destination, which in the usual case is the terminal. + +@samp{%} in the template introduces a code that substitutes other text +into the output. Specifically, @samp{%d} means to take the next +argument to @code{printf} and substitute it into the text as a decimal +number. (The argument for @samp{%d} must be of type @code{int}; if it +isn't, @code{printf} will malfunction.) So the output is a line that +looks like this: + +@example +Fibonacci series item 20 is 6765 +@end example + +This program does not contain a definition for @code{printf} because +it is defined by the C library, which makes it available in all C +programs. However, each program does need to @dfn{declare} +@code{printf} so it will be called correctly. The @code{#include} +line takes care of that; it includes a @dfn{header file} called +@file{stdio.h} into the program's code. That file is provided by the +operating system and it contains declarations for the many standard +input/output functions in the C library, one of which is +@code{printf}. + +Don't worry about header files for now; we'll explain them later in +@ref{Header Files}. + +The first argument of @code{printf} does not have to be a string +constant; it can be any string (@pxref{Strings}). However, using a +constant is the most common case. + +To learn more about @code{printf} and other facilities of the C +library, see @ref{Top, The GNU C Library, , libc, The GNU C Library +Reference Manual}. + +@node Complete Line-by-Line +@section Complete Program, Line by Line + +Here's the same example, explained line by line. +@strong{Beginners, do you find this helpful or not? +Would you prefer a different layout for the example? +Please tell rms@@gnu.org.} + +@example +#include /* @r{Include declaration of usual} */ + /* @r{I/O functions such as @code{printf}.} */ + /* @r{Most programs need these.} */ + +int /* @r{This function returns an @code{int}.} */ +fib (int n) /* @r{Its name is @code{fib};} */ + /* @r{its argument is called @code{n}.} */ +@{ /* @r{Start of function body.} */ + /* @r{This stops the recursion from being infinite.} */ + if (n <= 2) /* @r{If @code{n} is 1 or 2,} */ + return 1; /* @r{make @code{fib} return 1.} */ + else /* @r{otherwise, add the two previous} */ + /* @r{fibonacci numbers.} */ + return fib (n - 1) + fib (n - 2); +@} + +int /* @r{This function returns an @code{int}.} */ +main (void) /* @r{Start here; ignore arguments.} */ +@{ /* @r{Print message with numbers in it.} */ + printf ("Fibonacci series item %d is %d\n", + 20, fib (20)); + return 0; /* @r{Terminate program, report success.} */ +@} +@end example + +@node Compile Example +@section Compiling the Example Program +@cindex compiling +@cindex executable file + +To run a C program requires converting the source code into an +@dfn{executable file}. This is called @dfn{compiling} the program, +and the command to do that using GNU C is @command{gcc}. + +This example program consists of a single source file. If we +call that file @file{fib1.c}, the complete command to compile it is +this: + +@example +gcc -g -O -o fib1 fib1.c +@end example + +@noindent +Here, @option{-g} says to generate debugging information, @option{-O} +says to optimize at the basic level, and @option{-o fib1} says to put +the executable program in the file @file{fib1}. + +To run the program, use its file name as a shell command. +For instance, + +@example +./fib1 +@end example + +@noindent +However, unless you are sure the program is correct, you should +expect to need to debug it. So use this command, + +@example +gdb fib1 +@end example + +@noindent +which starts the GDB debugger (@pxref{Sample Session, Sample Session, +A Sample GDB Session, gdb, Debugging with GDB}) so you can run and +debug the executable program @code{fib1}. + + +@xref{Compilation}, for an introduction to compiling more complex +programs which consist of more than one source file. + +@node Storage +@chapter Storage and Data +@cindex bytes +@cindex storage organization +@cindex memory organization + +Storage in C programs is made up of units called @dfn{bytes}. On +nearly all computers, a byte consists of 8 bits, but there are a few +peculiar computers (mostly ``embedded controllers'' for very small +systems) where a byte is longer than that. This manual does not try +to explain the peculiarity of those computers; we assume that a byte +is 8 bits. + +Every C data type is made up of a certain number of bytes; that number +is the data type's @dfn{size}. @xref{Type Size}, for details. The +types @code{signed char} and @code{unsigned char} are one byte long; +use those types to operate on data byte by byte. @xref{Signed and +Unsigned Types}. You can refer to a series of consecutive bytes as an +array of @code{char} elements; that's what an ASCII string looks like +in memory. @xref{String Constants}. + +@node Beyond Integers +@chapter Beyond Integers + +So far we've presented programs that operate on integers. In this +chapter we'll present examples of handling non-integral numbers and +arrays of numbers. + +@menu +* Float Example:: A function that uses floating-point numbers. +* Array Example:: A function that works with arrays. +* Array Example Call:: How to call that function. +* Array Example Variations:: Different ways to write the call example. +@end menu + +@node Float Example +@section An Example with Non-Integer Numbers +@cindex floating point example + +Here's a function that operates on and returns @dfn{floating point} +numbers that don't have to be integers. Floating point represents a +number as a fraction together with a power of 2. (For more detail, +@pxref{Floating-Point Data Types}.) This example calculates the +average of three floating point numbers that are passed to it as +arguments: + +@example +double +average_of_three (double a, double b, double c) +@{ + return (a + b + c) / 3; +@} +@end example + +The values of the parameter @var{a}, @var{b} and @var{c} do not have to be +integers, and even when they happen to be integers, most likely their +average is not an integer. + +@code{double} is the usual data type in C for calculations on +floating-point numbers. + +To print a @code{double} with @code{printf}, we must use @samp{%f} +instead of @samp{%d}: + +@example +printf ("Average is %f\n", + average_of_three (1.1, 9.8, 3.62)); +@end example + +The code that calls @code{printf} must pass a @code{double} for +printing with @samp{%f} and an @code{int} for printing with @samp{%d}. +If the argument has the wrong type, @code{printf} will produce garbage +output. + +Here's a complete program that computes the average of three +specific numbers and prints the result: + +@example +double +average_of_three (double a, double b, double c) +@{ + return (a + b + c) / 3; +@} + +int +main (void) +@{ + printf ("Average is %f\n", + average_of_three (1.1, 9.8, 3.62)); + return 0; +@} +@end example + +From now on we will not present examples of calls to @code{main}. +Instead we encourage you to write them for yourself when you want +to test executing some code. + +@node Array Example +@section An Example with Arrays +@cindex array example + +A function to take the average of three numbers is very specific and +limited. A more general function would take the average of any number +of numbers. That requires passing the numbers in an array. An array +is an object in memory that contains a series of values of the same +data type. This chapter presents the basic concepts and use of arrays +through an example; for the full explanation, see @ref{Arrays}. + +Here's a function definition to take the average of several +floating-point numbers, passed as type @code{double}. The first +parameter, @code{length}, specifies how many numbers are passed. The +second parameter, @code{input_data}, is an array that holds those +numbers. + +@example +double +avg_of_double (int length, double input_data[]) +@{ + double sum = 0; + int i; + + for (i = 0; i < length; i++) + sum = sum + input_data[i]; + + return sum / length; +@} +@end example + +This introduces the expression to refer to an element of an array: +@code{input_data[i]} means the element at index @code{i} in +@code{input_data}. The index of the element can be any expression +with an integer value; in this case, the expression is @code{i}. +@xref{Accessing Array Elements}. + +@cindex zero-origin indexing +The lowest valid index in an array is 0, @emph{not} 1, and the highest +valid index is one less than the number of elements. (This is known +as @dfn{zero-origin indexing}.) + +This example also introduces the way to declare that a function +parameter is an array. Such declarations are modeled after the syntax +for an element of the array. Just as @code{double foo} declares that +@code{foo} is of type @code{double}, @code{double input_data[]} +declares that each element of @code{input_data} is of type +@code{double}. Therefore, @code{input_data} itself has type ``array +of @code{double}.'' + +When declaring an array parameter, it's not necessary to say how long +the array is. In this case, the parameter @code{input_data} has no +length information. That's why the function needs another parameter, +@code{length}, for the caller to provide that information to the +function @code{avg_of_double}. + +@node Array Example Call +@section Calling the Array Example + +To call the function @code{avg_of_double} requires making an +array and then passing it as an argument. Here is an example. + +@example +@{ + /* @r{The array of values to average.} */ + double nums_to_average[5]; + /* @r{The average, once we compute it.} */ + double average; + + /* @r{Fill in elements of @code{nums_to_average}.} */ + + nums_to_average[0] = 58.7; + nums_to_average[1] = 5.1; + nums_to_average[2] = 7.7; + nums_to_average[3] = 105.2; + nums_to_average[4] = -3.14159; + + average = avg_of_double (5, nums_to_average); + + /* @r{@dots{}now make use of @code{average}@dots{}} */ +@} +@end example + +This shows an array subscripting expression again, this time +on the left side of an assignment, storing a value into an +element of an array. + +It also shows how to declare a local variable that is an array: +@code{double nums_to_average[5];}. Since this declaration allocates the +space for the array, it needs to know the array's length. You can +specify the length with any expression whose value is an integer, but +in this declaration the length is a constant, the integer 5. + +The name of the array, when used by itself as an expression, stands +for the address of the array's data, and that's what gets passed to +the function @code{avg_of_double} in @code{avg_of_double (5, +nums_to_average)}. + +We can make the code easier to maintain by avoiding the need to write +5, the array length, when calling @code{avg_of_double}. That way, if +we change the array to include more elements, we won't have to change +that call. One way to do this is with the @code{sizeof} operator: + +@example + average = avg_of_double ((sizeof (nums_to_average) + / sizeof (nums_to_average[0])), + nums_to_average); +@end example + +This computes the number of elements in @code{nums_to_average} by dividing +its total size by the size of one element. @xref{Type Size}, for more +details of using @code{sizeof}. + +We don't show in this example what happens after storing the result of +@code{avg_of_double} in the variable @code{average}. Presumably +more code would follow that uses that result somehow. (Why compute +the average and not use it?) But that isn't part of this topic. + +@node Array Example Variations +@section Variations for Array Example + +The code to call @code{avg_of_double} has two declarations that +start with the same data type: + +@example + /* @r{The array of values to average.} */ + double nums_to_average[5]; + /* @r{The average, once we compute it.} */ + double average; +@end example + +In C, you can combine the two, like this: + +@example + double nums_to_average[5], average; +@end example + +This declares @code{nums_to_average} so each of its elements is a +@code{double}, and @code{average} so that it simply is a +@code{double}. + +However, while you @emph{can} combine them, that doesn't mean you +@emph{should}. If it is useful to write comments about the variables, +and usually it is, then it's clearer to keep the declarations separate +so you can put a comment on each one. + +We set all of the elements of the array @code{nums_to_average} with +assignments, but it is more convenient to use an initializer in the +declaration: + +@example +@{ + /* @r{The array of values to average.} */ + double nums_to_average[] + = @{ 58.7, 5.1, 7.7, 105.2, -3.14159 @}; + + /* @r{The average, once we compute it.} */ + average = avg_of_double ((sizeof (nums_to_average) + / sizeof (nums_to_average[0])), + nums_to_average); + + /* @r{@dots{}now make use of @code{average}@dots{}} */ +@} +@end example + +The array initializer is a comma-separated list of values, delimited +by braces. @xref{Initializers}. + +Note that the declaration does not specify a size for +@code{nums_to_average}, so the size is determined from the +initializer. There are five values in the initializer, so +@code{nums_to_average} gets length 5. If we add another element to +the initializer, @code{nums_to_average} will have six elements. + +Because the code computes the number of elements from the size of +the array, using @code{sizeof}, the program will operate on all the +elements in the initializer, regardless of how many those are. + +@node Lexical Syntax +@chapter Lexical Syntax +@cindex lexical syntax +@cindex token + +To start the full description of the C language, we explain the +lexical syntax and lexical units of C code. The lexical units of a +programming language are known as @dfn{tokens}. This chapter covers +all the tokens of C except for constants, which are covered in a later +chapter (@pxref{Constants}). One vital kind of token is the +@dfn{identifier} (@pxref{Identifiers}), which is used for names of any +kind. + +@menu +* English:: Write programs in English! +* Characters:: The characters allowed in C programs. +* Whitespace:: The particulars of whitespace characters. +* Comments:: How to include comments in C code. +* Identifiers:: How to form identifiers (names). +* Operators/Punctuation:: Characters used as operators or punctuation. +* Line Continuation:: Splitting one line into multiple lines. +@end menu + +@node English +@section Write Programs in English! + +In principle, you can write the function and variable names in a +program, and the comments, in any human language. C allows any kinds +of characters in comments, and you can put non-ASCII characters into +identifiers with a special prefix. However, to enable programmers in +all countries to understand and develop the program, it is best given +today's circumstances to write identifiers and comments in +English. + +English is the one language that programmers in all countries +generally study. If a program's names are in English, most +programmers in Bangladesh, Belgium, Bolivia, Brazil, and Bulgaria can +understand them. Most programmers in those countries can speak +English, or at least read it, but they do not read each other's +languages at all. In India, with so many languages, two programmers +may have no common language other than English. + +If you don't feel confident in writing English, do the best you can, +and follow each English comment with a version in a language you +write better; add a note asking others to translate that to English. +Someone will eventually do that. + +The program's user interface is a different matter. We don't need to +choose one language for that; it is easy to support multiple languages +and let each user choose the language to use. This requires writing +the program to support localization of its interface. (The +@code{gettext} package exists to support this; @pxref{Message +Translation, The GNU C Library, , libc, The GNU C Library Reference +Manual}.) Then a community-based translation effort can provide +support for all the languages users want to use. + +@node Characters +@section Characters +@cindex character set +@cindex Unicode + +@c ??? How to express ¶? + +GNU C source files are usually written in the +@url{https://en.wikipedia.org/wiki/ASCII,,ASCII} character set, which +was defined in the 1960s for English. However, they can also include +Unicode characters represented in the +@url{https://en.wikipedia.org/wiki/UTF-8,,UTF-8} multibyte encoding. +This makes it possible to represent accented letters such as @samp{á}, +as well as other scripts such as Arabic, Chinese, Cyrillic, Hebrew, +Japanese, and Korean.@footnote{On some obscure systems, GNU C uses +UTF-EBCDIC instead of UTF-8, but that is not worth describing in this +manual.} + +In C source code, non-ASCII characters are valid in comments, in wide +character constants (@pxref{Wide Character Constants}), and in string +constants (@pxref{String Constants}). + +@c ??? valid in identifiers? +Another way to specify non-ASCII characters in constants (character or +string) and identifiers is with an escape sequence starting with +backslash, specifying the intended Unicode character. (@xref{Unicode +Character Codes}.) This specifies non-ASCII characters without +putting a real non-ASCII character in the source file itself. + +C accepts two-character aliases called @dfn{digraphs} for certain +characters. @xref{Digraphs}. + +@node Whitespace +@section Whitespace +@cindex whitespace characters in source files +@cindex space character in source +@cindex tab character in source +@cindex formfeed in source +@cindex linefeed in source +@cindex newline in source +@cindex carriage return in source +@cindex vertical tab in source + +Whitespace means characters that exist in a file but appear blank in a +printed listing of a file (or traditionally did appear blank, several +decades ago). The C language requires whitespace in order to separate +two consecutive identifiers, or to separate an identifier from a +numeric constant. Other than that, and a few special situations +described later, whitespace is optional; you can put it in when you +wish, to make the code easier to read. + +Space and tab in C code are treated as whitespace characters. So are +line breaks. You can represent a line break with the newline +character (also called @dfn{linefeed} or LF), CR (carriage return), or +the CRLF sequence (two characters: carriage return followed by a +newline character). + +The @dfn{formfeed} character, Control-L, was traditionally used to +divide a file into pages. It is still used this way in source code, +and the tools that generate nice printouts of source code still start +a new page after each ``formfeed'' character. Dividing code into +pages separated by formfeed characters is a good way to break it up +into comprehensible pieces and show other programmers where they start +and end. + +The @dfn{vertical tab} character, Control-K, was traditionally used to +make printing advance down to the next section of a page. We know of +no particular reason to use it in source code, but it is still +accepted as whitespace in C. + +Comments are also syntactically equivalent to whitespace. +@ifinfo +@xref{Comments}. +@end ifinfo + +@node Comments +@section Comments +@cindex comments + +A comment encapsulates text that has no effect on the program's +execution or meaning. + +The purpose of comments is to explain the code to people that read it. +Writing good comments for your code is tremendously important---they +should provide background information that helps programmers +understand the reasons why the code is written the way it is. You, +returning to the code six months from now, will need the help of these +comments to remember why you wrote it this way. + +Outdated comments that become incorrect are counterproductive, so part +of the software developer's responsibility is to update comments as +needed to correspond with changes to the program code. + +C allows two kinds of comment syntax, the traditional style and the +C@t{++} style. A traditional C comment starts with @samp{/*} and ends +with @samp{*/}. For instance, + +@example +/* @r{This is a comment in traditional C syntax.} */ +@end example + +A traditional comment can contain @samp{/*}, but these delimiters do +not nest as pairs. The first @samp{*/} ends the comment regardless of +whether it contains @samp{/*} sequences. + +@example +/* @r{This} /* @r{is a comment} */ But this is not! */ +@end example + +A @dfn{line comment} starts with @samp{//} and ends at the end of the line. +For instance, + +@example +// @r{This is a comment in C@t{++} style.} +@end example + +Line comments do nest, in effect, because @samp{//} inside a line +comment is part of that comment: + +@example +// @r{this whole line is} // @r{one comment} +This is code, not comment. +@end example + +It is safe to put line comments inside block comments, or vice versa. + +@example +@group +/* @r{traditional comment} + // @r{contains line comment} + @r{more traditional comment} + */ text here is not a comment + +// @r{line comment} /* @r{contains traditional comment} */ +@end group +@end example + +But beware of commenting out one end of a traditional comment with a line +comment. The delimiter @samp{/*} doesn't start a comment if it occurs +inside an already-started comment. + +@example +@group + // @r{line comment} /* @r{That would ordinarily begin a block comment.} + Oops! The line comment has ended; + this isn't a comment any more. */ +@end group +@end example + +Comments are not recognized within string constants. @t{@w{"/* blah +*/"}} is the string constant @samp{@w{/* blah */}}, not an empty +string. + +In this manual we show the text in comments in a variable-width font, +for readability, but this font distinction does not exist in source +files. + +A comment is syntactically equivalent to whitespace, so it always +separates tokens. Thus, + +@example +@group + int/* @r{comment} */foo; +@r{is equivalent to} + int foo; +@end group +@end example + +@noindent +but clean code always uses real whitespace to separate the comment +visually from surrounding code. + +@node Identifiers +@section Identifiers +@cindex identifiers + +An @dfn{identifier} (name) in C is a sequence of letters and digits, +as well as @samp{_}, that does not start with a digit. Most compilers +also allow @samp{$}. An identifier can be as long as you like; for +example, + +@example +int anti_dis_establishment_arian_ism; +@end example + +@cindex case of letters in identifiers +Letters in identifiers are case-sensitive in C; thus, @code{a} +and @code{A} are two different identifiers. + +@cindex keyword +@cindex reserved words +Identifiers in C are used as variable names, function names, typedef +names, enumeration constants, type tags, field names, and labels. +Certain identifiers in C are @dfn{keywords}, which means they have +specific syntactic meanings. Keywords in C are @dfn{reserved words}, +meaning you cannot use them in any other way. For instance, you can't +define a variable or function named @code{return} or @code{if}. + +You can also include other characters, even non-ASCII characters, in +identifiers by writing their Unicode character names, which start with +@samp{\u} or @samp{\U}, in the identifier name. @xref{Unicode +Character Codes}. However, it is usually a bad idea to use non-ASCII +characters in identifiers, and when they are written in English, they +never need non-ASCII characters. @xref{English}. + +Whitespace is required to separate two consecutive identifiers, or to +separate an identifier from a preceding or following numeric +constant. + +@node Operators/Punctuation +@section Operators and Punctuation +@cindex operators +@cindex punctuation + +Here we describe the lexical syntax of operators and punctuation in C. +The specific operators of C and their meanings are presented in +subsequent chapters. + +Most operators in C consist of one or two characters that can't be +used in identifiers. The characters used for operators in C are +@samp{!~^&|*/%+-=<>,.?:}. + +Some operators are a single character. For instance, @samp{-} is the +operator for negation (with one operand) and the operator for +subtraction (with two operands). + +Some operators are two characters. For example, @samp{++} is the +increment operator. Recognition of multicharacter operators works by +grouping together as many consecutive characters as can constitute one +operator. + +For instance, the character sequence @samp{++} is always interpreted +as the increment operator; therefore, if we want to write two +consecutive instances of the operator @samp{+}, we must separate them +with a space so that they do not combine as one token. Applying the +same rule, @code{a+++++b} is always tokenized as @code{@w{a++ ++ + +b}}, not as @code{@w{a++ + ++b}}, even though the latter could be part +of a valid C program and the former could not (since @code{a++} +is not an lvalue and thus can't be the operand of @code{++}). + +A few C operators are keywords rather than special characters. They +include @code{sizeof} (@pxref{Type Size}) and @code{_Alignof} +(@pxref{Type Alignment}). + +The characters @samp{;@{@}[]()} are used for punctuation and grouping. +Semicolon (@samp{;}) ends a statement. Braces (@samp{@{} and +@samp{@}}) begin and end a block at the statement level +(@pxref{Blocks}), and surround the initializer (@pxref{Initializers}) +for a variable with multiple elements or components (such as arrays or +structures). + +Square brackets (@samp{[} and @samp{]}) do array indexing, as in +@code{array[5]}. + +Parentheses are used in expressions for explicit nesting of +expressions (@pxref{Basic Arithmetic}), around the parameter +declarations in a function declaration or definition, and around the +arguments in a function call, as in @code{printf ("Foo %d\n", i)} +(@pxref{Function Calls}). Several kinds of statements also use +parentheses as part of their syntax---for instance, @code{if} +statements, @code{for} statements, @code{while} statements, and +@code{switch} statements. @xref{if Statement}, and following +sections. + +Parentheses are also required around the operand of the operator +keywords @code{sizeof} and @code{_Alignof} when the operand is a data +type rather than a value. @xref{Type Size}. + +@node Line Continuation +@section Line Continuation +@cindex line continuation +@cindex continuation of lines + +The sequence of a backslash and a newline is ignored absolutely +anywhere in a C program. This makes it possible to split a single +source line into multiple lines in the source file. GNU C tolerates +and ignores other whitespace between the backslash and the newline. +In particular, it always ignores a CR (carriage return) character +there, in case some text editor decided to end the line with the CRLF +sequence. + +The main use of line continuation in C is for macro definitions that +would be inconveniently long for a single line (@pxref{Macros}). + +It is possible to continue a line comment onto another line with +backslash-newline. You can put backslash-newline in the middle of an +identifier, even a keyword, or an operator. You can even split +@samp{/*}, @samp{*/}, and @samp{//} onto multiple lines with +backslash-newline. Here's an ugly example: + +@example +@group +/\ +* +*/ fo\ +o +\ += 1\ +0; +@end group +@end example + +@noindent +That's equivalent to @samp{/* */ foo += 10;}. + +Don't do those things in real programs, since they make code hard to +read. + +@strong{Note:} For the sake of using certain tools on the source code, it is +wise to end every source file with a newline character which is not +preceded by a backslash, so that it really ends the last line. + +@node Arithmetic +@chapter Arithmetic +@cindex arithmetic operators +@cindex operators, arithmetic + +@c ??? Duplication with other sections -- get rid of that? + +Arithmetic operators in C attempt to be as similar as possible to the +abstract arithmetic operations, but it is impossible to do this +perfectly. Numbers in a computer have a finite range of possible +values, and non-integer values have a limit on their possible +accuracy. Nonetheless, in most cases you will encounter no surprises +in using @samp{+} for addition, @samp{-} for subtraction, and @samp{*} +for multiplication. + +Each C operator has a @dfn{precedence}, which is its rank in the +grammatical order of the various operators. The operators with the +highest precedence grab adjoining operands first; these expressions +then become operands for operators of lower precedence. We give some +information about precedence of operators in this chapter where we +describe the operators; for the full explanation, see @ref{Binary +Operator Grammar}. + +The arithmetic operators always @dfn{promote} their operands before +operating on them. This means converting narrow integer data types to +a wider data type (@pxref{Operand Promotions}). If you are just +learning C, don't worry about this yet. + +Given two operands that have different types, most arithmetic +operations convert them both to their @dfn{common type}. For +instance, if one is @code{int} and the other is @code{double}, the +common type is @code{double}. (That's because @code{double} can +represent all the values that an @code{int} can hold, but not vice +versa.) For the full details, see @ref{Common Type}. + +@menu +* Basic Arithmetic:: Addition, subtraction, multiplication, + and division. +* Integer Arithmetic:: How C performs arithmetic with integer values. +* Integer Overflow:: When an integer value exceeds the range + of its type. +* Mixed Mode:: Calculating with both integer values + and floating-point values. +* Division and Remainder:: How integer division works. +* Numeric Comparisons:: Comparing numeric values for equality or order. +* Shift Operations:: Shift integer bits left or right. +* Bitwise Operations:: Bitwise conjunction, disjunction, negation. +@end menu + +@node Basic Arithmetic +@section Basic Arithmetic +@cindex addition operator +@cindex subtraction operator +@cindex multiplication operator +@cindex division operator +@cindex negation operator +@cindex operator, addition +@cindex operator, subtraction +@cindex operator, multiplication +@cindex operator, division +@cindex operator, negation + +Basic arithmetic in C is done with the usual binary operators of +algebra: addition (@samp{+}), subtraction (@samp{-}), multiplication +(@samp{*}) and division (@samp{/}). The unary operator @samp{-} is +used to change the sign of a number. The unary @code{+} operator also +exists; it yields its operand unaltered. + +@samp{/} is the division operator, but dividing integers may not give +the result you expect. Its value is an integer, which is not equal to +the mathematical quotient when that is a fraction. Use @samp{%} to +get the corresponding integer remainder when necessary. +@xref{Division and Remainder}. Floating point division yields value +as close as possible to the mathematical quotient. + +These operators use algebraic syntax with the usual algebraic +precedence rule (@pxref{Binary Operator Grammar}) that multiplication +and division are done before addition and subtraction, but you can use +parentheses to explicitly specify how the operators nest. They are +left-associative (@pxref{Associativity and Ordering}). Thus, + +@example +-a + b - c + d * e / f +@end example + +@noindent +is equivalent to + +@example +(((-a) + b) - c) + ((d * e) / f) +@end example + +@node Integer Arithmetic +@section Integer Arithmetic +@cindex integer arithmetic + +Each of the basic arithmetic operations in C has two variants for +integers: @dfn{signed} and @dfn{unsigned}. The choice is determined +by the data types of their operands. + +Each integer data type in C is either @dfn{signed} or @dfn{unsigned}. +A signed type can hold a range of positive and negative numbers, with +zero near the middle of the range. An unsigned type can hold only +nonnegative numbers; its range starts with zero and runs upward. + +The most basic integer types are @code{int}, which normally can hold +numbers from @minus{}2,147,483,648 to 2,147,483,647, and @code{unsigned +int}, which normally can hold numbers from 0 to 4,294.967,295. (This +assumes @code{int} is 32 bits wide, always true for GNU C on real +computers but not always on embedded controllers.) @xref{Integer +Types}, for full information about integer types. + +When a basic arithmetic operation is given two signed operands, it +does signed arithmetic. Given two unsigned operands, it does +unsigned arithmetic. + +If one operand is @code{unsigned int} and the other is @code{int}, the +operator treats them both as unsigned. More generally, the common +type of the operands determines whether the operation is signed or +not. @xref{Common Type}. + +Printing the results of unsigned arithmetic with @code{printf} using +@samp{%d} can produce surprising results for values far away from +zero. Even though the rules above say that the computation was done +with unsigned arithmetic, the printed result may appear to be signed! + +The explanation is that the bit pattern resulting from addition, +subtraction or multiplication is actually the same for signed and +unsigned operations. The difference is only in the data type of the +result, which affects the @emph{interpretation} of the result bit pattern, +and whether the arithmetic operation can overflow (see the next section). + +But @samp{%d} doesn't know its argument's data type. It sees only the +value's bit pattern, and it is defined to interpret that as +@code{signed int}. To print it as unsigned requires using @samp{%u} +instead of @samp{%d}. @xref{Formatted Output, The GNU C Library, , +libc, The GNU C Library Reference Manual}. + +Arithmetic in C never operates directly on narrow integer types (those +with fewer bits than @code{int}; @ref{Narrow Integers}). Instead it +``promotes'' them to @code{int}. @xref{Operand Promotions}. + +@node Integer Overflow +@section Integer Overflow +@cindex integer overflow +@cindex overflow, integer + +When the mathematical value of an arithmetic operation doesn't fit in +the range of the data type in use, that's called @dfn{overflow}. +When it happens in integer arithmetic, it is @dfn{integer overflow}. + +Integer overflow happens only in arithmetic operations. Type conversion +operations, by definition, do not cause overflow, not even when the +result can't fit in its new type. @xref{Integer Conversion}. + +Signed numbers use two's-complement representation, in which the most +negative number lacks a positive counterpart (@pxref{Integers in +Depth}). Thus, the unary @samp{-} operator on a signed integer can +overflow. + +@menu +* Unsigned Overflow:: Overlow in unsigned integer arithmetic. +* Signed Overflow:: Overlow in signed integer arithmetic. +@end menu + +@node Unsigned Overflow +@subsection Overflow with Unsigned Integers + +Unsigned arithmetic in C ignores overflow; it produces the true result +modulo the @var{n}th power of 2, where @var{n} is the number of bits +in the data type. We say it ``truncates'' the true result to the +lowest @var{n} bits. + +A true result that is negative, when taken modulo the @var{n}th power +of 2, yields a positive number. For instance, + +@example +unsigned int x = 1; +unsigned int y; + +y = -x; +@end example + +@noindent +causes overflow because the negative number @minus{}1 can't be stored +in an unsigned type. The actual result, which is @minus{}1 modulo the +@var{n}th power of 2, is one less than the @var{n}th power of 2. That +is the largest value that the unsigned data type can store. For a +32-bit @code{unsigned int}, the value is 4,294,967,295. @xref{Maximum +and Minimum Values}. + +Adding that number to itself, as here, + +@example +unsigned int z; + +z = y + y; +@end example + +@noindent +ought to yield 8,489,934,590; however, that is again too large to fit, +so overflow truncates the value to 4,294,967,294. If that were a +signed integer, it would mean @minus{}2, which (not by coincidence) +equals @minus{}1 + @minus{}1. + +@node Signed Overflow +@subsection Overflow with Signed Integers +@cindex compiler options for integer overflow +@cindex integer overflow, compiler options +@cindex overflow, compiler options + +For signed integers, the result of overflow in C is @emph{in +principle} undefined, meaning that anything whatsoever could happen. +Therefore, C compilers can do optimizations that treat the overflow +case with total unconcern. (Since the result of overflow is undefined +in principle, one cannot claim that these optimizations are +erroneous.) + +@strong{Watch out:} These optimizations can do surprising things. For +instance, + +@example +int i; +@r{@dots{}} +if (i < i + 1) + x = 5; +@end example + +@noindent +could be optimized to do the assignment unconditionally, because the +@code{if}-condition is always true if @code{i + 1} does not overflow. + +GCC offers compiler options to control handling signed integer +overflow. These options operate per module; that is, each module +behaves according to the options it was compiled with. + +These two options specify particular ways to handle signed integer +overflow, other than the default way: + +@table @option +@item -fwrapv +Make signed integer operations well-defined, like unsigned integer +operations: they produce the @var{n} low-order bits of the true +result. The highest of those @var{n} bits is the sign bit of the +result. With @option{-fwrapv}, these out-of-range operations are not +considered overflow, so (strictly speaking) integer overflow never +happens. + +The option @option{-fwrapv} enables some optimizations based on the +defined values of out-of-range results. In GCC 8, it disables +optimizations that are based on assuming signed integer operations +will not overflow. + +@item -ftrapv +Generate a signal @code{SIGFPE} when signed integer overflow occurs. +This terminates the program unless the program handles the signal. +@xref{Signals}. +@end table + +One other option is useful for finding where overflow occurs: + +@ignore +@item -fno-strict-overflow +Disable optimizations that are based on assuming signed integer +operations will not overflow. +@end ignore + +@table @option +@item -fsanitize=signed-integer-overflow +Output a warning message at run time when signed integer overflow +occurs. This checks the @samp{+}, @samp{*}, and @samp{-} operators. +This takes priority over @option{-ftrapv}. +@end table + +@node Mixed Mode +@section Mixed-Mode Arithmetic + +Mixing integers and floating-point numbers in a basic arithmetic +operation converts the integers automatically to floating point. +In most cases, this gives exactly the desired results. +But sometimes it matters precisely where the conversion occurs. + +If @code{i} and @code{j} are integers, @code{(i + j) * 2.0} adds them +as an integer, then converts the sum to floating point for the +multiplication. If the addition gets an overflow, that is not +equivalent to converting both integers to floating point and then +adding them. You can get the latter result by explicitly converting +the integers, as in @code{((double) i + (double) j) * 2.0}. +@xref{Explicit Type Conversion}. + +@c Eggert's report +Adding or multiplying several values, including some integers and some +floating point, does the operations left to right. Thus, @code{3.0 + +i + j} converts @code{i} to floating point, then adds 3.0, then +converts @code{j} to floating point and adds that. You can specify a +different order using parentheses: @code{3.0 + (i + j)} adds @code{i} +and @code{j} first and then adds that result (converting to floating +point) to 3.0. In this respect, C differs from other languages, such +as Fortran. + +@node Division and Remainder +@section Division and Remainder +@cindex remainder operator +@cindex modulus +@cindex operator, remainder + +Division of integers in C rounds the result to an integer. The result +is always rounded towards zero. + +@example + 16 / 3 @result{} 5 +-16 / 3 @result{} -5 + 16 / -3 @result{} -5 +-16 / -3 @result{} 5 +@end example + +@noindent +To get the corresponding remainder, use the @samp{%} operator: + +@example + 16 % 3 @result{} 1 +-16 % 3 @result{} -1 + 16 % -3 @result{} 1 +-16 % -3 @result{} -1 +@end example + +@noindent +@samp{%} has the same operator precedence as @samp{/} and @samp{*}. + +From the rounded quotient and the remainder, you can reconstruct +the dividend, like this: + +@example +int +original_dividend (int divisor, int quotient, int remainder) +@{ + return divisor * quotient + remainder; +@} +@end example + +To do unrounded division, use floating point. If only one operand is +floating point, @samp{/} converts the other operand to floating +point. + +@example +16.0 / 3 @result{} 5.333333333333333 +16 / 3.0 @result{} 5.333333333333333 +16.0 / 3.0 @result{} 5.333333333333333 +16 / 3 @result{} 5 +@end example + +The remainder operator @samp{%} is not allowed for floating-point +operands, because it is not needed. The concept of remainder makes +sense for integers because the result of division of integers has to +be an integer. For floating point, the result of division is a +floating-point number, in other words a fraction, which will differ +from the exact result only by a very small amount. + +There are functions in the standard C library to calculate remainders +from integral-values division of floating-point numbers. +@xref{Remainder Functions, The GNU C Library, , libc, The GNU C Library +Reference Manual}. + +Integer division overflows in one specific case: dividing the smallest +negative value for the data type (@pxref{Maximum and Minimum Values}) +by @minus{}1. That's because the correct result, which is the +corresponding positive number, does not fit (@pxref{Integer Overflow}) +in the same number of bits. On some computers now in use, this always +causes a signal @code{SIGFPE} (@pxref{Signals}), the same behavior +that the option @option{-ftrapv} specifies (@pxref{Signed Overflow}). + +Division by zero leads to unpredictable results---depending on the +type of computer, it might cause a signal @code{SIGFPE}, or it might +produce a numeric result. + +@cindex division by zero +@cindex zero, division by +@strong{Watch out:} Make sure the program does not divide by zero. If +you can't prove that the divisor is not zero, test whether it is zero, +and skip the division if so. + +@node Numeric Comparisons +@section Numeric Comparisons +@cindex numeric comparisons +@cindex comparisons +@cindex operators, comparison +@cindex equal operator +@cindex not-equal operator +@cindex less-than operator +@cindex greater-than operator +@cindex less-or-equal operator +@cindex greater-or-equal operator +@cindex operator, equal +@cindex operator, not-equal +@cindex operator, less-than +@cindex operator, greater-than +@cindex operator, less-or-equal +@cindex operator, greater-or-equal +@cindex truth value + +There are two kinds of comparison operators: @dfn{equality} and +@dfn{ordering}. Equality comparisons test whether two expressions +have the same value. The result is a @dfn{truth value}: a number that +is 1 for ``true'' and 0 for ``false.'' + +@example +a == b /* @r{Test for equal.} */ +a != b /* @r{Test for not equal.} */ +@end example + +The equality comparison is written @code{==} because plain @code{=} +is the assignment operator. + +Ordering comparisons test which operand is greater or less. Their +results are truth values. These are the ordering comparisons of C: + +@example +a < b /* @r{Test for less-than.} */ +a > b /* @r{Test for greater-than.} */ +a <= b /* @r{Test for less-than-or-equal.} */ +a >= b /* @r{Test for greater-than-or-equal.} */ +@end example + +For any integers @code{a} and @code{b}, exactly one of the comparisons +@code{a < b}, @code{a == b} and @code{a > b} is true, just as in +mathematics. However, if @code{a} and @code{b} are special floating +point values (not ordinary numbers), all three can be false. +@xref{Special Float Values}, and @ref{Invalid Optimizations}. + +@node Shift Operations +@section Shift Operations +@cindex shift operators +@cindex operators, shift +@cindex operators, shift +@cindex shift count + +@dfn{Shifting} an integer means moving the bit values to the left or +right within the bits of the data type. Shifting is defined only for +integers. Here's the way to write it: + +@example +/* @r{Left shift.} */ +5 << 2 @result{} 20 + +/* @r{Right shift.} */ +5 >> 2 @result{} 1 +@end example + +@noindent +The left operand is the value to be shifted, and the right operand +says how many bits to shift it (the @dfn{shift count}). The left +operand is promoted (@pxref{Operand Promotions}), so shifting never +operates on a narrow integer type; it's always either @code{int} or +wider. The value of the shift operator has the same type as the +promoted left operand. + +@menu +* Bits Shifted In:: How shifting makes new bits to shift in. +* Shift Caveats:: Caveats of shift operations. +* Shift Hacks:: Clever tricks with shift operations. +@end menu + +@node Bits Shifted In +@subsection Shifting Makes New Bits + +A shift operation shifts towards one end of the number and has to +generate new bits at the other end. + +Shifting left one bit must generate a new least significant bit. It +always brings in zero there. It is equivalent to multiplying by the +appropriate power of 2. For example, + +@example +5 << 3 @r{is equivalent to} 5 * 2*2*2 +-10 << 4 @r{is equivalent to} -10 * 2*2*2*2 +@end example + +The meaning of shifting right depends on whether the data type is +signed or unsigned (@pxref{Signed and Unsigned Types}). For a signed +data type, it performs ``arithmetic shift,'' which keeps the number's +sign unchanged by duplicating the sign bit. For an unsigned data +type, it performs ``logical shift,'' which always shifts in zeros at +the most significant bit. + +In both cases, shifting right one bit is division by two, rounding +towards negative infinity. For example, + +@example +(unsigned) 19 >> 2 @result{} 4 +(unsigned) 20 >> 2 @result{} 5 +(unsigned) 21 >> 2 @result{} 5 +@end example + +For negative left operand @code{a}, @code{a >> 1} is not equivalent to +@code{a / 2}. They both divide by 2, but @samp{/} rounds toward +zero. + +The shift count must be zero or greater. Shifting by a negative +number of bits gives machine-dependent results. + +@node Shift Caveats +@subsection Caveats for Shift Operations + +@strong{Warning:} If the shift count is greater than or equal to the +width in bits of the first operand, the results are machine-dependent. +Logically speaking, the ``correct'' value would be either -1 (for +right shift of a negative number) or 0 (in all other cases), but what +it really generates is whatever the machine's shift instruction does in +that case. So unless you can prove that the second operand is not too +large, write code to check it at run time. + +@strong{Warning:} Never rely on how the shift operators relate in +precedence to other arithmetic binary operators. Programmers don't +remember these precedences, and won't understand the code. Always use +parentheses to explicitly specify the nesting, like this: + +@example +a + (b << 5) /* @r{Shift first, then add.} */ +(a + b) << 5 /* @r{Add first, then shift.} */ +@end example + +Note: according to the C standard, shifting of signed values isn't +guaranteed to work properly when the value shifted is negative, or +becomes negative during the operation of shifting left. However, only +pedants have a reason to be concerned about this; only computers with +strange shift instructions could plausibly do this wrong. In GNU C, +the operation always works as expected, + +@node Shift Hacks +@subsection Shift Hacks + +You can use the shift operators for various useful hacks. For +example, given a date specified by day of the month @code{d}, month +@code{m}, and year @code{y}, you can store the entire date in a single +integer @code{date}: + +@example +unsigned int d = 12; +unsigned int m = 6; +unsigned int y = 1983; +unsigned int date = ((y << 4) + m) << 5) + d; +@end example + +@noindent +To extract the original day, month, and year out of +@code{date}, use a combination of shift and remainder. + +@example +d = date % 32; +m = (date >> 5) % 16; +y = date >> 9; +@end example + +@code{-1 << LOWBITS} is a clever way to make an integer whose +@code{LOWBITS} lowest bits are all 0 and the rest are all 1. +@code{-(1 << LOWBITS)} is equivalent to that, due to associativity of +multiplication, since negating a value is equivalent to multiplying it +by @minus{}1. + +@node Bitwise Operations +@section Bitwise Operations +@cindex bitwise operators +@cindex operators, bitwise +@cindex negation, bitwise +@cindex conjunction, bitwise +@cindex disjunction, bitwise + +Bitwise operators operate on integers, treating each bit independently. +They are not allowed for floating-point types. + +The examples in this section use binary constants, starting with +@samp{0b} (@pxref{Integer Constants}). They stand for 32-bit integers +of type @code{int}. + +@table @code +@item ~@code{a} +Unary operator for bitwise negation; this changes each bit of +@code{a} from 1 to 0 or from 0 to 1. + +@example +~0b10101000 @result{} 0b11111111111111111111111101010111 +~0 @result{} 0b11111111111111111111111111111111 +~0b11111111111111111111111111111111 @result{} 0 +~ (-1) @result{} 0 +@end example + +It is useful to remember that @code{~@var{x} + 1} equals +@code{-@var{x}}, for integers, and @code{~@var{x}} equals +@code{-@var{x} - 1}. The last example above shows this with @minus{}1 +as @var{x}. + +@item @code{a} & @code{b} +Binary operator for bitwise ``and'' or ``conjunction.'' Each bit in +the result is 1 if that bit is 1 in both @code{a} and @code{b}. + +@example +0b10101010 & 0b11001100 @result{} 0b10001000 +@end example + +@item @code{a} | @code{b} +Binary operator for bitwise ``or'' (``inclusive or'' or +``disjunction''). Each bit in the result is 1 if that bit is 1 in +either @code{a} or @code{b}. + +@example +0b10101010 | 0b11001100 @result{} 0b11101110 +@end example + +@item @code{a} ^ @code{b} +Binary operator for bitwise ``xor'' (``exclusive or''). Each bit in +the result is 1 if that bit is 1 in exactly one of @code{a} and @code{b}. + +@example +0b10101010 ^ 0b11001100 @result{} 0b01100110 +@end example +@end table + +To understand the effect of these operators on signed integers, keep +in mind that all modern computers use two's-complement representation +(@pxref{Integer Representations}) for negative integers. This means +that the highest bit of the number indicates the sign; it is 1 for a +negative number and 0 for a positive number. In a negative number, +the value in the other bits @emph{increases} as the number gets closer +to zero, so that @code{0b111@r{@dots{}}111} is @minus{}1 and +@code{0b100@r{@dots{}}000} is the most negative possible integer. + +@strong{Warning:} C defines a precedence ordering for the bitwise +binary operators, but you should never rely on it. You should +never rely on how bitwise binary operators relate in precedence to the +arithmetic and shift binary operators. Other programmers don't +remember this precedence ordering, so always use parentheses to +explicitly specify the nesting. + +For example, suppose @code{offset} is an integer that specifies +the offset within shared memory of a table, except that its bottom few +bits (@code{LOWBITS} says how many) are special flags. Here's +how to get just that offset and add it to the base address. + +@example +shared_mem_base + (offset & (-1 << LOWBITS)) +@end example + +Thanks to the outer set of parentheses, we don't need to know whether +@samp{&} has higher precedence than @samp{+}. Thanks to the inner +set, we don't need to know whether @samp{&} has higher precedence than +@samp{<<}. But we can rely on all unary operators to have higher +precedence than any binary operator, so we don't need parentheses +around the left operand of @samp{<<}. + +@node Assignment Expressions +@chapter Assignment Expressions +@cindex assignment expressions +@cindex operators, assignment + +As a general concept in programming, an @dfn{assignment} is a +construct that stores a new value into a place where values can be +stored---for instance, in a variable. Such places are called +@dfn{lvalues} (@pxref{Lvalues}) because they are locations that hold a value. + +An assignment in C is an expression because it has a value; we call +it an @dfn{assignment expression}. A simple assignment looks like + +@example +@var{lvalue} = @var{value-to-store} +@end example + +@noindent +We say it assigns the value of the expression @var{value-to-store} to +the location @var{lvalue}, or that it stores @var{value-to-store} +there. You can think of the ``l'' in ``lvalue'' as standing for +``left,'' since that's what you put on the left side of the assignment +operator. + +However, that's not the only way to use an lvalue, and not all lvalues +can be assigned to. To use the lvalue in the left side of an +assignment, it has to be @dfn{modifiable}. In C, that means it was +not declared with the type qualifier @code{const} (@pxref{const}). + +The value of the assignment expression is that of @var{lvalue} after +the new value is stored in it. This means you can use an assignment +inside other expressions. Assignment operators are right-associative +so that + +@example +x = y = z = 0; +@end example + +@noindent +is equivalent to + +@example +x = (y = (z = 0)); +@end example + +This is the only useful way for them to associate; +the other way, + +@example +((x = y) = z) = 0; +@end example + +@noindent +would be invalid since an assignment expression such as @code{x = y} +is not valid as an lvalue. + +@strong{Warning:} Write parentheses around an assignment if you nest +it inside another expression, unless that is a conditional expression, +or comma-separated series, or another assignment. + +@menu +* Simple Assignment:: The basics of storing a value. +* Lvalues:: Expressions into which a value can be stored. +* Modifying Assignment:: Shorthand for changing an lvalue's contents. +* Increment/Decrement:: Shorthand for incrementing and decrementing + an lvalue's contents. +* Postincrement/Postdecrement:: Accessing then incrementing or decrementing. +* Assignment in Subexpressions:: How to avoid ambiguity. +* Write Assignments Separately:: Write assignments as separate statements. +@end menu + +@node Simple Assignment +@section Simple Assignment +@cindex simple assignment +@cindex assignment, simple + +A @dfn{simple assignment expression} computes the value of the right +operand and stores it into the lvalue on the left. Here is a simple +assignment expression that stores 5 in @code{i}: + +@example +i = 5 +@end example + +@noindent +We say that this is an @dfn{assignment to} the variable @code{i} and +that it @dfn{assigns} @code{i} the value 5. It has no semicolon +because it is an expression (so it has a value). Adding a semicolon +at the end would make it a statement (@pxref{Expression Statement}). + +Here is another example of a simple assignment expression. Its +operands are not simple, but the kind of assignment done here is +simple assignment. + +@example +x[foo ()] = y + 6 +@end example + +A simple assignment with two different numeric data types converts the +right operand value to the lvalue's type, if possible. It can convert +any numeric type to any other numeric type. + +Simple assignment is also allowed on some non-numeric types: pointers +(@pxref{Pointers}), structures (@pxref{Structure Assignment}), and +unions (@pxref{Unions}). + +@strong{Warning:} Assignment is not allowed on arrays because +there are no array values in C; C variables can be arrays, but these +arrays cannot be manipulated as wholes. @xref{Limitations of C +Arrays}. + +@xref{Assignment Type Conversions}, for the complete rules about data +types used in assignments. + +@node Lvalues +@section Lvalues +@cindex lvalues + +An expression that identifies a memory space that holds a value is +called an @dfn{lvalue}, because it is a location that can hold a value. + +The standard kinds of lvalues are: + +@itemize @bullet +@item +A variable. + +@item +A pointer-dereference expression (@pxref{Pointer Dereference}) using +unary @samp{*}. + +@item +A structure field reference (@pxref{Structures}) using @samp{.}, if +the structure value is an lvalue. + +@item +A structure field reference using @samp{->}. This is always an lvalue +since @samp{->} implies pointer dereference. + +@item +A union alternative reference (@pxref{Unions}), on the same conditions +as for structure fields. + +@item +An array-element reference using @samp{[@r{@dots{}}]}, if the array +is an lvalue. +@end itemize + +If an expression's outermost operation is any other operator, that +expression is not an lvalue. Thus, the variable @code{x} is an +lvalue, but @code{x + 0} is not, even though these two expressions +compute the same value (assuming @code{x} is a number). + +An array can be an lvalue (the rules above determine whether it is +one), but using the array in an expression converts it automatically +to a pointer to the first element. The result of this conversion is +not an lvalue. Thus, if the variable @code{a} is an array, you can't +use @code{a} by itself as the left operand of an assignment. But you +can assign to an element of @code{a}, such as @code{a[0]}. That is an +lvalue since @code{a} is an lvalue. + +@node Modifying Assignment +@section Modifying Assignment +@cindex modifying assignment +@cindex assignment, modifying + +You can abbreviate the common construct + +@example +@var{lvalue} = @var{lvalue} + @var{expression} +@end example + +@noindent +as + +@example +@var{lvalue} += @var{expression} +@end example + +This is known as a @dfn{modifying assignment}. For instance, + +@example +i = i + 5; +i += 5; +@end example + +@noindent +shows two statements that are equivalent. The first uses +simple assignment; the second uses modifying assignment. + +Modifying assignment works with any binary arithmetic operator. For +instance, you can subtract something from an lvalue like this, + +@example +@var{lvalue} -= @var{expression} +@end example + +@noindent +or multiply it by a certain amount like this, + +@example +@var{lvalue} *= @var{expression} +@end example + +@noindent +or shift it by a certain amount like this. + +@example +@var{lvalue} <<= @var{expression} +@var{lvalue} >>= @var{expression} +@end example + +In most cases, this feature adds no power to the language, but it +provides substantial convenience. Also, when @var{lvalue} contains +code that has side effects, the simple assignment performs those side +effects twice, while the modifying assignment performs them once. For +instance, + +@example +x[foo ()] = x[foo ()] + 5; +@end example + +@noindent +calls @code{foo} twice, and it could return different values each +time. If @code{foo ()} returns 1 the first time and 3 the second +time, then the effect could be to add @code{x[3]} and 5 and store the +result in @code{x[1]}, or to add @code{x[1]} and 5 and store the +result in @code{x[3]}. We don't know which of the two it will do, +because C does not specify which call to @code{foo} is computed first. + +Such a statement is not well defined, and shouldn't be used. + +By contrast, + +@example +x[foo ()] += 5; +@end example + +@noindent +is well defined: it calls @code{foo} only once to determine which +element of @code{x} to adjust, and it adjusts that element by adding 5 +to it. + +@node Increment/Decrement +@section Increment and Decrement Operators +@cindex increment operator +@cindex decrement operator +@cindex operator, increment +@cindex operator, decrement +@cindex preincrement expression +@cindex predecrement expression + +The operators @samp{++} and @samp{--} are the @dfn{increment} and +@dfn{decrement} operators. When used on a numeric value, they add or +subtract 1. We don't consider them assignments, but they are +equivalent to assignments. + +Using @samp{++} or @samp{--} as a prefix, before an lvalue, is called +@dfn{preincrement} or @dfn{predecrement}. This adds or subtracts 1 +and the result becomes the expression's value. For instance, + +@example +#include /* @r{Declares @code{printf}.} */ + +int +main (void) +@{ + int i = 5; + printf ("%d\n", i); + printf ("%d\n", ++i); + printf ("%d\n", i); + return 0; +@} +@end example + +@noindent +prints lines containing 5, 6, and 6 again. The expression @code{++i} +increments @code{i} from 5 to 6, and has the value 6, so the output +from @code{printf} on that line says @samp{6}. + +Using @samp{--} instead, for predecrement, + +@example +#include /* @r{Declares @code{printf}.} */ + +int +main (void) +@{ + int i = 5; + printf ("%d\n", i); + printf ("%d\n", --i); + printf ("%d\n", i); + return 0; +@} +@end example + +@noindent +prints three lines that contain (respectively) @samp{5}, @samp{4}, and +again @samp{4}. + +@node Postincrement/Postdecrement +@section Postincrement and Postdecrement +@cindex postincrement expression +@cindex postdecrement expression +@cindex operator, postincrement +@cindex operator, postdecrement + +Using @samp{++} or @samp{--} @emph{after} an lvalue does something +peculiar: it gets the value directly out of the lvalue and @emph{then} +increments or decrement it. Thus, the value of @code{i++} is the same +as the value of @code{i}, but @code{i++} also increments @code{i} ``a +little later.'' This is called @dfn{postincrement} or +@dfn{postdecrement}. + +For example, + +@example +#include /* @r{Declares @code{printf}.} */ + +int +main (void) +@{ + int i = 5; + printf ("%d\n", i); + printf ("%d\n", i++); + printf ("%d\n", i); + return 0; +@} +@end example + +@noindent +prints lines containing 5, again 5, and 6. The expression @code{i++} +has the value 5, which is the value of @code{i} at the time, +but it increments @code{i} from 5 to 6 just a little later. + +How much later is ``just a little later''? That is flexible. The +increment has to happen by the next @dfn{sequence point}. In simple cases, +that means by the end of the statement. @xref{Sequence Points}. + +If a unary operator precedes a postincrement or postincrement expression, +the increment nests inside: + +@example +-a++ @r{is equivalent to} -(a++) +@end example + +That's the only order that makes sense; @code{-a} is not an lvalue, so +it can't be incremented. + +@node Assignment in Subexpressions +@section Pitfall: Assignment in Subexpressions +@cindex assignment in subexpressions +@cindex subexpressions, assignment in + +In C, the order of computing parts of an expression is not fixed. +Aside from a few special cases, the operations can be computed in any +order. If one part of the expression has an assignment to @code{x} +and another part of the expression uses @code{x}, the result is +unpredictable because that use might be computed before or after the +assignment. + +Here's an example of ambiguous code: + +@example +x = 20; +printf ("%d %d\n", x, x = 4); +@end example + +@noindent +If the second argument, @code{x}, is computed before the third argument, +@code{x = 4}, the second argument's value will be 20. If they are +computed in the other order, the second argument's value will be 4. + +Here's one way to make that code unambiguous: + +@example +y = 20; +printf ("%d %d\n", y, x = 4); +@end example + +Here's another way, with the other meaning: + +@example +x = 4; +printf ("%d %d\n", x, x); +@end example + +This issue applies to all kinds of assignments, and to the increment +and decrement operators, which are equivalent to assignments. +@xref{Order of Execution}, for more information about this. + +However, it can be useful to write assignments inside an +@code{if}-condition or @code{while}-test along with logical operators. +@xref{Logicals and Assignments}. + +@node Write Assignments Separately +@section Write Assignments in Separate Statements + +It is often convenient to write an assignment inside an +@code{if}-condition, but that can reduce the readability of the +program. Here's an example of what to avoid: + +@example +if (x = advance (x)) + @r{@dots{}} +@end example + +The idea here is to advance @code{x} and test if the value is nonzero. +However, readers might miss the fact that it uses @samp{=} and not +@samp{==}. In fact, writing @samp{=} where @samp{==} was intended +inside a condition is a common error, so GNU C can give warnings when +@samp{=} appears in a way that suggests it's an error. + +It is much clearer to write the assignment as a separate statement, like this: + +@example +x = advance (x); +if (x != 0) + @r{@dots{}} +@end example + +@noindent +This makes it unmistakably clear that @code{x} is assigned a new value. + +Another method is to use the comma operator (@pxref{Comma Operator}), +like this: + +@example +if (x = advance (x), x != 0) + @r{@dots{}} +@end example + +@noindent +However, putting the assignment in a separate statement is usually clearer +unless the assignment is very short, because it reduces nesting. + +@node Execution Control Expressions +@chapter Execution Control Expressions +@cindex execution control expressions +@cindex expressions, execution control + +This chapter describes the C operators that combine expressions to +control which of those expressions execute, or in which order. + +@menu +* Logical Operators:: Logical conjunction, disjunction, negation. +* Logicals and Comparison:: Logical operators with comparison operators. +* Logicals and Assignments:: Assignments with logical operators. +* Conditional Expression:: An if/else construct inside expressions. +* Comma Operator:: Build a sequence of subexpressions. +@end menu + +@node Logical Operators +@section Logical Operators +@cindex logical operators +@cindex operators, logical +@cindex conjunction operator +@cindex disjunction operator +@cindex negation operator, logical + +The @dfn{logical operators} combine truth values, which are normally +represented in C as numbers. Any expression with a numeric value is a +valid truth value: zero means false, and any other value means true. +A pointer type is also meaningful as a truth value; a null pointer +(which is zero) means false, and a non-null pointer means true +(@pxref{Pointer Types}). The value of a logical operator is always 1 +or 0 and has type @code{int} (@pxref{Integer Types}). + +The logical operators are used mainly in the condition of an @code{if} +statement, or in the end test in a @code{for} statement or +@code{while} statement (@pxref{Statements}). However, they are valid +in any context where an integer-valued expression is allowed. + +@table @samp +@item ! @var{exp} +Unary operator for logical ``not.'' The value is 1 (true) if +@var{exp} is 0 (false), and 0 (false) if @var{exp} is nonzero (true). + +@strong{Warning:} if @code{exp} is anything but an lvalue or a +function call, you should write parentheses around it. + +@item @var{left} && @var{right} +The logical ``and'' binary operator computes @var{left} and, if necessary, +@var{right}. If both of the operands are true, the @samp{&&} expression +gives the value 1 (which is true). Otherwise, the @samp{&&} expression +gives the value 0 (false). If @var{left} yields a false value, +that determines the overall result, so @var{right} is not computed. + +@item @var{left} || @var{right} +The logical ``or'' binary operator computes @var{left} and, if necessary, +@var{right}. If at least one of the operands is true, the @samp{||} expression +gives the value 1 (which is true). Otherwise, the @samp{||} expression +gives the value 0 (false). If @var{left} yields a true value, +that determines the overall result, so @var{right} is not computed. +@end table + +@strong{Warning:} never rely on the relative precedence of @samp{&&} +and @samp{||}. When you use them together, always use parentheses to +specify explicitly how they nest, as shown here: + +@example +if ((r != 0 && x % r == 0) + || + (s != 0 && x % s == 0)) +@end example + +@node Logicals and Comparison +@section Logical Operators and Comparisons + +The most common thing to use inside the logical operators is a +comparison. Conveniently, @samp{&&} and @samp{||} have lower +precedence than comparison operators and arithmetic operators, so we +can write expressions like this without parentheses and get the +nesting that is natural: two comparison operations that must both be +true. + +@example +if (r != 0 && x % r == 0) +@end example + +@noindent +This example also shows how it is useful that @samp{&&} guarantees to +skip the right operand if the left one turns out false. Because of +that, this code never tries to divide by zero. + +This is equivalent: + +@example +if (r && x % r == 0) +@end example + +@noindent +A truth value is simply a number, so @code{r} +as a truth value tests whether it is nonzero. +But @code{r}'s meaning is not a truth value---it is a number to divide by. +So it is better style to write the explicit @code{!= 0}. + +Here's another equivalent way to write it: + +@example +if (!(r == 0) && x % r == 0) +@end example + +@noindent +This illustrates the unary @samp{!} operator, and the need to +write parentheses around its operand. + +@node Logicals and Assignments +@section Logical Operators and Assignments + +There are cases where assignments nested inside the condition can +actually make a program @emph{easier} to read. Here is an example +using a hypothetical type @code{list} which represents a list; it +tests whether the list has at least two links, using hypothetical +functions, @code{nonempty} which is true of the argument is a nonempty +list, and @code{list_next} which advances from one list link to the +next. We assume that a list is never a null pointer, so that the +assignment expressions are always ``true.'' + +@example +if (nonempty (list) + && (temp1 = list_next (list)) + && nonempty (temp1) + && (temp2 = list_next (temp1))) + @r{@dots{}} /* @r{use @code{temp1} and @code{temp2}} */ +@end example + +@noindent +Here we get the benefit of the @samp{&&} operator, to avoid executing +the rest of the code if a call to @code{nonempty} says ``false.'' The +only natural place to put the assignments is among those calls. + +It would be possible to rewrite this as several statements, but that +could make it much more cumbersome. On the other hand, when the test +is even more complex than this one, splitting it into multiple +statements might be necessary for clarity. + +If an empty list is a null pointer, we can dispense with calling +@code{nonempty}: + +@example +if ((temp1 = list_next (list)) + && (temp2 = list_next (temp1))) + @r{@dots{}} +@end example + +@node Conditional Expression +@section Conditional Expression +@cindex conditional expression +@cindex expression, conditional + +C has a conditional expression that selects one of two expressions +to compute and get the value from. It looks like this: + +@example +@var{condition} ? @var{iftrue} : @var{iffalse} +@end example + +@menu +* Conditional Rules:: Rules for the conditional operator. +* Conditional Branches:: About the two branches in a conditional. +@end menu + +@node Conditional Rules +@subsection Rules for Conditional Operator + +The first operand, @var{condition}, should be a value that can be +compared with zero---a number or a pointer. If it is true (nonzero), +then the conditional expression computes @var{iftrue} and its value +becomes the value of the conditional expression. Otherwise the +conditional expression computes @var{iffalse} and its value becomes +the value of the conditional expression. The conditional expression +always computes just one of @var{iftrue} and @var{iffalse}, never both +of them. + +Here's an example: the absolute value of a number @code{x} +can be written as @code{(x >= 0 ? x : -x)}. + +@strong{Warning:} The conditional expression operators have rather low +syntactic precedence. Except when the conditional expression is used +as an argument in a function call, write parentheses around it. For +clarity, always write parentheses around it if it extends across more +than one line. + +Assignment operators and the comma operator (@pxref{Comma Operator}) +have lower precedence than conditional expression operators, so write +parentheses around those when they appear inside a conditional +expression. @xref{Order of Execution}. + +@node Conditional Branches +@subsection Conditional Operator Branches +@cindex branches of conditional expression + +We call @var{iftrue} and @var{iffalse} the @dfn{branches} of the +conditional. + +The two branches should normally have the same type, but a few +exceptions are allowed. If they are both numeric types, the +conditional converts both to their common type (@pxref{Common Type}). + +With pointers (@pxref{Pointers}), the two values can be pointers to +nearly compatible types (@pxref{Compatible Types}). In this case, the +result type is a similar pointer whose target type combines all the +type qualifiers (@pxref{Type Qualifiers}) of both branches. + +If one branch has type @code{void *} and the other is a pointer to an +object (not to a function), the conditional converts the @code{void *} +branch to the type of the other. + +If one branch is an integer constant with value zero and the other is +a pointer, the conditional converts zero to the pointer's type. + +In GNU C, you can omit @var{iftrue} in a conditional expression. In +that case, if @var{condition} is nonzero, its value becomes the value of +the conditional expression, after conversion to the common type. +Thus, + +@example +x ? : y +@end example + +@noindent +has the value of @code{x} if that is nonzero; otherwise, the value of +@code{y}. + +@cindex side effect in ?: +@cindex ?: side effect +Omitting @var{iftrue} is useful when @var{condition} has side effects. +In that case, writing that expression twice would carry out the side +effects twice, but writing it once does them just once. For example, +if we suppose that the function @code{next_element} advances a pointer +variable to point to the next element in a list and returns the new +pointer, + +@example +next_element () ? : default_pointer +@end example + +@noindent +is a way to advance the pointer and use its new value if it isn't +null, but use @code{default_pointer} if that is null. We must not do +it this way, + +@example +next_element () ? next_element () : default_pointer +@end example + +@noindent +because it would advance the pointer a second time. + +@node Comma Operator +@section Comma Operator +@cindex comma operator +@cindex operator, comma + +The comma operator stands for sequential execution of expressions. +The value of the comma expression comes from the last expression in +the sequence; the previous expressions are computed only for their +side effects. It looks like this: + +@example +@var{exp1}, @var{exp2} @r{@dots{}} +@end example + +@noindent +You can bundle any number of expressions together this way, by putting +commas between them. + +@menu +* Uses of Comma:: When to use the comma operator. +* Clean Comma:: Clean use of the comma operator. +* Avoid Comma:: When to not use the comma operator. +@end menu + +@node Uses of Comma +@subsection The Uses of the Comma Operator + +With commas, you can put several expressions into a place that +requires just one expression---for example, in the header of a +@code{for} statement. This statement + +@example +for (i = 0, j = 10, k = 20; i < n; i++) +@end example + +@noindent +contains three assignment expressions, to initialize @code{i}, @code{j} +and @code{k}. The syntax of @code{for} requires just one expression +for initialization; to include three assignments, we use commas to +bundle them into a single larger expression, @code{i = 0, j = 10, k = +20}. This technique is also useful in the loop-advance expression, +the last of the three inside the @code{for} parentheses. + +In the @code{for} statement and the @code{while} statement +(@pxref{Loop Statements}), a comma provides a way to perform some side +effect before the loop-exit test. For example, + +@example +while (printf ("At the test, x = %d\n", x), x != 0) +@end example + +@node Clean Comma +@subsection Clean Use of the Comma Operator + +Always write parentheses around a series of comma operators, except +when it is at top level in an expression statement, or within the +parentheses of an @code{if}, @code{for}, @code{while}, or @code{switch} +statement (@pxref{Statements}). For instance, in + +@example +for (i = 0, j = 10, k = 20; i < n; i++) +@end example + +@noindent +the commas between the assignments are clear because they are between +a parenthesis and a semicolon. + +The arguments in a function call are also separated by commas, but that is +not an instance of the comma operator. Note the difference between + +@example +foo (4, 5, 6) +@end example + +@noindent +which passes three arguments to @code{foo} and + +@example +foo ((4, 5, 6)) +@end example + +@noindent +which uses the comma operator and passes just one argument +(with value 6). + +@strong{Warning:} don't use the comma operator around an argument +of a function unless it helps understand the code. When you do so, +don't put part of another argument on the same line. Instead, add a +line break to make the parentheses around the comma operator easier to +see, like this. + +@example +foo ((mumble (x, y), frob (z)), + *p) +@end example + +@node Avoid Comma +@subsection When Not to Use the Comma Operator + +You can use a comma in any subexpression, but in most cases it only +makes the code confusing, and it is clearer to raise all but the last +of the comma-separated expressions to a higher level. Thus, instead +of this: + +@example +x = (y += 4, 8); +@end example + +@noindent +it is much clearer to write this: + +@example +y += 4, x = 8; +@end example + +@noindent +or this: + +@example +y += 4; +x = 8; +@end example + +Use commas only in the cases where there is no clearer alternative +involving multiple statements. + +By contrast, don't hesitate to use commas in the expansion in a macro +definition. The trade-offs of code clarity are different in that +case, because the @emph{use} of the macro may improve overall clarity +so much that the ugliness of the macro's @emph{definition} is a small +price to pay. @xref{Macros}. + +@node Binary Operator Grammar +@chapter Binary Operator Grammar +@cindex binary operator grammar +@cindex grammar, binary operator +@cindex operator precedence +@cindex precedence, operator +@cindex left-associative + +@dfn{Binary operators} are those that take two operands, one +on the left and one on the right. + +All the binary operators in C are syntactically left-associative. +This means that @w{@code{a @var{op} b @var{op} c}} means @w{@code{(a +@var{op} b) @var{op} c}}. However, you should only write repeated +operators without parentheses using @samp{+}, @samp{-}, @samp{*} and +@samp{/}, because those cases are clear from algebra. So it is ok to +write @code{a + b + c} or @code{a - b - c}, but never @code{a == b == +c} or @code{a % b % c}. + +Each C operator has a @dfn{precedence}, which is its rank in the +grammatical order of the various operators. The operators with the +highest precedence grab adjoining operands first; these expressions +then become operands for operators of lower precedence. + +The precedence order of operators in C is fully specified, so any +combination of operations leads to a well-defined nesting. We state +only part of the full precedence ordering here because it is bad +practice for C code to depend on the other cases. For cases not +specified in this chapter, always use parentheses to make the nesting +explicit.@footnote{Personal note from Richard Stallman: I wrote GCC without +remembering anything about the C precedence order beyond what's stated +here. I studied the full precedence table to write the parser, and +promptly forgot it again. If you need to look up the full precedence order +to understand some C code, fix the code with parentheses so nobody else +needs to do that.} + +You can depend on this subsequence of the precedence ordering +(stated from highest precedence to lowest): + +@enumerate +@item +Component access (@samp{.} and @samp{->}). + +@item +Unary prefix operators. + +@item +Unary postfix operators. + +@item +Multiplication, division, and remainder (they have the same precedence). + +@item +Addition and subtraction (they have the same precedence). + +@item +Comparisons---but watch out! + +@item +Logical operators @samp{&&} and @samp{||}---but watch out! + +@item +Conditional expression with @samp{?} and @samp{:}. + +@item +Assignments. + +@item +Sequential execution (the comma operator, @samp{,}). +@end enumerate + +Two of the lines in the above list say ``but watch out!'' That means +that the line covers operators with subtly different precedence. +Never depend on the grammar of C to decide how two comparisons nest; +instead, always use parentheses to specify their nesting. + +You can let several @samp{&&} operators associate, or several +@samp{||} operators, but always use parentheses to show how @samp{&&} +and @samp{||} nest with each other. @xref{Logical Operators}. + +There is one other precedence ordering that code can depend on: + +@enumerate +@item +Unary postfix operators. + +@item +Bitwise and shift operators---but watch out! + +@item +Conditional expression with @samp{?} and @samp{:}. +@end enumerate + +The caveat for bitwise and shift operators is like that for logical +operators: you can let multiple uses of one bitwise operator +associate, but always use parentheses to control nesting of dissimilar +operators. + +These lists do not specify any precedence ordering between the bitwise +and shift operators of the second list and the binary operators above +conditional expressions in the first list. When they come together, +parenthesize them. @xref{Bitwise Operations}. + +@node Order of Execution +@chapter Order of Execution +@cindex order of execution + +The order of execution of a C program is not always obvious, and not +necessarily predictable. This chapter describes what you can count on. + +@menu +* Reordering of Operands:: Operations in C are not necessarily computed + in the order they are written. +* Associativity and Ordering:: Some associative operations are performed + in a particular order; others are not. +* Sequence Points:: Some guarantees about the order of operations. +* Postincrement and Ordering:: Ambiguous excution order with postincrement. +* Ordering of Operands:: Evaluation order of operands + and function arguments. +* Optimization and Ordering:: Compiler optimizations can reorder operations + only if it has no impact on program results. +@end menu + +@node Reordering of Operands +@section Reordering of Operands +@cindex ordering of operands +@cindex reordering of operands +@cindex operand execution ordering + +The C language does not necessarily carry out operations within an +expression in the order they appear in the code. For instance, in +this expression, + +@example +foo () + bar () +@end example + +@noindent +@code{foo} might be called first or @code{bar} might be called first. +If @code{foo} updates a datum and @code{bar} uses that datum, the +results can be unpredictable. + +The unpredictable order of computation of subexpressions also makes a +difference when one of them contains an assignment. We already saw +this example of bad code, + +@example +x = 20; +printf ("%d %d\n", x, x = 4); +@end example + +@noindent +in which the second argument, @code{x}, has a different value +depending on whether it is computed before or after the assignment in +the third argument. + +@node Associativity and Ordering +@section Associativity and Ordering +@cindex associativity and ordering + +An associative binary operator, such as @code{+}, when used repeatedly +can combine any number of operands. The operands' values may be +computed in any order. + +If the values are integers and overflow can be ignored, they may be +combined in any order. Thus, given four functions that return +@code{unsigned int}, calling them and adding their results as here + +@example +(foo () + bar ()) + (baz () + quux ()) +@end example + +@noindent +may add up the results in any order. + +By contrast, arithmetic on signed integers, with overflow significant, +is not really associative (@pxref{Integer Overflow}). Thus, the +additions must be done in the order specified, obeying parentheses and +left-association. That means computing @code{(foo () + bar ())} and +@code{(baz () + quux ())} first (in either order), then adding the +two. + +The same applies to arithmetic on floating-point values, since that +too is not really associative. However, the GCC option +@option{-funsafe-math-optimizations} allows the compiler to change the +order of calculation when an associative operation (associative in +exact mathematics) combines several operands. The option takes effect +when compiling a module (@pxref{Compilation}). Changing the order +of association can enable the program to pipeline the floating point +operations. + +In all these cases, the four function calls can be done in any order. +There is no right or wrong about that. + +@node Sequence Points +@section Sequence Points +@cindex sequence points +@cindex full expression + +There are some points in the code where C makes limited guarantees +about the order of operations. These are called @dfn{sequence +points}. Here is where they occur: + +@itemize @bullet +@item +At the end of a @dfn{full expression}; that is to say, an expression +that is not part of a larger expression. All side effects specified +by that expression are carried out before execution moves +on to subsequent code. + +@item +At the end of the first operand of certain operators: @samp{,}, +@samp{&&}, @samp{||}, and @samp{?:}. All side effects specified by +that expression are carried out before any execution of the +next operand. + +The commas that separate arguments in a function call are @emph{not} +comma operators, and they do not create sequence points. The rule +for function arguments and the rule for operands are different +(@pxref{Ordering of Operands}). + +@item +Just before calling a function. All side effects specified by the +argument expressions are carried out before calling the function. + +If the function to be called is not constant---that is, if it is +computed by an expression---all side effects in that expression are +carried out before calling the function. +@end itemize + +The ordering imposed by a sequence point applies locally to a limited +range of code, as stated above in each case. For instance, the +ordering imposed by the comma operator does not apply to code outside +that comma operator. Thus, in this code, + +@example +(x = 5, foo (x)) + x * x +@end example + +@noindent +the sequence point of the comma operator orders @code{x = 5} before +@code{foo (x)}, but @code{x * x} could be computed before or after +them. + +@node Postincrement and Ordering +@section Postincrement and Ordering +@cindex postincrement and ordering +@cindex ordering and postincrement + +Ordering requirements are loose with the postincrement and +postdecrement operations (@pxref{Postincrement/Postdecrement}), which +specify side effects to happen ``a little later.'' They must happen +before the next sequence point, but that still leaves room for various +meanings. In this expression, + +@example +z = x++ - foo () +@end example + +@noindent +it's unpredictable whether @code{x} gets incremented before or after +calling the function @code{foo}. If @code{foo} refers to @code{x}, +it might see the old value or it might see the incremented value. + +In this perverse expression, + +@example +x = x++ +@end example + +@noindent +@code{x} will certainly be incremented but the incremented value may +not stick. If the incrementation of @code{x} happens after the +assignment to @code{x}, the incremented value will remain in place. +But if the incrementation happens first, the assignment will overwrite +that with the not-yet-incremented value, so the expression as a whole +will leave @code{x} unchanged. + +@node Ordering of Operands +@section Ordering of Operands +@cindex ordering of operands +@cindex operand ordering + +Operands and arguments can be computed in any order, but there are limits to +this intermixing in GNU C: + +@itemize @bullet +@item +The operands of a binary arithmetic operator can be computed in either +order, but they can't be intermixed: one of them has to come first, +followed by the other. Any side effects in the operand that's computed +first are executed before the other operand is computed. + +@item +That applies to assignment operators too, except that in simple assignment +the previous value of the left operand is unused. + +@item +The arguments in a function call can be computed in any order, but +they can't be intermixed. Thus, one argument is fully computed, then +another, and so on until they are all done. Any side effects in one argument +are executed before computation of another argument begins. +@end itemize + +These rules don't cover side effects caused by postincrement and +postdecrement operators---those can be deferred up to the next +sequence point. + +If you want to get pedantic, the fact is that GCC can reorder the +computations in many other ways provided that doesn't alter the result +of running the program. However, because they don't alter the result +of running the program, they are negligible, unless you are concerned +with the values in certain variables at various times as seen by other +processes. In those cases, you can use @code{volatile} to prevent +optimizations that would make them behave strangely. @xref{volatile}. + +@node Optimization and Ordering +@section Optimization and Ordering +@cindex optimization and ordering +@cindex ordering and optimization + +Sequence points limit the compiler's freedom to reorder operations +arbitrarily, but optimizations can still reorder them if the compiler +concludes that this won't alter the results. Thus, in this code, + +@example +x++; +y = z; +x++; +@end example + +@noindent +there is a sequence point after each statement, so the code is +supposed to increment @code{x} once before the assignment to @code{y} +and once after. However, incrementing @code{x} has no effect on +@code{y} or @code{z}, and setting @code{y} can't affect @code{x}, so +the code could be optimized into this: + +@example +y = z; +x += 2; +@end example + +Normally that has no effect except to make the program faster. But +there are special situations where it can cause trouble due to things +that the compiler cannot know about, such as shared memory. To limit +optimization in those places, use the @code{volatile} type qualifier +(@pxref{volatile}). + +@node Primitive Types +@chapter Primitive Data Types +@cindex primitive types +@cindex types, primitive + +This chapter describes all the primitive data types of C---that is, +all the data types that aren't built up from other types. They +include the types @code{int} and @code{double} that we've already covered. + +@menu +* Integer Types:: Description of integer types. +* Floating-Point Data Types:: Description of floating-point types. +* Complex Data Types:: Description of complex number types. +* The Void Type:: A type indicating no value at all. +* Other Data Types:: A brief summary of other types. +* Type Designators:: Referring to a data type abstractly. +@end menu + +These types are all made up of bytes (@pxref{Storage}). + +@node Integer Types +@section Integer Data Types +@cindex integer types +@cindex types, integer + +Here we describe all the integer types and their basic +characteristics. @xref{Integers in Depth}, for more information about +the bit-level integer data representations and arithmetic. + +@menu +* Basic Integers:: Overview of the various kinds of integers. +* Signed and Unsigned Types:: Integers can either hold both negative and + non-negative values, or only non-negative. +* Narrow Integers:: When to use smaller integer types. +* Integer Conversion:: Casting a value from one integer type + to another. +* Boolean Type:: An integer type for boolean values. +* Integer Variations:: Sizes of integer types can vary + across platforms. +@end menu + +@node Basic Integers +@subsection Basic Integers + +@findex char +@findex int +@findex short int +@findex long int +@findex long long int + +Integer data types in C can be signed or unsigned. An unsigned type +can represent only positive numbers and zero. A signed type can +represent both positive and negative numbers, in a range spread almost +equally on both sides of zero. + +Aside from signedness, the integer data types vary in size: how many +bytes long they are. The size determines how many different integer +values the type can hold. + +Here's a list of the signed integer data types, with the sizes they +have on most computers. Each has a corresponding unsigned type; see +@ref{Signed and Unsigned Types}. + +@table @code +@item signed char +One byte (8 bits). This integer type is used mainly for integers that +represent characters, as part of arrays or other data structures. + +@item short +@itemx short int +Two bytes (16 bits). + +@item int +Four bytes (32 bits). + +@item long +@itemx long int +Four bytes (32 bits) or eight bytes (64 bits), depending on the +platform. Typically it is 32 bits on 32-bit computers +and 64 bits on 64-bit computers, but there are exceptions. + +@item long long +@itemx long long int +Eight bytes (64 bits). Supported in GNU C in the 1980s, and +incorporated into standard C as of ISO C99. +@end table + +You can omit @code{int} when you use @code{long} or @code{short}. +This is harmless and customary. + +@node Signed and Unsigned Types +@subsection Signed and Unsigned Types +@cindex signed types +@cindex unsigned types +@cindex types, signed +@cindex types, unsigned +@findex signed +@findex unsigned + +An unsigned integer type can represent only positive numbers and zero. +A signed type can represent both positive and negative number, in a +range spread almost equally on both sides of zero. For instance, +@code{unsigned char} holds numbers from 0 to 255 (on most computers), +while @code{signed char} holds numbers from @minus{}128 to 127. Each of +these types holds 256 different possible values, since they are both 8 +bits wide. + +Write @code{signed} or @code{unsigned} before the type keyword to +specify a signed or an unsigned type. However, the integer types +other than @code{char} are signed by default; with them, @code{signed} +is a no-op. + +Plain @code{char} may be signed or unsigned; this depends on the +compiler, the machine in use, and its operating system. + +In many programs, it makes no difference whether @code{char} is +signed. When it does matter, don't leave it to chance; write +@code{signed char} or @code{unsigned char}.@footnote{Personal note from +Richard Stallman: Eating with hackers at a fish restaurant, I ordered +Arctic Char. When my meal arrived, I noted that the chef had not +signed it. So I complained, ``This char is unsigned---I wanted a +signed char!'' Or rather, I would have said this if I had thought of +it fast enough.} + +@node Narrow Integers +@subsection Narrow Integers + +The types that are narrower than @code{int} are rarely used for +ordinary variables---we declare them @code{int} instead. This is +because C converts those narrower types to @code{int} for any +arithmetic. There is literally no reason to declare a local variable +@code{char}, for instance. + +In particular, if the value is really a character, you should declare +the variable @code{int}. Not @code{char}! Using that narrow type can +force the compiler to truncate values for conversion, which is a +waste. Furthermore, some functions return either a character value, +or @minus{}1 for ``no character.'' Using @code{int} keeps those +values distinct. + +The narrow integer types are useful as parts of other objects, such as +arrays and structures. Compare these array declarations, whose sizes +on 32-bit processors are shown: + +@example +signed char ac[1000]; /* @r{1000 bytes} */ +short as[1000]; /* @r{2000 bytes} */ +int ai[1000]; /* @r{4000 bytes} */ +long long all[1000]; /* @r{8000 bytes} */ +@end example + +In addition, character strings must be made up of @code{char}s, +because that's what all the standard library string functions expect. +Thus, array @code{ac} could be used as a character string, but the +others could not be. + +@node Integer Conversion +@subsection Conversion among Integer Types + +C converts between integer types implicitly in many situations. It +converts the narrow integer types, @code{char} and @code{short}, to +@code{int} whenever they are used in arithmetic. Assigning a new +value to an integer variable (or other lvalue) converts the value to +the variable's type. + +You can also convert one integer type to another explicitly with a +@dfn{cast} operator. @xref{Explicit Type Conversion}. + +The process of conversion to a wider type is straightforward: the +value is unchanged. The only exception is when converting a negative +value (in a signed type, obviously) to a wider unsigned type. In that +case, the result is a positive value with the same bits +(@pxref{Integers in Depth}). + +@cindex truncation +Converting to a narrower type, also called @dfn{truncation}, involves +discarding some of the value's bits. This is not considered overflow +(@pxref{Integer Overflow}) because loss of significant bits is a +normal consequence of truncation. Likewise for conversion between +signed and unsigned types of the same width. + +More information about conversion for assignment is in +@ref{Assignment Type Conversions}. For conversion for arithmetic, +see @ref{Argument Promotions}. + +@node Boolean Type +@subsection Boolean Type +@cindex boolean type +@cindex type, boolean +@findex bool + +The unsigned integer type @code{bool} holds truth values: its possible +values are 0 and 1. Converting any nonzero value to @code{bool} +results in 1. For example: + +@example +bool a = 0; +bool b = 1; +bool c = 4; /* @r{Stores the value 1 in @code{c}.} */ +@end example + +Unlike @code{int}, @code{bool} is not a keyword. It is defined in +the header file @file{stdbool.h}. + +@node Integer Variations +@subsection Integer Variations + +The integer types of C have standard @emph{names}, but what they +@emph{mean} varies depending on the kind of platform in use: +which kind of computer, which operating system, and which compiler. +It may even depend on the compiler options used. + +Plain @code{char} may be signed or unsigned; this depends on the +platform, too. Even for GNU C, there is no general rule. + +In theory, all of the integer types' sizes can vary. @code{char} is +always considered one ``byte'' for C, but it is not necessarily an +8-bit byte; on some platforms it may be more than 8 bits. ISO C +specifies only that none of these types is narrower than the ones +above it in the list in @ref{Basic Integers}, and that @code{short} +has at least 16 bits. + +It is possible that in the future GNU C will support platforms where +@code{int} is 64 bits long. In practice, however, on today's real +computers, there is little variation; you can rely on the table +given previously (@pxref{Basic Integers}). + +To be completely sure of the size of an integer type, +use the types @code{int16_t}, @code{int32_t} and @code{int64_t}. +Their corresponding unsigned types add @samp{u} at the front. +To define these, include the header file @file{stdint.h}. + +The GNU C Compiler compiles for some embedded controllers that use two +bytes for @code{int}. On some, @code{int} is just one ``byte,'' and +so is @code{short int}---but that ``byte'' may contain 16 bits or even +32 bits. These processors can't support an ordinary operating system +(they may have their own specialized operating systems), and most C +programs do not try to support them. + +@node Floating-Point Data Types +@section Floating-Point Data Types +@cindex floating-point types +@cindex types, floating-point +@findex double +@findex float +@findex long double + +@dfn{Floating point} is the binary analogue of scientific notation: +internally it represents a number as a fraction and a binary exponent; the +value is that fraction multiplied by the specified power of 2. + +For instance, to represent 6, the fraction would be 0.75 and the +exponent would be 3; together they stand for the value @math{0.75 * 2@sup{3}}, +meaning 0.75 * 8. The value 1.5 would use 0.75 as the fraction and 1 +as the exponent. The value 0.75 would use 0.75 as the fraction and 0 +as the exponent. The value 0.375 would use 0.75 as the fraction and +-1 as the exponent. + +These binary exponents are used by machine instructions. You can +write a floating-point constant this way if you wish, using +hexadecimal; but normally we write floating-point numbers in decimal. +@xref{Floating Constants}. + +C has three floating-point data types: + +@table @code +@item double +``Double-precision'' floating point, which uses 64 bits. This is the +normal floating-point type, and modern computers normally do +their floating-point computations in this type, or some wider type. +Except when there is a special reason to do otherwise, this is the +type to use for floating-point values. + +@item float +``Single-precision'' floating point, which uses 32 bits. It is useful +for floating-point values stored in structures and arrays, to save +space when the full precision of @code{double} is not needed. In +addition, single-precision arithmetic is faster on some computers, and +occasionally that is useful. But not often---most programs don't use +the type @code{float}. + +C would be cleaner if @code{float} were the name of the type we +use for most floating-point values; however, for historical reasons, +that's not so. + +@item long double +``Extended-precision'' floating point is either 80-bit or 128-bit +precision, depending on the machine in use. On some machines, which +have no floating-point format wider than @code{double}, this is +equivalent to @code{double}. +@end table + +Floating-point arithmetic raises many subtle issues. @xref{Floating +Point in Depth}, for more information. + +@node Complex Data Types +@section Complex Data Types +@cindex complex numbers +@cindex types, complex +@cindex @code{_Complex} keyword +@cindex @code{__complex__} keyword +@findex _Complex +@findex __complex__ + +Complex numbers can include both a real part and an imaginary part. +The numeric constants covered above have real-numbered values. An +imaginary-valued constant is an ordinary real-valued constant followed +by @samp{i}. + +To declare numeric variables as complex, use the @code{_Complex} +keyword.@footnote{For compatibility with older versions of GNU C, the +keyword @code{__complex__} is also allowed. Going forward, however, +use the new @code{_Complex} keyword as defined in ISO C11.} The +standard C complex data types are floating point, + +@example +_Complex float foo; +_Complex double bar; +_Complex long double quux; +@end example + +@noindent +but GNU C supports integer complex types as well. + +Since @code{_Complex} is a keyword just like @code{float} and +@code{double} and @code{long}, the keywords can appear in any order, +but the order shown above seems most logical. + +GNU C supports constants for complex values; for instance, @code{4.0 + +3.0i} has the value 4 + 3i as type @code{_Complex double}. +@xref{Imaginary Constants}. + +To pull the real and imaginary parts of the number back out, GNU C +provides the keywords @code{__real__} and @code{__imag__}: + +@example +_Complex double foo = 4.0 + 3.0i; + +double a = __real__ foo; /* @r{@code{a} is now 4.0.} */ +double b = __imag__ foo; /* @r{@code{b} is now 3.0.} */ +@end example + +@noindent +Standard C does not include these keywords, and instead relies on +functions defined in @code{complex.h} for accessing the real and +imaginary parts of a complex number: @code{crealf}, @code{creal}, and +@code{creall} extract the real part of a float, double, or long double +complex number, respectively; @code{cimagf}, @code{cimag}, and +@code{cimagl} extract the imaginary part. + +@cindex complex conjugation +GNU C also defines @samp{~} as an operator for complex conjugation, +which means negating the imaginary part of a complex number: + +@example +_Complex double foo = 4.0 + 3.0i; +_Complex double bar = ~foo; /* @r{@code{bar} is now 4 @minus{} 3i.} */ +@end example + +@noindent +For standard C compatibility, you can use the appropriate library +function: @code{conjf}, @code{conj}, or @code{confl}. + +@node The Void Type +@section The Void Type +@cindex void type +@cindex type, void +@findex void + +The data type @code{void} is a dummy---it allows no operations. It +really means ``no value at all.'' When a function is meant to return +no value, we write @code{void} for its return type. Then +@code{return} statements in that function should not specify a value +(@pxref{return Statement}). Here's an example: + +@example +void +print_if_positive (double x, double y) +@{ + if (x <= 0) + return; + if (y <= 0) + return; + printf ("Next point is (%f,%f)\n", x, y); +@} +@end example + +A @code{void}-returning function is comparable to what some other languages +call a ``procedure'' instead of a ``function.'' + +@c ??? Already presented +@c @samp{%f} in an output template specifies to format a @code{double} value +@c as a decimal number, using a decimal point if needed. + +@node Other Data Types +@section Other Data Types + +Beyond the primitive types, C provides several ways to construct new +data types. For instance, you can define @dfn{pointers}, values that +represent the addresses of other data (@pxref{Pointers}). You can +define @dfn{structures}, as in many other languages +(@pxref{Structures}), and @dfn{unions}, which specify multiple ways +to look at the same memory space (@pxref{Unions}). @dfn{Enumerations} +are collections of named integer codes (@pxref{Enumeration Types}). + +@dfn{Array types} in C are used for allocating space for objects, +but C does not permit operating on an array value as a whole. @xref{Arrays}. + +@node Type Designators +@section Type Designators +@cindex type designator + +Some C constructs require a way to designate a specific data type +independent of any particular variable or expression which has that +type. The way to do this is with a @dfn{type designator}. The +constucts that need one include casts (@pxref{Explicit Type +Conversion}) and @code{sizeof} (@pxref{Type Size}). + +We also use type designators to talk about the type of a value in C, +so you will see many type designators in this manual. When we say, +``The value has type @code{int},'' @code{int} is a type designator. + +To make the designator for any type, imagine a variable declaration +for a variable of that type and delete the variable name and the final +semicolon. + +For example, to designate the type of full-word integers, we start +with the declaration for a variable @code{foo} with that type, +which is this: + +@example +int foo; +@end example + +@noindent +Then we delete the variable name @code{foo} and the semicolon, leaving +@code{int}---exactly the keyword used in such a declaration. +Therefore, the type designator for this type is @code{int}. + +What about long unsigned integers? From the declaration + +@example +unsigned long int foo; +@end example + +@noindent +we determine that the designator is @code{unsigned long int}. + +Following this procedure, the designator for any primitive type is +simply the set of keywords which specifies that type in a declaration. +The same is true for compound types such as structures, unions, and +enumerations. + +Designators for pointer types do follow the rule of deleting the +variable name and semicolon, but the result is not so simple. +@xref{Pointer Type Designators}, as part of the chapter about +pointers. @xref{Array Type Designators}), for designators for array +types. + +To understand what type a designator stands for, imagine a variable +name inserted into the right place in the designator to make a valid +declaration. What type would that variable be declared as? That is the +type the designator designates. + +@node Constants +@chapter Constants +@cindex constants + +A @dfn{constant} is an expression that stands for a specific value by +explicitly representing the desired value. C allows constants for +numbers, characters, and strings. We have already seen numeric and +string constants in the examples. + +@menu +* Integer Constants:: Literal integer values. +* Integer Const Type:: Types of literal integer values. +* Floating Constants:: Literal floating-point values. +* Imaginary Constants:: Literal imaginary number values. +* Invalid Numbers:: Avoiding preprocessing number misconceptions. +* Character Constants:: Literal character values. +* String Constants:: Literal string values. +* UTF-8 String Constants:: Literal UTF-8 string values. +* Unicode Character Codes:: Unicode characters represented + in either UTF-16 or UTF-32. +* Wide Character Constants:: Literal characters values larger than 8 bits. +* Wide String Constants:: Literal string values made up of + 16- or 32-bit characters. +@end menu + +@node Integer Constants +@section Integer Constants +@cindex integer constants +@cindex constants, integer + +An integer constant consists of a number to specify the value, +followed optionally by suffix letters to specify the data type. + +The simplest integer constants are numbers written in base 10 +(decimal), such as @code{5}, @code{77}, and @code{403}. A decimal +constant cannot start with the character @samp{0} (zero) because +that makes the constant octal. + +You can get the effect of a negative integer constant by putting a +minus sign at the beginning. Grammatically speaking, that is an +arithmetic expression rather than a constant, but it behaves just like +a true constant. + +Integer constants can also be written in octal (base 8), hexadecimal +(base 16), or binary (base 2). An octal constant starts with the +character @samp{0} (zero), followed by any number of octal digits +(@samp{0} to @samp{7}): + +@example +0 // @r{zero} +077 // @r{63} +0403 // @r{259} +@end example + +@noindent +Pedantically speaking, the constant @code{0} is an octal constant, but +we can think of it as decimal; it has the same value either way. + +A hexadecimal constant starts with @samp{0x} (upper or lower case) +followed by hex digits (@samp{0} to @samp{9}, as well as @samp{a} +through @samp{f} in upper or lower case): + +@example +0xff // @r{255} +0XA0 // @r{160} +0xffFF // @r{65535} +@end example + +@cindex binary integer constants +A binary constant starts with @samp{0b} (upper or lower case) followed +by bits (each represented by the characters @samp{0} or @samp{1}): + +@example +0b101 // @r{5} +@end example + +Binary constants are a GNU C extension, not part of the C standard. + +Sometimes a space is needed after an integer constant to avoid +lexical confusion with the following tokens. @xref{Invalid Numbers}. + +@node Integer Const Type +@section Integer Constant Data Types +@cindex integer constant data types +@cindex constant data types, integer +@cindex types of integer constants + +The type of an integer constant is normally @code{int}, if the value +fits in that type, but here are the complete rules. The type +of an integer constant is the first one in this sequence that can +properly represent the value, + +@enumerate +@item +@code{int} +@item +@code{unsigned int} +@item +@code{long int} +@item +@code{unsigned long int} +@item +@code{long long int} +@item +@code{unsigned long long int} +@end enumerate + +@noindent +and that isn't excluded by the following rules. + +If the constant has @samp{l} or @samp{L} as a suffix, that excludes the +first two types (non-@code{long}). + +If the constant has @samp{ll} or @samp{LL} as a suffix, that excludes +first four types (non-@code{long long}). + +If the constant has @samp{u} or @samp{U} as a suffix, that excludes +the signed types. + +Otherwise, if the constant is decimal, that excludes the unsigned +types. +@c ### This said @code{unsigned int} is excluded. +@c ### See 17 April 2016 + +Here are some examples of the suffixes. + +@example +3000000000u // @r{three billion as @code{unsigned int}.} +0LL // @r{zero as a @code{long long int}.} +0403l // @r{259 as a @code{long int}.} +@end example + +Suffixes in integer constants are rarely used. When the precise type +is important, it is cleaner to convert explicitly (@pxref{Explicit +Type Conversion}). + +@xref{Integer Types}. + +@node Floating Constants +@section Floating-Point Constants +@cindex floating-point constants +@cindex constants, floating-point + +A floating-point constant must have either a decimal point, an +exponent-of-ten, or both; they distinguish it from an integer +constant. + +To indicate an exponent, write @samp{e} or @samp{E}. The exponent +value follows. It is always written as a decimal number; it can +optionally start with a sign. The exponent @var{n} means to multiply +the constant's value by ten to the @var{n}th power. + +Thus, @samp{1500.0}, @samp{15e2}, @samp{15e+2}, @samp{15.0e2}, +@samp{1.5e+3}, @samp{.15e4}, and @samp{15000e-1} are six ways of +writing a floating-point number whose value is 1500. They are all +equivalent. + +Here are more examples with decimal points: + +@example +1.0 +1000. +3.14159 +.05 +.0005 +@end example + +For each of them, here are some equivalent constants written with +exponents: + +@example +1e0, 1.0000e0 +100e1, 100e+1, 100E+1, 1e3, 10000e-1 +3.14159e0 +5e-2, .0005e+2, 5E-2, .0005E2 +.05e-2 +@end example + +A floating-point constant normally has type @code{double}. You can +force it to type @code{float} by adding @samp{f} or @samp{F} +at the end. For example, + +@example +3.14159f +3.14159e0f +1000.f +100E1F +.0005f +.05e-2f +@end example + +Likewise, @samp{l} or @samp{L} at the end forces the constant +to type @code{long double}. + +You can use exponents in hexadecimal floating constants, but since +@samp{e} would be interpreted as a hexadecimal digit, the character +@samp{p} or @samp{P} (for ``power'') indicates an exponent. + +The exponent in a hexadecimal floating constant is a possibly-signed +decimal integer that specifies a power of 2 (@emph{not} 10 or 16) to +multiply into the number. + +Here are some examples: + +@example +@group +0xAp2 // @r{40 in decimal} +0xAp-1 // @r{5 in decimal} +0x2.0Bp4 // @r{16.75 decimal} +0xE.2p3 // @r{121 decimal} +0x123.ABCp0 // @r{291.6708984375 in decimal} +0x123.ABCp4 // @r{4666.734375 in decimal} +0x100p-8 // @r{1} +0x10p-4 // @r{1} +0x1p+4 // @r{16} +0x1p+8 // @r{256} +@end group +@end example + +@xref{Floating-Point Data Types}. + +@node Imaginary Constants +@section Imaginary Constants +@cindex imaginary constants +@cindex complex constants +@cindex constants, imaginary + +A complex number consists of a real part plus an imaginary part. +(Either or both parts may be zero.) This section explains how to +write numeric constants with imaginary values. By adding these to +ordinary real-valued numeric constants, we can make constants with +complex values. + +The simple way to write an imaginary-number constant is to attach the +suffix @samp{i} or @samp{I}, or @samp{j} or @samp{J}, to an integer or +floating-point constant. For example, @code{2.5fi} has type +@code{_Complex float} and @code{3i} has type @code{_Complex int}. +The four alternative suffix letters are all equivalent. + +@cindex _Complex_I +The other way to write an imaginary constant is to multiply a real +constant by @code{_Complex_I}, which represents the imaginary number +i. Standard C doesn't support suffixing with @samp{i} or @samp{j}, so +this clunky way is needed. + +To write a complex constant with a nonzero real part and a nonzero +imaginary part, write the two separately and add them, like this: + +@example +4.0 + 3.0i +@end example + +@noindent +That gives the value 4 + 3i, with type @code{_Complex double}. + +Such a sum can include multiple real constants, or none. Likewise, it +can include multiple imaginary constants, or none. For example: + +@example +_Complex double foo, bar, quux; + +foo = 2.0i + 4.0 + 3.0i; /* @r{Imaginary part is 5.0.} */ +bar = 4.0 + 12.0; /* @r{Imaginary part is 0.0.} */ +quux = 3.0i + 15.0i; /* @r{Real part is 0.0.} */ +@end example + +@xref{Complex Data Types}. + +@node Invalid Numbers +@section Invalid Numbers + +Some number-like constructs which are not really valid as numeric +constants are treated as numbers in preprocessing directives. If +these constructs appear outside of preprocessing, they are erroneous. +@xref{Preprocessing Tokens}. + +Sometimes we need to insert spaces to separate tokens so that they +won't be combined into a single number-like construct. For example, +@code{0xE+12} is a preprocessing number that is not a valid numeric +constant, so it is a syntax error. If what we want is the three +tokens @code{@w{0xE + 12}}, we have to use those spaces as separators. + +@node Character Constants +@section Character Constants +@cindex character constants +@cindex constants, character +@cindex escape sequence + +A @dfn{character constant} is written with single quotes, as in +@code{'@var{c}'}. In the simplest case, @var{c} is a single ASCII +character that the constant should represent. The constant has type +@code{int}, and its value is the character code of that character. +For instance, @code{'a'} represents the character code for the letter +@samp{a}: 97, that is. + +To put the @samp{'} character (single quote) in the character +constant, @dfn{quote} it with a backslash (@samp{\}). This character +constant looks like @code{'\''}. This sort of sequence, starting with +@samp{\}, is called an @dfn{escape sequence}---the backslash character +here functions as a kind of @dfn{escape character}. + +To put the @samp{\} character (backslash) in the character constant, +quote it likewise with @samp{\} (another backslash). This character +constant looks like @code{'\\'}. + +@cindex bell character +@cindex @samp{\a} +@cindex backspace +@cindex @samp{\b} +@cindex tab (ASCII character) +@cindex @samp{\t} +@cindex vertical tab +@cindex @samp{\v} +@cindex formfeed +@cindex @samp{\f} +@cindex newline +@cindex @samp{\n} +@cindex return (ASCII character) +@cindex @samp{\r} +@cindex escape (ASCII character) +@cindex @samp{\e} +Here are all the escape sequences that represent specific +characters in a character constant. The numeric values shown are +the corresponding ASCII character codes, as decimal numbers. + +@example +'\a' @result{} 7 /* @r{alarm, @kbd{CTRL-g}} */ +'\b' @result{} 8 /* @r{backspace, @key{BS}, @kbd{CTRL-h}} */ +'\t' @result{} 9 /* @r{tab, @key{TAB}, @kbd{CTRL-i}} */ +'\n' @result{} 10 /* @r{newline, @kbd{CTRL-j}} */ +'\v' @result{} 11 /* @r{vertical tab, @kbd{CTRL-k}} */ +'\f' @result{} 12 /* @r{formfeed, @kbd{CTRL-l}} */ +'\r' @result{} 13 /* @r{carriage return, @key{RET}, @kbd{CTRL-m}} */ +'\e' @result{} 27 /* @r{escape character, @key{ESC}, @kbd{CTRL-[}} */ +'\\' @result{} 92 /* @r{backslash character, @kbd{\}} */ +'\'' @result{} 39 /* @r{singlequote character, @kbd{'}} */ +'\"' @result{} 34 /* @r{doublequote character, @kbd{"}} */ +'\?' @result{} 63 /* @r{question mark, @kbd{?}} */ +@end example + +@samp{\e} is a GNU C extension; to stick to standard C, write @samp{\33}. + +You can also write octal and hex character codes as +@samp{\@var{octalcode}} or @samp{\x@var{hexcode}}. Decimal is not an +option here, so octal codes do not need to start with @samp{0}. + +The character constant's value has type @code{int}. However, the +character code is treated initially as a @code{char} value, which is +then converted to @code{int}. If the character code is greater than +127 (@code{0177} in octal), the resulting @code{int} may be negative +on a platform where the type @code{char} is 8 bits long and signed. + +@node String Constants +@section String Constants +@cindex string constants +@cindex constants, string + +A @dfn{string constant} represents a series of characters. It starts +with @samp{"} and ends with @samp{"}; in between are the contents of +the string. Quoting special characters such as @samp{"}, @samp{\} and +newline in the contents works in string constants as in character +constants. In a string constant, @samp{'} does not need to be quoted. + +A string constant defines an array of characters which contains the +specified characters followed by the null character (code 0). Using +the string constant is equivalent to using the name of an array with +those contents. In simple cases, the length in bytes of the string +constant is one greater than the number of characters written in it. + +As with any array in C, using the string constant in an expression +converts the array to a pointer (@pxref{Pointers}) to the array's +first element (@pxref{Accessing Array Elements}). This pointer will +have type @code{char *} because it points to an element of type +@code{char}. @code{char *} is an example of a type designator for a +pointer type (@pxref{Pointer Type Designators}). That type is used +for strings generally, not just the strings expressed as constants +in a program. + +Thus, the string constant @code{"Foo!"} is almost +equivalent to declaring an array like this + +@example +char string_array_1[] = @{'F', 'o', 'o', '!', '\0' @}; +@end example + +@noindent +and then using @code{string_array_1} in the program. There +are two differences, however: + +@itemize @bullet +@item +The string constant doesn't define a name for the array. + +@item +The string constant is probably stored in a read-only area of memory. +@end itemize + +Newlines are not allowed in the text of a string constant. The motive +for this prohibition is to catch the error of omitting the closing +@samp{"}. To put a newline in a constant string, write it as +@samp{\n} in the string constant. + +A real null character in the source code inside a string constant +causes a warning. To put a null character in the middle of a string +constant, write @samp{\0} or @samp{\000}. + +Consecutive string constants are effectively concatenated. Thus, + +@example +"Fo" "o!" @r{is equivalent to} "Foo!" +@end example + +This is useful for writing a string containing multiple lines, +like this: + +@example +"This message is so long that it needs more than\n" +"a single line of text. C does not allow a newline\n" +"to represent itself in a string constant, so we have to\n" +"write \\n to put it in the string. For readability of\n" +"the source code, it is advisable to put line breaks in\n" +"the source where they occur in the contents of the\n" +"constant.\n" +@end example + +The sequence of a backslash and a newline is ignored anywhere +in a C program, and that includes inside a string constant. +Thus, you can write multi-line string constants this way: + +@example +"This is another way to put newlines in a string constant\n\ +and break the line after them in the source code." +@end example + +@noindent +However, concatenation is the recommended way to do this. + +You can also write perverse string constants like this, + +@example +"Fo\ +o!" +@end example + +@noindent +but don't do that---write it like this instead: + +@example +"Foo!" +@end example + +Be careful to avoid passing a string constant to a function that +modifies the string it receives. The memory where the string constant +is stored may be read-only, which would cause a fatal @code{SIGSEGV} +signal that normally terminates the function (@pxref{Signals}. Even +worse, the memory may not be read-only. Then the function might +modify the string constant, thus spoiling the contents of other string +constants that are supposed to contain the same value and are unified +by the compiler. + +@node UTF-8 String Constants +@section UTF-8 String Constants +@cindex UTF-8 String Constants + +Writing @samp{u8} immediately before a string constant, with no +intervening space, means to represent that string in UTF-8 encoding as +a sequence of bytes. UTF-8 represents ASCII characters with a single +byte, and represents non-ASCII Unicode characters (codes 128 and up) +as multibyte sequences. Here is an example of a UTF-8 constant: + +@example +u8"A cónstàñt" +@end example + +This constant occupies 13 bytes plus the terminating null, +because each of the accented letters is a two-byte sequence. + +Concatenating an ordinary string with a UTF-8 string conceptually +produces another UTF-8 string. However, if the ordinary string +contains character codes 128 and up, the results cannot be relied on. + +@node Unicode Character Codes +@section Unicode Character Codes +@cindex Unicode character codes +@cindex universal character names + +You can specify Unicode characters, for individual character constants +or as part of string constants (@pxref{String Constants}), using +escape sequences. Use the @samp{\u} escape sequence with a 16-bit +hexadecimal Unicode character code. If the code value is too big for +16 bits, use the @samp{\U} escape sequence with a 32-bit hexadecimal +Unicode character code. (These codes are called @dfn{universal +character names}.) For example, + +@example +\u6C34 /* @r{16-bit code (UTF-16)} */ +\U0010ABCD /* @r{32-bit code (UTF-32)} */ +@end example + +@noindent +One way to use these is in UTF-8 string constants (@pxref{UTF-8 String +Constants}). For instance, + +@example +u8"fóó \u6C34 \U0010ABCD" +@end example + + You can also use them in wide character constants (@pxref{Wide +Character Constants}), like this: + +@example +u'\u6C34' /* @r{16-bit code} */ +U'\U0010ABCD' /* @r{32-bit code} */ +@end example + +@noindent +and in wide string constants (@pxref{Wide String Constants}), like +this: + +@example +u"\u6C34\u6C33" /* @r{16-bit code} */ +U"\U0010ABCD" /* @r{32-bit code} */ +@end example + +Codes in the range of @code{D800} through @code{DFFF} are not valid +in Unicode. Codes less than @code{00A0} are also forbidden, except for +@code{0024}, @code{0040}, and @code{0060}; these characters are +actually ASCII control characters, and you can specify them with other +escape sequences (@pxref{Character Constants}). + +@node Wide Character Constants +@section Wide Character Constants +@cindex wide character constants +@cindex constants, wide character + +A @dfn{wide character constant} represents characters with more than 8 +bits of character code. This is an obscure feature that we need to +document but that you probably won't ever use. If you're just +learning C, you may as well skip this section. + +The original C wide character constant looks like @samp{L} (upper +case!) followed immediately by an ordinary character constant (with no +intervening space). Its data type is @code{wchar_t}, which is an +alias defined in @file{stddef.h} for one of the standard integer +types. Depending on the platform, it could be 16 bits or 32 bits. If +it is 16 bits, these character constants use the UTF-16 form of +Unicode; if 32 bits, UTF-32. + +There are also Unicode wide character constants which explicitly +specify the width. These constants start with @samp{u} or @samp{U} +instead of @samp{L}. @samp{u} specifies a 16-bit Unicode wide +character constant, and @samp{U} a 32-bit Unicode wide character +constant. Their types are, respectively, @code{char16_t} and +@w{@code{char32_t}}; they are declared in the header file +@file{uchar.h}. These character constants are valid even if +@file{uchar.h} is not included, but some uses of them may be +inconvenient without including it to declare those type names. + +The character represented in a wide character constant can be an +ordinary ASCII character. @code{L'a'}, @code{u'a'} and @code{U'a'} +are all valid, and they are all equal to @code{'a'}. + +In all three kinds of wide character constants, you can write a +non-ASCII Unicode character in the constant itself; the constant's +value is the character's Unicode character code. Or you can specify +the Unicode character with an escape sequence (@pxref{Unicode +Character Codes}). + +@node Wide String Constants +@section Wide String Constants +@cindex wide string constants +@cindex constants, wide string + +A @dfn{wide string constant} stands for an array of 16-bit or 32-bit +characters. They are rarely used; if you're just +learning C, you may as well skip this section. + +There are three kinds of wide string constants, which differ in the +data type used for each character in the string. Each wide string +constant is equivalent to an array of integers, but the data type of +those integers depends on the kind of wide string. Using the constant +in an expression will convert the array to a pointer to its first +element, as usual for arrays in C (@pxref{Accessing Array Elements}). +For each kind of wide string constant, we state here what type that +pointer will be. + +@table @code +@item char16_t +This is a 16-bit Unicode wide string constant: each element is a +16-bit Unicode character code with type @code{char16_t}, so the string +has the pointer type @code{char16_t@ *}. (That is a type designator; +@pxref{Pointer Type Designators}.) The constant is written as +@samp{u} (which must be lower case) followed (with no intervening +space) by a string constant with the usual syntax. + +@item char32_t +This is a 32-bit Unicode wide string constant: each element is a +32-bit Unicode character code, and the string has type @code{char32_t@ *}. +It's written as @samp{U} (which must be upper case) followed (with no +intervening space) by a string constant with the usual syntax. + +@item wchar_t +This is the original kind of wide string constant. It's written as +@samp{L} (which must be upper case) followed (with no intervening +space) by a string constant with the usual syntax, and the string has +type @code{wchar_t@ *}. + +The width of the data type @code{wchar_t} depends on the target +platform, which makes this kind of wide string somewhat less useful +than the newer kinds. +@end table + +@code{char16_t} and @code{char32_t} are declared in the header file +@file{uchar.h}. @code{wchar_t} is declared in @file{stddef.h}. + +Consecutive wide string constants of the same kind concatenate, just +like ordinary string constants. A wide string constant concatenated +with an ordinary string constant results in a wide string constant. +You can't concatenate two wide string constants of different kinds. +You also can't concatenate a wide string constant (of any kind) with a +UTF-8 string constant. + +@node Type Size +@chapter Type Size +@cindex type size +@cindex size of type +@findex sizeof + +Each data type has a @dfn{size}, which is the number of bytes +(@pxref{Storage}) that it occupies in memory. To refer to the size in +a C program, use @code{sizeof}. There are two ways to use it: + +@table @code +@item sizeof @var{expression} +This gives the size of @var{expression}, based on its data type. It +does not calculate the value of @var{expression}, only its size, so if +@var{expression} includes side effects or function calls, they do not +happen. Therefore, @code{sizeof} is always a compile-time operation +that has zero run-time cost. + +A value that is a bit field (@pxref{Bit Fields}) is not allowed as an +operand of @code{sizeof}. + +For example, + +@example +double a; + +i = sizeof a + 10; +@end example + +@noindent +sets @code{i} to 18 on most computers because @code{a} occupies 8 bytes. + +Here's how to determine the number of elements in an array +@code{array}: + +@example +(sizeof array / sizeof array[0]) +@end example + +@noindent +The expression @code{sizeof array} gives the size of the array, not +the size of a pointer to an element. However, if @var{expression} is +a function parameter that was declared as an array, that +variable really has a pointer type (@pxref{Array Parm Pointer}), so +the result is the size of that pointer. + +@item sizeof (@var{type}) +This gives the size of @var{type}. +For example, + +@example +i = sizeof (double) + 10; +@end example + +@noindent +is equivalent to the previous example. + +You can't apply @code{sizeof} to an incomplete type (@pxref{Incomplete +Types}), nor @code{void}. Using it on a function type gives 1 in GNU +C, which makes adding an integer to a function pointer work as desired +(@pxref{Pointer Arithmetic}). +@end table + +@strong{Warning}: When you use @code{sizeof} with a type +instead of an expression, you must write parentheses around the type. + +@strong{Warning}: When applying @code{sizeof} to the result of a cast +(@pxref{Explicit Type Conversion}), you must write parentheses around +the cast expression to avoid an ambiguity in the grammar of C@. +Specifically, + +@example +sizeof (int) -x +@end example + +@noindent +parses as + +@example +(sizeof (int)) - x +@end example + +@noindent +If what you want is + +@example +sizeof ((int) -x) +@end example + +@noindent +you must write it that way, with parentheses. + +The data type of the value of the @code{sizeof} operator is always one +of the unsigned integer types; which one of those types depends on the +machine. The header file @code{stddef.h} defines the typedef name +@code{size_t} as an alias for this type. @xref{Defining Typedef +Names}. + +@node Pointers +@chapter Pointers +@cindex pointers + +Among high-level languages, C is rather low level, close to the +machine. This is mainly because it has explicit @dfn{pointers}. A +pointer value is the numeric address of data in memory. The type of +data to be found at that address is specified by the data type of the +pointer itself. The unary operator @samp{*} gets the data that a +pointer points to---this is called @dfn{dereferencing the pointer}. + +C also allows pointers to functions, but since there are some +differences in how they work, we treat them later. @xref{Function +Pointers}. + +@menu +* Address of Data:: Using the ``address-of'' operator. +* Pointer Types:: For each type, there is a pointer type. +* Pointer Declarations:: Declaring variables with pointer types. +* Pointer Type Designators:: Designators for pointer types. +* Pointer Dereference:: Accessing what a pointer points at. +* Null Pointers:: Pointers which do not point to any object. +* Invalid Dereference:: Dereferencing null or invalid pointers. +* Void Pointers:: Totally generic pointers, can cast to any. +* Pointer Comparison:: Comparing memory address values. +* Pointer Arithmetic:: Computing memory address values. +* Pointers and Arrays:: Using pointer syntax instead of array syntax. +* Pointer Arithmetic Low Level:: More about computing memory address values. +* Pointer Increment/Decrement:: Incrementing and decrementing pointers. +* Pointer Arithmetic Drawbacks:: A common pointer bug to watch out for. +* Pointer-Integer Conversion:: Converting pointer types to integer types. +* Printing Pointers:: Using @code{printf} for a pointer's value. +@end menu + +@node Address of Data +@section Address of Data + +@cindex address-of operator +The most basic way to make a pointer is with the ``address-of'' +operator, @samp{&}. Let's suppose we have these variables available: + +@example +int i; +double a[5]; +@end example + +Now, @code{&i} gives the address of the variable @code{i}---a pointer +value that points to @code{i}'s location---and @code{&a[3]} gives the +address of the element 3 of @code{a}. (It is actually the fourth +element in the array, since the first element has index 0.) + +The address-of operator is unusual because it operates on a place to +store a value (an lvalue, @pxref{Lvalues}), not on the value currently +stored there. (The left argument of a simple assignment is unusual in +the same way.) You can use it on any lvalue except a bit field +(@pxref{Bit Fields}) or a constructor (@pxref{Structure +Constructors}). + + +@node Pointer Types +@section Pointer Types + +For each data type @var{t}, there is a type for pointers to type +@var{t}. For these variables, + +@example +int i; +double a[5]; +@end example + +@itemize @bullet +@item +@code{i} has type @code{int}; we say +@code{&i} is a ``pointer to @code{int}.'' + +@item +@code{a} has type @code{double[5]}; we say @code{&a} is a ``pointer to +arrays of five @code{double}s.'' + +@item +@code{a[3]} has type @code{double}; we say @code{&a[3]} is a ``pointer +to @code{double}.'' +@end itemize + +@node Pointer Declarations +@section Pointer-Variable Declarations + +The way to declare that a variable @code{foo} points to type @var{t} is + +@example +@var{t} *foo; +@end example + +To remember this syntax, think ``if you dereference @code{foo}, using +the @samp{*} operator, what you get is type @var{t}. Thus, @code{foo} +points to type @var{t}.'' + +Thus, we can declare variables that hold pointers to these three +types, like this: + +@example +int *ptri; /* @r{Pointer to @code{int}.} */ +double *ptrd; /* @r{Pointer to @code{double}.} */ +double (*ptrda)[5]; /* @r{Pointer to @code{double[5]}.} */ +@end example + +@samp{int *ptri;} means, ``if you dereference @code{ptri}, you get an +@code{int}.'' @samp{double (*ptrda)[5];} means, ``if you dereference +@code{ptrda}, then subscript it by an integer less than 5, you get a +@code{double}.'' The parentheses express the point that you would +dereference it first, then subscript it. + +Contrast the last one with this: + +@example +double *aptrd[5]; /* @r{Array of five pointers to @code{double}.} */ +@end example + +@noindent +Because @samp{*} has higher syntactic precedence than subscripting, +you would subscript @code{aptrd} then dereference it. Therefore, it +declares an array of pointers, not a pointer. + +@node Pointer Type Designators +@section Pointer-Type Designators + +Every type in C has a designator; you make it by deleting the variable +name and the semicolon from a declaration (@pxref{Type +Designators}). Here are the designators for the pointer +types of the example declarations in the previous section: + +@example +int * /* @r{Pointer to @code{int}.} */ +double * /* @r{Pointer to @code{double}.} */ +double (*)[5] /* @r{Pointer to @code{double[5]}.} */ +@end example + +Remember, to understand what type a designator stands for, imagine the +variable name that would be in the declaration, and figure out what +type it would declare that variable with. @code{double (*)[5]} can +only come from @code{double (*@var{variable})[5]}, so it's a pointer +which, when dereferenced, gives an array of 5 @code{double}s. + +@node Pointer Dereference +@section Dereferencing Pointers +@cindex dereferencing pointers +@cindex pointer dereferencing + +The main use of a pointer value is to @dfn{dereference it} (access the +data it points at) with the unary @samp{*} operator. For instance, +@code{*&i} is the value at @code{i}'s address---which is just +@code{i}. The two expressions are equivalent, provided @code{&i} is +valid. + +A pointer-dereference expression whose type is data (not a function) +is an lvalue. + +Pointers become really useful when we store them somewhere and use +them later. Here's a simple example to illustrate the practice: + +@example +@{ + int i; + int *ptr; + + ptr = &i; + + i = 5; + + @r{@dots{}} + + return *ptr; /* @r{Returns 5, fetched from @code{i}.} */ +@} +@end example + +This shows how to declare the variable @code{ptr} as type +@code{int *} (pointer to @code{int}), store a pointer value into it +(pointing at @code{i}), and use it later to get the value of the +object it points at (the value in @code{i}). + +If anyone can provide a useful example which is this basic, +I would be grateful. + +@node Null Pointers +@section Null Pointers +@cindex null pointers +@cindex pointers, null + +@c ???stdio loads sttddef + +A pointer value can be @dfn{null}, which means it does not point to +any object. The cleanest way to get a null pointer is by writing +@code{NULL}, a standard macro defined in @file{stddef.h}. You can +also do it by casting 0 to the desired pointer type, as in +@code{(char *) 0}. (The cast operator performs explicit type conversion; +@xref{Explicit Type Conversion}.) + +You can store a null pointer in any lvalue whose data type +is a pointer type: + +@example +char *foo; +foo = NULL; +@end example + +These two, if consecutive, can be combined into a declaration with +initializer, + +@example +char *foo = NULL; +@end example + +You can also explicitly cast @code{NULL} to the specific pointer type +you want---it makes no difference. + +@example +char *foo; +foo = (char *) NULL; +@end example + +To test whether a pointer is null, compare it with zero or +@code{NULL}, as shown here: + +@example +if (p != NULL) + /* @r{@code{p} is not null.} */ + operate (p); +@end example + +Since testing a pointer for not being null is basic and frequent, all +but beginners in C will understand the conditional without need for +@code{!= NULL}: + +@example +if (p) + /* @r{@code{p} is not null.} */ + operate (p); +@end example + +@node Invalid Dereference +@section Dereferencing Null or Invalid Pointers + +Trying to dereference a null pointer is an error. On most platforms, +it generally causes a signal, usually @code{SIGSEGV} +(@pxref{Signals}). + +@example +char *foo = NULL; +c = *foo; /* @r{This causes a signal and terminates.} */ +@end example + +@noindent +Likewise a pointer that has the wrong alignment for the target data type +(on most types of computer), or points to a part of memory that has +not been allocated in the process's address space. + +The signal terminates the program, unless the program has arranged to +handle the signal (@pxref{Signal Handling, The GNU C Library, , libc, +The GNU C Library Reference Manual}). + +However, the signal might not happen if the dereference is optimized +away. In the example above, if you don't subsequently use the value +of @code{c}, GCC might optimize away the code for @code{*foo}. You +can prevent such optimization using the @code{volatile} qualifier, as +shown here: + +@example +volatile char *p; +volatile char c; +c = *p; +@end example + +You can use this to test whether @code{p} points to unallocated +memory. Set up a signal handler first, so the signal won't terminate +the program. + +@node Void Pointers +@section Void Pointers +@cindex void pointers +@cindex pointers, void + +The peculiar type @code{void *}, a pointer whose target type is +@code{void}, is used often in C@. It represents a pointer to +we-don't-say-what. Thus, + +@example +void *numbered_slot_pointer (int); +@end example + +@noindent +declares a function @code{numbered_slot_pointer} that takes an +integer parameter and returns a pointer, but we don't say what type of +data it points to. + +With type @code{void *}, you can pass the pointer around and test +whether it is null. However, dereferencing it gives a @code{void} +value that can't be used (@pxref{The Void Type}). To dereference the +pointer, first convert it to some other pointer type. + +Assignments convert @code{void *} automatically to any other pointer +type, if the left operand has a pointer type; for instance, + +@example +@{ + int *p; + /* @r{Converts return value to @code{int *}.} */ + p = numbered_slot_pointer (5); + @r{@dots{}} +@} +@end example + +Passing an argument of type @code{void *} for a parameter that has a +pointer type also converts. For example, supposing the function +@code{hack} is declared to require type @code{float *} for its +argument, this will convert the null pointer to that type. + +@example +/* @r{Declare @code{hack} that way.} + @r{We assume it is defined somewhere else.} */ +void hack (float *); +@dots{} +/* @r{Now call @code{hack}.} */ +@{ + /* @r{Converts return value of @code{numbered_slot_pointer}} + @r{to @code{float *} to pass it to @code{hack}.} */ + hack (numbered_slot_pointer (5)); + @r{@dots{}} +@} +@end example + + You can also convert to another pointer type with an explicit cast +(@pxref{Explicit Type Conversion}), like this: +@example +(int *) numbered_slot_pointer (5) +@end example + +Here is an example which decides at run time which pointer +type to convert to: + +@example +void +extract_int_or_double (void *ptr, bool its_an_int) +@{ + if (its_an_int) + handle_an_int (*(int *)ptr); + else + handle_a_double (*(double *)ptr); +@} +@end example + +The expression @code{*(int *)ptr} means to convert @code{ptr} +to type @code{int *}, then dereference it. + +@node Pointer Comparison +@section Pointer Comparison +@cindex pointer comparison +@cindex comparison, pointer + +Two pointer values are equal if they point to the same location, or if +they are both null. You can test for this with @code{==} and +@code{!=}. Here's a trivial example: + +@example +@{ + int i; + int *p, *q; + + p = &i; + q = &i; + if (p == q) + printf ("This will be printed.\n"); + if (p != q) + printf ("This won't be printed.\n"); +@} +@end example + +Ordering comparisons such as @code{>} and @code{>=} operate on +pointers by converting them to unsigned integers. The C standard says +the two pointers must point within the same object in memory, but on +GNU/Linux systems these operations simply compare the numeric values +of the pointers. + +The pointer values to be compared should in principle have the same type, but +they are allowed to differ in limited cases. First of all, if the two +pointers' target types are nearly compatible (@pxref{Compatible +Types}), the comparison is allowed. + +If one of the operands is @code{void *} (@pxref{Void Pointers}) and +the other is another pointer type, the comparison operator converts +the @code{void *} pointer to the other type so as to compare them. +(In standard C, this is not allowed if the other type is a function +pointer type, but that works in GNU C@.) + +Comparison operators also allow comparing the integer 0 with a pointer +value. Thus works by converting 0 to a null pointer of the same type +as the other operand. + +@node Pointer Arithmetic +@section Pointer Arithmetic +@cindex pointer arithmetic +@cindex arithmetic, pointer + +Adding an integer (positive or negative) to a pointer is valid in C@. +It assumes that the pointer points to an element in an array, and +advances or retracts the pointer across as many array elements as the +integer specifies. Here is an example, in which adding a positive +integer advances the pointer to a later element in the same array. + +@example +void +incrementing_pointers () +@{ + int array[5] = @{ 45, 29, 104, -3, 123456 @}; + int elt0, elt1, elt4; + + int *p = &array[0]; + /* @r{Now @code{p} points at element 0. Fetch it.} */ + elt0 = *p; + + ++p; + /* @r{Now @code{p} points at element 1. Fetch it.} */ + elt1 = *p; + + p += 3; + /* @r{Now @code{p} points at element 4 (the last). Fetch it.} */ + elt4 = *p; + + printf ("elt0 %d elt1 %d elt4 %d.\n", + elt0, elt1, elt4); + /* @r{Prints elt0 45 elt1 29 elt4 123456.} */ +@} +@end example + +Here's an example where adding a negative integer retracts the pointer +to an earlier element in the same array. + +@example +void +decrementing_pointers () +@{ + int array[5] = @{ 45, 29, 104, -3, 123456 @}; + int elt0, elt3, elt4; + + int *p = &array[4]; + /* @r{Now @code{p} points at element 4 (the last). Fetch it.} */ + elt4 = *p; + + --p; + /* @r{Now @code{p} points at element 3. Fetch it.} */ + elt3 = *p; + + p -= 3; + /* @r{Now @code{p} points at element 0. Fetch it.} */ + elt0 = *p; + + printf ("elt0 %d elt3 %d elt4 %d.\n", + elt0, elt3, elt4); + /* @r{Prints elt0 45 elt3 -3 elt4 123456.} */ +@} +@end example + +If one pointer value was made by adding an integer to another +pointer value, it should be possible to subtract the pointer values +and recover that integer. That works too in C@. + +@example +void +subtract_pointers () +@{ + int array[5] = @{ 45, 29, 104, -3, 123456 @}; + int *p0, *p3, *p4; + + int *p = &array[4]; + /* @r{Now @code{p} points at element 4 (the last). Save the value.} */ + p4 = p; + + --p; + /* @r{Now @code{p} points at element 3. Save the value.} */ + p3 = p; + + p -= 3; + /* @r{Now @code{p} points at element 0. Save the value.} */ + p0 = p; + + printf ("%d, %d, %d, %d\n", + p4 - p0, p0 - p0, p3 - p0, p0 - p3); + /* @r{Prints 4, 0, 3, -3.} */ +@} +@end example + +The addition operation does not know where arrays are. All it does is +add the integer (multiplied by object size) to the value of the +pointer. When the initial pointer and the result point into a single +array, the result is well-defined. + +@strong{Warning:} Only experts should do pointer arithmetic involving pointers +into different memory objects. + +The difference between two pointers has type @code{int}, or +@code{long} if necessary (@pxref{Integer Types}). The clean way to +declare it is to use the typedef name @code{ptrdiff_t} defined in the +file @file{stddef.h}. + +This definition of pointer subtraction is consistent with +pointer-integer addition, in that @code{(p3 - p1) + p1} equals +@code{p3}, as in ordinary algebra. + +In standard C, addition and subtraction are not allowed on @code{void +*}, since the target type's size is not defined in that case. +Likewise, they are not allowed on pointers to function types. +However, these operations work in GNU C, and the ``size of the target +type'' is taken as 1. + +@node Pointers and Arrays +@section Pointers and Arrays +@cindex pointers and arrays +@cindex arrays and pointers + +The clean way to refer to an array element is +@code{@var{array}[@var{index}]}. Another, complicated way to do the +same job is to get the address of that element as a pointer, then +dereference it: @code{* (&@var{array}[0] + @var{index})} (or +equivalently @code{* (@var{array} + @var{index})}). This first gets a +pointer to element zero, then increments it with @code{+} to point to +the desired element, then gets the value from there. + +That pointer-arithmetic construct is the @emph{definition} of square +brackets in C@. @code{@var{a}[@var{b}]} means, by definition, +@code{*(@var{a} + @var{b})}. This definition uses @var{a} and @var{b} +symmetrically, so one must be a pointer and the other an integer; it +does not matter which comes first. + +Since indexing with square brackets is defined in terms of addition +and dereference, that too is symmetrical. Thus, you can write +@code{3[array]} and it is equivalent to @code{array[3]}. However, it +would be foolish to write @code{3[array]}, since it has no advantage +and could confuse people who read the code. + +It may seem like a discrepancy that the definition @code{*(@var{a} + +@var{b})} requires a pointer, but @code{array[3]} uses an array value +instead. Why is this valid? The name of the array, when used by +itself as an expression (other than in @code{sizeof}), stands for a +pointer to the arrays's zeroth element. Thus, @code{array + 3} +converts @code{array} implicitly to @code{&array[0]}, and the result +is a pointer to element 3, equivalent to @code{&array[3]}. + +Since square brackets are defined in terms of such addition, +@code{array[3]} first converts @code{array} to a pointer. That's why +it works to use an array directly in that construct. + +@node Pointer Arithmetic Low Level +@section Pointer Arithmetic at Low Level +@cindex pointer arithmetic, low level +@cindex low level pointer arithmetic + +The behavior of pointer arithmetic is theoretically defined only when +the pointer values all point within one object allocated in memory. +But the addition and subtraction operators can't tell whether the +pointer values are all within one object. They don't know where +objects start and end. So what do they really do? + +Adding pointer @var{p} to integer @var{i} treats @var{p} as a memory +address, which is in fact an integer---call it @var{pint}. It treats +@var{i} as a number of elements of the type that @var{p} points to. +These elements' sizes add up to @code{@var{i} * sizeof (*@var{p})}. +So the sum, as an integer, is @code{@var{pint} + @var{i} * sizeof +(*@var{p})}. This value is reinterpreted as a pointer like @var{p}. + +If the starting pointer value @var{p} and the result do not point at +parts of the same object, the operation is not officially legitimate, +and C code is not ``supposed'' to do it. But you can do it anyway, +and it gives precisely the results described by the procedure above. +In some special situations it can do something useful, but non-wizards +should avoid it. + +Here's a function to offset a pointer value @emph{as if} it pointed to +an object of any given size, by explicitly performing that calculation: + +@example +#include + +void * +ptr_add (void *p, int i, int objsize) +@{ + intptr_t p_address = (long) p; + intptr_t totalsize = i * objsize; + intptr_t new_address = p_address + totalsize; + return (void *) new_address; +@} +@end example + +@noindent +@cindex @code{intptr_t} +This does the same job as @code{@var{p} + @var{i}} with the proper +pointer type for @var{p}. It uses the type @code{intptr_t}, which is +defined in the header file @file{stdint.h}. (In practice, @code{long +long} would always work, but it is cleaner to use @code{intptr_t}.) + +@node Pointer Increment/Decrement +@section Pointer Increment and Decrement +@cindex pointer increment and decrement +@cindex incrementing pointers +@cindex decrementing pointers + +The @samp{++} operator adds 1 to a variable. We have seen it for +integers (@pxref{Increment/Decrement}), but it works for pointers too. +For instance, suppose we have a series of positive integers, +terminated by a zero, and we want to add them all up. + +@example +int +sum_array_till_0 (int *p) +@{ + int sum = 0; + + for (;;) + @{ + /* @r{Fetch the next integer.} */ + int next = *p++; + /* @r{Exit the loop if it's 0.} */ + if (next == 0) + break; + /* @r{Add it into running total.} */ + sum += next; + @} + + return sum; +@} +@end example + +@noindent +The statement @samp{break;} will be explained further on (@pxref{break +Statement}). Used in this way, it immediately exits the surrounding +@code{for} statement. + +@code{*p++} parses as @code{*(p++)}, because a postfix operator always +takes precedence over a prefix operator. Therefore, it dereferences +@code{p}, and increments @code{p} afterwards. Incrementing a variable +means adding 1 to it, as in @code{p = p + 1}. Since @code{p} is a +pointer, adding 1 to it advances it by the width of the datum it +points to---in this case, one @code{int}. Therefore, each iteration +of the loop picks up the next integer from the series and puts it into +@code{next}. + +This @code{for}-loop has no initialization expression since @code{p} +and @code{sum} are already initialized, it has no end-test since the +@samp{break;} statement will exit it, and needs no expression to +advance it since that's done within the loop by incrementing @code{p} +and @code{sum}. Thus, those three expressions after @code{for} are +left empty. + +Another way to write this function is by keeping the parameter value unchanged +and using indexing to access the integers in the table. + +@example +int +sum_array_till_0_indexing (int *p) +@{ + int i; + int sum = 0; + + for (i = 0; ; i++) + @{ + /* @r{Fetch the next integer.} */ + int next = p[i]; + /* @r{Exit the loop if it's 0.} */ + if (next == 0) + break; + /* @r{Add it into running total.} */ + sum += next; + @} + + return sum; +@} +@end example + +In this program, instead of advancing @code{p}, we advance @code{i} +and add it to @code{p}. (Recall that @code{p[i]} means @code{*(p + +i)}.) Either way, it uses the same address to get the next integer. + +It makes no difference in this program whether we write @code{i++} or +@code{++i}, because the value is not used. All that matters is the +effect, to increment @code{i}. + +The @samp{--} operator also works on pointers; it can be used +to scan backwards through an array, like this: + +@example +int +after_last_nonzero (int *p, int len) +@{ + /* @r{Set up @code{q} to point just after the last array element.} */ + int *q = p + len; + + while (q != p) + /* @r{Step @code{q} back until it reaches a nonzero element.} */ + if (*--q != 0) + /* @r{Return the index of the element after that nonzero.} */ + return q - p + 1; + + return 0; +@} +@end example + +That function returns the length of the nonzero part of the +array specified by its arguments; that is, the index of the +first zero of the run of zeros at the end. + +@node Pointer Arithmetic Drawbacks +@section Drawbacks of Pointer Arithmetic +@cindex drawbacks of pointer arithmetic +@cindex pointer arithmetic, drawbacks + +Pointer arithmetic is clean and elegant, but it is also the cause of a +major security flaw in the C language. Theoretically, it is only +valid to adjust a pointer within one object allocated as a unit in +memory. However, if you unintentionally adjust a pointer across the +bounds of the object and into some other object, the system has no way +to detect this error. + +A bug which does that can easily result in clobbering part of another +object. For example, with @code{array[-1]} you can read or write the +nonexistent element before the beginning of an array---probably part +of some other data. + +Combining pointer arithmetic with casts between pointer types, you can +create a pointer that fails to be properly aligned for its type. For +example, + +@example +int a[2]; +char *pa = (char *)a; +int *p = (int *)(pa + 1); +@end example + +@noindent +gives @code{p} a value pointing to an ``integer'' that includes part +of @code{a[0]} and part of @code{a[1]}. Dereferencing that with +@code{*p} can cause a fatal @code{SIGSEGV} signal or it can return the +contents of that badly aligned @code{int} (@pxref{Signals}. If it +``works,'' it may be quite slow. It can also cause aliasing +confusions (@pxref{Aliasing}). + +@strong{Warning:} Using improperly aligned pointers is risky---don't do it +unless it is really necessary. + +@node Pointer-Integer Conversion +@section Pointer-Integer Conversion +@cindex pointer-integer conversion +@cindex conversion between pointers and integers +@cindex @code{uintptr_t} + +On modern computers, an address is simply a number. It occupies the +same space as some size of integer. In C, you can convert a pointer +to the appropriate integer types and vice versa, without losing +information. The appropriate integer types are @code{uintptr_t} (an +unsigned type) and @code{intptr_t} (a signed type). Both are defined +in @file{stdint.h}. + +For instance, + +@example +#include +#include + +void +print_pointer (void *ptr) +@{ + uintptr_t converted = (uintptr_t) ptr; + + printf ("Pointer value is 0x%x\n", + (unsigned int) converted); +@} +@end example + +@noindent +The specification @samp{%x} in the template (the first argument) for +@code{printf} means to represent this argument using hexadecimal +notation. It's cleaner to use @code{uintptr_t}, since hexadecimal +printing treats the number as unsigned, but it won't actually matter: +all @code{printf} gets to see is the series of bits in the number. + +@strong{Warning:} Converting pointers to integers is risky---don't do +it unless it is really necessary. + +@node Printing Pointers +@section Printing Pointers + +To print the numeric value of a pointer, use the @samp{%p} specifier. +For example: + +@example +void +print_pointer (void *ptr) +@{ + printf ("Pointer value is %p\n", ptr); +@} +@end example + +The specification @samp{%p} works with any pointer type. It prints +@samp{0x} followed by the address in hexadecimal, printed as the +appropriate unsigned integer type. + +@node Structures +@chapter Structures +@cindex structures +@findex struct +@cindex fields in structures + +A @dfn{structure} is a user-defined data type that holds various +@dfn{fields} of data. Each field has a name and a data type specified +in the structure's definition. + +Here we define a structure suitable for storing a linked list of +integers. Each list item will hold one integer, plus a pointer +to the next item. + +@example +struct intlistlink + @{ + int datum; + struct intlistlink *next; + @}; +@end example + +The structure definition has a @dfn{type tag} so that the code can +refer to this structure. The type tag here is @code{intlistlink}. +The definition refers recursively to the same structure through that +tag. + +You can define a structure without a type tag, but then you can't +refer to it again. That is useful only in some special contexts, such +as inside a @code{typedef} or a @code{union}. + +The contents of the structure are specified by the @dfn{field +declarations} inside the braces. Each field in the structure needs a +declaration there. The fields in one structure definition must have +distinct names, but these names do not conflict with any other names +in the program. + +A field declaration looks just like a variable declaration. You can +combine field declarations with the same beginning, just as you can +combine variable declarations. + +This structure has two fields. One, named @code{datum}, has type +@code{int} and will hold one integer in the list. The other, named +@code{next}, is a pointer to another @code{struct intlistlink} +which would be the rest of the list. In the last list item, it would +be @code{NULL}. + +This structure definition is recursive, since the type of the +@code{next} field refers to the structure type. Such recursion is not +a problem; in fact, you can use the type @code{struct intlistlink *} +before the definition of the type @code{struct intlistlink} itself. +That works because pointers to all kinds of structures really look the +same at the machine level. + +After defining the structure, you can declare a variable of type +@code{struct intlistlink} like this: + +@example +struct intlistlink foo; +@end example + +The structure definition itself can serve as the beginning of a +variable declaration, so you can declare variables immediately after, +like this: + +@example +struct intlistlink + @{ + int datum; + struct intlistlink *next; + @} foo; +@end example + +@noindent +But that is ugly. It is almost always clearer to separate the +definition of the structure from its uses. + +Declaring a structure type inside a block (@pxref{Blocks}) limits +the scope of the structure type name to that block. That means the +structure type is recognized only within that block. Declaring it in +a function parameter list, as here, + +@example +int f (struct foo @{int a, b@} parm); +@end example + +@noindent +(assuming that @code{struct foo} is not already defined) limits the +scope of the structure type @code{struct foo} to that parameter list; +that is basically useless, so it triggers a warning. + +Standard C requires at least one field in a structure. +GNU C does not require this. + +@menu +* Referencing Fields:: Accessing field values in a structure object. +* Dynamic Memory Allocation:: Allocating space for objects + while the program is running. +* Field Offset:: Memory layout of fields within a structure. +* Structure Layout:: Planning the memory layout of fields. +* Packed Structures:: Packing structure fields as close as possible. +* Bit Fields:: Dividing integer fields + into fields with fewer bits. +* Bit Field Packing:: How bit fields pack together in integers. +* const Fields:: Making structure fields immutable. +* Zero Length:: Zero-length array as a variable-length object. +* Flexible Array Fields:: Another approach to variable-length objects. +* Overlaying Structures:: Casting one structure type + over an object of another structure type. +* Structure Assignment:: Assigning values to structure objects. +* Unions:: Viewing the same object in different types. +* Packing With Unions:: Using a union type to pack various types into + the same memory space. +* Cast to Union:: Casting a value one of the union's alternative + types to the type of the union itself. +* Structure Constructors:: Building new structure objects. +* Unnamed Types as Fields:: Fields' types do not always need names. +* Incomplete Types:: Types which have not been fully defined. +* Intertwined Incomplete Types:: Defining mutually-recursive structue types. +* Type Tags:: Scope of structure and union type tags. +@end menu + +@node Referencing Fields +@section Referencing Structure Fields +@cindex referencing structure fields +@cindex structure fields, referencing + +To make a structure useful, there has to be a way to examine and store +its fields. The @samp{.} (period) operator does that; its use looks +like @code{@var{object}.@var{field}}. + +Given this structure and variable, + +@example +struct intlistlink + @{ + int datum; + struct intlistlink *next; + @}; + +struct intlistlink foo; +@end example + +@noindent +you can write @code{foo.datum} and @code{foo.next} to refer to the two +fields in the value of @code{foo}. These fields are lvalues, so you +can store values into them, and read the values out again. + +Most often, structures are dynamically allocated (see the next +section), and we refer to the objects via pointers. +@code{(*p).@var{field}} is somewhat cumbersome, so there is an +abbreviation: @code{p->@var{field}}. For instance, assume the program +contains this declaration: + +@example +struct intlistlink *ptr; +@end example + +@noindent +You can write @code{ptr->datum} and @code{ptr->next} to refer +to the two fields in the object that @code{ptr} points to. + +If a unary operator precedes an expression using @samp{->}, +the @samp{->} nests inside: + +@example + -ptr->datum @r{is equivalent to} -(ptr->datum) +@end example + +You can intermix @samp{->} and @samp{.} without parentheses, +as shown here: + +@example +struct @{ double d; struct intlistlink l; @} foo; + +@r{@dots{}}foo.l.next->next->datum@r{@dots{}} +@end example + +@node Dynamic Memory Allocation +@section Dynamic Memory Allocation +@cindex dynamic memory allocation +@cindex memory allocation, dynamic +@cindex allocating memory dynamically + +To allocate an object dynamically, call the library function +@code{malloc} (@pxref{Basic Allocation, The GNU C Library,, libc, The GNU C Library +Reference Manual}). Here is how to allocate an object of type +@code{struct intlistlink}. To make this code work, include the file +@file{stdlib.h}, like this: + +@example +#include /* @r{Defines @code{NULL}.} */ +#include /* @r{Declares @code{malloc}.} */ + +@dots{} + +struct intlistlink * +alloc_intlistlink () +@{ + struct intlistlink *p; + + p = malloc (sizeof (struct intlistlink)); + + if (p == NULL) + fatal ("Ran out of storage"); + + /* @r{Initialize the contents.} */ + p->datum = 0; + p->next = NULL; + + return p; +@} +@end example + +@noindent +@code{malloc} returns @code{void *}, so the assignment to @code{p} +will automatically convert it to type @code{struct intlistlink *}. +The return value of @code{malloc} is always sufficiently aligned +(@pxref{Type Alignment}) that it is valid for any data type. + +The test for @code{p == NULL} is necessary because @code{malloc} +returns a null pointer if it cannot get any storage. We assume that +the program defines the function @code{fatal} to report a fatal error +to the user. + +Here's how to add one more integer to the front of such a list: + +@example +struct intlistlink *my_list = NULL; + +void +add_to_mylist (int my_int) +@{ + struct intlistlink *p = alloc_intlistlink (); + + p->datum = my_int; + p->next = mylist; + mylist = p; +@} +@end example + +The way to free the objects is by calling @code{free}. Here's +a function to free all the links in one of these lists: + +@example +void +free_intlist (struct intlistlink *p) +@{ + while (p) + @{ + struct intlistlink *q = p; + p = p->next; + free (q); + @} +@} +@end example + +We must extract the @code{next} pointer from the object before freeing +it, because @code{free} can clobber the data that was in the object. +For the same reason, the program must not use the list any more after +freeing its elements. To make sure it won't, it is best to clear out +the variable where the list was stored, like this: + +@example +free_intlist (mylist); + +mylist = NULL; +@end example + +@node Field Offset +@section Field Offset +@cindex field offset +@cindex structure field offset +@cindex offset of structure fields + +To determine the offset of a given field @var{field} in a structure +type @var{type}, use the macro @code{offsetof}, which is defined in +the file @file{stddef.h}. It is used like this: + +@example +offsetof (@var{type}, @var{field}) +@end example + +Here is an example: + +@example +struct foo +@{ + int element; + struct foo *next; +@}; + +offsetof (struct foo, next) +/* @r{On most machines that is 4. It may be 8.} */ +@end example + +@node Structure Layout +@section Structure Layout +@cindex structure layout +@cindex layout of structures + +The rest of this chapter covers advanced topics about structures. If +you are just learning C, you can skip it. + +The precise layout of a @code{struct} type is crucial when using it to +overlay hardware registers, to access data structures in shared +memory, or to assemble and disassemble packets for network +communication. It is also important for avoiding memory waste when +the program makes many objects of that type. However, the layout +depends on the target platform. Each platform has conventions for +structure layout, which compilers need to follow. + +Here are the conventions used on most platforms. + +The structure's fields appear in the structure layout in the order +they are declared. When possible, consecutive fields occupy +consecutive bytes within the structure. However, if a field's type +demands more alignment than it would get that way, C gives it the +alignment it requires by leaving a gap after the previous field. + +Once all the fields have been laid out, it is possible to determine +the structure's alignment and size. The structure's alignment is the +maximum alignment of any of the fields in it. Then the structure's +size is rounded up to a multiple of its alignment. That may require +leaving a gap at the end of the structure. + +Here are some examples, where we assume that @code{char} has size and +alignment 1 (always true), and @code{int} has size and alignment 4 +(true on most kinds of computers): + +@example +struct foo +@{ + char a, b; + int c; +@}; +@end example + +@noindent +This structure occupies 8 bytes, with an alignment of 4. @code{a} is +at offset 0, @code{b} is at offset 1, and @code{c} is at offset 4. +There is a gap of 2 bytes before @code{c}. + +Contrast that with this structure: + +@example +struct foo +@{ + char a; + int c; + char b; +@}; +@end example + +This structure has size 12 and alignment 4. @code{a} is at offset 0, +@code{c} is at offset 4, and @code{b} is at offset 8. There are two +gaps: three bytes before @code{c}, and three bytes at the end. + +These two structures have the same contents at the C level, but one +takes 8 bytes and the other takes 12 bytes due to the ordering of the +fields. A reliable way to avoid this sort of wastage is to order the +fields by size, biggest fields first. + +@node Packed Structures +@section Packed Structures +@cindex packed structures +@cindex @code{__attribute__((packed))} + +In GNU C you can force a structure to be laid out with no gaps by +adding @code{__attribute__((packed))} after @code{struct} (or at the +end of the structure type declaration). Here's an example: + +@example +struct __attribute__((packed)) foo +@{ + char a; + int c; + char b; +@}; +@end example + +Without @code{__attribute__((packed))}, this structure occupies 12 +bytes (as described in the previous section), assuming 4-byte +alignment for @code{int}. With @code{__attribute__((packed))}, it is +only 6 bytes long---the sum of the lengths of its fields. + +Use of @code{__attribute__((packed))} often results in fields that +don't have the normal alignment for their types. Taking the address +of such a field can result in an invalid pointer because of its +improper alignment. Dereferencing such a pointer can cause a +@code{SIGSEGV} signal on a machine that doesn't, in general, allow +unaligned pointers. + +@xref{Attributes}. + +@node Bit Fields +@section Bit Fields +@cindex bit fields + +A structure field declaration with an integer type can specify the +number of bits the field should occupy. We call that a @dfn{bit +field}. These are useful because consecutive bit fields are packed +into a larger storage unit. For instance, + +@example +unsigned char opcode: 4; +@end example + +@noindent +specifies that this field takes just 4 bits. +Since it is unsigned, its possible values range +from 0 to 15. A signed field with 4 bits, such as this, + +@example +signed char small: 4; +@end example + +@noindent +can hold values from -8 to 7. + +You can subdivide a single byte into those two parts by writing + +@example +unsigned char opcode: 4; +signed char small: 4; +@end example + +@noindent +in the structure. With bit fields, these two numbers fit into +a single @code{char}. + +Here's how to declare a one-bit field that can hold either 0 or 1: + +@example +unsigned char special_flag: 1; +@end example + +You can also use the @code{bool} type for bit fields: + +@example +bool special_flag: 1; +@end example + +Except when using @code{bool} (which is always unsigned, +@pxref{Boolean Type}), always specify @code{signed} or @code{unsigned} +for a bit field. There is a default, if that's not specified: the bit +field is signed if plain @code{char} is signed, except that the option +@option{-funsigned-bitfields} forces unsigned as the default. But it +is cleaner not to depend on this default. + +Bit fields are special in that you cannot take their address with +@samp{&}. They are not stored with the size and alignment appropriate +for the specified type, so they cannot be addressed through pointers +to that type. + +@node Bit Field Packing +@section Bit Field Packing + +Programs to communicate with low-level hardware interfaces need to +define bit fields laid out to match the hardware data. This section +explains how to do that. + +Consecutive bit fields are packed together, but each bit field must +fit within a single object of its specified type. In this example, + +@example +unsigned short a : 3, b : 3, c : 3, d : 3, e : 3; +@end example + +@noindent +all five fields fit consecutively into one two-byte @code{short}. +They need 15 bits, and one @code{short} provides 16. By contrast, + +@example +unsigned char a : 3, b : 3, c : 3, d : 3, e : 3; +@end example + +@noindent +needs three bytes. It fits @code{a} and @code{b} into one +@code{char}, but @code{c} won't fit in that @code{char} (they would +add up to 9 bits). So @code{c} and @code{d} go into a second +@code{char}, leaving a gap of two bits between @code{b} and @code{c}. +Then @code{e} needs a third @code{char}. By contrast, + +@example +unsigned char a : 3, b : 3; +unsigned int c : 3; +unsigned char d : 3, e : 3; +@end example + +@noindent +needs only two bytes: the type @code{unsigned int} +allows @code{c} to straddle bytes that are in the same word. + +You can leave a gap of a specified number of bits by defining a +nameless bit field. This looks like @code{@var{type} : @var{nbits};}. +It is allocated space in the structure just as a named bit field would +be allocated. + +You can force the following bit field to advance to the following +aligned memory object with @code{@var{type} : 0;}. + +Both of these constructs can syntactically share @var{type} with +ordinary bit fields. This example illustrates both: + +@example +unsigned int a : 5, : 3, b : 5, : 0, c : 5, : 3, d : 5; +@end example + +@noindent +It puts @code{a} and @code{b} into one @code{int}, with a 3-bit gap +between them. Then @code{: 0} advances to the next @code{int}, +so @code{c} and @code{d} fit into that one. + +These rules for packing bit fields apply to most target platforms, +including all the usual real computers. A few embedded controllers +have special layout rules. + +@node const Fields +@section @code{const} Fields +@cindex const fields +@cindex structure fields, constant + +@c ??? Is this a C standard feature? + +A structure field declared @code{const} cannot be assigned to +(@pxref{const}). For instance, let's define this modified version of +@code{struct intlistlink}: + +@example +struct intlistlink_ro /* @r{``ro'' for read-only.} */ + @{ + const int datum; + struct intlistlink *next; + @}; +@end example + +This structure can be used to prevent part of the code from modifying +the @code{datum} field: + +@example +/* @r{@code{p} has type @code{struct intlistlink *}.} + @r{Convert it to @code{struct intlistlink_ro *}.} */ +struct intlistlink_ro *q + = (struct intlistlink_ro *) p; + +q->datum = 5; /* @r{Error!} */ +p->datum = 5; /* @r{Valid since @code{*p} is} + @r{not a @code{struct intlistlink_ro}.} */ +@end example + +A @code{const} field can get a value in two ways: by initialization of +the whole structure, and by making a pointer-to-structure point to an object +in which that field already has a value. + +Any @code{const} field in a structure type makes assignment impossible +for structures of that type (@pxref{Structure Assignment}). That is +because structure assignment works by assigning the structure's +fields, one by one. + +@node Zero Length +@section Arrays of Length Zero +@cindex array of length zero +@cindex zero-length arrays +@cindex length-zero arrays + +GNU C allows zero-length arrays. They are useful as the last element +of a structure that is really a header for a variable-length object. +Here's an example, where we construct a variable-size structure +to hold a line which is @code{this_length} characters long: + +@example +struct line @{ + int length; + char contents[0]; +@}; + +struct line *thisline + = ((struct line *) + malloc (sizeof (struct line) + + this_length)); +thisline->length = this_length; +@end example + +In ISO C90, we would have to give @code{contents} a length of 1, which +means either wasting space or complicating the argument to @code{malloc}. + +@node Flexible Array Fields +@section Flexible Array Fields +@cindex flexible array fields +@cindex array fields, flexible + +The C99 standard adopted a more complex equivalent of zero-length +array fields. It's called a @dfn{flexible array}, and it's indicated +by omitting the length, like this: + +@example +struct line +@{ + int length; + char contents[]; +@}; +@end example + +The flexible array has to be the last field in the structure, and there +must be other fields before it. + +Under the C standard, a structure with a flexible array can't be part +of another structure, and can't be an element of an array. + +GNU C allows static initialization of flexible array fields. The effect +is to ``make the array long enough'' for the initializer. + +@example +struct f1 @{ int x; int y[]; @} f1 + = @{ 1, @{ 2, 3, 4 @} @}; +@end example + +@noindent +This defines a structure variable named @code{f1} +whose type is @code{struct f1}. In C, a variable name or function name +never conflicts with a structure type tag. + +Omitting the flexible array field's size lets the initializer +determine it. This is allowed only when the flexible array is defined +in the outermost structure and you declare a variable of that +structure type. For example: + +@example +struct foo @{ int x; int y[]; @}; +struct bar @{ struct foo z; @}; + +struct foo a = @{ 1, @{ 2, 3, 4 @} @}; // @r{Valid.} +struct bar b = @{ @{ 1, @{ 2, 3, 4 @} @} @}; // @r{Invalid.} +struct bar c = @{ @{ 1, @{ @} @} @}; // @r{Valid.} +struct foo d[1] = @{ @{ 1 @{ 2, 3, 4 @} @} @}; // @r{Invalid.} +@end example + +@node Overlaying Structures +@section Overlaying Different Structures +@cindex overlaying structures +@cindex structures, overlaying + +Be careful about using different structure types to refer to the same +memory within one function, because GNU C can optimize code assuming +it never does that. @xref{Aliasing}. Here's an example of the kind of +aliasing that can cause the problem: + +@example +struct a @{ int size; char *data; @}; +struct b @{ int size; char *data; @}; +struct a foo; +struct b *q = (struct b *) &foo; +@end example + +Here @code{q} points to the same memory that the variable @code{foo} +occupies, but they have two different types. The two types +@code{struct a} and @code{struct b} are defined alike, but they are +not the same type. Interspersing references using the two types, +like this, + +@example +p->size = 0; +q->size = 1; +x = p->size; +@end example + +@noindent +allows GNU C to assume that @code{p->size} is still zero when it is +copied into @code{x}. The compiler ``knows'' that @code{q} points to +a @code{struct b} and this cannot overlap with a @code{struct a}. + +Other compilers might also do this optimization. The ISO C standard +considers such code erroneous, precisely so that this optimization +will be valid. + +@node Structure Assignment +@section Structure Assignment +@cindex structure assignment +@cindex assigning structures + +Assignment operating on a structure type copies the structure. The +left and right operands must have the same type. Here is an example: + +@example +#include /* @r{Defines @code{NULL}.} */ +#include /* @r{Declares @code{malloc}.} */ +@r{@dots{}} + +struct point @{ double x, y; @}; + +struct point * +copy_point (struct point point) +@{ + struct point *p + = (struct point *) malloc (sizeof (struct point)); + if (p == NULL) + fatal ("Out of memory"); + *p = point; + return p; +@} +@end example + +Notionally, assignment on a structure type works by copying each of +the fields. Thus, if any of the fields has the @code{const} +qualifier, that structure type does not allow assignment: + +@example +struct point @{ const double x, y; @}; + +struct point a, b; + +a = b; /* @r{Error!} */ +@end example + +@xref{Assignment Expressions}. + +@node Unions +@section Unions +@cindex unions +@findex union + +A @dfn{union type} defines alternative ways of looking at the same +piece of memory. Each alternative view is defined with a data type, +and identified by a name. A union definition looks like this: + +@example +union @var{name} +@{ + @var{alternative declarations}@r{@dots{}} +@}; +@end example + +Each alternative declaration looks like a structure field declaration, +except that it can't be a bit field. For instance, + +@example +union number +@{ + long int integer; + double float; +@} +@end example + +@noindent +lets you store either an integer (type @code{long int}) or a floating +point number (type @code{double}) in the same place in memory. The +length and alignment of the union type are the maximum of all the +alternatives---they do not have to be the same. In this union +example, @code{double} probably takes more space than @code{long int}, +but that doesn't cause a problem in programs that use the union in the +normal way. + +The members don't have to be different in data type. Sometimes +each member pertains to a way the data will be used. For instance, + +@example +union datum +@{ + double latitude; + double longitude; + double height; + double weight; + int continent; +@} +@end example + +This union holds one of several kinds of data; most kinds are floating +points, but the value can also be a code for a continent which is an +integer. You @emph{could} use one member of type @code{double} to +access all the values which have that type, but the different member +names will make the program clearer. + +The alignment of a union type is the maximum of the alignments of the +alternatives. The size of the union type is the maximum of the sizes +of the alternatives, rounded up to a multiple of the alignment +(because every type's size must be a multiple of its alignment). + +All the union alternatives start at the address of the union itself. +If an alternative is shorter than the union as a whole, it occupies +the first part of the union's storage, leaving the last part unused +@emph{for that alternative}. + +@strong{Warning:} if the code stores data using one union alternative +and accesses it with another, the results depend on the kind of +computer in use. Only wizards should try to do this. However, when +you need to do this, a union is a clean way to do it. + +Assignment works on any union type by copying the entire value. + +@node Packing With Unions +@section Packing With Unions + +Sometimes we design a union with the intention of packing various +kinds of objects into a certain amount of memory space. For example. + +@example +union bytes8 +@{ + long long big_int_elt; + double double_elt; + struct @{ int first, second; @} two_ints; + struct @{ void *first, *second; @} two_ptrs; +@}; + +union bytes8 *p; +@end example + +This union makes it possible to look at 8 bytes of data that @code{p} +points to as a single 8-byte integer (@code{p->big_int_elt}), as a +single floating-point number (@code{p->double_elt}), as a pair of +integers (@code{p->two_ints.first} and @code{p->two_ints.second}), or +as a pair of pointers (@code{p->two_ptrs.first} and +@code{p->two_ptrs.second}). + +To pack storage with such a union makes assumptions about the sizes of +all the types involved. This particular union was written expecting a +pointer to have the same size as @code{int}. On a machine where one +pointer takes 8 bytes, the code using this union probably won't work +as expected. The union, as such, will function correctly---if you +store two values through @code{two_ints} and extract them through +@code{two_ints}, you will get the same integers back---but the part of +the program that expects the union to be 8 bytes long could +malfunction, or at least use too much space. + +The above example shows one case where a @code{struct} type with no +tag can be useful. Another way to get effectively the same result +is with arrays as members of the union: + +@example +union eight_bytes +@{ + long long big_int_elt; + double double_elt; + int two_ints[2]; + void *two_ptrs[2]; +@}; +@end example + +@node Cast to Union +@section Cast to a Union Type +@cindex cast to a union +@cindex union, casting to a + +In GNU C, you can explicitly cast any of the alternative types to the +union type; for instance, + +@example +(union eight_bytes) (long long) 5 +@end example + +@noindent +makes a value of type @code{union eight_bytes} which gets its contents +through the alternative named @code{big_int_elt}. + +The value being cast must exactly match the type of the alternative, +so this is not valid: + +@example +(union eight_bytes) 5 /* @r{Error! 5 is @code{int}.} */ +@end example + +A cast to union type looks like any other cast, except that the type +specified is a union type. You can specify the type either with +@code{union @var{tag}} or with a typedef name (@pxref{Defining +Typedef Names}). + +Using the cast as the right-hand side of an assignment to a variable of +union type is equivalent to storing in an alternative of the union: + +@example +union foo u; + +u = (union foo) x @r{means} u.i = x + +u = (union foo) y @r{means} u.d = y +@end example + +You can also use the union cast as a function argument: + +@example +void hack (union foo); +@r{@dots{}} +hack ((union foo) x); +@end example + +@node Structure Constructors +@section Structure Constructors +@cindex structure constructors +@cindex constructors, structure + +You can construct a structure value by writing its type in +parentheses, followed by an initializer that would be valid in a +declaration for that type. For instance, given this declaration, + +@example +struct foo @{int a; char b[2];@} structure; +@end example + +@noindent +you can create a @code{struct foo} value as follows: + +@example +((struct foo) @{x + y, 'a', 0@}) +@end example + +@noindent +This specifies @code{x + y} for field @code{a}, +the character @samp{a} for field @code{b}'s element 0, +and the null character for field @code{b}'s element 1. + +The parentheses around that constructor are to necessary, but we +recommend writing them to make the nesting of the containing +expression clearer. + +You can also show the nesting of the two by writing it like +this: + +@example +((struct foo) @{x + y, @{'a', 0@} @}) +@end example + +Each of those is equivalent to writing the following statement +expression (@pxref{Statement Exprs}): + +@example +(@{ + struct foo temp = @{x + y, 'a', 0@}; + temp; +@}) +@end example + +You can also create a union value this way, but it is not especially +useful since that is equivalent to doing a cast: + +@example + ((union whosis) @{@var{value}@}) +@r{is equivalent to} + ((union whosis) (@var{value})) +@end example + +@node Unnamed Types as Fields +@section Unnamed Types as Fields +@cindex unnamed structures +@cindex unnamed unions +@cindex structures, unnamed +@cindex unions, unnamed + +A structure or a union can contain, as fields, +unnamed structures and unions. Here's an example: + +@example +struct +@{ + int a; + union + @{ + int b; + float c; + @}; + int d; +@} foo; +@end example + +@noindent +You can access the fields of the unnamed union within @code{foo} as if they +were individual fields at the same level as the union definition: + +@example +foo.a = 42; +foo.b = 47; +foo.c = 5.25; // @r{Overwrites the value in @code{foo.b}}. +foo.d = 314; +@end example + +Avoid using field names that could cause ambiguity. For example, with +this definition: + +@example +struct +@{ + int a; + struct + @{ + int a; + float b; + @}; +@} foo; +@end example + +@noindent +it is impossible to tell what @code{foo.a} refers to. GNU C reports +an error when a definition is ambiguous in this way. + +@node Incomplete Types +@section Incomplete Types +@cindex incomplete types +@cindex types, incomplete + +A type that has not been fully defined is called an @dfn{incomplete +type}. Structure and union types are incomplete when the code makes a +forward reference, such as @code{struct foo}, before defining the +type. An array type is incomplete when its length is unspecified. + +You can't use an incomplete type to declare a variable or field, or +use it for a function parameter or return type. The operators +@code{sizeof} and @code{_Alignof} give errors when used on an +incomplete type. + +However, you can define a pointer to an incomplete type, and declare a +variable or field with such a pointer type. In general, you can do +everything with such pointers except dereference them. For example: + +@example +extern void bar (struct mysterious_value *); + +void +foo (struct mysterious_value *arg) +@{ + bar (arg); +@} + +@r{@dots{}} + +@{ + struct mysterious_value *p, **q; + + p = *q; + foo (p); +@} +@end example + +@noindent +These examples are valid because the code doesn't try to understand +what @code{p} points to; it just passes the pointer around. +(Presumably @code{bar} is defined in some other file that really does +have a definition for @code{struct mysterious_value}.) However, +dereferencing the pointer would get an error; that requires a +definition for the structure type. + +@node Intertwined Incomplete Types +@section Intertwined Incomplete Types + +When several structure types contain pointers to each other, you can +define the types in any order because pointers to types that come +later are incomplete types. Thus, +Here is an example. + +@example +/* @r{An employee record points to a group.} */ +struct employee +@{ + char *name; + @r{@dots{}} + struct group *group; /* @r{incomplete type.} */ + @r{@dots{}} +@}; + +/* @r{An employee list points to employees.} */ +struct employee_list +@{ + struct employee *this_one; + struct employee_list *next; /* @r{incomplete type.} */ + @r{@dots{}} +@}; + +/* @r{A group points to one employee_list.} */ +struct group +@{ + char *name; + @r{@dots{}} + struct employee_list *employees; + @r{@dots{}} +@}; +@end example + +@node Type Tags +@section Type Tags +@cindex type tags + +The name that follows @code{struct} (@pxref{Structures}), @code{union} +(@pxref{Unions}, or @code{enum} (@pxref{Enumeration Types}) is called +a @dfn{type tag}. In C, a type tag never conflicts with a variable +name or function name; the type tags have a separate @dfn{name space}. +Thus, there is no name conflict in this code: + +@example +struct pair @{ int a, b; @}; +int pair = 1; +@end example + +@noindent +nor in this one: + +@example +struct pair @{ int a, b; @} pair; +@end example + +@noindent +where @code{pair} is both a structure type tag and a variable name. + +However, @code{struct}, @code{union}, and @code{enum} share the same +name space of tags, so this is a conflict: + +@example +struct pair @{ int a, b; @}; +enum pair @{ c, d @}; +@end example + +@noindent +and so is this: + +@example +struct pair @{ int a, b; @}; +struct pair @{ int c, d; @}; +@end example + +When the code defines a type tag inside a block, the tag's scope is +limited to that block (as for local variables). Two definitions for +one type tag do not conflict if they are in different scopes; rather, +each is valid in its scope. For example, + +@example +struct pair @{ int a, b; @}; + +void +pair_up_doubles (int len, double array[]) +@{ + struct pair @{ double a, b; @}; + @r{@dots{}} +@} +@end example + +@noindent +has two definitions for @code{struct pair} which do not conflict. The +one inside the function applies only within the definition of +@code{pair_up_doubles}. Within its scope, that definition +@dfn{shadows} the outer definition. + +If @code{struct pair} appears inside the function body, before the +inner definition, it refers to the outer definition---the only one +that has been seen at that point. Thus, in this code, + +@example +struct pair @{ int a, b; @}; + +void +pair_up_doubles (int len, double array[]) +@{ + struct two_pairs @{ struct pair *p, *q; @}; + struct pair @{ double a, b; @}; + @r{@dots{}} +@} +@end example + +@noindent +the structure @code{two_pairs} has pointers to the outer definition of +@code{struct pair}, which is probably not desirable. + +To prevent that, you can write @code{struct pair;} inside the function +body as a variable declaration with no variables. This is a +@dfn{forward declaration} of the type tag @code{pair}: it makes the +type tag local to the current block, with the details of the type to +come later. Here's an example: + +@example +void +pair_up_doubles (int len, double array[]) +@{ + /* @r{Forward declaration for @code{pair}.} */ + struct pair; + struct two_pairs @{ struct pair *p, *q; @}; + /* @r{Give the details.} */ + struct pair @{ double a, b; @}; + @r{@dots{}} +@} +@end example + +However, the cleanest practice is to avoid shadowing type tags. + +@node Arrays +@chapter Arrays +@cindex array +@cindex elements of arrays + +An @dfn{array} is a data object that holds a series of @dfn{elements}, +all of the same data type. Each element is identified by its numeric +@var{index} within the array. + +We presented arrays of numbers in the sample programs early in this +manual (@pxref{Array Example}). However, arrays can have elements of +any data type, including pointers, structures, unions, and other +arrays. + +If you know another programming language, you may suppose that you know all +about arrays, but C arrays have special quirks, so in this chapter we +collect all the information about arrays in C@. + +The elements of a C array are allocated consecutively in memory, +with no gaps between them. Each element is aligned as required +for its data type (@pxref{Type Alignment}). + +@menu +* Accessing Array Elements:: How to access individual elements of an array. +* Declaring an Array:: How to name and reserve space for a new array. +* Strings:: A string in C is a special case of array. +* Array Type Designators:: Referring to a specific array type. +* Incomplete Array Types:: Naming, but not allocating, a new array. +* Limitations of C Arrays:: Arrays are not first-class objects. +* Multidimensional Arrays:: Arrays of arrays. +* Constructing Array Values:: Assigning values to an entire array at once. +* Arrays of Variable Length:: Declaring arrays of non-constant size. +@end menu + +@node Accessing Array Elements +@section Accessing Array Elements +@cindex accessing array elements +@cindex array elements, accessing + +If the variable @code{a} is an array, the @var{n}th element of +@code{a} is @code{a[@var{n}]}. You can use that expression to access +an element's value or to assign to it: + +@example +x = a[5]; +a[6] = 1; +@end example + +@noindent +Since the variable @code{a} is an lvalue, @code{a[@var{n}]} is also an +lvalue. + +The lowest valid index in an array is 0, @emph{not} 1, and the highest +valid index is one less than the number of elements. + +The C language does not check whether array indices are in bounds, so +if the code uses an out-of-range index, it will access memory outside the +array. + +@strong{Warning:} Using only valid index values in C is the +programmer's responsibility. + +Array indexing in C is not a primitive operation: it is defined in +terms of pointer arithmetic and dereferencing. Now that we know +@emph{what} @code{a[i]} does, we can ask @emph{how} @code{a[i]} does +its job. + +In C, @code{@var{x}[@var{y}]} is an abbreviation for +@code{*(@var{x}+@var{y})}. Thus, @code{a[i]} really means +@code{*(a+i)}. @xref{Pointers and Arrays}. + +When an expression with array type (such as @code{a}) appears as part +of a larger C expression, it is converted automatically to a pointer +to element zero of that array. For instance, @code{a} in an +expression is equivalent to @code{&a[0]}. Thus, @code{*(a+i)} is +computed as @code{*(&a[0]+i)}. + +Now we can analyze how that expression gives us the desired element of +the array. It makes a pointer to element 0 of @code{a}, advances it +by the value of @code{i}, and dereferences that pointer. + +Another equivalent way to write the expression is @code{(&a[0])[i]}. + +@node Declaring an Array +@section Declaring an Array +@cindex declaring an array +@cindex array, declaring + +To make an array declaration, write @code{[@var{length}]} after the +name being declared. This construct is valid in the declaration of a +variable, a function parameter, a function value type (the value can't +be an array, but it can be a pointer to one), a structure field, or a +union alternative. + +The surrounding declaration specifies the element type of the array; +that can be any type of data, but not @code{void} or a function type. +For instance, + +@example +double a[5]; +@end example + +@noindent +declares @code{a} as an array of 5 @code{double}s. + +@example +struct foo bstruct[length]; +@end example + +@noindent +declares @code{bstruct} as an array of @code{length} objects of type +@code{struct foo}. A variable array size like this is allowed when +the array is not file-scope. + +Other declaration constructs can nest within the array declaration +construct. For instance: + +@example +struct foo *b[length]; +@end example + +@noindent +declares @code{b} as an array of @code{length} pointers to +@code{struct foo}. This shows that the length need not be a constant +(@pxref{Arrays of Variable Length}). + +@example +double (*c)[5]; +@end example + +@noindent +declares @code{c} as a pointer to an array of 5 @code{double}s, and + +@example +char *(*f (int))[5]; +@end example + +@noindent +declares @code{f} as a function taking an @code{int} argument and +returning a pointer to an array of 5 strings (pointers to +@code{char}s). + +@example +double aa[5][10]; +@end example + +@noindent +declares @code{aa} as an array of 5 elements, each of which is an +array of 10 @code{double}s. This shows how to declare a +multidimensional array in C (@pxref{Multidimensional Arrays}). + +All these declarations specify the array's length, which is needed in +these cases in order to allocate storage for the array. + +@node Strings +@section Strings +@cindex string + +A string in C is a sequence of elements of type @code{char}, +terminated with the null character, the character with code zero. + +Programs often need to use strings with specific, fixed contents. To +write one in a C program, use a @dfn{string constant} such as +@code{"Take me to your leader!"}. The data type of a string constant +is @code{char *}. For the full syntactic details of writing string +constants, @ref{String Constants}. + +To declare a place to store a non-constant string, declare an array of +@code{char}. Keep in mind that it must include one extra @code{char} +for the terminating null. For instance, + +@example +char text = @{ 'H', 'e', 'l', 'l', 'o', 0 @}; +@end example + +@noindent +declares an array named @samp{text} with six elements---five letters +and the terminating null character. An equivalent way to get the same +result is this, + +@example +char text = "Hello"; +@end example + +@noindent +which copies the elements of the string constant, including @emph{its} +terminating null character. + +@example +char message[200]; +@end example + +@noindent +declares an array long enough to hold a string of 199 ASCII characters +plus the terminating null character. + +When you store a string into @code{message} be sure to check or prove +that the length does not exceed its size. For example, + +@example +void +set_message (char *text) +@{ + int i; + for (i = 0; i < sizeof (message); i++) + @{ + message[i] = text[i]; + if (text[i] == 0) + return; + @} + fatal_error ("Message is too long for `message'); +@} +@end example + +It's easy to do this with the standard library function +@code{strncpy}, which fills out the whole destination array (up to a +specified length) with null characters. Thus, if the last character +of the destination is not null, the string did not fit. Many system +libraries, including the GNU C library, hand-optimize @code{strncpy} +to run faster than an explicit @code{for}-loop. + +Here's what the code looks like: + +@example +void +set_message (char *text) +@{ + strncpy (message, text, sizeof (message)); + if (message[sizeof (message) - 1] != 0) + fatal_error ("Message is too long for `message'); +@} +@end example + +@xref{String and Array Utilities, The GNU C Library, , libc, The GNU C +Library Reference Manual}, for more information about the standard +library functions for operating on strings. + +You can avoid putting a fixed length limit on strings you construct or +operate on by allocating the space for them dynamically. +@xref{Dynamic Memory Allocation}. + +@node Array Type Designators +@section Array Type Designators + +Every C type has a type designator, which you make by deleting the +variable name and the semicolon from a declaration (@pxref{Type +Designators}). The designators for array types follow this rule, but +they may appear surprising. + +@example +@r{type} int a[5]; @r{designator} int [5] +@r{type} double a[5][3]; @r{designator} double [5][3] +@r{type} struct foo *a[5]; @r{designator} struct foo *[5] +@end example + +@node Incomplete Array Types +@section Incomplete Array Types +@cindex incomplete array types +@cindex array types, incomplete + +An array is equivalent, for most purposes, to a pointer to its zeroth +element. When that is true, the length of the array is irrelevant. +The length needs to be known only for allocating space for the array, or +for @code{sizeof} and @code{typeof} (@pxref{Auto Type}). Thus, in some +contexts C allows + +@itemize @bullet +@item +An @code{extern} declaration says how to refer to a variable allocated +elsewhere. It does not need to allocate space for the variable, +so if it is an array, you can omit the length. For example, + +@example +extern int foo[]; +@end example + +@item +When declaring a function parameter as an array, the argument value +passed to the function is really a pointer to the array's zeroth +element. This value does not say how long the array really is, there +is no need to declare it. For example, + +@example +int +func (int foo[]) +@end example +@end itemize + +These declarations are examples of @dfn{incomplete} array types, types +that are not fully specified. The incompleteness makes no difference +for accessing elements of the array, but it matters for some other +things. For instance, @code{sizeof} is not allowed on an incomplete +type. + +With multidimensional arrays, only the first dimension can be omitted: + +@example +extern struct chesspiece *funnyboard foo[][8]; +@end example + +In other words, the code doesn't have to say how many rows there are, +but it must state how big each row is. + +@node Limitations of C Arrays +@section Limitations of C Arrays +@cindex limitations of C arrays +@cindex first-class object + +Arrays have quirks in C because they are not ``first-class objects'': +there is no way in C to operate on an array as a unit. + +The other composite objects in C, structures and unions, are +first-class objects: a C program can copy a structure or union value +in an assignment, or pass one as an argument to a function, or make a +function return one. You can't do those things with an array in C@. +That is because a value you can operate on never has an array type. + +An expression in C can have an array type, but that doesn't produce +the array as a value. Instead it is converted automatically to a +pointer to the array's element at index zero. The code can operate +on the pointer, and through that on individual elements of the array, +but it can't get and operate on the array as a unit. + +There are three exceptions to this conversion rule, but none of them +offers a way to operate on the array as a whole. + +First, @samp{&} applied to an expression with array type gives you the +address of the array, as an array type. However, you can't operate on the +whole array that way---if you apply @samp{*} to get the array back, +that expression converts, as usual, to a pointer to its zeroth +element. + +Second, the operators @code{sizeof}, @code{_Alignof}, and +@code{typeof} do not convert the array to a pointer; they leave it as +an array. But they don't operate on the array's data---they only give +information about its type. + +Third, a string constant used as an initializer for an array is not +converted to a pointer---rather, the declaration copies the +@emph{contents} of that string in that one special case. + +You @emph{can} copy the contents of an array, just not with an +assignment operator. You can do it by calling the library function +@code{memcpy} or @code{memmove} (@pxref{Copying and Concatenation, The +GNU C Library, , libc, The GNU C Library Reference Manual}). Also, +when a structure contains just an array, you can copy that structure. + +An array itself is an lvalue if it is a declared variable, or part of +a structure or union that is an lvalue. When you construct an array +from elements (@pxref{Constructing Array Values}), that array is not +an lvalue. + +@node Multidimensional Arrays +@section Multidimensional Arrays +@cindex multidimensional arrays +@cindex array, multidimensional + +Strictly speaking, all arrays in C are unidimensional. However, you +can create an array of arrays, which is more or less equivalent to a +multidimensional array. For example, + +@example +struct chesspiece *board[8][8]; +@end example + +@noindent +declares an array of 8 arrays of 8 pointers to @code{struct +chesspiece}. This data type could represent the state of a chess +game. To access one square's contents requires two array index +operations, one for each dimension. For instance, you can write +@code{board[row][column]}, assuming @code{row} and @code{column} +are variables with integer values in the proper range. + +How does C understand @code{board[row][column]}? First of all, +@code{board} is converted automatically to a pointer to the zeroth +element (at index zero) of @code{board}. Adding @code{row} to that +makes it point to the desired element. Thus, @code{board[row]}'s +value is an element of @code{board}---an array of 8 pointers. + +However, as an expression with array type, it is converted +automatically to a pointer to the array's zeroth element. The second +array index operation, @code{[column]}, accesses the chosen element +from that array. + +As this shows, pointer-to-array types are meaningful in C@. +You can declare a variable that points to a row in a chess board +like this: + +@example +struct chesspiece *(*rowptr)[8]; +@end example + +@noindent +This points to an array of 8 pointers to @code{struct chesspiece}. +You can assign to it as follows: + +@example +rowptr = &board[5]; +@end example + +The dimensions don't have to be equal in length. Here we declare +@code{statepop} as an array to hold the population of each state in +the United States for each year since 1900: + +@example +#define NSTATES 50 +@{ + int nyears = current_year - 1900 + 1; + int statepop[NSTATES][nyears]; + @r{@dots{}} +@} +@end example + +The variable @code{statepop} is an array of @code{NSTATES} subarrays, +each indexed by the year (counting from 1900). Thus, to get the +element for a particular state and year, we must subscript it first +by the number that indicates the state, and second by the index for +the year: + +@example +statepop[state][year - 1900] +@end example + +@cindex array, layout in memory +The subarrays within the multidimensional array are allocated +consecutively in memory, and within each subarray, its elements are +allocated consecutively in memory. The most efficient way to process +all the elements in the array is to scan the last subscript in the +innermost loop. This means consecutive accesses go to consecutive +memory locations, which optimizes use of the processor's memory cache. +For example: + +@example +int total = 0; +float average; + +for (int state = 0; state < NSTATES, ++state) + @{ + for (int year = 0; year < nyears; ++year) + @{ + total += statepop[state][year]; + @} + @} + +average = total / nyears; +@end example + +C's layout for multidimensional arrays is different from Fortran's +layout. In Fortran, a multidimensional array is not an array of +arrays; rather, multidimensional arrays are a primitive feature, and +it is the first index that varies most rapidly between consecutive +memory locations. Thus, the memory layout of a 50x114 array in C +matches that of a 114x50 array in Fortran. + +@node Constructing Array Values +@section Constructing Array Values +@cindex constructing array values +@cindex array values, constructing + +You can construct an array from elements by writing them inside +braces, and preceding all that with the array type's designator in +parentheses. There is no need to specify the array length, since the +number of elements determines that. The constructor looks like this: + +@example +(@var{elttype}[]) @{ @var{elements} @}; +@end example + +Here is an example, which constructs an array of string pointers: + +@example +(char *[]) @{ "x", "y", "z" @}; +@end example + +That's equivalent in effect to declaring an array with the same +initializer, like this: + +@example +char *array[] = @{ "x", "y", "z" @}; +@end example + +and then using the array. + +If all the elements are simple constant expressions, or made up of +such, then the compound literal can be coerced to a pointer to its +zeroth element and used to initialize a file-scope variable +(@pxref{File-Scope Variables}), as shown here: + +@example +char **foo = (char *[]) @{ "x", "y", "z" @}; +@end example + +@noindent +The data type of @code{foo} is @code{char **}, which is a pointer +type, not an array type. The declaration is equivalent to defining +and then using an array-type variable: + +@example +char *nameless_array[] = @{ "x", "y", "z" @}; +char **foo = &nameless_array[0]; +@end example + +@node Arrays of Variable Length +@section Arrays of Variable Length +@cindex array of variable length +@cindex variable-length arrays + +In GNU C, you can declare variable-length arrays like any other +arrays, but with a length that is not a constant expression. The +storage is allocated at the point of declaration and deallocated when +the block scope containing the declaration exits. For example: + +@example +#include /* @r{Defines @code{FILE}.} */ +#include /* @r{Declares @code{str}.} */ + +FILE * +concat_fopen (char *s1, char *s2, char *mode) +@{ + char str[strlen (s1) + strlen (s2) + 1]; + strcpy (str, s1); + strcat (str, s2); + return fopen (str, mode); +@} +@end example + +@noindent +(This uses some standard library functions; see @ref{String and Array +Utilities, , , libc, The GNU C Library Reference Manual}.) + +The length of an array is computed once when the storage is allocated +and is remembered for the scope of the array in case it is used in +@code{sizeof}. + +@strong{Warning:} don't allocate a variable-length array if the size +might be very large (more than 100,000), or in a recursive function, +because that is likely to cause stack overflow. Allocate the array +dynamically instead (@pxref{Dynamic Memory Allocation}). + +Jumping or breaking out of the scope of the array name deallocates the +storage. Jumping into the scope is not allowed; that gives an error +message. + +You can also use variable-length arrays as arguments to functions: + +@example +struct entry +tester (int len, char data[len][len]) +@{ + @r{@dots{}} +@} +@end example + +As usual, a function argument declared with an array type +is really a pointer to an array that already exists. +Calling the function does not allocate the array, so there's no +particular danger of stack overflow in using this construct. + +To pass the array first and the length afterward, use a forward +declaration in the function's parameter list (another GNU extension). +For example, + +@example +struct entry +tester (int len; char data[len][len], int len) +@{ + @r{@dots{}} +@} +@end example + +The @code{int len} before the semicolon is a @dfn{parameter forward +declaration}, and it serves the purpose of making the name @code{len} +known when the declaration of @code{data} is parsed. + +You can write any number of such parameter forward declarations in the +parameter list. They can be separated by commas or semicolons, but +the last one must end with a semicolon, which is followed by the +``real'' parameter declarations. Each forward declaration must match +a ``real'' declaration in parameter name and data type. ISO C11 does +not support parameter forward declarations. + +@node Enumeration Types +@chapter Enumeration Types +@cindex enumeration types +@cindex types, enumeration +@cindex enumerator + +An @dfn{enumeration type} represents a limited set of integer values, +each with a name. It is effectively equivalent to a primitive integer +type. + +Suppose we have a list of possible emotional states to store in an +integer variable. We can give names to these alternative values with +an enumeration: + +@example +enum emotion_state @{ neutral, happy, sad, worried, + calm, nervous @}; +@end example + +@noindent +(Never mind that this is a simplistic way to classify emotional states; +it's just a code example.) + +The names inside the enumeration are called @dfn{enumerators}. The +enumeration type defines them as constants, and their values are +consecutive integers; @code{neutral} is 0, @code{happy} is 1, +@code{sad} is 2, and so on. Alternatively, you can specify values for +the enumerators explicitly like this: + +@example +enum emotion_state @{ neutral = 2, happy = 5, + sad = 20, worried = 10, + calm = -5, nervous = -300 @}; +@end example + +Each enumerator which does not specify a value gets value zero +(if it is at the beginning) or the next consecutive integer. + +@example +/* @r{@code{neutral} is 0 by default,} + @r{and @code{worried} is 21 by default.} */ +enum emotion_state @{ neutral, + happy = 5, sad = 20, worried, + calm = -5, nervous = -300 @}; +@end example + +If an enumerator is obsolete, you can specify that using it should +cause a warning, by including an attribute in the enumerator's +declaration. Here is how @code{happy} would look with this +attribute: + +@example +happy __attribute__ + ((deprecated + ("impossible under plutocratic rule"))) + = 5, +@end example + +@xref{Attributes}. + +You can declare variables with the enumeration type: + +@example +enum emotion_state feelings_now; +@end example + +In the C code itself, this is equivalent to declaring the variable +@code{int}. (If all the enumeration values are positive, it is +equivalent to @code{unsigned int}.) However, declaring it with the +enumeration type has an advantage in debugging, because GDB knows it +should display the current value of the variable using the +corresponding name. If the variable's type is @code{int}, GDB can +only show the value as a number. + +The identifier that follows @code{enum} is called a @dfn{type tag} +since it distinguishes different enumeration types. Type tags are in +a separate name space and belong to scopes like most other names in C@. +@xref{Type Tags}, for explanation. + +You can predeclare an @code{enum} type tag like a structure or union +type tag, like this: + +@example +enum foo; +@end example + +@noindent +The @code{enum} type is incomplete until you finish defining it. + +You can optionally include a trailing comma at the end of a list of +enumeration values: + +@example +enum emotion_state @{ neutral, happy, sad, worried, + calm, nervous, @}; +@end example + +@noindent +This is useful in some macro definitions, since it enables you to +assemble the list of enumerators without knowing which one is last. +The extra comma does not change the meaning of the enumeration in any +way. + +@node Defining Typedef Names +@chapter Defining Typedef Names +@cindex typedef names +@findex typedef + +You can define a data type keyword as an alias for any type, and then +use the alias syntactically like a built-in type keyword such as +@code{int}. You do this using @code{typedef}, so these aliases are +also called @dfn{typedef names}. + +@code{typedef} is followed by text that looks just like a variable +declaration, but instead of declaring variables it defines data type +keywords. + +Here's how to define @code{fooptr} as a typedef alias for the type +@code{struct foo *}, then declare @code{x} and @code{y} as variables +with that type: + +@example +typedef struct foo *fooptr; + +fooptr x, y; +@end example + +@noindent +That declaration is equivalent to the following one: + +@example +struct foo *x, *y; +@end example + +You can define a typedef alias for any type. For instance, this makes +@code{frobcount} an alias for type @code{int}: + +@example +typedef int frobcount; +@end example + +@noindent +This doesn't define a new type distinct from @code{int}. Rather, +@code{frobcount} is another name for the type @code{int}. Once the +variable is declared, it makes no difference which name the +declaration used. + +There is a syntactic difference, however, between @code{frobcount} and +@code{int}: A typedef name cannot be used with +@code{signed}, @code{unsigned}, @code{long} or @code{short}. It has +to specify the type all by itself. So you can't write this: + +@example +unsigned frobcount f1; /* @r{Error!} */ +@end example + +But you can write this: + +@example +typedef unsigned int unsigned_frobcount; + +unsigned_frobcount f1; +@end example + +In other words, a typedef name is not an alias for @emph{a keyword} +such as @code{int}. It stands for a @emph{type}, and that could be +the type @code{int}. + +Typedef names are in the same namespace as functions and variables, so +you can't use the same name for a typedef and a function, or a typedef +and a variable. When a typedef is declared inside a code block, it is +in scope only in that block. + +@strong{Warning:} Avoid defining typedef names that end in @samp{_t}, +because many of these have standard meanings. + +You can redefine a typedef name to the exact same type as its first +definition, but you cannot redefine a typedef name to a +different type, even if the two types are compatible. For example, this +is valid: + +@example +typedef int frobcount; +typedef int frotzcount; +typedef frotzcount frobcount; +typedef frobcount frotzcount; +@end example + +@noindent +because each typedef name is always defined with the same type +(@code{int}), but this is not valid: + +@example +enum foo @{f1, f2, f3@}; +typedef enum foo frobcount; +typedef int frobcount; +@end example + +@noindent +Even though the type @code{enum foo} is compatible with @code{int}, +they are not the @emph{same} type. + +@node Statements +@chapter Statements +@cindex statements + +A @dfn{statement} specifies computations to be done for effect; it +does not produce a value, as an expression would. In general a +statement ends with a semicolon (@samp{;}), but blocks (which are +statements, more or less) are an exception to that rule. +@ifnottex +@xref{Blocks}. +@end ifnottex + +The places to use statements are inside a block, and inside a +complex statement. A @dfn{complex statement} contains one or two +components that are nested statements. Each such component must +consist of one and only one statement. The way to put multiple +statements in such a component is to group them into a @dfn{block} +(@pxref{Blocks}), which counts as one statement. + +The following sections describe the various kinds of statement. + +@menu +* Expression Statement:: Evaluate an expression, as a statement, + usually done for a side effect. +* if Statement:: Basic conditional execution. +* if-else Statement:: Multiple branches for conditional execution. +* Blocks:: Grouping multiple statements together. +* return Statement:: Return a value from a function. +* Loop Statements:: Repeatedly executing a statement or block. +* switch Statement:: Multi-way conditional choices. +* switch Example:: A plausible example of using @code{switch}. +* Duffs Device:: A special way to use @code{switch}. +* Case Ranges:: Ranges of values for @code{switch} cases. +* Null Statement:: A statement that does nothing. +* goto Statement:: Jump to another point in the source code, + identified by a label. +* Local Labels:: Labels with limited scope. +* Labels as Values:: Getting the address of a label. +* Statement Exprs:: A series of statements used as an expression. +@end menu + +@node Expression Statement +@section Expression Statement +@cindex expression statement +@cindex statement, expression + +The most common kind of statement in C is an @dfn{expression statement}. +It consists of an expression followed by a +semicolon. The expression's value is discarded, so the expressions +that are useful are those that have side effects: assignment +expressions, increment and decrement expressions, and function calls. +Here are examples of expression statements: + +@smallexample +x = 5; /* @r{Assignment expression.} */ +p++; /* @r{Increment expression.} */ +printf ("Done\n"); /* @r{Function call expression.} */ +*p; /* @r{Cause @code{SIGSEGV} signal if @code{p} is null.} */ +x + y; /* @r{Useless statement without effect.} */ +@end smallexample + +In very unusual circumstances we use an expression statement +whose purpose is to get a fault if an address is invalid: + +@smallexample +volatile char *p; +@r{@dots{}} +*p; /* @r{Cause signal if @code{p} is null.} */ +@end smallexample + +If the target of @code{p} is not declared @code{volatile}, the +compiler might optimize away the memory access, since it knows that +the value isn't really used. @xref{volatile}. + +@node if Statement +@section @code{if} Statement +@cindex @code{if} statement +@cindex statement, @code{if} +@findex if + +An @code{if} statement computes an expression to decide +whether to execute the following statement or not. +It looks like this: + +@example +if (@var{condition}) + @var{execute-if-true} +@end example + +The first thing this does is compute the value of @var{condition}. If +that is true (nonzero), then it executes the statement +@var{execute-if-true}. If the value of @var{condition} is false +(zero), it doesn't execute @var{execute-if-true}; instead, it does +nothing. + +This is a @dfn{complex statement} because it contains a component +@var{if-true-substatement} that is a nested statement. It must be one +and only one statement. The way to put multiple statements there is +to group them into a @dfn{block} (@pxref{Blocks}). + +@node if-else Statement +@section @code{if-else} Statement +@cindex @code{if}@dots{}@code{else} statement +@cindex statement, @code{if}@dots{}@code{else} +@findex else + +An @code{if}-@code{else} statement computes an expression to decide +which of two nested statements to execute. +It looks like this: + +@example +if (@var{condition}) + @var{if-true-substatement} +else + @var{if-false-substatement} +@end example + +The first thing this does is compute the value of @var{condition}. If +that is true (nonzero), then it executes the statement +@var{if-true-substatement}. If the value of @var{condition} is false +(zero), then it executes the statement @var{if-false-substatement} instead. + +This is a @dfn{complex statement} because it contains components +@var{if-true-substatement} and @var{if-else-substatement} that are +nested statements. Each must be one and only one statement. The way +to put multiple statements in such a component is to group them into a +@dfn{block} (@pxref{Blocks}). + +@node Blocks +@section Blocks +@cindex block +@cindex compound statement + +A @dfn{block} is a construct that contains multiple statements of any +kind. It begins with @samp{@{} and ends with @samp{@}}, and has a +series of statements and declarations in between. Another name for +blocks is @dfn{compound statements}. + +Is a block a statement? Yes and no. It doesn't @emph{look} like a +normal statement---it does not end with a semicolon. But you can +@emph{use} it like a statement; anywhere that a statement is required +or allowed, you can write a block and consider that block a statement. + +So far it seems that a block is a kind of statement with an unusual +syntax. But that is not entirely true: a function body is also a +block, and that block is definitely not a statement. The text after a +function header is not treated as a statement; only a function body is +allowed there, and nothing else would be meaningful there. + +In a formal grammar we would have to choose---either a block is a kind +of statement or it is not. But this manual is meant for humans, not +for parser generators. The clearest answer for humans is, ``a block +is a statement, in some ways.'' + +@cindex nested block +@cindex internal block +A block that isn't a function body is called an @dfn{internal block} +or a @dfn{nested block}. You can put a nested block directly inside +another block, but more often the nested block is inside some complex +statement, such as a @code{for} statement or an @code{if} statement. + +There are two uses for nested blocks in C: + +@itemize @bullet +@item +To specify the scope for local declarations. For instance, a local +variable's scope is the rest of the innermost containing block. + +@item +To write a series of statements where, syntactically, one statement is +called for. For instance, the @var{execute-if-true} of an @code{if} +statement is one statement. To put multiple statements there, they +have to be wrapped in a block, like this: + +@example +if (x < 0) + @{ + printf ("x was negative\n"); + x = -x; + @} +@end example +@end itemize + +This example (repeated from above) shows a nested block which serves +both purposes: it includes two statements (plus a declaration) in the +body of a @code{while} statement, and it provides the scope for the +declaration of @code{q}. + +@example +void +free_intlist (struct intlistlink *p) +@{ + while (p) + @{ + struct intlistlink *q = p; + p = p->next; + free (q); + @} +@} +@end example + +@node return Statement +@section @code{return} Statement +@cindex @code{return} statement +@cindex statement, @code{return} +@findex return + +The @code{return} statement makes the containing function return +immediately. It has two forms. This one specifies no value to +return: + +@example +return; +@end example + +@noindent +That form is meant for functions whose return type is @code{void} +(@pxref{The Void Type}). You can also use it in a function that +returns nonvoid data, but that's a bad idea, since it makes the +function return garbage. + +The form that specifies a value looks like this: + +@example +return @var{value}; +@end example + +@noindent +which computes the expression @var{value} and makes the function +return that. If necessary, the value undergoes type conversion to +the function's declared return value type, which works like +assigning the value to a variable of that type. + +@node Loop Statements +@section Loop Statements +@cindex loop statements +@cindex statements, loop +@cindex iteration + +You can use a loop statement when you need to execute a series of +statements repeatedly, making an @dfn{iteration}. C provides several +different kinds of loop statements, described in the following +subsections. + +Every kind of loop statement is a complex statement because contains a +component, here called @var{body}, which is a nested statement. +Most often the body is a block. + +@menu +* while Statement:: Loop as long as a test expression is true. +* do-while Statement:: Execute a loop once, with further looping + as long as a test expression is true. +* break Statement:: End a loop immediately. +* for Statement:: Iterative looping. +* Example of for:: An example of iterative looping. +* Omitted for-Expressions:: for-loop expression options. +* for-Index Declarations:: for-loop declaration options. +* continue Statement:: Begin the next cycle of a loop. +@end menu + +@node while Statement +@subsection @code{while} Statement +@cindex @code{while} statement +@cindex statement, @code{while} +@findex while + +The @code{while} statement is the simplest loop construct. +It looks like this: + +@example +while (@var{test}) + @var{body} +@end example + +Here, @var{body} is a statement (often a nested block) to repeat, and +@var{test} is the test expression that controls whether to repeat it again. +Each iteration of the loop starts by computing @var{test} and, if it +is true (nonzero), that means the loop should execute @var{body} again +and then start over. + +Here's an example of advancing to the last structure in a chain of +structures chained through the @code{next} field: + +@example +#include /* @r{Defines @code{NULL}.} */ +@r{@dots{}} +while (chain->next != NULL) + chain = chain->next; +@end example + +@noindent +This code assumes the chain isn't empty to start with; if the chain is +empty (that is, if @code{chain} is a null pointer), the code gets a +@code{SIGSEGV} signal trying to dereference that null pointer (@pxref{Signals}). + +@node do-while Statement +@subsection @code{do-while} Statement +@cindex @code{do}--@code{while} statement +@cindex statement, @code{do}--@code{while} +@findex do + +The @code{do}--@code{while} statement is a simple loop construct that +performs the test at the end of the iteration. + +@example +do + @var{body} +while (@var{test}); +@end example + +Here, @var{body} is a statement (possibly a block) to repeat, and +@var{test} is an expression that controls whether to repeat it again. + +Each iteration of the loop starts by executing @var{body}. Then it +computes @var{test} and, if it is true (nonzero), that means to go +back and start over with @var{body}. If @var{test} is false (zero), +then the loop stops repeating and execution moves on past it. + +@node break Statement +@subsection @code{break} Statement +@cindex @code{break} statement +@cindex statement, @code{break} +@findex break + +The @code{break} statement looks like @samp{break;}. Its effect is to +exit immediately from the innermost loop construct or @code{switch} +statement (@pxref{switch Statement}). + +For example, this loop advances @code{p} until the next null +character or newline. + +@example +while (*p) + @{ + /* @r{End loop if we have reached a newline.} */ + if (*p == '\n') + break; + p++ + @} +@end example + +When there are nested loops, the @code{break} statement exits from the +innermost loop containing it. + +@example +struct list_if_tuples +@{ + struct list_if_tuples next; + int length; + data *contents; +@}; + +void +process_all_elements (struct list_if_tuples *list) +@{ + while (list) + @{ + /* @r{Process all the elements in this node's vector,} + @r{stopping when we reach one that is null.} */ + for (i = 0; i < list->length; i++ + @{ + /* @r{Null element terminates this node's vector.} */ + if (list->contents[i] == NULL) + /* @r{Exit the @code{for} loop.} */ + break; + /* @r{Operate on the next element.} */ + process_element (list->contents[i]); + @} + + list = list->next; + @} +@} +@end example + +The only way in C to exit from an outer loop is with +@code{goto} (@pxref{goto Statement}). + +@node for Statement +@subsection @code{for} Statement +@cindex @code{for} statement +@cindex statement, @code{for} +@findex for + +A @code{for} statement uses three expressions written inside a +parenthetical group to define the repetition of the loop. The first +expression says how to prepare to start the loop. The second says how +to test, before each iteration, whether to continue looping. The +third says how to advance, at the end of an iteration, for the next +iteration. All together, it looks like this: + +@example +for (@var{start}; @var{continue-test}; @var{advance}) + @var{body} +@end example + +The first thing the @code{for} statement does is compute @var{start}. +The next thing it does is compute the expression @var{continue-test}. +If that expression is false (zero), the @code{for} statement finishes +immediately, so @var{body} is executed zero times. + +However, if @var{continue-test} is true (nonzero), the @code{for} +statement executes @var{body}, then @var{advance}. Then it loops back +to the not-quite-top to test @var{continue-test} again. But it does +not compute @var{start} again. + +@node Example of for +@subsection Example of @code{for} + +Here is the @code{for} statement from the iterative Fibonacci +function: + +@example +int i; +for (i = 1; i < n; ++i) + /* @r{If @code{n} is 1 or less, the loop runs zero times,} */ + /* @r{since @code{i < n} is false the first time.} */ + @{ + /* @r{Now @var{last} is @code{fib (@var{i})}} + @r{and @var{prev} is @code{fib (@var{i} @minus{} 1)}.} */ + /* @r{Compute @code{fib (@var{i} + 1)}.} */ + int next = prev + last; + /* @r{Shift the values down.} */ + prev = last; + last = next; + /* @r{Now @var{last} is @code{fib (@var{i} + 1)}} + @r{and @var{prev} is @code{fib (@var{i})}.} + @r{But that won't stay true for long,} + @r{because we are about to increment @var{i}.} */ + @} +@end example + +In this example, @var{start} is @code{i = 1}, meaning set @code{i} to +1. @var{continue-test} is @code{i < n}, meaning keep repeating the +loop as long as @code{i} is less than @code{n}. @var{advance} is +@code{i++}, meaning increment @code{i} by 1. The body is a block +that contains a declaration and two statements. + +@node Omitted for-Expressions +@subsection Omitted @code{for}-Expressions + +A fully-fleshed @code{for} statement contains all these parts, + +@example +for (@var{start}; @var{continue-test}; @var{advance}) + @var{body} +@end example + +@noindent +but you can omit any of the three expressions inside the parentheses. +The parentheses and the two semicolons are required syntactically, but +the expressions between them may be missing. A missing expression +means this loop doesn't use that particular feature of the @code{for} +statement. + +Instead of using @var{start}, you can do the loop preparation +before the @code{for} statement: the effect is the same. So we +could have written the beginning of the previous example this way: + +@example +int i = 0; +for (; i < n; ++i) +@end example + +@noindent +instead of this way: + +@example +int i; +for (i = 0; i < n; ++i) +@end example + +Omitting @var{continue-test} means the loop runs forever (or until +something else causes exit from it). Statements inside the loop can +test conditions for termination and use @samp{break;} to exit. This +is more flexible since you can put those tests anywhere in the loop, +not solely at the beginning. + +Putting an expression in @var{advance} is almost equivalent to writing +it at the end of the loop body; it does almost the same thing. The +only difference is for the @code{continue} statement (@pxref{continue +Statement}). So we could have written this: + +@example +for (i = 0; i < n;) + @{ + @r{@dots{}} + ++i; + @} +@end example + +@noindent +instead of this: + +@example +for (i = 0; i < n; ++i) + @{ + @r{@dots{}} + @} +@end example + +The choice is mainly a matter of what is more readable for +programmers. However, there is also a syntactic difference: +@var{advance} is an expression, not a statement. It can't include +loops, blocks, declarations, etc. + +@node for-Index Declarations +@subsection @code{for}-Index Declarations + +You can declare loop-index variables directly in the @var{start} +portion of the @code{for}-loop, like this: + +@example +for (int i = 0; i < n; ++i) + @{ + @r{@dots{}} + @} +@end example + +This kind of @var{start} is limited to a single declaration; it can +declare one or more variables, separated by commas, all of which are +the same @var{basetype} (@code{int}, in this example): + +@example +for (int i = 0, j = 1, *p = NULL; i < n; ++i, ++j, ++p) + @{ + @r{@dots{}} + @} +@end example + +@noindent +The scope of these variables is the @code{for} statement as a whole. +See @ref{Variable Declarations} for a explanation of @var{basetype}. + +Variables declared in @code{for} statements should have initializers. +Omitting the initialization gives the variables unpredictable initial +values, so this code is erroneous. + +@example +for (int i; i < n; ++i) + @{ + @r{@dots{}} + @} +@end example + +@node continue Statement +@subsection @code{continue} Statement +@cindex @code{continue} statement +@cindex statement, @code{continue} +@findex continue + +The @code{continue} statement looks like @samp{continue;}, and its +effect is to jump immediately to the end of the innermost loop +construct. If it is a @code{for}-loop, the next thing that happens +is to execute the loop's @var{advance} expression. + +For example, this loop increments @code{p} until the next null character +or newline, and operates (in some way not shown) on all the characters +in the line except for spaces. All it does with spaces is skip them. + +@example +for (;*p; ++p) + @{ + /* @r{End loop if we have reached a newline.} */ + if (*p == '\n') + break; + /* @r{Pay no attention to spaces.} */ + if (*p == ' ') + continue; + /* @r{Operate on the next character.} */ + @r{@dots{}} + @} +@end example + +@noindent +Executing @samp{continue;} skips the loop body but it does not +skip the @var{advance} expression, @code{p++}. + +We could also write it like this: + +@example +for (;*p; ++p) + @{ + /* @r{Exit if we have reached a newline.} */ + if (*p == '\n') + break; + /* @r{Pay no attention to spaces.} */ + if (*p != ' ') + @{ + /* @r{Operate on the next character.} */ + @r{@dots{}} + @} + @} +@end example + +The advantage of using @code{continue} is that it reduces the +depth of nesting. + +Contrast @code{continue} with the @code{break} statement. @xref{break +Statement}. + +@node switch Statement +@section @code{switch} Statement +@cindex @code{switch} statement +@cindex statement, @code{switch} +@findex switch +@findex case +@findex default + +The @code{switch} statement selects code to run according to the value +of an expression. The expression, in parentheses, follows the keyword +@code{switch}. After that come all the cases to select among, +inside braces. It looks like this: + +@example +switch (@var{selector}) + @{ + @var{cases}@r{@dots{}} + @} +@end example + +A case can look like this: + +@example +case @var{value}: + @var{statements} + break; +@end example + +@noindent +which means ``come here if @var{selector} happens to have the value +@var{value},'' or like this (a GNU C extension): + +@example +case @var{rangestart} ... @var{rangeend}: + @var{statements} + break; +@end example + +@noindent +which means ``come here if @var{selector} happens to have a value +between @var{rangestart} and @var{rangeend} (inclusive).'' @xref{Case +Ranges}. + +The values in @code{case} labels must reduce to integer constants. +They can use arithmetic, and @code{enum} constants, but they cannot +refer to data in memory, because they have to be computed at compile +time. It is an error if two @code{case} labels specify the same +value, or ranges that overlap, or if one is a range and the other is a +value in that range. + +You can also define a default case to handle ``any other value,'' like +this: + +@example +default: + @var{statements} + break; +@end example + +If the @code{switch} statement has no @code{default:} label, then it +does nothing when the value matches none of the cases. + +The brace-group inside the @code{switch} statement is a block, and you +can declare variables with that scope just as in any other block +(@pxref{Blocks}). However, initializers in these declarations won't +necessarily be executed every time the @code{switch} statement runs, +so it is best to avoid giving them initializers. + +@code{break;} inside a @code{switch} statement exits immediately from +the @code{switch} statement. @xref{break Statement}. + +If there is no @code{break;} at the end of the code for a case, +execution continues into the code for the following case. This +happens more often by mistake than intentionally, but since this +feature is used in real code, we cannot eliminate it. + +@strong{Warning:} When one case is intended to fall through to the +next, write a comment like @samp{falls through} to say it's +intentional. That way, other programmers won't assume it was an error +and ``fix'' it erroneously. + +Consecutive @code{case} statements could, pedantically, be considered +an instance of falling through, but we don't consider or treat them that +way because they won't confuse anyone. + +@node switch Example +@section Example of @code{switch} + +Here's an example of using the @code{switch} statement +to distinguish among characters: + +@cindex counting vowels and punctuation +@example +struct vp @{ int vowels, punct; @}; + +struct vp +count_vowels_and_punct (char *string) +@{ + int c; + int vowels = 0; + int punct = 0; + /* @r{Don't change the parameter itself.} */ + /* @r{That helps in debugging.} */ + char *p = string; + struct vp value; + + while (c = *p++) + switch (c) + @{ + case 'y': + case 'Y': + /* @r{We assume @code{y_is_consonant} will check surrounding + letters to determine whether this y is a vowel.} */ + if (y_is_consonant (p - 1)) + break; + + /* @r{Falls through} */ + + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + case 'A': + case 'E': + case 'I': + case 'O': + case 'U': + vowels++; + break; + + case '.': + case ',': + case ':': + case ';': + case '?': + case '!': + case '\"': + case '\'': + punct++; + break; + @} + + value.vowels = vowels; + value.punct = punct; + + return value; +@} +@end example + +@node Duffs Device +@section Duff's Device +@cindex Duff's device + +The cases in a @code{switch} statement can be inside other control +constructs. For instance, we can use a technique known as @dfn{Duff's +device} to optimize this simple function, + +@example +void +copy (char *to, char *from, int count) +@{ + while (count > 0) + *to++ = *from++, count--; +@} +@end example + +@noindent +which copies memory starting at @var{from} to memory starting at +@var{to}. + +Duff's device involves unrolling the loop so that it copies +several characters each time around, and using a @code{switch} statement +to enter the loop body at the proper point: + +@example +void +copy (char *to, char *from, int count) +@{ + if (count <= 0) + return; + int n = (count + 7) / 8; + switch (count % 8) + @{ + do @{ + case 0: *to++ = *from++; + case 7: *to++ = *from++; + case 6: *to++ = *from++; + case 5: *to++ = *from++; + case 4: *to++ = *from++; + case 3: *to++ = *from++; + case 2: *to++ = *from++; + case 1: *to++ = *from++; + @} while (--n > 0); + @} +@} +@end example + +@node Case Ranges +@section Case Ranges +@cindex case ranges +@cindex ranges in case statements + +You can specify a range of consecutive values in a single @code{case} label, +like this: + +@example +case @var{low} ... @var{high}: +@end example + +@noindent +This has the same effect as the proper number of individual @code{case} +labels, one for each integer value from @var{low} to @var{high}, inclusive. + +This feature is especially useful for ranges of ASCII character codes: + +@example +case 'A' ... 'Z': +@end example + +@strong{Be careful:} with integers, write spaces around the @code{...} +to prevent it from being parsed wrong. For example, write this: + +@example +case 1 ... 5: +@end example + +@noindent +rather than this: + +@example +case 1...5: +@end example + +@node Null Statement +@section Null Statement +@cindex null statement +@cindex statement, null + +A @dfn{null statement} is just a semicolon. It does nothing. + +A null statement is a placeholder for use where a statement is +grammatically required, but there is nothing to be done. For +instance, sometimes all the work of a @code{for}-loop is done in the +@code{for}-header itself, leaving no work for the body. Here is an +example that searches for the first newline in @code{array}: + +@example +for (p = array; *p != '\n'; p++) + ; +@end example + +@node goto Statement +@section @code{goto} Statement and Labels +@cindex @code{goto} statement +@cindex statement, @code{goto} +@cindex label +@findex goto + +The @code{goto} statement looks like this: + +@example +goto @var{label}; +@end example + +@noindent +Its effect is to transfer control immediately to another part of the +current function---where the label named @var{label} is defined. + +An ordinary label definition looks like this: + +@example +@var{label}: +@end example + +@noindent +and it can appear before any statement. You can't use @code{default} +as a label, since that has a special meaning for @code{switch} +statements. + +An ordinary label doesn't need a separate declaration; defining it is +enough. + +Here's an example of using @code{goto} to implement a loop +equivalent to @code{do}--@code{while}: + +@example +@{ + loop_restart: + @var{body} + if (@var{condition}) + goto loop_restart; +@} +@end example + +The name space of labels is separate from that of variables and functions. +Thus, there is no error in using a single name in both ways: + +@example +@{ + int foo; // @r{Variable @code{foo}.} + foo: // @r{Label @code{foo}.} + @var{body} + if (foo > 0) // @r{Variable @code{foo}.} + goto foo; // @r{Label @code{foo}.} +@} +@end example + +Blocks have no effect on ordinary labels; each label name is defined +throughout the whole of the function it appears in. It looks strange to +jump into a block with @code{goto}, but it works. For example, + +@example +if (x < 0) + goto negative; +if (y < 0) + @{ + negative: + printf ("Negative\n"); + return; + @} +@end example + +If the goto jumps into the scope of a variable, it does not +initialize the variable. For example, if @code{x} is negative, + +@example +if (x < 0) + goto negative; +if (y < 0) + @{ + int i = 5; + negative: + printf ("Negative, and i is %d\n", i); + return; + @} +@end example + +@noindent +prints junk because @code{i} was not initialized. + +If the block declares a variable-length automatic array, jumping into +it gives a compilation error. However, jumping out of the scope of a +variable-length array works fine, and deallocates its storage. + +A label can't come directly before a declaration, so the code can't +jump directly to one. For example, this is not allowed: + +@example +@{ + goto foo; +foo: + int x = 5; + bar(&x); +@} +@end example + +@noindent +The workaround is to add a statement, even an empty statement, +directly after the label. For example: + +@example +@{ + goto foo; +foo: + ; + int x = 5; + bar(&x); +@} +@end example + +Likewise, a label can't be the last thing in a block. The workaround +solution is the same: add a semicolon after the label. + +These unnecessary restrictions on labels make no sense, and ought in +principle to be removed; but they do only a little harm since labels +and @code{goto} are rarely the best way to write a program. + +These examples are all artificial; it would be more natural to +write them in other ways, without @code{goto}. For instance, +the clean way to write the example that prints @samp{Negative} is this: + +@example +if (x < 0 || y < 0) + @{ + printf ("Negative\n"); + return; + @} +@end example + +@noindent +It is hard to construct simple examples where @code{goto} is actually +the best way to write a program. Its rare good uses tend to be in +complex code, thus not apt for the purpose of explaining the meaning +of @code{goto}. + +The only good time to use @code{goto} is when it makes the code +simpler than any alternative. Jumping backward is rarely desirable, +because usually the other looping and control constructs give simpler +code. Using @code{goto} to jump forward is more often desirable, for +instance when a function needs to do some processing in an error case +and errors can occur at various different places within the function. + +@node Local Labels +@section Locally Declared Labels +@cindex local labels +@cindex macros, local labels +@findex __label__ + +In GNU C you can declare @dfn{local labels} in any nested block +scope. A local label is used in a @code{goto} statement just like an +ordinary label, but you can only reference it within the block in +which it was declared. + +A local label declaration looks like this: + +@example +__label__ @var{label}; +@end example + +@noindent +or + +@example +__label__ @var{label1}, @var{label2}, @r{@dots{}}; +@end example + +Local label declarations must come at the beginning of the block, +before any ordinary declarations or statements. + +The label declaration declares the label @emph{name}, but does not define +the label itself. That's done in the usual way, with +@code{@var{label}:}, before one of the statements in the block. + +The local label feature is useful for complex macros. If a macro +contains nested loops, a @code{goto} can be useful for breaking out of +them. However, an ordinary label whose scope is the whole function +cannot be used: if the macro can be expanded several times in one +function, the label will be multiply defined in that function. A +local label avoids this problem. For example: + +@example +#define SEARCH(value, array, target) \ +do @{ \ + __label__ found; \ + __auto_type _SEARCH_target = (target); \ + __auto_type _SEARCH_array = (array); \ + int i, j; \ + int value; \ + for (i = 0; i < max; i++) \ + for (j = 0; j < max; j++) \ + if (_SEARCH_array[i][j] == _SEARCH_target) \ + @{ (value) = i; goto found; @} \ + (value) = -1; \ + found:; \ +@} while (0) +@end example + +This could also be written using a statement expression +(@pxref{Statement Exprs}): + +@example +#define SEARCH(array, target) \ +(@{ \ + __label__ found; \ + __auto_type _SEARCH_target = (target); \ + __auto_type _SEARCH_array = (array); \ + int i, j; \ + int value; \ + for (i = 0; i < max; i++) \ + for (j = 0; j < max; j++) \ + if (_SEARCH_array[i][j] == _SEARCH_target) \ + @{ value = i; goto found; @} \ + value = -1; \ + found: \ + value; \ +@}) +@end example + +Ordinary labels are visible throughout the function where they are +defined, and only in that function. However, explicitly declared +local labels of a block are visible in nested functions declared +within that block. @xref{Nested Functions}, for details. + +@xref{goto Statement}. + +@node Labels as Values +@section Labels as Values +@cindex labels as values +@cindex computed gotos +@cindex goto with computed label +@cindex address of a label + +In GNU C, you can get the address of a label defined in the current +function (or a local label defined in the containing function) with +the unary operator @samp{&&}. The value has type @code{void *}. This +value is a constant and can be used wherever a constant of that type +is valid. For example: + +@example +void *ptr; +@r{@dots{}} +ptr = &&foo; +@end example + +To use these values requires a way to jump to one. This is done +with the computed goto statement@footnote{The analogous feature in +Fortran is called an assigned goto, but that name seems inappropriate in +C, since you can do more with label addresses than store them in special label +variables.}, @code{goto *@var{exp};}. For example, + +@example +goto *ptr; +@end example + +@noindent +Any expression of type @code{void *} is allowed. + +@xref{goto Statement}. + +@menu +* Label Value Uses:: Examples of using label values. +* Label Value Caveats:: Limitations of label values. +@end menu + +@node Label Value Uses +@subsection Label Value Uses + +One use for label-valued constants is to initialize a static array to +serve as a jump table: + +@example +static void *array[] = @{ &&foo, &&bar, &&hack @}; +@end example + +Then you can select a label with indexing, like this: + +@example +goto *array[i]; +@end example + +@noindent +Note that this does not check whether the subscript is in bounds---array +indexing in C never checks that. + +You can make the table entries offsets instead of addresses +by subtracting one label from the others. Here is an example: + +@example +static const int array[] = @{ &&foo - &&foo, &&bar - &&foo, + &&hack - &&foo @}; +goto *(&&foo + array[i]); +@end example + +@noindent +Using offsets is preferable in shared libraries, as it avoids the need +for dynamic relocation of the array elements; therefore, the array can +be read-only. + +An array of label values or offsets serves a purpose much like that of +the @code{switch} statement. The @code{switch} statement is cleaner, +so use @code{switch} by preference when feasible. + +Another use of label values is in an interpreter for threaded code. +The labels within the interpreter function can be stored in the +threaded code for super-fast dispatching. + +@node Label Value Caveats +@subsection Label Value Caveats + +Jumping to a label defined in another function does not work. +It can cause unpredictable results. + +The best way to avoid this is to store label values only in +automatic variables, or static variables whose names are declared +within the function. Never pass them as arguments. + +@cindex cloning +An optimization known as @dfn{cloning} generates multiple simplified +variants of a function's code, for use with specific fixed arguments. +Using label values in certain ways, such as saving the address in one +call to the function and using it again in another call, would make cloning +give incorrect results. These functions must disable cloning. + +Inlining calls to the function would also result in multiple copies of +the code, each with its own value of the same label. Using the label +in a computed goto is no problem, because the computed goto inhibits +inlining. However, using the label value in some other way, such as +an indication of where an error occurred, would be optimized wrong. +These functions must disable inlining. + +To prevent inlining or cloning of a function, specify +@code{__attribute__((__noinline__,__noclone__))} in its definition. +@xref{Attributes}. + +When a function uses a label value in a static variable initializer, +that automatically prevents inlining or cloning the function. + +@node Statement Exprs +@section Statements and Declarations in Expressions +@cindex statements inside expressions +@cindex declarations inside expressions +@cindex expressions containing statements + +@c the above section title wrapped and causes an underfull hbox.. i +@c changed it from "within" to "in". --mew 4feb93 +A block enclosed in parentheses can be used as an expression in GNU +C@. This provides a way to use local variables, loops and switches within +an expression. We call it a @dfn{statement expression}. + +Recall that a block is a sequence of statements +surrounded by braces. In this construct, parentheses go around the +braces. For example: + +@example +(@{ int y = foo (); int z; + if (y > 0) z = y; + else z = - y; + z; @}) +@end example + +@noindent +is a valid (though slightly more complex than necessary) expression +for the absolute value of @code{foo ()}. + +The last statement in the block should be an expression statement; an +expression followed by a semicolon, that is. The value of this +expression serves as the value of statement expression. If the last +statement is anything else, the statement expression's value is +@code{void}. + +This feature is mainly useful in making macro definitions compute each +operand exactly once. @xref{Macros and Auto Type}. + +Statement expressions are not allowed in expressions that must be +constant, such as the value for an enumerator, the width of a +bit-field, or the initial value of a static variable. + +Jumping into a statement expression---with @code{goto}, or using a +@code{switch} statement outside the statement expression---is an +error. With a computed @code{goto} (@pxref{Labels as Values}), the +compiler can't detect the error, but it still won't work. + +Jumping out of a statement expression is permitted, but since +subexpressions in C are not computed in a strict order, it is +unpredictable which other subexpressions will have been computed by +then. For example, + +@example + foo (), ((@{ bar1 (); goto a; 0; @}) + bar2 ()), baz(); +@end example + +@noindent +calls @code{foo} and @code{bar1} before it jumps, and never +calls @code{baz}, but may or may not call @code{bar2}. If @code{bar2} +does get called, that occurs after @code{foo} and before @code{bar1}. + +@node Variables +@chapter Variables +@cindex variables + +Every variable used in a C program needs to be made known by a +@dfn{declaration}. It can be used only after it has been declared. +It is an error to declare a variable name more than once in the same +scope; an exception is that @code{extern} declarations and tentative +definitions can coexist with another declaration of the same +variable. + +Variables can be declared anywhere within a block or file. (Older +versions of C required that all variable declarations within a block +occur before any statements.) + +Variables declared within a function or block are @dfn{local} to +it. This means that the variable name is visible only until the end +of that function or block, and the memory space is allocated only +while control is within it. + +Variables declared at the top level in a file are called @dfn{file-scope}. +They are assigned fixed, distinct memory locations, so they retain +their values for the whole execution of the program. + +@menu +* Variable Declarations:: Name a variable and and reserve space for it. +* Initializers:: Assigning inital values to variables. +* Designated Inits:: Assigning initial values to array elements + at particular array indices. +* Auto Type:: Obtaining the type of a variable. +* Local Variables:: Variables declared in function definitions. +* File-Scope Variables:: Variables declared outside of + function definitions. +* Static Local Variables:: Variables declared within functions, + but with permanent storage allocation. +* Extern Declarations:: Declaring a variable + which is allocated somewhere else. +* Allocating File-Scope:: When is space allocated + for file-scope variables? +* auto and register:: Historically used storage directions. +* Omitting Types:: The bad practice of declaring variables + with implicit type. +@end menu + +@node Variable Declarations +@section Variable Declarations +@cindex variable declarations +@cindex declaration of variables + +Here's what a variable declaration looks like: + +@example +@var{keywords} @var{basetype} @var{decorated-variable} @r{[}= @var{init}@r{]}; +@end example + +The @var{keywords} specify how to handle the scope of the variable +name and the allocation of its storage. Most declarations have +no keywords because the defaults are right for them. + +C allows these keywords to come before or after @var{basetype}, or +even in the middle of it as in @code{unsigned static int}, but don't +do that---it would surprise other programmers. Always write the +keywords first. + +The @var{basetype} can be any of the predefined types of C, or a type +keyword defined with @code{typedef}. It can also be @code{struct +@var{tag}}, @code{union @var{tag}}, or @code{enum @var{tag}}. In +addition, it can include type qualifiers such as @code{const} and +@code{volatile} (@pxref{Type Qualifiers}). + +In the simplest case, @var{decorated-variable} is just the variable +name. That declares the variable with the type specified by +@var{basetype}. For instance, + +@example +int foo; +@end example + +@noindent +uses @code{int} as the @var{basetype} and @code{foo} as the +@var{decorated-variable}. It declares @code{foo} with type +@code{int}. + +@example +struct tree_node foo; +@end example + +@noindent +declares @code{foo} with type @code{struct tree_node}. + +@menu +* Declaring Arrays and Pointers:: Declaration syntax for variables of + array and pointer types. +* Combining Variable Declarations:: More than one variable declaration + in a single statement. +@end menu + +@node Declaring Arrays and Pointers +@subsection Declaring Arrays and Pointers +@cindex declaring arrays and pointers +@cindex array, declaring +@cindex pointers, declaring + +To declare a variable that is an array, write +@code{@var{variable}[@var{length}]} for @var{decorated-variable}: + +@example +int foo[5]; +@end example + +To declare a variable that has a pointer type, write +@code{*@var{variable}} for @var{decorated-variable}: + +@example +struct list_elt *foo; +@end example + +These constructs nest. For instance, + +@example +int foo[3][5]; +@end example + +@noindent +declares @code{foo} as an array of 3 arrays of 5 integers each, + +@example +struct list_elt *foo[5]; +@end example + +@noindent +declares @code{foo} as an array of 5 pointers to structures, and + +@example +struct list_elt **foo; +@end example + +@noindent +declares @code{foo} as a pointer to a pointer to a structure. + +@example +int **(*foo[30])(int, double); +@end example + +@noindent +declares @code{foo} as an array of 30 pointers to functions +(@pxref{Function Pointers}), each of which must accept two arguments +(one @code{int} and one @code{double}) and return type @code{int **}. + +@example +void +bar (int size) +@{ + int foo[size]; + @r{@dots{}} +@} +@end example + +@noindent +declares @code{foo} as an array of integers with a size specified at +run time when the function @code{bar} is called. + +@node Combining Variable Declarations +@subsection Combining Variable Declarations +@cindex combining variable declarations +@cindex variable declarations, combining +@cindex declarations, combining + +When multiple declarations have the same @var{keywords} and +@var{basetype}, you can combine them using commas. Thus, + +@example +@var{keywords} @var{basetype} + @var{decorated-variable-1} @r{[}= @var{init1}@r{]}, + @var{decorated-variable-2} @r{[}= @var{init2}@r{]}; +@end example + +@noindent +is equivalent to + +@example +@var{keywords} @var{basetype} + @var{decorated-variable-1} @r{[}= @var{init1}@r{]}; +@var{keywords} @var{basetype} + @var{decorated-variable-2} @r{[}= @var{init2}@r{]}; +@end example + +Here are some simple examples: + +@example +int a, b; +int a = 1, b = 2; +int a, *p, array[5]; +int a = 0, *p = &a, array[5] = @{1, 2@}; +@end example + +@noindent +In the last two examples, @code{a} is an @code{int}, @code{p} is a +pointer to @code{int}, and @code{array} is an array of 5 @code{int}s. +Since the initializer for @code{array} specifies only two elements, +the other three elements are initialized to zero. + +@node Initializers +@section Initializers +@cindex initializers + +A variable's declaration, unless it is @code{extern}, should also +specify its initial value. For numeric and pointer-type variables, +the initializer is an expression for the value. If necessary, it is +converted to the variable's type, just as in an assignment. + +You can also initialize a local structure-type (@pxref{Structures}) or +local union-type (@pxref{Unions}) variable this way, from an +expression whose value has the same type. But you can't initialize an +array this way (@pxref{Arrays}), since arrays are not first-class +objects in C (@pxref{Limitations of C Arrays}) and there is no array +assignment. + +You can initialize arrays and structures componentwise, +with a list of the elements or components. You can initialize +a union with any one of its alternatives. + +@itemize @bullet +@item +A component-wise initializer for an array consists of element values +surrounded by @samp{@{@r{@dots{}}@}}. If the values in the initializer +don't cover all the elements in the array, the remaining elements are +initialized to zero. + +You can omit the size of the array when you declare it, and let +the initializer specify the size: + +@example +int array[] = @{ 3, 9, 12 @}; +@end example + +@item +A component-wise initializer for a structure consists of field values +surrounded by @samp{@{@r{@dots{}}@}}. Write the field values in the same +order as the fields are declared in the structure. If the values in +the initializer don't cover all the fields in the structure, the +remaining fields are initialized to zero. + +@item +The initializer for a union-type variable has the form @code{@{ +@var{value} @}}, where @var{value} initializes the @emph{first alternative} +in the union definition. +@end itemize + +For an array of arrays, a structure containing arrays, an array of +structures, etc., you can nest these constructs. For example, + +@example +struct point @{ double x, y; @}; + +struct point series[] + = @{ @{0, 0@}, @{1.5, 2.8@}, @{99, 100.0004@} @}; +@end example + +You can omit a pair of inner braces if they contain the right +number of elements for the sub-value they initialize, so that +no elements or fields need to be filled in with zeros. +But don't do that very much, as it gets confusing. + +An array of @code{char} can be initialized using a string constant. +Recall that the string constant includes an implicit null character at +the end (@pxref{String Constants}). Using a string constant as +initializer means to use its contents as the initial values of the +array elements. Here are examples: + +@example +char text[6] = "text!"; /* @r{Includes the null.} */ +char text[5] = "text!"; /* @r{Excludes the null.} */ +char text[] = "text!"; /* @r{Gets length 6.} */ +char text[] + = @{ 't', 'e', 'x', 't', '!', 0 @}; /* @r{same as above.} */ +char text[] = @{ "text!" @}; /* @r{Braces are optional.} */ +@end example + +@noindent +and this kind of initializer can be nested inside braces to initialize +structures or arrays that contain a @code{char}-array. + +In like manner, you can use a wide string constant to initialize +an array of @code{wchar_t}. + +@node Designated Inits +@section Designated Initializers +@cindex initializers with labeled elements +@cindex labeled elements in initializers +@cindex case labels in initializers +@cindex designated initializers + +In a complex structure or long array, it's useful to indicate +which field or element we are initializing. + +To designate specific array elements during initialization, include +the array index in brackets, and an assignment operator, for each +element: + +@example +int foo[10] = @{ [3] = 42, [7] = 58 @}; +@end example + +@noindent +This does the same thing as: + +@example +int foo[10] = @{ 0, 0, 0, 42, 0, 0, 0, 58, 0, 0 @}; +@end example + +The array initialization can include non-designated element values +alongside designated indices; these follow the expected ordering +of the array initialization, so that + +@example +int foo[10] = @{ [3] = 42, 43, 44, [7] = 58 @}; +@end example + +@noindent +does the same thing as: + +@example +int foo[10] = @{ 0, 0, 0, 42, 43, 44, 0, 58, 0, 0 @}; +@end example + +Note that you can only use constant expressions as array index values, +not variables. + +If you need to initialize a subsequence of sequential array elements to +the same value, you can specify a range: + +@example +int foo[100] = @{ [0 ... 19] = 42, [20 ... 99] = 43 @}; +@end example + +@noindent +Using a range this way is a GNU C extension. + +When subsequence ranges overlap, each element is initialized by the +last specification that applies to it. Thus, this initialization is +equivalent to the previous one. + +@example +int foo[100] = @{ [0 ... 99] = 43, [0 ... 19] = 42 @}; +@end example + +@noindent +as the second overrides the first for elements 0 through 19. + +The value used to initialize a range of elements is evaluated only +once, for the first element in the range. So for example, this code + +@example +int random_values[100] + = @{ [0 ... 99] = get_random_number() @}; +@end example + +@noindent +would initialize all 100 elements of the array @code{random_values} to +the same value---probably not what is intended. + +Similarly, you can initialize specific fields of a structure variable +by specifying the field name prefixed with a dot: + +@example +struct point @{ int x; int y; @}; + +struct point foo = @{ .y = 42; @}; +@end example + +@noindent +The same syntax works for union variables as well: + +@example +union int_double @{ int i; double d; @}; + +union int_double foo = @{ .d = 34 @}; +@end example + +@noindent +This casts the integer value 34 to a double and stores it +in the union variable @code{foo}. + +You can designate both array elements and structure elements in +the same initialization; for example, here's an array of point +structures: + +@example +struct point point_array[10] = @{ [4].y = 32, [6].y = 39 @}; +@end example + +Along with the capability to specify particular array and structure +elements to initialize comes the possibility of initializing the same +element more than once: + +@example +int foo[10] = @{ [4] = 42, [4] = 98 @}; +@end example + +@noindent +In such a case, the last initialization value is retained. + +@node Auto Type +@section Referring to a Type with @code{__auto_type} +@findex __auto_type +@findex typeof +@cindex macros, types of arguments + +You can declare a variable copying the type from +the initializer by using @code{__auto_type} instead of a particular type. +Here's an example: + +@example +#define max(a,b) \ + (@{ __auto_type _a = (a); \ + __auto_type _b = (b); \ + _a > _b ? _a : _b @}) +@end example + +This defines @code{_a} to be of the same type as @code{a}, and +@code{_b} to be of the same type as @code{b}. This is a useful thing +to do in a macro that ought to be able to handle any type of data +(@pxref{Macros and Auto Type}). + +The original GNU C method for obtaining the type of a value is to use +@code{typeof}, which takes as an argument either a value or the name of +a type. The previous example could also be written as: + +@example +#define max(a,b) \ + (@{ typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + _a > _b ? _a : _b @}) +@end example + +@code{typeof} is more flexible than @code{__auto_type}; however, the +principal use case for @code{typeof} is in variable declarations with +initialization, which is exactly what @code{__auto_type} handles. + +@node Local Variables +@section Local Variables +@cindex local variables +@cindex variables, local + +Declaring a variable inside a function definition (@pxref{Function +Definitions}) makes the variable name @dfn{local} to the containing +block---that is, the containing pair of braces. More precisely, the +variable's name is visible starting just after where it appears in the +declaration, and its visibility continues until the end of the block. + +Local variables in C are generally @dfn{automatic} variables: each +variable's storage exists only from the declaration to the end of the +block. Execution of the declaration allocates the storage, computes +the initial value, and stores it in the variable. The end of the +block deallocates the storage.@footnote{Due to compiler optimizations, +allocation and deallocation don't necessarily really happen at +those times.} + +@strong{Warning:} Two declarations for the same local variable +in the same scope are an error. + +@strong{Warning:} Automatic variables are stored in the run-time stack. +The total space for the program's stack may be limited; therefore, +in using very large arrays, it may be necessary to allocate +them in some other way to stop the program from crashing. + +@strong{Warning:} If the declaration of an automatic variable does not +specify an initial value, the variable starts out containing garbage. +In this example, the value printed could be anything at all: + +@example +@{ + int i; + + printf ("Print junk %d\n", i); +@} +@end example + +In a simple test program, that statement is likely to print 0, simply +because every process starts with memory zeroed. But don't rely on it +to be zero---that is erroneous. + +@strong{Note:} Make sure to store a value into each local variable (by +assignment, or by initialization) before referring to its value. + +@node File-Scope Variables +@section File-Scope Variables +@cindex file-scope variables +@cindex global variables +@cindex variables, file-scope +@cindex variables, global + +A variable declaration at the top level in a file (not inside a +function definition) declares a @dfn{file-scope variable}. Loading a +program allocates the storage for all the file-scope variables in it, +and initializes them too. + +Each file-scope variable is either @dfn{static} (limited to one +compilation module) or @dfn{global} (shared with all compilation +modules in the program). To make the variable static, write the +keyword @code{static} at the start of the declaration. Omitting +@code{static} makes the variable global. + +The initial value for a file-scope variable can't depend on the +contents of storage, and can't call any functions. + +@example +int foo = 5; /* @r{Valid.} */ +int bar = foo; /* @r{Invalid!} */ +int bar = sin (1.0); /* @r{Invalid!} */ +@end example + +But it can use the address of another file-scope variable: + +@example +int foo; +int *bar = &foo; /* @r{Valid.} */ +int arr[5]; +int *bar3 = &arr[3]; /* @r{Valid.} */ +int *bar4 = arr + 4; /* @r{Valid.} */ +@end example + +It is valid for a module to have multiple declarations for a +file-scope variable, as long as they are all global or all static, but +at most one declaration can specify an initial value for it. + +@node Static Local Variables +@section Static Local Variables +@cindex static local variables +@cindex variables, static local +@findex static + +The keyword @code{static} in a local variable declaration says to +allocate the storage for the variable permanently, just like a +file-scope variable, even if the declaration is within a function. + +Here's an example: + +@example +int +increment_counter () +@{ + static int counter = 0; + return ++counter; +@} +@end example + +The scope of the name @code{counter} runs from the declaration to the +end of the containing block, just like an automatic local variable, +but its storage is permanent, so the value persists from one call to +the next. As a result, each call to @code{increment_counter} +returns a different, unique value. + +The initial value of a static local variable has the same limitations +as for file-scope variables: it can't depend on the contents of +storage or call any functions. It can use the address of a file-scope +variable or a static local variable, because those addresses are +determined before the program runs. + +@node Extern Declarations +@section @code{extern} Declarations +@cindex @code{extern} declarations +@cindex declarations, @code{extern} +@findex extern + +An @code{extern} declaration is used to refer to a global variable +whose principal declaration comes elsewhere---in the same module, or in +another compilation module. It looks like this: + +@example +extern @var{basetype} @var{decorated-variable}; +@end example + +Its meaning is that, in the current scope, the variable name refers to +the file-scope variable of that name---which needs to be declared in a +non-@code{extern}, non-@code{static} way somewhere else. + +For instance, if one compilation module has this global variable +declaration + +@example +int error_count = 0; +@end example + +@noindent +then other compilation modules can specify this + +@example +extern int error_count; +@end example + +@noindent +to allow reference to the same variable. + +The usual place to write an @code{extern} declaration is at top level +in a source file, but you can write an @code{extern} declaration +inside a block to make a global or static file-scope variable +accessible in that block. + +Since an @code{extern} declaration does not allocate space for the +variable, it can omit the size of an array: + +@example +extern int array[]; +@end example + +You can use @code{array} normally in all contexts where it is +converted automatically to a pointer. However, to use it as the +operand of @code{sizeof} is an error, since the size is unknown. + +It is valid to have multiple @code{extern} declarations for the same +variable, even in the same scope, if they give the same type. They do +not conflict---they agree. For an array, it is legitimate for some +@code{extern} declarations can specify the size while others omit it. +However, if two declarations give different sizes, that is an error. + +Likewise, you can use @code{extern} declarations at file scope +(@pxref{File-Scope Variables}) followed by an ordinary global +(non-static) declaration of the same variable. They do not conflict, +because they say compatible things about the same meaning of the variable. + +@node Allocating File-Scope +@section Allocating File-Scope Variables +@cindex allocation file-scope variables +@cindex file-scope variables, allocating + +Some file-scope declarations allocate space for the variable, and some +don't. + +A file-scope declaration with an initial value @emph{must} allocate +space for the variable; if there are two of such declarations for the +same variable, even in different compilation modules, they conflict. + +An @code{extern} declaration @emph{never} allocates space for the variable. +If all the top-level declarations of a certain variable are +@code{extern}, the variable never gets memory space. If that variable +is used anywhere in the program, the use will be reported as an error, +saying that the variable is not defined. + +@cindex tentative definition +A file-scope declaration without an initial value is called a +@dfn{tentative definition}. This is a strange hybrid: it @emph{can} +allocate space for the variable, but does not insist. So it causes no +conflict, no error, if the variable has another declaration that +allocates space for it, perhaps in another compilation module. But if +nothing else allocates space for the variable, the tentative +definition will do it. Any number of compilation modules can declare +the same variable in this way, and that is sufficient for all of them +to use the variable. + +@c @opindex -fno-common +@c @opindex --warn_common +In programs that are very large or have many contributors, it may be +wise to adopt the convention of never using tentative definitions. +You can use the compilation option @option{-fno-common} to make them +an error, or @option{--warn-common} to warn about them. + +If a file-scope variable gets its space through a tentative +definition, it starts out containing all zeros. + +@node auto and register +@section @code{auto} and @code{register} +@cindex @code{auto} declarations +@cindex @code{register} declarations +@findex auto +@findex register + +For historical reasons, you can write @code{auto} or @code{register} +before a local variable declaration. @code{auto} merely emphasizes +that the variable isn't static; it changes nothing. + +@code{register} suggests to the compiler storing this variable in a +register. However, GNU C ignores this suggestion, since it can +choose the best variables to store in registers without any hints. + +It is an error to take the address of a variable declared +@code{register}, so you cannot use the unary @samp{&} operator on it. +If the variable is an array, you can't use it at all (other than as +the operand of @code{sizeof}), which makes it rather useless. + +@node Omitting Types +@section Omitting Types in Declarations +@cindex omitting types in declarations + +The syntax of C traditionally allows omitting the data type in a +declaration if it specifies a storage class, a type qualifier (see the +next chapter), or @code{auto} or @code{register}. Then the type +defaults to @code{int}. For example: + +@example +auto foo = 42; +@end example + +This is bad practice; if you see it, fix it. + +@node Type Qualifiers +@chapter Type Qualifiers + +A declaration can include type qualifiers to advise the compiler +about how the variable will be used. There are three different +qualifiers, @code{const}, @code{volatile} and @code{restrict}. They +pertain to different issues, so you can use more than one together. +For instance, @code{const volatile} describes a value that the +program is not allowed to change, but might have a different value +each time the program examines it. (This might perhaps be a special +hardware register, or part of shared memory.) + +If you are just learning C, you can skip this chapter. + +@menu +* const:: Variables whose values don't change. +* volatile:: Variables whose values may be accessed + or changed outside of the control of + this program. +* restrict Pointers:: Restricted pointers for code optimization. +* restrict Pointer Example:: Example of how that works. +@end menu + +@node const +@section @code{const} Variables and Fields +@cindex @code{const} variables and fields +@cindex variables, @code{const} +@findex const + +You can mark a variable as ``constant'' by writing @code{const} in +front of the declaration. This says to treat any assignment to that +variable as an error. It may also permit some compiler +optimizations---for instance, to fetch the value only once to satisfy +multiple references to it. The construct looks like this: + +@example +const double pi = 3.14159; +@end example + +After this definition, the code can use the variable @code{pi} +but cannot assign a different value to it. + +@example +pi = 3.0; /* @r{Error!} */ +@end example + +Simple variables that are constant can be used for the same purposes +as enumeration constants, and they are not limited to integers. The +constantness of the variable propagates into pointers, too. + +A pointer type can specify that the @emph{target} is constant. For +example, the pointer type @code{const double *} stands for a pointer +to a constant @code{double}. That's the typethat results from taking +the address of @code{pi}. Such a pointer can't be dereferenced in the +left side of an assignment. + +@example +*(&pi) = 3.0; /* @r{Error!} */ +@end example + +Nonconstant pointers can be converted automatically to constant +pointers, but not vice versa. For instance, + +@example +const double *cptr; +double *ptr; + +cptr = π /* @r{Valid.} */ +cptr = ptr; /* @r{Valid.} */ +ptr = cptr; /* @r{Error!} */ +ptr = π /* @r{Error!} */ +@end example + +This is not an ironclad protection against modifying the value. You +can always cast the constant pointer to a nonconstant pointer type: + +@example +ptr = (double *)cptr; /* @r{Valid.} */ +ptr = (double *)π /* @r{Valid.} */ +@end example + +However, @code{const} provides a way to show that a certain function +won't modify the data structure whose address is passed to it. Here's +an example: + +@example +int +string_length (const char *string) +@{ + int count = 0; + while (*string++) + count++; + return count; +@} +@end example + +@noindent +Using @code{const char *} for the parameter is a way of saying this +function never modifies the memory of the string itself. + +In calling @code{string_length}, you can specify an ordinary +@code{char *} since that can be converted automatically to @code{const +char *}. + +@node volatile +@section @code{volatile} Variables and Fields +@cindex @code{volatile} variables and fields +@cindex variables, @code{volatile} +@findex volatile + +The GNU C compiler often performs optimizations that eliminate the +need to write or read a variable. For instance, + +@example +int foo; +foo = 1; +foo++; +@end example + +@noindent +might simply store the value 2 into @code{foo}, without ever storing 1. +These optimizations can also apply to structure fields in some cases. + +If the memory containing @code{foo} is shared with another program, +or if it is examined asynchronously by hardware, such optimizations +could confuse the communication. Using @code{volatile} is one way +to prevent them. + +Writing @code{volatile} with the type in a variable or field declaration +says that the value may be examined or changed for reasons outside the +control of the program at any moment. Therefore, the program must +execute in a careful way to assure correct interaction with those +accesses, whenever they may occur. + +The simplest use looks like this: + +@example +volatile int lock; +@end example + +This directs the compiler not to do certain common optimizations on +use of the variable @code{lock}. All the reads and writes for a volatile +variable or field are really done, and done in the order specified +by the source code. Thus, this code: + +@example +lock = 1; +list = list->next; +if (lock) + lock_broken (&lock); +lock = 0; +@end example + +@noindent +really stores the value 1 in @code{lock}, even though there is no +sign it is really used, and the @code{if} statement reads and +checks the value of @code{lock}, rather than assuming it is still 1. + +A limited amount of optimization can be done, in principle, on +@code{volatile} variables and fields: multiple references between two +sequence points (@pxref{Sequence Points}) can be simplified together. + +Use of @code{volatile} does not eliminate the flexibility in ordering +the computation of the operands of most operators. For instance, in +@code{lock + foo ()}, the order of accessing @code{lock} and calling +@code{foo} is not specified, so they may be done in either order; the +fact that @code{lock} is @code{volatile} has no effect on that. + +@node restrict Pointers +@section @code{restrict}-Qualified Pointers +@cindex @code{restrict} pointers +@cindex pointers, @code{restrict}-qualified +@findex restrict + +You can declare a pointer as ``restricted'' using the @code{restrict} +type qualifier, like this: + +@example +int *restrict p = x; +@end example + +@noindent +This enables better optimization of code that uses the pointer. + +If @code{p} is declared with @code{restrict}, and then the code +references the object that @code{p} points to (using @code{*p} or +@code{p[@var{i}]}), the @code{restrict} declaration promises that the +code will not access that object in any other way---only through +@code{p}. + +For instance, it means the code must not use another pointer +to access the same space, as shown here: + +@example +int *restrict p = @var{whatever}; +int *q = p; +foo (*p, *q); +@end example + +@noindent +That contradicts the @code{restrict} promise by accessing the object +that @code{p} points to using @code{q}, which bypasses @code{p}. +Likewise, it must not do this: + +@example +int *restrict p = @var{whatever}; +struct @{ int *a, *b; @} s; +s.a = p; +foo (*p, *s.a); +@end example + +@noindent +This example uses a structure field instead of the variable @code{q} +to hold the other pointer, and that contradicts the promise just the +same. + +The keyword @code{restrict} also promises that @code{p} won't point to +the allocated space of any automatic or static variable. So the code +must not do this: + +@example +int a; +int *restrict p = &a; +foo (*p, a); +@end example + +@noindent +because that does direct access to the object (@code{a}) that @code{p} +points to, which bypasses @code{p}. + +If the code makes such promises with @code{restrict} then breaks them, +execution is unpredictable. + +@node restrict Pointer Example +@section @code{restrict} Pointer Example + +Here are examples where @code{restrict} enables real optimization. + +In this example, @code{restrict} assures GCC that the array @code{out} +points to does not overlap with the array @code{in} points to. + +@example +void +process_data (const char *in, + char * restrict out, + size_t size) +@{ + for (i = 0; i < size; i++) + out[i] = in[i] + in[i + 1]; +@} +@end example + +Here's a simple tree structure, where each tree node holds data of +type @code{PAYLOAD} plus two subtrees. + +@example +struct foo + @{ + PAYLOAD payload; + struct foo *left; + struct foo *right; + @}; +@end example + +Now here's a function to null out both pointers in the @code{left} +subtree. + +@example +void +null_left (struct foo *a) +@{ + a->left->left = NULL; + a->left->right = NULL; +@} +@end example + +Since @code{*a} and @code{*a->left} have the same data type, +they could legitimately alias (@pxref{Aliasing}). Therefore, +the compiled code for @code{null_left} must read @code{a->left} +again from memory when executing the second assignment statement. + +We can enable optimization, so that it does not need to read +@code{a->left} again, by writing @code{null_left} this in a less +obvious way. + +@example +void +null_left (struct foo *a) +@{ + struct foo *b = a->left; + b->left = NULL; + b->right = NULL; +@} +@end example + +A more elegant way to fix this is with @code{restrict}. + +@example +void +null_left (struct foo *restrict a) +@{ + a->left->left = NULL; + a->left->right = NULL; +@} +@end example + +Declaring @code{a} as @code{restrict} asserts that other pointers such +as @code{a->left} will not point to the same memory space as @code{a}. +Therefore, the memory location @code{a->left->left} cannot be the same +memory as @code{a->left}. Knowing this, the compiled code may avoid +reloading @code{a->left} for the second statement. + +@node Functions +@chapter Functions +@cindex functions + +We have already presented many examples of functions, so if you've +read this far, you basically understand the concept of a function. It +is vital, nonetheless, to have a chapter in the manual that collects +all the information about functions. + +@menu +* Function Definitions:: Writing the body of a function. +* Function Declarations:: Declaring the interface of a function. +* Function Calls:: Using functions. +* Function Call Semantics:: Call-by-value argument passing. +* Function Pointers:: Using references to functions. +* The main Function:: Where execution of a GNU C program begins. +* Advanced Definitions:: Advanced features of function definitions. +* Obsolete Definitions:: Obsolete features still used + in function definitions in old code. +@end menu + +@node Function Definitions +@section Function Definitions +@cindex function definitions +@cindex defining functions + +We have already presented many examples of function definitions. To +summarize the rules, a function definition looks like this: + +@example +@var{returntype} +@var{functionname} (@var{parm_declarations}@r{@dots{}}) +@{ + @var{body} +@} +@end example + +The part before the open-brace is called the @dfn{function header}. + +Write @code{void} as the @var{returntype} if the function does +not return a value. + +@menu +* Function Parameter Variables:: Syntax and semantics + of function parameters. +* Forward Function Declarations:: Functions can only be called after + they have been defined or declared. +* Static Functions:: Limiting visibility of a function. +* Arrays as Parameters:: Functions that accept array arguments. +* Structs as Parameters:: Functions that accept structure arguments. +@end menu + +@node Function Parameter Variables +@subsection Function Parameter Variables +@cindex function parameter variables +@cindex parameter variables in functions +@cindex parameter list + +A function parameter variable is a local variable (@pxref{Local +Variables}) used within the function to store the value passed as an +argument in a call to the function. Usually we say ``function +parameter'' or ``parameter'' for short, not mentioning the fact that +it's a variable. + +We declare these variables in the beginning of the function +definition, in the @dfn{parameter list}. For example, + +@example +fib (int n) +@end example + +@noindent +has a parameter list with one function parameter @code{n}, which has +type @code{int}. + +Function parameter declarations differ from ordinary variable +declarations in several ways: + +@itemize @bullet +@item +Inside the function definition header, commas separate parameter +declarations, and each parameter needs a complete declaration +including the type. For instance, if a function @code{foo} has two +@code{int} parameters, write this: + +@example +foo (int a, int b) +@end example + +You can't share the common @code{int} between the two declarations: + +@example +foo (int a, b) /* @r{Invalid!} */ +@end example + +@item +A function parameter variable is initialized to whatever value is +passed in the function call, so its declaration cannot specify an +initial value. + +@item +Writing an array type in a function parameter declaration has the +effect of declaring it as a pointer. The size specified for the array +has no effect at all, and we normally omit the size. Thus, + +@example +foo (int a[5]) +foo (int a[]) +foo (int *a) +@end example + +@noindent +are equivalent. + +@item +The scope of the parameter variables is the entire function body, +notwithstanding the fact that they are written in the function header, +which is just outside the function body. +@end itemize + +If a function has no parameters, it would be most natural for the +list of parameters in its definition to be empty. But that, in C, has +a special meaning for historical reasons: ``Do not check that calls to +this function have the right number of arguments.'' Thus, + +@example +int +foo () +@{ + return 5; +@} + +int +bar (int x) +@{ + return foo (x); +@} +@end example + +@noindent +would not report a compilation error in passing @code{x} as an +argument to @code{foo}. By contrast, + +@example +int +foo (void) +@{ + return 5; +@} + +int +bar (int x) +@{ + return foo (x); +@} +@end example + +@noindent +would report an error because @code{foo} is supposed to receive +no arguments. + +@node Forward Function Declarations +@subsection Forward Function Declarations +@cindex forward function declarations +@cindex function declarations, forward + +The order of the function definitions in the source code makes no +difference, except that each function needs to be defined or declared +before code uses it. + +The definition of a function also declares its name for the rest of +the containing scope. But what if you want to call the function +before its definition? To permit that, write a compatible declaration +of the same function, before the first call. A declaration that +prefigures a subsequent definition in this way is called a +@dfn{forward declaration}. The function declaration can be at top +@c ??? file scope +level or within a block, and it applies until the end of the containing +scope. + +@xref{Function Declarations}, for more information about these +declarations. + +@node Static Functions +@subsection Static Functions +@cindex static functions +@cindex functions, static +@findex static + +The keyword @code{static} in a function definition limits the +visibility of the name to the current compilation module. (That's the +same thing @code{static} does in variable declarations; +@pxref{File-Scope Variables}.) For instance, if one compilation module +contains this code: + +@example +static int +foo (void) +@{ + @r{@dots{}} +@} +@end example + +@noindent +then the code of that compilation module can call @code{foo} anywhere +after the definition, but other compilation modules cannot refer to it +at all. + +@cindex forward declaration +@cindex static function, declaration +To call @code{foo} before its definition, it needs a forward +declaration, which should use @code{static} since the function +definition does. For this function, it looks like this: + +@example +static int foo (void); +@end example + +It is generally wise to use @code{static} on the definitions of +functions that won't be called from outside the same compilation +module. This makes sure that calls are not added in other modules. +If programmers decide to change the function's calling convention, or +understand all the consequences of its use, they will only have to +check for calls in the same compilation module. + +@node Arrays as Parameters +@subsection Arrays as Parameters +@cindex array as parameters +@cindex functions with array parameters + +Arrays in C are not first-class objects: it is impossible to copy +them. So they cannot be passed as arguments like other values. +@xref{Limitations of C Arrays}. Rather, array parameters work in +a special way. + +@menu +* Array Parm Pointer:: +* Passing Array Args:: +* Array Parm Qualifiers:: +@end menu + +@node Array Parm Pointer +@subsubsection Array parameters are pointers + +Declaring a function parameter variable as an array really gives it a +pointer type. C does this because an expression with array type, if +used as an argument in a function call, is converted automatically to +a pointer (to the zeroth element of the array). If you declare the +corresponding parameter as an ``array'', it will work correctly with +the pointer value that really gets passed. + +This relates to the fact that C does not check array bounds in access +to elements of the array (@pxref{Accessing Array Elements}). + +For example, in this function, + +@example +void +clobber4 (int array[20]) +@{ + array[4] = 0; +@} +@end example + +@noindent +the parameter @code{array}'s real type is @code{int *}; the specified +length, 20, has no effect on the program. You can leave out the length +and write this: + +@example +void +clobber4 (int array[]) +@{ + array[4] = 0; +@} +@end example + +@noindent +or write the parameter declaration explicitly as a pointer: + +@example +void +clobber4 (int *array) +@{ + array[4] = 0; +@} +@end example + +They are all equivalent. + +@node Passing Array Args +@subsubsection Passing array arguments + + The function call passes this pointer by +value, like all argument values in C@. However, the result is +paradoxical in that the array itself is passed by reference: its +contents are treated as shared memory---shared between the caller and +the called function, that is. When @code{clobber4} assigns to element +4 of @code{array}, the effect is to alter element 4 of the array +specified in the call. + +@example +#include /* @r{Defines @code{NULL}.} */ +#include /* @r{Declares @code{malloc},} */ + /* @r{Defines @code{EXIT_SUCCESS}.} */ + +int +main (void) +@{ + int data[] = @{1, 2, 3, 4, 5, 6@}; + int i; + + /* @r{Show the initial value of element 4.} */ + for (i = 0; i < 6; i++) + printf ("data[%d] = %d\n", i, data[i]); + + printf ("\n"); + + clobber4 (data); + + /* @r{Show that element 4 has been changed.} */ + for (i = 0; i < 6; i++) + printf ("data[%d] = %d\n", i, data[i]); + + printf ("\n"); + + return EXIT_SUCCESS; +@} +@end example + +@noindent +shows that @code{data[4]} has become zero after the call to +@code{clobber4}. + +The array @code{data} has 6 elements, but passing it to a function +whose argument type is written as @code{int [20]} is not an error, +because that really stands for @code{int *}. The pointer that is the +real argument carries no indication of the length of the array it +points into. It is not required to point to the beginning of the +array, either. For instance, + +@example +clobber4 (data+1); +@end example + +@noindent +passes an ``array'' that starts at element 1 of @code{data}, and the +effect is to zero @code{data[5]} instead of @code{data[4]}. + +If all calls to the function will provide an array of a particular +size, you can specify the size of the array to be @code{static}: + +@example +void +clobber4 (int array[static 20]) +@r{@dots{}} +@end example + +@noindent +This is a promise to the compiler that the function will always be +called with an array of 20 elements, so that the compiler can optimize +code accordingly. If the code breaks this promise and calls the +function with, for example, a shorter array, unpredictable things may +happen. + +@node Array Parm Qualifiers +@subsubsection Type qualifiers on array parameters + +You can use the type qualifiers @code{const}, @code{restrict}, and +@code{volatile} with array parameters; for example: + +@example +void +clobber4 (volatile int array[20]) +@r{@dots{}} +@end example + +@noindent +denotes that @code{array} is equivalent to a pointer to a volatile +@code{int}. Alternatively: + +@example +void +clobber4 (int array[const 20]) +@r{@dots{}} +@end example + +@noindent +makes the array parameter equivalent to a constant pointer to an +@code{int}. If we want the @code{clobber4} function to succeed, it +would not make sense to write + +@example +void +clobber4 (const int array[20]) +@r{@dots{}} +@end example + +@noindent +as this would tell the compiler that the parameter should point to an +array of constant @code{int} values, and then we would not be able to +store zeros in them. + +In a function with multiple array parameters, you can use @code{restrict} +to tell the compiler that each array parameter passed in will be distinct: + +@example +void +foo (int array1[restrict 10], int array2[restrict 10]) +@r{@dots{}} +@end example + +@noindent +Using @code{restrict} promises the compiler that callers will +not pass in the same array for more than one @code{restrict} array +parameter. Knowing this enables the compiler to perform better code +optimization. This is the same effect as using @code{restrict} +pointers (@pxref{restrict Pointers}), but makes it clear when reading +the code that an array of a specific size is expected. + +@node Structs as Parameters +@subsection Functions That Accept Structure Arguments + +Structures in GNU C are first-class objects, so using them as function +parameters and arguments works in the natural way. This function +@code{swapfoo} takes a @code{struct foo} with two fields as argument, +and returns a structure of the same type but with the fields +exchanged. + +@example +struct foo @{ int a, b; @}; + +struct foo x; + +struct foo +swapfoo (struct foo inval) +@{ + struct foo outval; + outval.a = inval.b; + outval.b = inval.a; + return outval; +@} +@end example + +This simpler definition of @code{swapfoo} avoids using a local +variable to hold the result about to be return, by using a structure +constructor (@pxref{Structure Constructors}), like this: + +@example +struct foo +swapfoo (struct foo inval) +@{ + return (struct foo) @{ inval.b, inval.a @}; +@} +@end example + +It is valid to define a structure type in a function's parameter list, +as in + +@example +int +frob_bar (struct bar @{ int a, b; @} inval) +@{ + @var{body} +@} +@end example + +@noindent +and @var{body} can access the fields of @var{inval} since the +structure type @code{struct bar} is defined for the whole function +body. However, there is no way to create a @code{struct bar} argument +to pass to @code{frob_bar}, except with kludges. As a result, +defining a structure type in a parameter list is useless in practice. + +@node Function Declarations +@section Function Declarations +@cindex function declarations +@cindex declararing functions + +To call a function, or use its name as a pointer, a @dfn{function +declaration} for the function name must be in effect at that point in +the code. The function's definition serves as a declaration of that +function for the rest of the containing scope, but to use the function +in code before the definition, or from another compilation module, a +separate function declaration must precede the use. + +A function declaration looks like the start of a function definition. +It begins with the return value type (@code{void} if none) and the +function name, followed by argument declarations in parentheses +(though these can sometimes be omitted). But that's as far as the +similarity goes: instead of the function body, the declaration uses a +semicolon. + +@cindex function prototype +@cindex prototype of a function +A declaration that specifies argument types is called a @dfn{function +prototype}. You can include the argument names or omit them. The +names, if included in the declaration, have no effect, but they may +serve as documentation. + +This form of prototype specifies fixed argument types: + +@example +@var{rettype} @var{function} (@var{argtypes}@r{@dots{}}); +@end example + +@noindent +This form says the function takes no arguments: + +@example +@var{rettype} @var{function} (void); +@end example + +@noindent +This form declares types for some arguments, and allows additional +arguments whose types are not specified: + +@example +@var{rettype} @var{function} (@var{argtypes}@r{@dots{}}, ...); +@end example + +For a parameter that's an array of variable length, you can write +its declaration with @samp{*} where the ``length'' of the array would +normally go; for example, these are all equivalent. + +@example +double maximum (int n, int m, double a[n][m]); +double maximum (int n, int m, double a[*][*]); +double maximum (int n, int m, double a[ ][*]); +double maximum (int n, int m, double a[ ][m]); +@end example + +@noindent +The old-fashioned form of declaration, which is not a prototype, says +nothing about the types of arguments or how many they should be: + +@example +@var{rettype} @var{function} (); +@end example + +@strong{Warning:} Arguments passed to a function declared without a +prototype are converted with the default argument promotions +(@pxref{Argument Promotions}. Likewise for additional arguments whose +types are unspecified. + +Function declarations are usually written at the top level in a source file, +but you can also put them inside code blocks. Then the function name +is visible for the rest of the containing scope. For example: + +@example +void +foo (char *file_name) +@{ + void save_file (char *); + save_file (file_name); +@} +@end example + +If another part of the code tries to call the function +@code{save_file}, this declaration won't be in effect there. So the +function will get an implicit declaration of the form @code{extern int +save_file ();}. That conflicts with the explicit declaration +here, and the discrepancy generates a warning. + +The syntax of C traditionally allows omitting the data type in a +function declaration if it specifies a storage class or a qualifier. +Then the type defaults to @code{int}. For example: + +@example +static foo (double x); +@end example + +@noindent +defaults the return type to @code{int}. +This is bad practice; if you see it, fix it. + +Calling a function that is undeclared has the effect of an creating +@dfn{implicit} declaration in the innermost containing scope, +equivalent to this: + +@example +extern int @dfn{function} (); +@end example + +@noindent +This declaration says that the function returns @code{int} but leaves +its argument types unspecified. If that does not accurately fit the +function, then the program @strong{needs} an explicit declaration of +the function with argument types in order to call it correctly. + +Implicit declarations are deprecated, and a function call that creates one +causes a warning. + +@node Function Calls +@section Function Calls +@cindex function calls +@cindex calling functions + +Starting a program automatically calls the function named @code{main} +(@pxref{The main Function}). Aside from that, a function does nothing +except when it is @dfn{called}. That occurs during the execution of a +function-call expression specifying that function. + +A function-call expression looks like this: + +@example +@var{function} (@var{arguments}@r{@dots{}}) +@end example + +Most of the time, @var{function} is a function name. However, it can +also be an expression with a function pointer value; that way, the +program can determine at run time which function to call. + +The @var{arguments} are a series of expressions separated by commas. +Each expression specifies one argument to pass to the function. + +The list of arguments in a function call looks just like use of the +comma operator (@pxref{Comma Operator}), but the fact that it fills +the parentheses of a function call gives it a different meaning. + +Here's an example of a function call, taken from an example near the +beginning (@pxref{Complete Program}). + +@example +printf ("Fibonacci series item %d is %d\n", + 19, fib (19)); +@end example + +The three arguments given to @code{printf} are a constant string, the +integer 19, and the integer returned by @code{fib (19)}. + +@node Function Call Semantics +@section Function Call Semantics +@cindex function call semantics +@cindex semantics of function calls +@cindex call-by-value + +The meaning of a function call is to compute the specified argument +expressions, convert their values according to the function's +declaration, then run the function giving it copies of the converted +values. (This method of argument passing is known as +@dfn{call-by-value}.) When the function finishes, the value it +returns becomes the value of the function-call expression. + +Call-by-value implies that an assignment to the function argument +variable has no direct effect on the caller. For instance, + +@example +#include /* @r{Defines @code{EXIT_SUCCESS}.} */ +#include /* @r{Declares @code{printf}.} */ + +void +subroutine (int x) +@{ + x = 5; +@} + +void +main (void) +@{ + int y = 20; + subroutine (y); + printf ("y is %d\n", y); + return EXIT_SUCCESS; +@} +@end example + +@noindent +prints @samp{y is 20}. Calling @code{subroutine} initializes @code{x} +from the value of @code{y}, but this does not establish any other +relationship between the two variables. Thus, the assignment to +@code{x}, inside @code{subroutine}, changes only @emph{that} @code{x}. + +If an argument's type is specified by the function's declaration, the +function call converts the argument expression to that type if +possible. If the conversion is impossible, that is an error. + +If the function's declaration doesn't specify the type of that +argument, then the @emph{default argument promotions} apply. +@xref{Argument Promotions}. + +@node Function Pointers +@section Function Pointers +@cindex function pointers +@cindex pointers to functions + +A function name refers to a fixed function. Sometimes it is useful to +call a function to be determined at run time; to do this, you can use +a @dfn{function pointer value} that points to the chosen function +(@pxref{Pointers}). + +Pointer-to-function types can be used to declare variables and other +data, including array elements, structure fields, and union +alternatives. They can also be used for function arguments and return +values. These types have the peculiarity that they are never +converted automatically to @code{void *} or vice versa. However, you +can do that conversion with a cast. + +@menu +* Declaring Function Pointers:: How to declare a pointer to a function. +* Assigning Function Pointers:: How to assign values to function pointers. +* Calling Function Pointers:: How to call functions through pointers. +@end menu + +@node Declaring Function Pointers +@subsection Declaring Function Pointers +@cindex declaring function pointers +@cindex function pointers, declaring + +The declaration of a function pointer variable (or structure field) +looks almost like a function declaration, except it has an additional +@samp{*} just before the variable name. Proper nesting requires a +pair of parentheses around the two of them. For instance, @code{int +(*a) ();} says, ``Declare @code{a} as a pointer such that @code{*a} is +an @code{int}-returning function.'' + +Contrast these three declarations: + +@example +/* @r{Declare a function returning @code{char *}.} */ +char *a (char *); +/* @r{Declare a pointer to a function returning @code{char}.} */ +char (*a) (char *); +/* @r{Declare a pointer to a function returning @code{char *}.} */ +char *(*a) (char *); +@end example + +The possible argument types of the function pointed to are the same +as in a function declaration. You can write a prototype +that specifies all the argument types: + +@example +@var{rettype} (*@var{function}) (@var{arguments}@r{@dots{}}); +@end example + +@noindent +or one that specifies some and leaves the rest unspecified: + +@example +@var{rettype} (*@var{function}) (@var{arguments}@r{@dots{}}, ...); +@end example + +@noindent +or one that says there are no arguments: + +@example +@var{rettype} (*@var{function}) (void); +@end example + +You can also write a non-prototype declaration that says +nothing about the argument types: + +@example +@var{rettype} (*@var{function}) (); +@end example + +For example, here's a declaration for a variable that should +point to some arithmetic function that operates on two @code{double}s: + +@example +double (*binary_op) (double, double); +@end example + +Structure fields, union alternatives, and array elements can be +function pointers; so can parameter variables. The function pointer +declaration construct can also be combined with other operators +allowed in declarations. For instance, + +@example +int **(*foo)(); +@end example + +@noindent +declares @code{foo} as a pointer to a function that returns +type @code{int **}, and + +@example +int **(*foo[30])(); +@end example + +@noindent +declares @code{foo} as an array of 30 pointers to functions that +return type @code{int **}. + +@example +int **(**foo)(); +@end example + +@noindent +declares @code{foo} as a pointer to a pointer to a function that +returns type @code{int **}. + +@node Assigning Function Pointers +@subsection Assigning Function Pointers +@cindex assigning function pointers +@cindex function pointers, assigning + +Assuming we have declared the variable @code{binary_op} as in the +previous section, giving it a value requires a suitable function to +use. So let's define a function suitable for the variable to point +to. Here's one: + +@example +double +double_add (double a, double b) +@{ + return a+b; +@} +@end example + +Now we can give it a value: + +@example +binary_op = double_add; +@end example + +The target type of the function pointer must be upward compatible with +the type of the function (@pxref{Compatible Types}). + +There is no need for @samp{&} in front of @code{double_add}. +Using a function name such as @code{double_add} as an expression +automatically converts it to the function's address, with the +appropriate function pointer type. However, it is ok to use +@samp{&} if you feel that is clearer: + +@example +binary_op = &double_add; +@end example + +@node Calling Function Pointers +@subsection Calling Function Pointers +@cindex calling function pointers +@cindex function pointers, calling + +To call the function specified by a function pointer, just write the +function pointer value in a function call. For instance, here's a +call to the function @code{binary_op} points to: + +@example +binary_op (x, 5) +@end example + +Since the data type of @code{binary_op} explicitly specifies type +@code{double} for the arguments, the call converts @code{x} and 5 to +@code{double}. + +The call conceptually dereferences the pointer @code{binary_op} to +``get'' the function it points to, and calls that function. If you +wish, you can explicitly represent the derefence by writing the +@code{*} operator: + +@example +(*binary_op) (x, 5) +@end example + +The @samp{*} reminds people reading the code that @code{binary_op} is +a function pointer rather than the name of a specific function. + +@node The main Function +@section The @code{main} Function +@cindex @code{main} function +@findex main + +Every complete executable program requires at least one function, +called @code{main}, which is where execution begins. You do not have +to explicitly declare @code{main}, though GNU C permits you to do so. +Conventionally, @code{main} should be defined to follow one of these +calling conventions: + +@example +int main (void) @{@r{@dots{}}@} +int main (int argc, char *argv[]) @{@r{@dots{}}@} +int main (int argc, char *argv[], char *envp[]) @{@r{@dots{}}@} +@end example + +@noindent +Using @code{void} as the parameter list means that @code{main} does +not use the arguments. You can write @code{char **argv} instead of +@code{char *argv[]}, and likewise for @code{envp}, as the two +constructs are equivalent. + +@ignore @c Not so at present +Defining @code{main} in any other way generates a warning. Your +program will still compile, but you may get unexpected results when +executing it. +@end ignore + +You can call @code{main} from C code, as you can call any other +function, though that is an unusual thing to do. When you do that, +you must write the call to pass arguments that match the parameters in +the definition of @code{main}. + +The @code{main} function is not actually the first code that runs when +a program starts. In fact, the first code that runs is system code +from the file @file{crt0.o}. In Unix, this was hand-written assembler +code, but in GNU we replaced it with C code. Its job is to find +the arguments for @code{main} and call that. + +@menu +* Values from main:: Returning values from the main function. +* Command-line Parameters:: Accessing command-line parameters + provided to the program. +* Environment Variables:: Accessing system environment variables. +@end menu + +@node Values from main +@subsection Returning Values from @code{main} +@cindex returning values from @code{main} +@cindex success +@cindex failure +@cindex exit status + +When @code{main} returns, the process terminates. Whatever value +@code{main} returns becomes the exit status which is reported to the +parent process. While nominally the return value is of type +@code{int}, in fact the exit status gets truncated to eight bits; if +@code{main} returns the value 256, the exit status is 0. + +Normally, programs return only one of two values: 0 for success, +and 1 for failure. For maximum portability, use the macro +values @code{EXIT_SUCCESS} and @code{EXIT_FAILURE} defined in +@code{stdlib.h}. Here's an example: + +@cindex @code{EXIT_FAILURE} +@cindex @code{EXIT_SUCCESS} +@example +#include /* @r{Defines @code{EXIT_SUCCESS}} */ + /* @r{and @code{EXIT_FAILURE}.} */ + +int +main (void) +@{ + @r{@dots{}} + if (foo) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +@} +@end example + +Some types of programs maintain special conventions for various return +values; for example, comparison programs including @code{cmp} and +@code{diff} return 1 to indicate a mismatch, and 2 to indicate that +the comparison couldn't be performed. + +@node Command-line Parameters +@subsection Accessing Command-line Parameters +@cindex command-line parameters +@cindex parameters, command-line + +If the program was invoked with any command-line arguments, it can +access them through the arguments of @code{main}, @code{argc} and +@code{argv}. (You can give these arguments any names, but the names +@code{argc} and @code{argv} are customary.) + +The value of @code{argv} is an array containing all of the +command-line arguments as strings, with the name of the command +invoked as the first string. @code{argc} is an integer that says how +many strings @code{argv} contains. Here is an example of accessing +the command-line parameters, retrieving the program's name and +checking for the standard @option{--version} and @option{--help} options: + +@example +#include /* @r{Declare @code{strcmp}.} */ + +int +main (int argc, char *argv[]) +@{ + char *program_name = argv[0]; + + for (int i = 1; i < argc; i++) + @{ + if (!strcmp (argv[i], "--version")) + @{ + /* @r{Print version information and exit.} */ + @r{@dots{}} + @} + else if (!strcmp (argv[i], "--help")) + @{ + /* @r{Print help information and exit.} */ + @r{@dots{}} + @} + @} + @r{@dots{}} +@} +@end example + +@node Environment Variables +@subsection Accessing Environment Variables +@cindex environment variables + +You can optionally include a third parameter to @code{main}, another +array of strings, to capture the environment variables available to +the program. Unlike what happens with @code{argv}, there is no +additional parameter for the count of environment variables; rather, +the array of environment variables concludes with a null pointer. + +@example +#include /* @r{Declares @code{printf}.} */ + +int +main (int argc, char *argv[], char *envp[]) +@{ + /* @r{Print out all environment variables.} */ + int i = 0; + while (envp[i]) + @{ + printf ("%s\n", envp[i]); + i++; + @} +@} +@end example + +Another method of retrieving environment variables is to use the +library function @code{getenv}, which is defined in @code{stdlib.h}. +Using @code{getenv} does not require defining @code{main} to accept the +@code{envp} pointer. For example, here is a program that fetches and prints +the user's home directory (if defined): + +@example +#include /* @r{Declares @code{getenv}.} */ +#include /* @r{Declares @code{printf}.} */ + +int +main (void) +@{ + char *home_directory = getenv ("HOME"); + if (home_directory) + printf ("My home directory is: %s\n", home_directory); + else + printf ("My home directory is not defined!\n"); +@} +@end example + +@node Advanced Definitions +@section Advanced Function Features + +This section describes some advanced or obscure features for GNU C +function definitions. If you are just learning C, you can skip the +rest of this chapter. + +@menu +* Variable-Length Array Parameters:: Functions that accept arrays + of variable length. +* Variable Number of Arguments:: Variadic functions. +* Nested Functions:: Defining functions within functions. +* Inline Function Definitions:: A function call optimization technique. +@end menu + +@node Variable-Length Array Parameters +@subsection Variable-Length Array Parameters +@cindex variable-length array parameters +@cindex array parameters, variable-length +@cindex functions that accept variable-length arrays + +An array parameter can have variable length: simply declare the array +type with a size that isn't constant. In a nested function, the +length can refer to a variable defined in a containing scope. In any +function, it can refer to a previous parameter, like this: + +@example +struct entry +tester (int len, char data[len][len]) +@{ + @r{@dots{}} +@} +@end example + +Alternatively, in function declarations (but not in function +definitions), you can use @code{[*]} to denote that the array +parameter is of a variable length, such that these two declarations +mean the same thing: + +@example +struct entry +tester (int len, char data[len][len]); +@end example + +@example +struct entry +tester (int len, char data[*][*]); +@end example + +@noindent +The two forms of input are equivalent in GNU C, but emphasizing that +the array parameter is variable-length may be helpful to those +studying the code. + +You can also omit the length parameter, and instead use some other +in-scope variable for the length in the function definition: + +@example +struct entry +tester (char data[*][*]); +@r{@dots{}} +int dataLength = 20; +@r{@dots{}} +struct entry +tester (char data[dataLength][dataLength]) +@{ + @r{@dots{}} +@} +@end example + +@c ??? check text above + +@cindex parameter forward declaration +In GNU C, to pass the array first and the length afterward, you can +use a @dfn{parameter forward declaration}, like this: + +@example +struct entry +tester (int len; char data[len][len], int len) +@{ + @r{@dots{}} +@} +@end example + +The @samp{int len} before the semicolon is the parameter forward +declaration; it serves the purpose of making the name @code{len} known +when the declaration of @code{data} is parsed. + +You can write any number of such parameter forward declarations in the +parameter list. They can be separated by commas or semicolons, but +the last one must end with a semicolon, which is followed by the +``real'' parameter declarations. Each forward declaration must match +a subsequent ``real'' declaration in parameter name and data type. + +Standard C does not support parameter forward declarations. + +@node Variable Number of Arguments +@subsection Variable-Length Parameter Lists +@cindex variable-length parameter lists +@cindex parameters lists, variable length +@cindex function parameter lists, variable length + +@cindex variadic function +A function that takes a variable number of arguments is called a +@dfn{variadic function}. In C, a variadic function must specify at +least one fixed argument with an explicitly declared data type. +Additional arguments can follow, and can vary in both quantity and +data type. + +In the function header, declare the fixed parameters in the normal +way, then write a comma and an ellipsis: @samp{, ...}. Here is an +example of a variadic function header: + +@example +int add_multiple_values (int number, ...) +@end example + +@cindex @code{va_list} +@cindex @code{va_start} +@cindex @code{va_end} +The function body can refer to fixed arguments by their parameter +names, but the additional arguments have no names. Accessing them in +the function body uses certain standard macros. They are defined in +the library header file @file{stdarg.h}, so the code must +@code{#include} that file. + +In the body, write + +@example +va_list ap; +va_start (ap, @var{last_fixed_parameter}); +@end example + +@noindent +This declares the variable @code{ap} (you can use any name for it) +and then sets it up to point before the first additional argument. + +Then, to fetch the next consecutive additional argument, write this: + +@example +va_arg (ap, @var{type}) +@end example + +After fetching all the additional arguments (or as many as need to be +used), write this: + +@example +va_end (ap); +@end example + +Here's an example of a variadic function definition that adds any +number of @code{int} arguments. The first (fixed) argument says how +many more arguments follow. + +@example +#include /* @r{Defines @code{va}@r{@dots{}} macros.} */ +@r{@dots{}} + +int +add_multiple_values (int argcount, ...) +@{ + int counter, total = 0; + + /* @r{Declare a variable of type @code{va_list}.} */ + va_list argptr; + + /* @r{Initialize that variable..} */ + va_start (argptr, argcount); + + for (counter = 0; counter < argcount; counter++) + @{ + /* @r{Get the next additional argument.} */ + total += va_arg (argptr, int); + @} + + /* @r{End use of the @code{argptr} variable.} */ + va_end (argptr); + + return total; +@} +@end example + +With GNU C, @code{va_end} is superfluous, but some other compilers +might make @code{va_start} allocate memory so that calling +@code{va_end} is necessary to avoid a memory leak. Before doing +@code{va_start} again with the same variable, do @code{va_end} +first. + +@cindex @code{va_copy} +Because of this possible memory allocation, it is risky (in principle) +to copy one @code{va_list} variable to another with assignment. +Instead, use @code{va_copy}, which copies the substance but allocates +separate memory in the variable you copy to. The call looks like +@code{va_copy (@var{to}, @var{from})}, where both @var{to} and +@var{from} should be variables of type @code{va_list}. In principle, +do @code{va_end} on each of these variables before its scope ends. + +Since the additional arguments' types are not specified in the +function's definition, the default argument promotions +(@pxref{Argument Promotions}) apply to them in function calls. The +function definition must take account of this; thus, if an argument +was passed as @code{short}, the function should get it as @code{int}. +If an argument was passed as @code{float}, the function should get it +as @code{double}. + +C has no mechanism to tell the variadic function how many arguments +were passed to it, so its calling convention must give it a way to +determine this. That's why @code{add_multiple_values} takes a fixed +argument that says how many more arguments follow. Thus, you can +call the function like this: + +@example +sum = add_multiple_values (3, 12, 34, 190); +/* @r{Value is 12+34+190.} */ +@end example + +In GNU C, there is no actual need to use the @code{va_end} function. +In fact, it does nothing. It's used for compatibility with other +compilers, when that matters. + +It is a mistake to access variables declared as @code{va_list} except +in the specific ways described here. Just what that type consists of +is an implementation detail, which could vary from one platform to +another. + +@node Nested Functions +@subsection Nested Functions +@cindex nested functions +@cindex functions, nested +@cindex downward funargs +@cindex thunks + +A @dfn{nested function} is a function defined inside another function. +The nested function's name is local to the block where it is defined. +For example, here we define a nested function named @code{square}, and +call it twice: + +@example +@group +foo (double a, double b) +@{ + double square (double z) @{ return z * z; @} + + return square (a) + square (b); +@} +@end group +@end example + +The nested function can access all the variables of the containing +function that are visible at the point of its definition. This is +called @dfn{lexical scoping}. For example, here we show a nested +function that uses an inherited variable named @code{offset}: + +@example +@group +bar (int *array, int offset, int size) +@{ + int access (int *array, int index) + @{ return array[index + offset]; @} + int i; + @r{@dots{}} + for (i = 0; i < size; i++) + @r{@dots{}} access (array, i) @r{@dots{}} +@} +@end group +@end example + +Nested function definitions can appear wherever automatic variable +declarations are allowed; that is, in any block, interspersed with the +other declarations and statements in the block. + +The nested function's name is visible only within the parent block; +the name's scope starts from its definition and continues to the end +of the containing block. If the nested function's name +is the same as the parent function's name, there wil be +no way to refer to the parent function inside the scope of the +name of the nested function. + +Using @code{extern} or @code{static} on a nested function definition +is an error. + +It is possible to call the nested function from outside the scope of its +name by storing its address or passing the address to another function. +You can do this safely, but you must be careful: + +@example +@group +hack (int *array, int size, int addition) +@{ + void store (int index, int value) + @{ array[index] = value + addition; @} + + intermediate (store, size); +@} +@end group +@end example + +Here, the function @code{intermediate} receives the address of +@code{store} as an argument. If @code{intermediate} calls @code{store}, +the arguments given to @code{store} are used to store into @code{array}. +@code{store} also accesses @code{hack}'s local variable @code{addition}. + +It is safe for @code{intermediate} to call @code{store} because +@code{hack}'s stack frame, with its arguments and local variables, +continues to exist during the call to @code{intermediate}. + +Calling the nested function through its address after the containing +function has exited is asking for trouble. If it is called after a +containing scope level has exited, and if it refers to some of the +variables that are no longer in scope, it will refer to memory +containing junk or other data. It's not wise to take the risk. + +The GNU C Compiler implements taking the address of a nested function +using a technique called @dfn{trampolines}. This technique was +described in @cite{Lexical Closures for C@t{++}} (Thomas M. Breuel, +USENIX C@t{++} Conference Proceedings, October 17--21, 1988). + +A nested function can jump to a label inherited from a containing +function, provided the label was explicitly declared in the containing +function (@pxref{Local Labels}). Such a jump returns instantly to the +containing function, exiting the nested function that did the +@code{goto} and any intermediate function invocations as well. Here +is an example: + +@example +@group +bar (int *array, int offset, int size) +@{ + /* @r{Explicitly declare the label @code{failure}.} */ + __label__ failure; + int access (int *array, int index) + @{ + if (index > size) + /* @r{Exit this function,} + @r{and return to @code{bar}.} */ + goto failure; + return array[index + offset]; + @} +@end group + +@group + int i; + @r{@dots{}} + for (i = 0; i < size; i++) + @r{@dots{}} access (array, i) @r{@dots{}} + @r{@dots{}} + return 0; + + /* @r{Control comes here from @code{access} + if it does the @code{goto}.} */ + failure: + return -1; +@} +@end group +@end example + +To declare the nested function before its definition, use +@code{auto} (which is otherwise meaningless for function declarations; +@pxref{auto and register}). For example, + +@example +bar (int *array, int offset, int size) +@{ + auto int access (int *, int); + @r{@dots{}} + @r{@dots{}} access (array, i) @r{@dots{}} + @r{@dots{}} + int access (int *array, int index) + @{ + @r{@dots{}} + @} + @r{@dots{}} +@} +@end example + +@node Inline Function Definitions +@subsection Inline Function Definitions +@cindex inline function definitions +@cindex function definitions, inline +@findex inline + +To declare a function inline, use the @code{inline} keyword in its +definition. Here's a simple function that takes a pointer-to-@code{int} +and increments the integer stored there---declared inline. + +@example +struct list +@{ + struct list *first, *second; +@}; + +inline struct list * +list_first (struct list *p) +@{ + return p->first; +@} + +inline struct list * +list_second (struct list *p) +@{ + return p->second; +@} +@end example + +optimized compilation can substitute the inline function's body for +any call to it. This is called @emph{inlining} the function. It +makes the code that contains the call run faster, significantly so if +the inline function is small. + +Here's a function that uses @code{pair_second}: + +@example +int +pairlist_length (struct list *l) +@{ + int length = 0; + while (l) + @{ + length++; + l = pair_second (l); + @} + return length; +@} +@end example + +Substituting the code of @code{pair_second} into the definition of +@code{pairlist_length} results in this code, in effect: + +@example +int +pairlist_length (struct list *l) +@{ + int length = 0; + while (l) + @{ + length++; + l = l->second; + @} + return length; +@} +@end example + +Since the definition of @code{pair_second} does not say @code{extern} +or @code{static}, that definition is used only for inlining. It +doesn't generate code that can be called at run time. If not all the +calls to the function are inlined, there must be a definition of the +same function name in another module for them to call. + +@cindex inline functions, omission of +@c @opindex fkeep-inline-functions +Adding @code{static} to an inline function definition means the +function definition is limited to this compilation module. Also, it +generates run-time code if necessary for the sake of any calls that +were not inlined. If all calls are inlined then the function +definition does not generate run-time code, but you can force +generation of run-time code with the option +@option{-fkeep-inline-functions}. + +@cindex extern inline function +Specifying @code{extern} along with @code{inline} means the function is +external and generates run-time code to be called from other +separately compiled modules, as well as inlined. You can define the +function as @code{inline} without @code{extern} in other modules so as +to inline calls to the same function in those modules. + +Why are some calls not inlined? First of all, inlining is an +optimization, so non-optimized compilation does not inline. + +Some calls cannot be inlined for technical reasons. Also, certain +usages in a function definition can make it unsuitable for inline +substitution. Among these usages are: variadic functions, use of +@code{alloca}, use of computed goto (@pxref{Labels as Values}), and +use of nonlocal goto. The option @option{-Winline} requests a warning +when a function marked @code{inline} is unsuitable to be inlined. The +warning explains what obstacle makes it unsuitable. + +Just because a call @emph{can} be inlined does not mean it +@emph{should} be inlined. The GNU C compiler weighs costs and +benefits to decide whether inlining a particular call is advantageous. + +You can force inlining of all calls to a given function that can be +inlined, even in a non-optimized compilation. by specifying the +@samp{always_inline} attribute for the function, like this: + +@example +/* @r{Prototype.} */ +inline void foo (const char) __attribute__((always_inline)); +@end example + +@noindent +This is a GNU C extension. @xref{Attributes}. + +A function call may be inlined even if not declared @code{inline} in +special cases where the compiler can determine this is correct and +desirable. For instance, when a static function is called only once, +it will very likely be inlined. With @option{-flto}, link-time +optimization, any function might be inlined. To absolutely prevent +inlining of a specific function, specify +@code{__attribute__((__noinline__))} in the function's definition. + +@node Obsolete Definitions +@section Obsolete Function Features + +These features of function definitions are still used in old +programs, but you shouldn't write code this way today. +If you are just learning C, you can skip this section. + +@menu +* Old GNU Inlining:: An older inlining technique. +* Old-Style Function Definitions:: Original K&R style functions. +@end menu + +@node Old GNU Inlining +@subsection Older GNU C Inlining + +The GNU C spec for inline functions, before GCC version 5, defined +@code{extern inline} on a function definition to mean to inline calls +to it but @emph{not} generate code for the function that could be +called at run time. By contrast, @code{inline} without @code{extern} +specified to generate run-time code for the function. In effect, ISO +incompatibly flipped the meanings of these two cases. We changed GCC +in version 5 to adopt the ISO specification. + +Many programs still use these cases with the previous GNU C meanings. +You can specify use of those meanings with the option +@option{-fgnu89-inline}. You can also specify this for a single +function with @code{__attribute__ ((gnu_inline))}. Here's an example: + +@example +inline __attribute__ ((gnu_inline)) +int +inc (int *a) +@{ + (*a)++; +@} +@end example + +@node Old-Style Function Definitions +@subsection Old-Style Function Definitions +@cindex old-style function definitions +@cindex function definitions, old-style +@cindex K&R-style function definitions + +The syntax of C traditionally allows omitting the data type in a +function declaration if it specifies a storage class or a qualifier. +Then the type defaults to @code{int}. For example: + +@example +static foo (double x); +@end example + +@noindent +defaults the return type to @code{int}. This is bad practice; if you +see it, fix it. + +An @dfn{old-style} (or ``K&R'') function definition is the way +function definitions were written in the 1980s. It looks like this: + +@example +@var{rettype} +@var{function} (@var{parmnames}) + @var{parm_declarations} +@{ + @var{body} +@} +@end example + +In @var{parmnames}, only the parameter names are listed, separated by +commas. Then @var{parm_declarations} declares their data types; these +declarations look just like variable declarations. If a parameter is +listed in @var{parmnames} but has no declaration, it is implicitly +declared @code{int}. + +There is no reason to write a definition this way nowadays, but they +can still be seen in older GNU programs. + +An old-style variadic function definition looks like this: + +@example +#include + +int +add_multiple_values (va_alist) + va_dcl +@{ + int argcount; + int counter, total = 0; + + /* @r{Declare a variable of type @code{va_list}.} */ + va_list argptr; + + /* @r{Initialize that variable.} */ + va_start (argptr); + + /* @r{Get the first argument (fixed).} */ + argcount = va_arg (int); + + for (counter = 0; counter < argcount; counter++) + @{ + /* @r{Get the next additional argument.} */ + total += va_arg (argptr, int); + @} + + /* @r{End use of the @code{argptr} variable.} */ + va_end (argptr); + + return total; +@} +@end example + +Note that the old-style variadic function definition has no fixed +parameter variables; all arguments must be obtained with +@code{va_arg}. + +@node Compatible Types +@chapter Compatible Types +@cindex compatible types +@cindex types, compatible + +Declaring a function or variable twice is valid in C only if the two +declarations specify @dfn{compatible} types. In addition, some +operations on pointers require operands to have compatible target +types. + +In C, two different primitive types are never compatible. Likewise for +the defined types @code{struct}, @code{union} and @code{enum}: two +separately defined types are incompatible unless they are defined +exactly the same way. + +However, there are a few cases where different types can be +compatible: + +@itemize @bullet +@item +Every enumeration type is compatible with some integer type. In GNU +C, the choice of integer type depends on the largest enumeration +value. + +@c ??? Which one, in GCC? +@c ??? ... it varies, depending on the enum values. Testing on +@c ??? fencepost, it appears to use a 4-byte signed integer first, +@c ??? then moves on to an 8-byte signed integer. These details +@c ??? might be platform-dependent, as the C standard says that even +@c ??? char could be used as an enum type, but it's at least true +@c ??? that GCC chooses a type that is at least large enough to +@c ??? hold the largest enum value. + +@item +Array types are compatible if the element types are compatible +and the sizes (when specified) match. + +@item +Pointer types are compatible if the pointer target types are +compatible. + +@item +Function types that specify argument types are compatible if the +return types are compatible and the argument types are compatible, +argument by argument. In addition, they must all agree in whether +they use @code{...} to allow additional arguments. + +@item +Function types that don't specify argument types are compatible if the +return types are. + +@item +Function types that specify the argument types are compatible with +function types that omit them, if the return types are compatible and +the specified argument types are unaltered by the argument promotions +(@pxref{Argument Promotions}). +@end itemize + +In order for types to be compatible, they must agree in their type +qualifiers. Thus, @code{const int} and @code{int} are incompatible. +It follows that @code{const int *} and @code{int *} are incompatible +too (they are pointers to types that are not compatible). + +If two types are compatible ignoring the qualifiers, we call them +@dfn{nearly compatible}. (If they are array types, we ignore +qualifiers on the element types.@footnote{This is a GNU C extension.}) +Comparison of pointers is valid if the pointers' target types are +nearly compatible. Likewise, the two branches of a conditional +expression may be pointers to nearly compatible target types. + +If two types are compatible ignoring the qualifiers, and the first +type has all the qualifiers of the second type, we say the first is +@dfn{upward compatible} with the second. Assignment of pointers +requires the assigned pointer's target type to be upward compatible +with the right operand (the new value)'s target type. + +@node Type Conversions +@chapter Type Conversions +@cindex type conversions +@cindex conversions, type + +C converts between data types automatically when that seems clearly +necessary. In addition, you can convert explicitly with a @dfn{cast}. + +@menu +* Explicit Type Conversion:: Casting a value from one type to another. +* Assignment Type Conversions:: Automatic conversion by assignment operation. +* Argument Promotions:: Automatic conversion of function parameters. +* Operand Promotions:: Automatic conversion of arithmetic operands. +* Common Type:: When operand types differ, which one is used? +@end menu + +@node Explicit Type Conversion +@section Explicit Type Conversion +@cindex cast +@cindex explicit type conversion + +You can do explicit conversions using the unary @dfn{cast} operator, +which is written as a type designator (@pxref{Type Designators}) in +parentheses. For example, @code{(int)} is the operator to cast to +type @code{int}. Here's an example of using it: + +@example +@{ + double d = 5.5; + + printf ("Floating point value: %f\n", d); + printf ("Rounded to integer: %d\n", (int) d); +@} +@end example + +Using @code{(int) d} passes an @code{int} value as argument to +@code{printf}, so you can print it with @samp{%d}. Using just +@code{d} without the cast would pass the value as @code{double}. +That won't work at all with @samp{%d}; the results would be gibberish. + +To divide one integer by another without rounding, +cast either of the integers to @code{double} first: + +@example +(double) @var{dividend} / @var{divisor} +@var{dividend} / (double) @var{divisor} +@end example + +It is enough to cast one of them, because that forces the common type +to @code{double} so the other will be converted automatically. + +The valid cast conversions are: + +@itemize @bullet +@item +One numerical type to another. + +@item +One pointer type to another. +(Converting between pointers that point to functions +and pointers that point to data is not standard C.) + +@item +A pointer type to an integer type. + +@item +An integer type to a pointer type. + +@item +To a union type, from the type of any alternative in the union +(@pxref{Unions}). (This is a GNU extension.) + +@item +Anything, to @code{void}. +@end itemize + +@node Assignment Type Conversions +@section Assignment Type Conversions +@cindex assignment type conversions + +Certain type conversions occur automatically in assignments +and certain other contexts. These are the conversions +assignments can do: + +@itemize @bullet +@item +Converting any numeric type to any other numeric type. + +@item +Converting @code{void *} to any other pointer type +(except pointer-to-function types). + +@item +Converting any other pointer type to @code{void *}. +(except pointer-to-function types). + +@item +Converting 0 (a null pointer constant) to any pointer type. + +@item +Converting any pointer type to @code{bool}. (The result is +1 if the pointer is not null.) + +@item +Converting between pointer types when the left-hand target type is +upward compatible with the right-hand target type. @xref{Compatible +Types}. +@end itemize + +These type conversions occur automatically in certain contexts, +which are: + +@itemize @bullet +@item +An assignment converts the type of the right-hand expression +to the type wanted by the left-hand expression. For example, + +@example +double i; +i = 5; +@end example + +@noindent +converts 5 to @code{double}. + +@item +A function call, when the function specifies the type for that +argument, converts the argument value to that type. For example, + +@example +void foo (double); +foo (5); +@end example + +@noindent +converts 5 to @code{double}. + +@item +A @code{return} statement converts the specified value to the type +that the function is declared to return. For example, + +@example +double +foo () +@{ + return 5; +@} +@end example + +@noindent +also converts 5 to @code{double}. +@end itemize + +In all three contexts, if the conversion is impossible, that +constitutes an error. + +@node Argument Promotions +@section Argument Promotions +@cindex argument promotions +@cindex promotion of arguments + +When a function's definition or declaration does not specify the type +of an argument, that argument is passed without conversion in whatever +type it has, with these exceptions: + +@itemize @bullet +@item +Some narrow numeric values are @dfn{promoted} to a wider type. If the +expression is a narrow integer, such as @code{char} or @code{short}, +the call converts it automatically to @code{int} (@pxref{Integer +Types}).@footnote{On an embedded controller where @code{char} +or @code{short} is the same width as @code{int}, @code{unsigned char} +or @code{unsigned short} promotes to @code{unsigned int}, but that +never occurs in GNU C on real computers.} + +In this example, the expression @code{c} is passed as an @code{int}: + +@example +char c = '$'; + +printf ("Character c is '%c'\n", c); +@end example + +@item +If the expression +has type @code{float}, the call converts it automatically to +@code{double}. + +@item +An array as argument is converted to a pointer to its zeroth element. + +@item +A function name as argument is converted to a pointer to that function. +@end itemize + +@node Operand Promotions +@section Operand Promotions +@cindex operand promotions + +The operands in arithmetic operations undergo type conversion automatically. +These @dfn{operand promotions} are the same as the argument promotions +except without converting @code{float} to @code{double}. In other words, +the operand promotions convert + +@itemize @bullet +@item +@code{char} or @code{short} (whether signed or not) to @code{int}. + +@item +an array to a pointer to its zeroth element, and + +@item +a function name to a pointer to that function. +@end itemize + +@node Common Type +@section Common Type +@cindex common type + +Arithmetic binary operators (except the shift operators) convert their +operands to the @dfn{common type} before operating on them. +Conditional expressions also convert the two possible results to their +common type. Here are the rules for determining the common type. + +If one of the numbers has a floating-point type and the other is an +integer, the common type is that floating-point type. For instance, + +@example +5.6 * 2 @result{} 11.2 /* @r{a @code{double} value} */ +@end example + +If both are floating point, the type with the larger range is the +common type. + +If both are integers but of different widths, the common type +is the wider of the two. + +If they are integer types of the same width, the common type is +unsigned if either operand is unsigned, and it's @code{long} if either +operand is @code{long}. It's @code{long long} if either operand is +@code{long long}. + +These rules apply to addition, subtraction, multiplication, division, +remainder, comparisons, and bitwise operations. They also apply to +the two branches of a conditional expression, and to the arithmetic +done in a modifying assignment operation. + +@node Scope +@chapter Scope +@cindex scope +@cindex block scope +@cindex function scope +@cindex function prototype scope + +Each definition or declaration of an identifier is visible +in certain parts of the program, which is typically less than the whole +of the program. The parts where it is visible are called its @dfn{scope}. + +Normally, declarations made at the top-level in the source -- that is, +not within any blocks and function definitions -- are visible for the +entire contents of the source file after that point. This is called +@dfn{file scope} (@pxref{File-Scope Variables}). + +Declarations made within blocks of code, including within function +definitions, are visible only within those blocks. This is called +@dfn{block scope}. Here is an example: + +@example +@group +void +foo (void) +@{ + int x = 42; +@} +@end group +@end example + +@noindent +In this example, the variable @code{x} has block scope; it is visible +only within the @code{foo} function definition block. Thus, other +blocks could have their own variables, also named @code{x}, without +any conflict between those variables. + +A variable declared inside a subblock has a scope limited to +that subblock, + +@example +@group +void +foo (void) +@{ + @{ + int x = 42; + @} + // @r{@code{x} is out of scope here.} +@} +@end group +@end example + +If a variable declared within a block has the same name as a variable +declared outside of that block, the definition within the block +takes precedence during its scope: + +@example +@group +int x = 42; + +void +foo (void) +@{ + int x = 17; + printf ("%d\n", x); +@} +@end group +@end example + +@noindent +This prints 17, the value of the variable @code{x} declared in the +function body block, rather than the value of the variable @code{x} at +file scope. We say that the inner declaration of @code{x} +@dfn{shadows} the outer declaration, for the extent of the inner +declaration's scope. + +A declaration with block scope can be shadowed by another declaration +with the same name in a subblock. + +@example +@group +void +foo (void) +@{ + char *x = "foo"; + @{ + int x = 42; + @r{@dots{}} + exit (x / 6); + @} +@} +@end group +@end example + +A function parameter's scope is the entire function body, but it can +be shadowed. For example: + +@example +@group +int x = 42; + +void +foo (int x) +@{ + printf ("%d\n", x); +@} +@end group +@end example + +@noindent +This prints the value of @code{x} the function parameter, rather than +the value of the file-scope variable @code{x}. However, + +Labels (@pxref{goto Statement}) have @dfn{function} scope: each label +is visible for the whole of the containing function body, both before +and after the label declaration: + +@example +@group +void +foo (void) +@{ + @r{@dots{}} + goto bar; + @r{@dots{}} + @{ // @r{Subblock does not affect labels.} + bar: + @r{@dots{}} + @} + goto bar; +@} +@end group +@end example + +Except for labels, a declared identifier is not +visible to code before its declaration. For example: + +@example +@group +int x = 5; +int y = x + 10; +@end group +@end example + +@noindent +will work, but: + +@example +@group +int x = y + 10; +int y = 5; +@end group +@end example + +@noindent +cannot refer to the variable @code{y} before its declaration. + +@include cpp.texi + +@node Integers in Depth +@chapter Integers in Depth + +This chapter explains the machine-level details of integer types: how +they are represented as bits in memory, and the range of possible +values for each integer type. + +@menu +* Integer Representations:: How integer values appear in memory. +* Maximum and Minimum Values:: Value ranges of integer types. +@end menu + +@node Integer Representations +@section Integer Representations + +@cindex integer representations +@cindex representation of integers + +Modern computers store integer values as binary (base-2) numbers that +occupy a single unit of storage, typically either as an 8-bit +@code{char}, a 16-bit @code{short int}, a 32-bit @code{int}, or +possibly, a 64-bit @code{long long int}. Whether a @code{long int} is +a 32-bit or a 64-bit value is system dependent.@footnote{In theory, +any of these types could have some other size, bit it's not worth even +a minute to cater to that possibility. It never happens on +GNU/Linux.} + +@cindex @code{CHAR_BIT} +The macro @code{CHAR_BIT}, defined in @file{limits.h}, gives the number +of bits in type @code{char}. On any real operating system, the value +is 8. + +The fixed sizes of numeric types necessarily limits their @dfn{range +of values}, and the particular encoding of integers decides what that +range is. + +@cindex two's-complement representation +For unsigned integers, the entire space is used to represent a +nonnegative value. Signed integers are stored using +@dfn{two's-complement representation}: a signed integer with @var{n} +bits has a range from @math{-2@sup{(@var{n} - 1)}} to @minus{}1 to 0 +to 1 to @math{+2@sup{(@var{n} - 1)} - 1}, inclusive. The leftmost, or +high-order, bit is called the @dfn{sign bit}. + +@c ??? Needs correcting + +There is only one value that means zero, and the most negative number +lacks a positive counterpart. As a result, negating that number +causes overflow; in practice, its result is that number back again. +For example, a two's-complement signed 8-bit integer can represent all +decimal numbers from @minus{}128 to +127. We will revisit that +peculiarity shortly. + +Decades ago, there were computers that didn't use two's-complement +representation for integers (@pxref{Integers in Depth}), but they are +long gone and not worth any effort to support. + +@c ??? Is this duplicate? + +When an arithmetic operation produces a value that is too big to +represent, the operation is said to @dfn{overflow}. In C, integer +overflow does not interrupt the control flow or signal an error. +What it does depends on signedness. + +For unsigned arithmetic, the result of an operation that overflows is +the @var{n} low-order bits of the correct value. If the correct value +is representable in @var{n} bits, that is always the result; +thus we often say that ``integer arithmetic is exact,'' omitting the +crucial qualifying phrase ``as long as the exact result is +representable.'' + +In principle, a C program should be written so that overflow never +occurs for signed integers, but in GNU C you can specify various ways +of handling such overflow (@pxref{Integer Overflow}). + +Integer representations are best understood by looking at a table for +a tiny integer size; here are the possible values for an integer with +three bits: + +@multitable @columnfractions .25 .25 .25 .25 +@headitem Unsigned @tab Signed @tab Bits @tab 2s Complement +@item 0 @tab 0 @tab 000 @tab 000 (0) +@item 1 @tab 1 @tab 001 @tab 111 (-1) +@item 2 @tab 2 @tab 010 @tab 110 (-2) +@item 3 @tab 3 @tab 011 @tab 101 (-3) +@item 4 @tab -4 @tab 100 @tab 100 (-4) +@item 5 @tab -3 @tab 101 @tab 011 (3) +@item 6 @tab -2 @tab 110 @tab 010 (2) +@item 7 @tab -1 @tab 111 @tab 001 (1) +@end multitable + +The parenthesized decimal numbers in the last column represent the +signed meanings of the two's-complement of the line's value. Recall +that, in two's-complement encoding, the high-order bit is 0 when +the number is nonnegative. + +We can now understand the peculiar behavior of negation of the +most negative two's-complement integer: start with 0b100, +invert the bits to get 0b011, and add 1: we get +0b100, the value we started with. + +We can also see overflow behavior in two's-complement: + +@example +3 + 1 = 0b011 + 0b001 = 0b100 = (-4) +3 + 2 = 0b011 + 0b010 = 0b101 = (-3) +3 + 3 = 0b011 + 0b011 = 0b110 = (-2) +@end example + +@noindent +A sum of two nonnegative signed values that overflows has a 1 in the +sign bit, so the exact positive result is truncated to a negative +value. + +@c ===================================================================== + +@node Maximum and Minimum Values +@section Maximum and Minimum Values +@cindex maximum integer values +@cindex minimum integer values +@cindex integer ranges +@cindex ranges of integer types +@findex INT_MAX +@findex UINT_MAX +@findex SHRT_MAX +@findex LONG_MAX +@findex LLONG_MAX +@findex USHRT_MAX +@findex ULONG_MAX +@findex ULLONG_MAX +@findex CHAR_MAX +@findex SCHAR_MAX +@findex UCHAR_MAX + +For each primitive integer type, there is a standard macro defined in +@file{limits.h} that gives the largest value that type can hold. For +instance, for type @code{int}, the maximum value is @code{INT_MAX}. +On a 32-bit computer, that is equal to 2,147,483,647. The +maximum value for @code{unsigned int} is @code{UINT_MAX}, which on a +32-bit computer is equal to 4,294,967,295. Likewise, there are +@code{SHRT_MAX}, @code{LONG_MAX}, and @code{LLONG_MAX}, and +corresponding unsigned limits @code{USHRT_MAX}, @code{ULONG_MAX}, and +@code{ULLONG_MAX}. + +Since there are three ways to specify a @code{char} type, there are +also three limits: @code{CHAR_MAX}, @code{SCHAR_MAX}, and +@code{UCHAR_MAX}. + +For each type that is or might be signed, there is another symbol that +gives the minimum value it can hold. (Just replace @code{MAX} with +@code{MIN} in the names listed above.) There is no minimum limit +symbol for types specified with @code{unsigned} because the +minimum for them is universally zero. + +@code{INT_MIN} is not the negative of @code{INT_MAX}. In +two's-complement representation, the most negative number is 1 less +than the negative of the most positive number. Thus, @code{INT_MIN} +on a 32-bit computer has the value @minus{}2,147,483,648. You can't +actually write the value that way in C, since it would overflow. +That's a good reason to use @code{INT_MIN} to specify +that value. Its definition is written to avoid overflow. + +@include fp.texi + +@node Compilation +@chapter Compilation +@cindex object file +@cindex compilation module +@cindex make rules + +Early in the manual we explained how to compile a simple C program +that consists of a single source file (@pxref{Compile Example}). +However, we handle only short programs that way. A typical C program +consists of many source files, each of which is a separate +@dfn{compilation module}---meaning that it has to be compiled +separately. + +The full details of how to compile with GCC are documented in xxxx. +@c ??? ref +Here we give only a simple introduction. + +These are the commands to compile two compilation modules, +@file{foo.c} and @file{bar.c}, with a command for each module: + +@example +gcc -c -O -g foo.c +gcc -c -O -g bar.c +@end example + +@noindent +In these commands, @option{-g} says to generate debugging information, +@option{-O} says to do some optimization, and @option{-c} says to put +the compiled code for that module into a corresponding @dfn{object +file} and go no further. The object file for @file{foo.c} is called +@file{foo.o}, and so on. + +If you wish, you can specify the additional options @option{-Wformat +-Wparenthesis -Wstrict-prototypes}, which request additional warnings. + +One reason to divide a large program into multiple compilation modules +is to control how each module can access the internals of the others. +When a module declares a function or variable @code{extern}, other +modules can access it. The other functions and variables in +a module can't be accessed from outside that module. + +The other reason for using multiple modules is so that changing +one source file does not require recompiling all of them in order +to try the modified program. Dividing a large program into many +substantial modules in this way typically makes recompilation much faster. + +@cindex linking object files +After you compile all the program's modules, in order to run the +program you must @dfn{link} the object files into a combined +executable, like this: + +@example +gcc -o foo foo.o bar.o +@end example + +@noindent +In this command, @option{-o foo} species the file name for the +executable file, and the other arguments are the object files to link. +Always specify the executable file name in a command that generates +one. + +Normally we don't run any of these commands directly. Instead we +write a set of @dfn{make rules} for the program, then use the +@command{make} program to recompile only the source files that need to +be recompiled. + +@c ??? ref to make manual + +@node Directing Compilation +@chapter Directing Compilation + +This chapter describes C constructs that don't alter the program's +meaning @emph{as such}, but rather direct the compiler how to treat +some aspects of the program. + +@menu +* Pragmas:: Controling compilation of some constructs. +* Static Assertions:: Compile-time tests for conditions. +@end menu + +@node Pragmas +@section Pragmas + +A @dfn{pragma} is an annotation in a program that gives direction to +the compiler. + +@menu +* Pragma Basics:: Pragma syntax and usage. +* Severity Pragmas:: Settings for compile-time pragma output. +* Optimization Pragmas:: Controlling optimizations. +@end menu + +@c See also @ref{Macro Pragmas}, which save and restore macro definitions. + +@node Pragma Basics +@subsection Pragma Basics + +C defines two syntactical forms for pragmas, the line form and the +token form. You can write any pragma in either form, with the same +meaning. + +The line form is a line in the source code, like this: + +@example +#pragma @var{line} +@end example + +@noindent +The line pragma has no effect on the parsing of the lines around it. +This form has the drawback that it can't be generated by a macro expansion. + +The token form is a series of tokens; it can appear anywhere in the +program between the other tokens. + +@example +_Pragma (@var{stringconstant}) +@end example + +@noindent +The pragma has no effect on the syntax of the tokens that surround it; +thus, here's a pragma in the middle of an @code{if} statement: + +@example +if _Pragma ("hello") (x > 1) +@end example + +@noindent +However, that's an unclear thing to do; for the sake of +understandability, it is better to put a pragma on a line by itself +and not embedded in the middle of another construct. + +Both forms of pragma have a textual argument. In a line pragma, the +text is the rest of the line. The textual argument to @code{_Pragma} +uses the same syntax as a C string constant: surround the text with +two @samp{"} characters, and add a backslash before each @samp{"} or +@samp{\} character in it. + +With either syntax, the textual argument specifies what to do. +It begins with one or several words that specify the operation. +If the compiler does not recognize them, it ignores the pragma. + +Here are the pragma operations supported in GNU C@. + +@c ??? Verify font for [] +@table @code +@item #pragma GCC dependency "@var{file}" [@var{message}] +@itemx _Pragma ("GCC dependency \"@var{file}\" [@var{message}]") +Declares that the current source file depends on @var{file}, so GNU C +compares the file times and gives a warning if @var{file} is newer +than the current source file. + +This directive searches for @var{file} the way @code{#include} +searches for a non-system header file. + +If @var{message} is given, the warning message includes that text. + +Examples: + +@example +#pragma GCC dependency "parse.y" +_pragma ("GCC dependency \"/usr/include/time.h\" \ +rerun fixincludes") +@end example + +@item #pragma GCC poison @var{identifiers} +@itemx _Pragma ("GCC poison @var{identifiers}") +Poisons the identifiers listed in @var{identifiers}. + +This is useful to make sure all mention of @var{identifiers} has been +deleted from the program and that no reference to them creeps back in. +If any of those identifiers appears anywhere in the source after the +directive, it causes a compilation error. For example, + +@example +#pragma GCC poison printf sprintf fprintf +sprintf(some_string, "hello"); +@end example + +@noindent +generates an error. + +If a poisoned identifier appears as part of the expansion of a macro +that was defined before the identifier was poisoned, it will @emph{not} +cause an error. Thus, system headers that define macros that use +the identifier will not cause errors. + +For example, + +@example +#define strrchr rindex +_Pragma ("GCC poison rindex") +strrchr(some_string, 'h'); +@end example + +@noindent +does not cause a compilation error. + +@item #pragma GCC system_header +@itemx _Pragma ("GCC system_header") +Specify treating the rest of the current source file as if it came +from a system header file. @xref{System Headers, System Headers, +System Headers, gcc, Using the GNU Compiler Collection}. + +@item #pragma GCC warning @var{message} +@itemx _Pragma ("GCC warning @var{message}") +Equivalent to @code{#warning}. Its advantage is that the +@code{_Pragma} form can be included in a macro definition. + +@item #pragma GCC error @var{message} +@itemx _Pragma ("GCC error @var{message}") +Equivalent to @code{#error}. Its advantage is that the +@code{_Pragma} form can be included in a macro definition. + +@item #pragma GCC message @var{message} +@itemx _Pragma ("GCC message @var{message}") +Similar to @samp{GCC warning} and @samp{GCC error}, this simply prints an +informational message, and could be used to include additional warning +or error text without triggering more warnings or errors. (Note that +unlike @samp{warning} and @samp{error}, @samp{message} does not include +@samp{GCC} as part of the pragma.) +@end table + +@node Severity Pragmas +@subsection Severity Pragmas + +These pragmas control the severity of classes of diagnostics. +You can specify the class of diagnostic with the GCC option that causes +those diagnostics to be generated. + +@table @code +@item #pragma GCC diagnostic error @var{option} +@itemx _Pragma ("GCC diagnostic error @var{option}") +For code following this pragma, treat diagnostics of the variety +specified by @var{option} as errors. For example: + +@example +_Pragma ("GCC diagnostic error -Wformat") +@end example + +@noindent +specifies to treat diagnostics enabled by the @var{-Wformat} option +as errors rather than warnings. + +@item #pragma GCC diagnostic warning @var{option} +@itemx _Pragma ("GCC diagnostic warning @var{option}") +For code following this pragma, treat diagnostics of the variety +specified by @var{option} as warnings. This overrides the +@var{-Werror} option which says to treat warnings as errors. + +@item #pragma GCC diagnostic ignore @var{option} +@itemx _Pragma ("GCC diagnostic ignore @var{option}") +For code following this pragma, refrain from reporting any diagnostics +of the variety specified by @var{option}. + +@item #pragma GCC diagnostic push +@itemx _Pragma ("GCC diagnostic push") +@itemx #pragma GCC diagnostic pop +@itemx _Pragma ("GCC diagnostic pop") +These pragmas maintain a stack of states for severity settings. +@samp{GCC diagnostic push} saves the current settings on the stack, +and @samp{GCC diagnostic pop} pops the last stack item and restores +the current settings from that. + +@samp{GCC diagnostic pop} when the severity setting stack is empty +restores the settings to what they were at the start of compilation. + +Here is an example: + +@example +_Pragma ("GCC diagnostic error -Wformat") + +/* @r{@option{-Wformat} messages treated as errors. } */ + +_Pragma ("GCC diagnostic push") +_Pragma ("GCC diagnostic warning -Wformat") + +/* @r{@option{-Wformat} messages treated as warnings. } */ + +_Pragma ("GCC diagnostic push") +_Pragma ("GCC diagnostic ignored -Wformat") + +/* @r{@option{-Wformat} messages suppressed. } */ + +_Pragma ("GCC diagnostic pop") + +/* @r{@option{-Wformat} messages treated as warnings again. } */ + +_Pragma ("GCC diagnostic pop") + +/* @r{@option{-Wformat} messages treated as errors again. } */ + +/* @r{This is an excess @samp{pop} that matches no @samp{push}. } */ +_Pragma ("GCC diagnostic pop") + +/* @r{@option{-Wformat} messages treated once again} + @r{as specified by the GCC command-line options.} */ +@end example +@end table + +@node Optimization Pragmas +@subsection Optimization Pragmas + +These pragmas enable a particular optimization for specific function +definitions. The settings take effect at the end of a function +definition, so the clean place to use these pragmas is between +function definitions. + +@table @code +@item #pragma GCC optimize @var{optimization} +@itemx _Pragma ("GCC optimize @var{optimization}") +These pragmas enable the optimization @var{optimization} for the +following functions. For example, + +@example +_Pragma ("GCC optimize -fforward-propagate") +@end example + +@noindent +says to apply the @samp{forward-propagate} optimization to all +following function definitions. Specifying optimizations for +individual functions, rather than for the entire program, is rare but +can be useful for getting around a bug in the compiler. + +If @var{optimization} does not correspond to a defined optimization +option, the pragma is erroneous. To turn off an optimization, use the +corresponding @samp{-fno-} option, such as +@samp{-fno-forward-propagate}. + +@item #pragma GCC target @var{optimizations} +@itemx _Pragma ("GCC target @var{optimizations}") +The pragma @samp{GCC target} is similar to @samp{GCC optimize} but is +used for platform-specific optimizations. Thus, + +@example +_Pragma ("GCC target popcnt") +@end example + +@noindent +activates the optimization @samp{popcnt} for all +following function definitions. This optimization is supported +on a few common targets but not on others. + +@item #pragma GCC push_options +@itemx _Pragma ("GCC push_options") +The @samp{push_options} pragma saves on a stack the current settings +specified with the @samp{target} and @samp{optimize} pragmas. + +@item #pragma GCC pop_options +@itemx _Pragma ("GCC pop_options") +The @samp{pop_options} pragma pops saved settings from that stack. + +Here's an example of using this stack. + +@example +_Pragma ("GCC push_options") +_Pragma ("GCC optimize forward-propagate") + +/* @r{Functions to compile} + @r{with the @code{forward-propagate} optimization.} */ + +_Pragma ("GCC pop_options") +/* @r{Ends enablement of @code{forward-propagate}.} */ +@end example + +@item #pragma GCC reset_options +@itemx _Pragma ("GCC reset_options") +Clears all pragma-defined @samp{target} and @samp{optimize} +optimization settings. +@end table + +@node Static Assertions +@section Static Assertions +@cindex static assertions +@findex _Static_assert + +You can add compiler-time tests for necessary conditions into your +code using @code{_Static_assert}. This can be useful, for example, to +check that the compilation target platform supports the type sizes +that the code expects. For example, + +@example +_Static_assert ((sizeof (long int) >= 8), + "long int needs to be at least 8 bytes"); +@end example + +@noindent +reports a compile-time error if compiled on a system with long +integers smaller than 8 bytes, with @samp{long int needs to be at +least 8 bytes} as the error message. + +Since calls @code{_Static_assert} are processed at compile time, the +expression must be computable at compile time and the error message +must be a literal string. The expression can refer to the sizes of +variables, but can't refer to their values. For example, the +following static assertion is invalid for two reasons: + +@example +char *error_message + = "long int needs to be at least 8 bytes"; +int size_of_long_int = sizeof (long int); + +_Static_assert (size_of_long_int == 8, error_message); +@end example + +@noindent +The expression @code{size_of_long_int == 8} isn't computable at +compile time, and the error message isn't a literal string. + +You can, though, use preprocessor definition values with +@code{_Static_assert}: + +@example +#define LONG_INT_ERROR_MESSAGE "long int needs to be \ +at least 8 bytes" + +_Static_assert ((sizeof (long int) == 8), + LONG_INT_ERROR_MESSAGE); +@end example + +Static assertions are permitted wherever a statement or declaration is +permitted, including at top level in the file, and also inside the +definition of a type. + +@example +union y +@{ + int i; + int *ptr; + _Static_assert (sizeof (int *) == sizeof (int), + "Pointer and int not same size"); +@}; +@end example + +@node Type Alignment +@appendix Type Alignment +@cindex type alignment +@cindex alignment of type +@findex _Alignof +@findex __alignof__ + +Code for device drivers and other communication with low-level +hardware sometimes needs to be concerned with the alignment of +data objects in memory. + +Each data type has a required @dfn{alignment}, always a power of 2, +that says at which memory addresses an object of that type can validly +start. A valid address for the type must be a multiple of its +alignment. If a type's alignment is 1, that means it can validly +start at any address. If a type's alignment is 2, that means it can +only start at an even address. If a type's alignment is 4, that means +it can only start at an address that is a multiple of 4. + +The alignment of a type (except @code{char}) can vary depending on the +kind of computer in use. To refer to the alignment of a type in a C +program, use @code{_Alignof}, whose syntax parallels that of +@code{sizeof}. Like @code{sizeof}, @code{_Alignof} is a compile-time +operation, and it doesn't compute the value of the expression used +as its argument. + +Nominally, each integer and floating-point type has an alignment equal to +the largest power of 2 that divides its size. Thus, @code{int} with +size 4 has a nominal alignment of 4, and @code{long long int} with +size 8 has a nominal alignment of 8. + +However, each kind of computer generally has a maximum alignment, and +no type needs more alignment than that. If the computer's maximum +alignment is 4 (which is common), then no type's alignment is more +than 4. + +The size of any type is always a multiple of its alignment; that way, +in an array whose elements have that type, all the elements are +properly aligned if the first one is. + +These rules apply to all real computers today, but some embedded +controllers have odd exceptions. We don't have references to cite for +them. +@c We can't cite a nonfree manual as documentation. + +Ordinary C code guarantees that every object of a given type is in +fact aligned as that type requires. + +If the operand of @code{_Alignof} is a structure field, the value +is the alignment it requires. It may have a greater alignment by +coincidence, due to the other fields, but @code{_Alignof} is not +concerned about that. @xref{Structures}. + +Older versions of GNU C used the keyword @code{__alignof__} for this, +but now that the feature has been standardized, it is better +to use the standard keyword @code{_Alignof}. + +@findex _Alignas +@findex __aligned__ +You can explicitly specify an alignment requirement for a particular +variable or structure field by adding @code{_Alignas +(@var{alignment})} to the declaration, where @var{alignment} is a +power of 2 or a type name. For instance: + +@example +char _Alignas (8) x; +@end example + +@noindent +or + +@example +char _Alignas (double) x; +@end example + +@noindent +specifies that @code{x} must start on an address that is a multiple of +8. However, if @var{alignment} exceeds the maximum alignment for the +machine, that maximum is how much alignment @code{x} will get. + +The older GNU C syntax for this feature looked like +@code{__attribute__ ((__aligned__ (@var{alignment})))} to the +declaration, and was added after the variable. For instance: + +@example +char x __attribute__ ((__aligned__ 8)); +@end example + +@xref{Attributes}. + +@node Aliasing +@appendix Aliasing +@cindex aliasing (of storage) +@cindex pointer type conversion +@cindex type conversion, pointer + +We have already presented examples of casting a @code{void *} pointer +to another pointer type, and casting another pointer type to +@code{void *}. + +One common kind of pointer cast is guaranteed safe: casting the value +returned by @code{malloc} and related functions (@pxref{Dynamic Memory +Allocation}). It is safe because these functions do not save the +pointer anywhere else; the only way the program will access the newly +allocated memory is via the pointer just returned. + +In fact, C allows casting any pointer type to any other pointer type. +Using this to access the same place in memory using two +different data types is called @dfn{aliasing}. + +Aliasing is necessary in some programs that do sophisticated memory +management, such as GNU Emacs, but most C programs don't need to do +aliasing. When it isn't needed, @strong{stay away from it!} To do +aliasing correctly requires following the rules stated below. +Otherwise, the aliasing may result in malfunctions when the program +runs. + +The rest of this appendix explains the pitfalls and rules of aliasing. + +@menu +* Aliasing Alignment:: Memory alignment considerations for + casting between pointer types. +* Aliasing Length:: Type size considerations for + casting between pointer types. +* Aliasing Type Rules:: Even when type alignment and size matches, + aliasing can still have surprising results. + +@end menu + +@node Aliasing Alignment +@appendixsection Aliasing and Alignment + +In order for a type-converted pointer to be valid, it must have the +alignment that the new pointer type requires. For instance, on most +computers, @code{int} has alignment 4; the address of an @code{int} +must be a multiple of 4. However, @code{char} has alignment 1, so the +address of a @code{char} is usually not a multiple of 4. Taking the +address of such a @code{char} and casting it to @code{int *} probably +results in an invalid pointer. Trying to dereference it may cause a +@code{SIGBUS} signal, depending on the platform in use (@pxref{Signals}). + +@example +foo () +@{ + char i[4]; + int *p = (int *) &i[1]; /* @r{Misaligned pointer!} */ + return *p; /* @r{Crash!} */ +@} +@end example + +This requirement is never a problem when casting the return value +of @code{malloc} because that function always returns a pointer +with as much alignment as any type can require. + +@node Aliasing Length +@appendixsection Aliasing and Length + +When converting a pointer to a different pointer type, make sure the +object it really points to is at least as long as the target of the +converted pointer. For instance, suppose @code{p} has type @code{int +*} and it's cast as follows: + +@example +int *p; + +struct + @{ + double d, e, f; + @} foo; + +struct foo *q = (struct foo *)p; + +q->f = 5.14159; +@end example + +@noindent +the value @code{q->f} will run past the end of the @code{int} that +@code{p} points to. If @code{p} was initialized to the start of an +array of type @code{int[6]}, the object is long enough for three +@code{double}s. But if @code{p} points to something shorter, +@code{q->f} will run on beyond the end of that, overlaying some other +data. Storing that will garble that other data. Or it could extend +past the end of memory space and cause a @code{SIGSEGV} signal +(@pxref{Signals}). + +@node Aliasing Type Rules +@appendixsection Type Rules for Aliasing + +C code that converts a pointer to a different pointer type can use the +pointers to access the same memory locations with two different data +types. If the same address is accessed with different types in a +single control thread, optimization can make the code do surprising +things (in effect, make it malfunction). + +Here's a concrete example where aliasing that can change the code's +behavior when it is optimized. We assume that @code{float} is 4 bytes +long, like @code{int}, and so is every pointer. Thus, the structures +@code{struct a} and @code{struct b} are both 8 bytes. + +@example +#include +struct a @{ int size; char *data; @}; +struct b @{ float size; char *data; @}; + +void sub (struct a *p, struct b *q) +@{ +  int x; +  p->size = 0; +  q->size = 1; +  x = p->size; +  printf("x       =%d\n", x); +  printf("p->size =%d\n", (int)p->size); +  printf("q->size =%d\n", (int)q->size); +@} + +int main(void) +@{ +  struct a foo; +  struct a *p = &foo; +  struct b *q = (struct b *) &foo; + +  sub (p, q); +@} +@end example + +This code works as intended when compiled without optimization. All +the operations are carried out sequentially as written. The code +sets @code{x} to @code{p->size}, but what it actually gets is the +bits of the floating point number 1, as type @code{int}. + +However, when optimizing, the compiler is allowed to assume +(mistakenly, here) that @code{q} does not point to the same storage as +@code{p}, because their data types are not allowed to alias. + +From this assumption, the compiler can deduce (falsely, here) that the +assignment into @code{q->size} has no effect on the value of +@code{p->size}, which must therefore still be 0. Thus, @code{x} will +be set to 0. + +GNU C, following the C standard, @emph{defines} this optimization as +legitimate. Code that misbehaves when optimized following these rules +is, by definition, incorrect C code. + +The rules for storage aliasing in C are based on the two data types: +the type of the object, and the type it is accessed through. The +rules permit accessing part of a storage object of type @var{t} using +only these types: + +@itemize @bullet +@item +@var{t}. + +@item +A type compatible with @var{t}. @xref{Compatible Types}. + +@item +A signed or unsigned version of one of the above. + +@item +A qualifed version of one of the above. +@xref{Type Qualifiers}. + +@item +An array, structure (@pxref{Structures}), or union type +(@code{Unions}) that contains one of the above, either directly as a +field or through multiple levels of fields. If @var{t} is +@code{double}, this would include @code{struct s @{ union @{ double +d[2]; int i[4]; @} u; int i; @};} because there's a @code{double} +inside it somewhere. + +@item +A character type. +@end itemize + +What do these rules say about the example in this subsection? + +For @code{foo.size} (equivalently, @code{a->size}), @var{t} is +@code{int}. The type @code{float} is not allowed as an aliasing type +by those rules, so @code{b->size} is not supposed to alias with +elements of @code{j}. Based on that assumption, GNU C makes a +permitted optimization that was not, in this case, consistent with +what the programmer intended the program to do. + +Whether GCC actually performs type-based aliasing analysis depends on +the details of the code. GCC has other ways to determine (in some cases) +whether objects alias, and if it gets a reliable answer that way, it won't +fall back on type-based heuristics. + +@c @opindex -fno-strict-aliasing +The importance of knowing the type-based aliasing rules is not so as +to ensure that the optimization is done where it would be safe, but so +as to ensure it is @emph{not} done in a way that would break the +program. You can turn off type-based aliasing analysis by giving GCC +the option @option{-fno-strict-aliasing}. + +@node Digraphs +@appendix Digraphs +@cindex digraphs + +C accepts aliases for certain characters. Apparently in the 1990s +some computer systems had trouble inputting these characters, or +trouble displaying them. These digraphs almost never appear in C +programs nowadays, but we mention them for completeness. + +@table @samp +@item <: +An alias for @samp{[}. +@item :> +An alias for @samp{]}. +@item <% +An alias for @samp{@{}. +@item %> +An alias for @samp{@}}. +@item %: +An alias for @samp{#}, +used for preprocessing directives (@pxref{Directives}) and +macros (@pxref{Macros}). +@end table + +@node Attributes +@appendix Attributes in Declarations +@cindex attributes +@findex __attribute__ + +You can specify certain additional requirements in a declaration, to +get fine-grained control over code generation, and helpful +informational messages during compilation. We use a few attributes in +code examples throughout this manual, including + +@table @code +@item aligned +The @code{aligned} attribute specifies a minimum alignment for a +variable or structure field, measured in bytes: + +@example +int foo __attribute__ ((aligned (8))) = 0; +@end example + +@noindent +This directs GNU C to allocate @code{foo} at an address that is a +multiple of 8 bytes. However, you can't force an alignment bigger +than the computer's maximum meaningful alignment. + +@item packed +The @code{packed} attribute specifies to compact the fields of a +structure by not leaving gaps between fields. For example, + +@example +struct __attribute__ ((packed)) bar +@{ + char a; + int b; +@}; +@end example + +@noindent +allocates the integer field @code{b} at byte 1 in the structure, +immediately after the character field @code{a}. The packed structure +is just 5 bytes long (assuming @code{int} is 4 bytes) and its +alignment is 1, that of @code{char}. + +@item deprecated +Applicable to both variables and functions, the @code{deprecated} +attribute tells the compiler to issue a warning if the variable or +function is ever used in the source file. + +@example +int old_foo __attribute__ ((deprecated)); + +int old_quux () __attribute__ ((deprecated)); +@end example + +@item __noinline__ +The @code{__noinline__} attribute, in a function's declaration or +definition, specifies never to inline calls to that function. All +calls to that function, in a compilation unit where it has this +attribute, will be compiled to invoke the separately compiled +function. @xref{Inline Function Definitions}. + +@item __noclone__ +The @code{__noclone__} attribute, in a function's declaration or +definition, specifies never to clone that function. Thus, there will +be only one compiled version of the function. @xref{Label Value +Caveats}, for more information about cloning. + +@item always_inline +The @code{always_inline} attribute, in a function's declaration or +definition, specifies to inline all calls to that function (unless +something about the function makes inlining impossible). This applies +to all calls to that function in a compilation unit where it has this +attribute. @xref{Inline Function Definitions}. + +@item gnu_inline +The @code{gnu_inline} attribute, in a function's declaration or +definition, specifies to handle the @code{inline} keywprd the way GNU +C originally implemented it, many years before ISO C said anything +about inlining. @xref{Inline Function Definitions}. +@end table + +For full documentation of attributes, see the GCC manual. +@xref{Attribute Syntax, Attribute Syntax, System Headers, gcc, Using +the GNU Compiler Collection}. + +@node Signals +@appendix Signals +@cindex signal +@cindex handler (for signal) +@cindex @code{SIGSEGV} +@cindex @code{SIGFPE} +@cindex @code{SIGBUS} + +Some program operations bring about an error condition called a +@dfn{signal}. These signals terminate the program, by default. + +There are various different kinds of signals, each with a name. We +have seen several such error conditions through this manual: + +@table @code +@item SIGSEGV +This signal is generated when a program tries to read or write outside +the memory that is allocated for it, or to write memory that can only +be read. The name is an abbreviation for ``segmentation violation''. + +@item SIGFPE +This signal indicates a fatal arithmetic error. The name is an +abbreviation for ``floating-point exception'', but covers all types of +arithmetic errors, including division by zero and overflow. + +@item SIGBUS +This signal is generated when an invalid pointer is dereferenced, +typically the result of dereferencing an uninintalized pointer. It is +similar to @code{SIGSEGV}, except that @code{SIGSEGV} indicates +invalid access to valid memory, while @code{SIGBUS} indicates an +attempt to access an invalid address. +@end table + +These kinds of signal allow the program to specify a function as a +@dfn{signal handler}. When a signal has a handler, it doesn't +terminate the program; instead it calls the handler. + +There are many other kinds of signal; here we list only those that +come from run-time errors in C operations. The rest have to do with +the functioning of the operating system. The GNU C Library Reference +Manual gives more explanation about signals (@pxref{Program Signal +Handling, The GNU C Library, , libc, The GNU C Library Reference +Manual}). + +@node GNU Free Documentation License +@appendix GNU Free Documentation License + +@include fdl.texi + +@node Symbol Index +@unnumbered Index of Symbols and Keywords + +@printindex fn + +@node Concept Index +@unnumbered Concept Index + +@printindex cp + +@bye diff --git a/cpp.texi b/cpp.texi new file mode 100644 index 0000000..521253e --- /dev/null +++ b/cpp.texi @@ -0,0 +1,2507 @@ + +@ignore +@c Copyright @copyright{} 2020 Richard Stallman and Free Software Foundation, Inc. + +DRAFT --- DO NOT REDISTRIBUTE --- DRAFT --- DO NOT REDISTRIBUTE --- DRAFT +@end ignore + +@node Preprocessing +@chapter Preprocessing +@c man begin DESCRIPTION +@cindex preprocessing +As the first stage of compiling a C source module, GCC transforms the +text with text substitutions and file inclusions. This is called +@dfn{preprocessing}. + +@menu +* Preproc Overview:: +* Directives:: +* Preprocessing Tokens:: +* Header Files:: +* Macros:: +* Conditionals:: +* Diagnostics:: +* Line Control:: +* Null Directive:: +@end menu + +@node Preproc Overview +@section Preprocessing Overview + +GNU C performs preprocessing on each line of a C program as the first +stage of compilation. Preprocessing operates on a line only when it +contains a @dfn{preprocessing directive} or uses a @dfn{macro}---all +other lines pass through preprocessing unchanged. + +Here are some jobs that preprocessing does. The rest of +this chapter gives the details. + +@itemize @bullet +@item +Inclusion of header files. These are files (usually containing +declarations and macro definitions) that can be substituted into your +program. + +@item +Macro expansion. You can define @dfn{macros}, which are abbreviations +for arbitrary fragments of C code. Preprocessing replaces the macros +with their definitions. Some macros are automatically predefined. + +@item +Conditional compilation. You can include or exclude parts of the +program according to various conditions. + +@item +Line control. If you use a program to combine or rearrange source files +into an intermediate file that is then compiled, you can use line +control to inform the compiler where each source line originally came +from. + +@item +Compilation control. @code{#pragma} and @code{_Pragma} invoke +some special compiler features in how to handle certain constructs. + +@item +Diagnostics. You can detect problems at compile time and issue errors +or warnings. +@end itemize + +Except for expansion of predefined macros, all these operations happen +only if you use preprocessing directives to request them. + +@node Directives +@section Directives +@cindex directives +@cindex preprocessing directives +@cindex directive line +@cindex directive name + +@dfn{Preprocessing directives} are lines in the program that start +with @samp{#}. Whitespace is allowed before and after the @samp{#}. +The @samp{#} is followed by an identifier, the @dfn{directive name}. +It specifies the operation to perform. Here are a couple of examples: + +@example +#define LIMIT 51 + # undef LIMIT +# error You screwed up! +@end example + +We usually refer to a directive as @code{#@var{name}} where @var{name} +is the directive name. For example, @code{#define} means the +directive that defines a macro. + +The @samp{#} that begins a directive cannot come from a macro +expansion. Also, the directive name is not macro expanded. Thus, if +@code{foo} is defined as a macro expanding to @code{define}, that does +not make @code{#foo} a valid preprocessing directive. + +The set of valid directive names is fixed. Programs cannot define new +preprocessing directives. + +Some directives require arguments; these make up the rest of the +directive line and must be separated from the directive name by +whitespace. For example, @code{#define} must be followed by a macro +name and the intended expansion of the macro. + +A preprocessing directive cannot cover more than one line. The line +can, however, be continued with backslash-newline, or by a +@samp{/*@r{@dots{}}*/}-style comment that extends past the end of the +line. These will be replaced (by nothing, or by whitespace) before +the directive is processed. + +@node Preprocessing Tokens +@section Preprocessing Tokens + +@cindex preprocessing tokens +Preprocessing divides C code (minus its comments) into +@dfn{tokens} that are similar to C tokens, but not exactly the same. +Here are the quirks of preprocessing tokens. + +The main classes of preprocessing tokens are identifiers, +preprocessing numbers, string constants, character constants, and +punctuators; there are a few others too. + +@table @asis +@item identifier +@cindex identifiers +An @dfn{identifier} preprocessing token is syntactically like an +identifier in C: any sequence of letters, digits, or underscores, as +well as non-ASCII characters represented using @samp{\U} or @samp{\u}, +that doesn't begin with a digit. + +During preprocessing, the keywords of C have no special significance; +at that stage, they are simply identifiers. Thus, you can define a +macro whose name is a keyword. The only identifier that is special +during preprocessing is @code{defined} (@pxref{defined}). + +@item preprocessing number +@cindex numbers, preprocessing +@cindex preprocessing numbers +A @dfn{preprocessing number} is something that preprocessing treats +textually as a number, including C numeric constants, and other +sequences of characters which resemble numeric constants. +Preprocessing does not try to verify that a preprocessing number is a +valid number in C, and indeed it need not be one. + +More precisely, preprocessing numbers begin with an optional period, a +required decimal digit, and then continue with any sequence of +letters, digits, underscores, periods, and exponents. Exponents are +the two-character sequences @samp{e+}, @samp{e-}, @samp{E+}, +@samp{E-}, @samp{p+}, @samp{p-}, @samp{P+}, and @samp{P-}. (The +exponents that begin with @samp{p} or @samp{P} are new to C99. They +are used for hexadecimal floating-point constants.) + +The reason behind this unusual syntactic class is that the full +complexity of numeric constants is irrelevant during preprocessing. +The distinction between lexically valid and invalid floating-point +numbers, for example, doesn't matter at this stage. The use of +preprocessing numbers makes it possible to split an identifier at any +position and get exactly two tokens, and reliably paste them together +using the @code{##} operator (@pxref{Concatenation}). + +@item punctuator +A @dfn{punctuator} is syntactically like an operator. +These are the valid punctuators: + +@example +[ ] ( ) @{ @} . -> +++ -- & * + - ~ ! +/ % << >> < > <= >= == != ^ | && || +? : ; ... += *= /= %= += -= <<= >>= &= ^= |= +, # ## +<: :> <% %> %: %:%: +@end example + +@item string constant +A string constant in the source code is recognized by preprocessing as +a single preprocessing token. + +@item character constant +A character constant in the source code is recognized by preprocessing +as a single preprocessing token. + +@item header name +Within the @code{#include} directive, preprocessing recognizes a +@dfn{header name} token. It consists of @samp{"@var{name}"}, where +@var{name} is a sequence of source characters other than newline and +@samp{"}, or @samp{<@var{name}>}, where @var{name} is a sequence of +source characters other than newline and @samp{>}. + +In practice, it is more convenient to think that the @code{#include} line +is exempt from tokenization. + +@item other +Any other character that's valid in a C source program +is treated as a separate preprocessing token. +@end table + +Once the program is broken into preprocessing tokens, they remain +separate until the end of preprocessing. Macros that generate two +consecutive tokens insert whitespace to keep them separate, if +necessary. For example, + +@example +@group +#define foo() bar +foo()baz + @expansion{} bar baz +@emph{not} + @expansion{} barbaz +@end group +@end example + +The only exception is with the @code{##} preprocessing operator, which +pastes tokens together (@pxref{Concatenation}). + +Preprocessing treats the null character (code 0) as whitespace, but +generates a warning for it because it may be invisible to the user +(many terminals do not display it at all) and its presence in the file +is probably a mistake. + +@node Header Files +@section Header Files + +@cindex header file +A header file is a file of C code, typically containing C declarations +and macro definitions (@pxref{Macros}), to be shared between several +source files. You request the use of a header file in your program by +@dfn{including} it, with the C preprocessing directive +@code{#include}. + +Header files serve two purposes. + +@itemize @bullet +@item +@cindex system header files +System header files declare the interfaces to parts of the operating +system. You include them in your program to supply the definitions and +declarations that you need to invoke system calls and libraries. + +@item +Program-specific header files contain declarations for interfaces between the +source files of a particular program. It is a good idea to create a header +file for related declarations and macro definitions if all or most of them +are needed in several different source files. +@end itemize + +Including a header file produces the same results as copying the header +file into each source file that needs it. Such copying would be +time-consuming and error-prone. With a header file, the related +declarations appear in only one place. If they need to be changed, you +can change them in one place, and programs that include the header file +will then automatically use the new version when next recompiled. The header +file eliminates the labor of finding and changing all the copies as well +as the risk that a failure to change one copy will result in +inconsistencies within a program. + +In C, the usual convention is to give header files names that end with +@file{.h}. It is most portable to use only letters, digits, dashes, and +underscores in header file names, and at most one dot. + +@menu +* include Syntax:: +* include Operation:: +* Search Path:: +* Once-Only Headers:: +@c * Alternatives to Wrapper #ifndef:: +* Computed Includes:: +@c * Wrapper Headers:: +@c * System Headers:: +@end menu + +@node include Syntax +@subsection @code{#include} Syntax + +@findex #include +You can specify inclusion of user and system header files with the +preprocessing directive @code{#include}. It has two variants: + +@table @code +@item #include <@var{file}> +This variant is used for system header files. It searches for a file +named @var{file} in a standard list of system directories. You can +prepend directories to this list with the @option{-I} option +(@pxref{Invocation, Invoking GCC, Invoking GCC, gcc, Using the GNU +Compiler Collection}). + +@item #include "@var{file}" +This variant is used for header files of your own program. It +searches for a file named @var{file} first in the directory containing +the current file, then in the quote directories, then the same +directories used for @code{<@var{file}>}. You can prepend directories +to the list of quote directories with the @option{-iquote} option. +@end table + +The argument of @code{#include}, whether delimited with quote marks or +angle brackets, behaves like a string constant in that comments are not +recognized, and macro names are not expanded. Thus, @code{@w{#include +}} specifies inclusion of a system header file named @file{x/*y}. + +However, if backslashes occur within @var{file}, they are considered +ordinary text characters, not escape characters: character escape +sequences such as used in string constants in C are not meaningful +here. Thus, @code{@w{#include "x\n\\y"}} specifies a filename +containing three backslashes. By the same token, there is no way to +escape @samp{"} or @samp{>} to include it in the header file name if +it would instead end the file name. + +Some systems interpret @samp{\} as a file name component separator. +All these systems also interpret @samp{/} the same way. It is most +portable to use only @samp{/}. + +It is an error to put anything other than comments on the +@code{#include} line after the file name. + +@node include Operation +@subsection @code{#include} Operation + +The @code{#include} directive works by scanning the specified header +file as input before continuing with the rest of the current file. +The result of preprocessing consists of the text already generated, +followed by the result of preprocessing the included file, followed by +whatever results from the text after the @code{#include} directive. +For example, if you have a header file @file{header.h} as follows, + +@example +char *test (void); +@end example + +@noindent +and a main program called @file{program.c} that uses the header file, +like this, + +@example +int x; +#include "header.h" + +int +main (void) +@{ + puts (test ()); +@} +@end example + +@noindent +the result is equivalent to putting this text in @file{program.c}: + +@example +int x; +char *test (void); + +int +main (void) +@{ + puts (test ()); +@} +@end example + +Included files are not limited to declarations and macro definitions; +those are merely the typical uses. Any fragment of a C program can be +included from another file. The include file could even contain the +beginning of a statement that is concluded in the containing file, or +the end of a statement that was started in the including file. However, +an included file must consist of complete tokens. Comments and string +literals that have not been closed by the end of an included file are +invalid. For error recovery, the compiler terminates them at the end of +the file. + +To avoid confusion, it is best if header files contain only complete +syntactic units---function declarations or definitions, type +declarations, etc. + +The line following the @code{#include} directive is always treated as +a separate line, even if the included file lacks a final newline. +There is no problem putting a preprocessing directive there. + +@node Search Path +@subsection Search Path + +GCC looks in several different places for header files to be included. +On the GNU system, and Unix systems, the default directories for +system header files are: + +@example +@var{libdir}/gcc/@var{target}/@var{version}/include +/usr/local/include +@var{libdir}/gcc/@var{target}/@var{version}/include-fixed +@var{libdir}/@var{target}/include +/usr/include/@var{target} +/usr/include +@end example + +@noindent +The list may be different in some operating systems. Other +directories are added for C++. + +In the above, @var{target} is the canonical name of the system GCC was +configured to compile code for; often but not always the same as the +canonical name of the system it runs on. @var{version} is the version +of GCC in use. + +You can add to this list with the @option{-I@var{dir}} command-line +option. All the directories named by @option{-I} are searched, in +left-to-right order, @emph{before} the default directories. The only +exception is when @file{dir} is already searched by default. In +this case, the option is ignored and the search order for system +directories remains unchanged. + +Duplicate directories are removed from the quote and bracket search +chains before the two chains are merged to make the final search chain. +Thus, it is possible for a directory to occur twice in the final search +chain if it was specified in both the quote and bracket chains. + +You can prevent GCC from searching any of the default directories with +the @option{-nostdinc} option. This is useful when you are compiling an +operating system kernel or some other program that does not use the +standard C library facilities, or the standard C library itself. +@option{-I} options are not ignored as described above when +@option{-nostdinc} is in effect. + +GCC looks for headers requested with @code{@w{#include "@var{file}"}} +first in the directory containing the current file, then in the +@dfn{quote directories} specified by @option{-iquote} options, then in +the same places it looks for a system header. For example, if +@file{/usr/include/sys/stat.h} contains @code{@w{#include "types.h"}}, +GCC looks for @file{types.h} first in @file{/usr/include/sys}, then in +the quote directories and then in its usual search path. + +@code{#line} (@pxref{Line Control}) does not change GCC's idea of the +directory containing the current file. + +@cindex quote directories +The @option{-I-} is an old-fashioned, deprecated way to specify the +quote directories. To look for headers in a directory named @file{-}, +specify @option{-I./-}. There are several more ways to adjust the +header search path. @xref{invocation, Invoking GCC, Invoking GCC, +gcc, Using the GNU Compiler Collection}. + +@node Once-Only Headers +@subsection Once-Only Headers +@cindex repeated inclusion +@cindex including just once +@cindex wrapper @code{#ifndef} + +If a header file happens to be included twice, the compiler will process +its contents twice. This is very likely to cause an error, e.g.@: when the +compiler sees the same structure definition twice. + +The standard way to prevent this is to enclose the entire real contents +of the file in a conditional, like this: + +@example +@group +/* File foo. */ +#ifndef FILE_FOO_SEEN +#define FILE_FOO_SEEN + +@var{the entire file} + +#endif /* !FILE_FOO_SEEN */ +@end group +@end example + +This construct is commonly known as a @dfn{wrapper #ifndef}. When the +header is included again, the conditional will be false, because +@code{FILE_FOO_SEEN} is defined. Preprocessing skips over the entire +contents of the file, so that compilation will never ``see'' the file +contents twice in one module. + +GCC optimizes this case even further. It remembers when a header file +has a wrapper @code{#ifndef}. If a subsequent @code{#include} +specifies that header, and the macro in the @code{#ifndef} is still +defined, it does not bother to rescan the file at all. + +You can put comments in the header file outside the wrapper. They +do not interfere with this optimization. + +@cindex controlling macro +@cindex guard macro +The macro @code{FILE_FOO_SEEN} is called the @dfn{controlling macro} +or @dfn{guard macro}. In a user header file, the macro name should +not begin with @samp{_}. In a system header file, it should begin +with @samp{__} (or @samp{_} followed by an upper-case letter) to avoid +conflicts with user programs. In any kind of header file, the macro +name should contain the name of the file and some additional text, to +avoid conflicts with other header files. + +@node Computed Includes +@subsection Computed Includes +@cindex computed includes +@cindex macros in include + +Sometimes it is necessary to select one of several different header +files to be included into your program. They might specify +configuration parameters to be used on different sorts of operating +systems, for instance. You could do this with a series of conditionals, + +@example +#if SYSTEM_1 +# include "system_1.h" +#elif SYSTEM_2 +# include "system_2.h" +#elif SYSTEM_3 +/* @r{@dots{}} */ +#endif +@end example + +That rapidly becomes tedious. Instead, GNU C offers the ability to use +a macro for the header name. This is called a @dfn{computed include}. +Instead of writing a header name as the direct argument of +@code{#include}, you simply put a macro name there instead: + +@example +#define SYSTEM_H "system_1.h" +/* @r{@dots{}} */ +#include SYSTEM_H +@end example + +@noindent +@code{SYSTEM_H} is expanded, then @file{system_1.h} is included as if +the @code{#include} had been written with that name. @code{SYSTEM_H} +could be defined by your Makefile with a @option{-D} option. + +You must be careful when you define such a macro. @code{#define} +saves tokens, not text. GCC has no way of knowing that the macro will +be used as the argument of @code{#include}, so it generates ordinary +tokens, not a header name. This is unlikely to cause problems if you +use double-quote includes, which are syntactically similar to string +constants. If you use angle brackets, however, you may have trouble. + +The syntax of a computed include is actually a bit more general than the +above. If the first non-whitespace character after @code{#include} is +not @samp{"} or @samp{<}, then the entire line is macro-expanded +like running text would be. + +If the line expands to a single string constant, the contents of that +string constant are the file to be included. Preprocessing does not +re-examine the string for embedded quotes, but neither does it process +backslash escapes in the string. Therefore + +@example +#define HEADER "a\"b" +#include HEADER +@end example + +@noindent +looks for a file named @file{a\"b}. Preprocessing searches for the +file according to the rules for double-quoted includes. + +If the line expands to a token stream beginning with a @samp{<} token +and including a @samp{>} token, then the tokens between the @samp{<} and +the first @samp{>} are combined to form the filename to be included. +Any whitespace between tokens is reduced to a single space; then any +space after the initial @samp{<} is retained, but a trailing space +before the closing @samp{>} is ignored. Preprocessing searches for the file +according to the rules for angle-bracket includes. + +In either case, if there are any tokens on the line after the file name, +an error occurs and the directive is not processed. It is also an error +if the result of expansion does not match either of the two expected +forms. + +These rules are implementation-defined behavior according to the C +standard. To minimize the risk of different compilers interpreting your +computed includes differently, we recommend you use only a single +object-like macro that expands to a string constant. That also +makes it clear to people reading your program. + +@node Macros +@section Macros +@cindex macros + +A @dfn{macro} is a fragment of code that has been given a name. +Whenever the name is used, it is replaced by the contents of the macro. +There are two kinds of macros. They differ mostly in what they look +like when they are used. @dfn{Object-like} macros resemble data objects +when used, @dfn{function-like} macros resemble function calls. + +You may define any valid identifier as a macro, even if it is a C +keyword. In the preprocessing stage, GCC does not know anything about +keywords. This can be useful if you wish to hide a keyword such as +@code{const} from an older compiler that does not understand it. +However, the preprocessing operator @code{defined} (@pxref{defined}) +can never be defined as a macro, and C@code{++}'s named operators +(@pxref{C++ Named Operators, C++ Named Operators, C++ Named Operators, +gcc, Using the GNU Compiler Collection}) cannot be macros when +compiling C@code{++} code. + +The operator @code{#} is used in macros for stringification of an +argument (@pxref{Stringification}), and @code{##} is used for +concatenation of arguments into larger tokens (@pxref{Concatenation}) + +@menu +* Object-like Macros:: +* Function-like Macros:: +@c * Macro Pragmas:: +* Macro Arguments:: +* Stringification:: +* Concatenation:: +* Variadic Macros:: +* Predefined Macros:: +* Undefining and Redefining Macros:: +* Directives Within Macro Arguments:: +* Macro Pitfalls:: +@end menu + +@node Object-like Macros +@subsection Object-like Macros +@cindex object-like macro +@cindex symbolic constants +@cindex manifest constants + +An @dfn{object-like macro} is a simple identifier that will be +replaced by a code fragment. It is called object-like because in most +cases the use of the macro looks like reference to a data object in +code that uses it. These macros are most commonly used to give +symbolic names to numeric constants. + +@findex #define +The way to define macros with the @code{#define} directive. +@code{#define} is followed by the name of the macro and then the token +sequence it should be an abbreviation for, which is variously referred +to as the macro's @dfn{body}, @dfn{expansion} or @dfn{replacement +list}. For example, + +@example +#define BUFFER_SIZE 1024 +@end example + +@noindent +defines a macro named @code{BUFFER_SIZE} as an abbreviation for the +token @code{1024}. If somewhere after this @code{#define} directive +there comes a C statement of the form + +@example +foo = (char *) malloc (BUFFER_SIZE); +@end example + +@noindent +then preprocessing will recognize and @dfn{expand} the macro +@code{BUFFER_SIZE}, so that compilation will see the tokens: + +@example +foo = (char *) malloc (1024); +@end example + +By convention, macro names are written in upper case. Programs are +easier to read when it is possible to tell at a glance which names are +macros. Macro names that start with @samp{__} are reserved for +internal uses, and many of them are defined automatically, so don't +define such macro names unless you really know what you're doing. +Likewise for macro names that start with @samp{_} and an upper-case letter. + +The macro's body ends at the end of the @code{#define} line. You may +continue the definition onto multiple lines, if necessary, using +backslash-newline. When the macro is expanded, however, it will all +come out on one line. For example, + +@example +#define NUMBERS 1, \ + 2, \ + 3 +int x[] = @{ NUMBERS @}; + @expansion{} int x[] = @{ 1, 2, 3 @}; +@end example + +@noindent +The most common visible consequence of this is surprising line numbers +in error messages. + +There is no restriction on what can go in a macro body provided it +decomposes into valid preprocessing tokens. Parentheses need not +balance, and the body need not resemble valid C code. (If it does not, +you may get error messages from the C compiler when you use the macro.) + +Preprocessing scans the program sequentially. A macro definition +takes effect right after its appearance. Therefore, the following +input + +@example +foo = X; +#define X 4 +bar = X; +@end example + +@noindent +produces + +@example +foo = X; +bar = 4; +@end example + +When preprocessing expands a macro name, the macro's expansion +replaces the macro invocation, then the expansion is examined for more +macros to expand. For example, + +@example +@group +#define TABLESIZE BUFSIZE +#define BUFSIZE 1024 +TABLESIZE + @expansion{} BUFSIZE + @expansion{} 1024 +@end group +@end example + +@noindent +@code{TABLESIZE} is expanded first to produce @code{BUFSIZE}, then that +macro is expanded to produce the final result, @code{1024}. + +Notice that @code{BUFSIZE} was not defined when @code{TABLESIZE} was +defined. The @code{#define} for @code{TABLESIZE} uses exactly the +expansion you specify---in this case, @code{BUFSIZE}---and does not +check to see whether it too contains macro names. Only when you +@emph{use} @code{TABLESIZE} is the result of its expansion scanned for +more macro names. + +This makes a difference if you change the definition of @code{BUFSIZE} +at some point in the source file. @code{TABLESIZE}, defined as shown, +will always expand using the definition of @code{BUFSIZE} that is +currently in effect: + +@example +#define BUFSIZE 1020 +#define TABLESIZE BUFSIZE +#undef BUFSIZE +#define BUFSIZE 37 +@end example + +@noindent +Now @code{TABLESIZE} expands (in two stages) to @code{37}. + +If the expansion of a macro contains its own name, either directly or +via intermediate macros, it is not expanded again when the expansion is +examined for more macros. This prevents infinite recursion. +@xref{Self-Referential Macros}, for the precise details. + +@node Function-like Macros +@subsection Function-like Macros +@cindex function-like macros + +You can also define macros whose use looks like a function call. +These are called @dfn{function-like macros}. To define one, use the +@code{#define} directive with a pair of parentheses immediately after +the macro name. For example, + +@example +#define lang_init() c_init() +lang_init() + @expansion{} c_init() +@end example + +A function-like macro is expanded only when its name appears with a +pair of parentheses after it. If you write just the name, without +parentheses, it is left alone. This can be useful when you have a +function and a macro of the same name, and you wish to use the +function sometimes. Whitespace and line breaks before or between the +parentheses are ignored when the macro is called. + +@example +extern void foo(void); +#define foo() /* @r{optimized inline version} */ +/* @r{@dots{}} */ + foo(); + funcptr = foo; +@end example + +Here the call to @code{foo()} expands the macro, but the function +pointer @code{funcptr} gets the address of the real function +@code{foo}. If the macro were to be expanded there, it would cause a +syntax error. + +If you put spaces between the macro name and the parentheses in the +macro definition, that does not define a function-like macro, it defines +an object-like macro whose expansion happens to begin with a pair of +parentheses. Here is an example: + +@example +#define lang_init () c_init() +lang_init() + @expansion{} () c_init()() +@end example + +The first two pairs of parentheses in this expansion come from the +macro. The third is the pair that was originally after the macro +invocation. Since @code{lang_init} is an object-like macro, it does not +consume those parentheses. + +Any name can have at most one macro definition at a time. Thus, +you can't define the same name as an object-like macro and a +function-like macro at once. + +@node Macro Arguments +@subsection Macro Arguments +@cindex arguments +@cindex macros with arguments +@cindex arguments in macro definitions + +Function-like macros can take @dfn{arguments}, just like true functions. +To define a macro that uses arguments, you insert @dfn{parameters} +between the pair of parentheses in the macro definition that make the +macro function-like. The parameters must be valid C identifiers, +separated by commas and optionally whitespace. + +To invoke a macro that takes arguments, you write the name of the macro +followed by a list of @dfn{actual arguments} in parentheses, separated +by commas. The invocation of the macro need not be restricted to a +single logical line---it can cross as many lines in the source file as +you wish. The number of arguments you give must match the number of +parameters in the macro definition. When the macro is expanded, each +use of a parameter in its body is replaced by the tokens of the +corresponding argument. (The macro body is not required to use all of the +parameters.) + +As an example, here is a macro that computes the minimum of two numeric +values, as it is defined in many C programs, and some uses. + +@example +#define min(X, Y) ((X) < (Y) ? (X) : (Y)) + x = min(a, b); @expansion{} x = ((a) < (b) ? (a) : (b)); + y = min(1, 2); @expansion{} y = ((1) < (2) ? (1) : (2)); + z = min(a+28, *p); @expansion{} z = ((a+28) < (*p) ? (a+28) : (*p)); +@end example + +@noindent +In this small example you can already see several of the dangers of +macro arguments. @xref{Macro Pitfalls}, for detailed explanations. + +Leading and trailing whitespace in each argument is dropped, and all +whitespace between the tokens of an argument is reduced to a single +space. Parentheses within each argument must balance; a comma within +such parentheses does not end the argument. However, there is no +requirement for square brackets or braces to balance, and they do not +prevent a comma from separating arguments. Thus, + +@example +macro (array[x = y, x + 1]) +@end example + +@noindent +passes two arguments to @code{macro}: @code{array[x = y} and @code{x + +1]}. If you want to supply @code{array[x = y, x + 1]} as an argument, +you can write it as @code{array[(x = y, x + 1)]}, which is equivalent C +code. However, putting an assignment inside an array subscript +is to be avoided anyway. + +All arguments to a macro are completely macro-expanded before they are +substituted into the macro body. After substitution, the complete text +is scanned again for macros to expand, including the arguments. This rule +may seem strange, but it is carefully designed so you need not worry +about whether any function call is actually a macro invocation. You can +run into trouble if you try to be too clever, though. @xref{Argument +Prescan}, for detailed discussion. + +For example, @code{min (min (a, b), c)} is first expanded to + +@example + min (((a) < (b) ? (a) : (b)), (c)) +@end example + +@noindent +and then to + +@example +@group +((((a) < (b) ? (a) : (b))) < (c) + ? (((a) < (b) ? (a) : (b))) + : (c)) +@end group +@end example + +@noindent +(The line breaks shown here for clarity are not actually generated.) + +@cindex empty macro arguments +You can leave macro arguments empty without error, but many macros +will then expand to invalid code. You cannot leave out arguments +entirely; if a macro takes two arguments, there must be exactly one +comma at the top level of its argument list. Here are some silly +examples using @code{min}: + +@smallexample +min(, b) @expansion{} (( ) < (b) ? ( ) : (b)) +min(a, ) @expansion{} ((a ) < ( ) ? (a ) : ( )) +min(,) @expansion{} (( ) < ( ) ? ( ) : ( )) +min((,),) @expansion{} (((,)) < ( ) ? ((,)) : ( )) + +min() @error{} macro "min" requires 2 arguments, but only 1 given +min(,,) @error{} macro "min" passed 3 arguments, but takes just 2 +@end smallexample + +Whitespace is not a preprocessing token, so if a macro @code{foo} takes +one argument, @code{@w{foo ()}} and @code{@w{foo ( )}} both supply it an +empty argument. + +@ignore @c How long ago was this? +Previous GNU preprocessor implementations and +documentation were incorrect on this point, insisting that a +function-like macro that takes a single argument be passed a space if an +empty argument was required. +@end ignore + +Macro parameters appearing inside string literals are not replaced by +their corresponding actual arguments. + +@example +#define foo(x) x, "x" +foo(bar) @expansion{} bar, "x" +@end example + +@noindent +See the next subsection for how to insert macro arguments +into a string literal. + +The token following the macro call and the last token of the macro +expansion do not become one token even if it looks like they could: + +@example +#define foo() abc +foo()def @expansion{} abc def +@end example + +@node Stringification +@subsection Stringification +@cindex stringification +@cindex @code{#} operator + +Sometimes you may want to convert a macro argument into a string +constant. Parameters are not replaced inside string constants, but +you can use the @code{#} preprocessing operator instead. When a macro +parameter is used with a leading @code{#}, preprocessing replaces it +with the literal text of the actual argument, converted to a string +constant. Unlike normal parameter replacement, the argument is not +macro-expanded first. This is called @dfn{stringification}. + +There is no way to combine an argument with surrounding text and +stringify it all together. But you can write a series of string +constants and stringified arguments. After preprocessing replaces the +stringified arguments with string constants, the consecutive string +constants will be concatenated into one long string constant +(@pxref{String Constants}). + +Here is an example that uses stringification and concatenation of +string constants: + +@example +@group +#define WARN_IF(EXP) \ + do @{ if (EXP) \ + fprintf (stderr, "Warning: " #EXP "\n"); @} \ + while (0) + +WARN_IF (x == 0); + @expansion{} + do @{ if (x == 0) + fprintf (stderr, "Warning: " "x == 0" "\n"); @} + while (0); +@end group +@end example + +@noindent +The argument for @code{EXP} is substituted once, as is, into the +@code{if} statement, and once, stringified, into the argument to +@code{fprintf}. If @code{x} were a macro, it would be expanded in the +@code{if} statement but not in the string. + +The @code{do} and @code{while (0)} are a kludge to make it possible to +write @code{WARN_IF (@var{arg});}. The resemblance of @code{WARN_IF} +to a function makes that a natural way to write it. +@xref{Swallowing the Semicolon}. + +Stringification in C involves more than putting double-quote +characters around the fragment. It also backslash-escapes the quotes +surrounding embedded string constants, and all backslashes within +string and character constants, in order to get a valid C string +constant with the proper contents. Thus, stringifying @code{@w{p = +"foo\n";}} results in @t{@w{"p = \"foo\\n\";"}}. However, backslashes +that are not inside string or character constants are not duplicated: +@samp{\n} by itself stringifies to @t{"\n"}. + +All leading and trailing whitespace in text being stringified is +ignored. Any sequence of whitespace in the middle of the text is +converted to a single space in the stringified result. Comments are +replaced by whitespace long before stringification happens, so they +never appear in stringified text. + +There is no way to convert a macro argument into a character constant. + +To stringify the result of expansion of a macro argument, you have to +use two levels of macros, like this: + +@example +#define xstr(S) str(S) +#define str(s) #s +#define foo 4 +str (foo) + @expansion{} "foo" +xstr (foo) + @expansion{} xstr (4) + @expansion{} str (4) + @expansion{} "4" +@end example + +@code{s} is stringified when it is used in @code{str}, so it is not +macro-expanded first. But @code{S} is an ordinary argument to +@code{xstr}, so it is completely macro-expanded before @code{xstr} +itself is expanded (@pxref{Argument Prescan}). Therefore, by the time +@code{str} gets to its argument text, that text already been +macro-expanded. + +@node Concatenation +@subsection Concatenation +@cindex concatenation +@cindex token pasting +@cindex token concatenation +@cindex @code{##} operator + +It is often useful to merge two tokens into one while expanding macros. +This is called @dfn{token pasting} or @dfn{token concatenation}. The +@code{##} preprocessing operator performs token pasting. When a macro +is expanded, the two tokens on either side of each @code{##} operator +are combined into a single token, which then replaces the @code{##} and +the two original tokens in the macro expansion. Usually both will be +identifiers, or one will be an identifier and the other a preprocessing +number. When pasted, they make a longer identifier. + +Concatenation into an identifier isn't the only valid case. It is +also possible to concatenate two numbers (or a number and a name, such +as @code{1.5} and @code{e3}) into a number. Also, multi-character +operators such as @code{+=} can be formed by token pasting. + +However, two tokens that don't together form a valid token cannot be +pasted together. For example, you cannot concatenate @code{x} with +@code{+}, not in either order. Trying this issues a warning and keeps +the two tokens separate. Whether it puts white space between the +tokens is undefined. It is common to find unnecessary uses of +@code{##} in complex macros. If you get this warning, it is likely +that you can simply remove the @code{##}. + +The tokens combined by @code{##} could both come from the macro body, +but then you could just as well write them as one token in the first place. +Token pasting is useful when one or both of the tokens comes from a +macro argument. If either of the tokens next to an @code{##} is a +parameter name, it is replaced by its actual argument before @code{##} +executes. As with stringification, the actual argument is not +macro-expanded first. If the argument is empty, that @code{##} has no +effect. + +Keep in mind that preprocessing converts comments to whitespace before +it looks for uses of macros. Therefore, you cannot create a comment +by concatenating @samp{/} and @samp{*}. You can put as much +whitespace between @code{##} and its operands as you like, including +comments, and you can put comments in arguments that will be +concatenated. + +It is an error to use @code{##} at the beginning or end of a macro +body. + +Multiple @code{##} operators are handled left-to-right, so that +@samp{1 ## e ## -2} pastes into @samp{1e-2}. (Right-to-left +processing would first generate @samp{e-2}, which is an invalid token.) +When @code{#} and @code{##} are used together, they are all handled +left-to-right. + +Consider a C program that interprets named commands. There probably +needs to be a table of commands, perhaps an array of structures declared +as follows: + +@example +@group +struct command +@{ + char *name; + void (*function) (void); +@}; +@end group + +@group +struct command commands[] = +@{ + @{ "quit", quit_command @}, + @{ "help", help_command @}, + /* @r{@dots{}} */ +@}; +@end group +@end example + +It would be cleaner not to have to write each command name twice, once +in the string constant and once in the function name. A macro that +takes the name of a command as an argument can make this unnecessary. +It can create the string constant with stringification, and the +function name by concatenating the argument with @samp{_command}. +Here is how it is done: + +@example +#define COMMAND(NAME) @{ #NAME, NAME ## _command @} + +struct command commands[] = +@{ + COMMAND (quit), + COMMAND (help), + /* @r{@dots{}} */ +@}; +@end example + +@node Variadic Macros +@subsection Variadic Macros +@cindex variable number of arguments +@cindex macros with variable arguments +@cindex variadic macros + +A macro can be declared to accept a variable number of arguments much as +a function can. The syntax for defining the macro is similar to that of +a function. Here is an example: + +@example +#define eprintf(@dots{}) fprintf (stderr, __VA_ARGS__) +@end example + +This kind of macro is called @dfn{variadic}. When the macro is invoked, +all the tokens in its argument list after the last named argument (this +macro has none), including any commas, become the @dfn{variable +argument}. This sequence of tokens replaces the identifier +@code{@w{__VA_ARGS__}} in the macro body wherever it appears. Thus, we +have this expansion: + +@example +eprintf ("%s:%d: ", input_file, lineno) + @expansion{} fprintf (stderr, "%s:%d: ", input_file, lineno) +@end example + +The variable argument is completely macro-expanded before it is inserted +into the macro expansion, just like an ordinary argument. You may use +the @code{#} and @code{##} operators to stringify the variable argument +or to paste its leading or trailing token with another token. (But see +below for an important special case for @code{##}.) + +@strong{Warning:} don't use the identifier @code{@w{__VA_ARGS__}} +for anything other than this. + +If your macro is complicated, you may want a more descriptive name for +the variable argument than @code{@w{__VA_ARGS__}}. You can write an +argument name immediately before the @samp{@dots{}}; that name is used +for the variable argument.@footnote{GNU C extension.} The +@code{eprintf} macro above could be written thus: + +@example +#define eprintf(args@dots{}) fprintf (stderr, args) +@end example + +A variadic macro can have named arguments as well as variable +arguments, so @code{eprintf} can be defined like this, instead: + +@example +#define eprintf(format, @dots{}) \ + fprintf (stderr, format, __VA_ARGS__) +@end example + +@noindent +This formulation is more descriptive, but what if you want to specify +a format string that takes no arguments? In GNU C, you can omit the +comma before the variable arguments if they are empty, but that puts +an extra comma in the expansion: + +@example +eprintf ("success!\n") + @expansion{} fprintf(stderr, "success!\n", ); +@end example + +@noindent +That's an error in the call to @code{fprintf}. + +To get rid of that comma, the @code{##} token paste operator has a +special meaning when placed between a comma and a variable +argument.@footnote{GNU C extension.} If you write + +@example +#define eprintf(format, @dots{}) \ + fprintf (stderr, format, ##__VA_ARGS__) +@end example + +@noindent +then use the macro @code{eprintf} with empty variable arguments, +@code{##} deletes the preceding comma. + +@example +eprintf ("success!\n") + @expansion{} fprintf(stderr, "success!\n"); +@end example + +@noindent +This does @emph{not} happen if you pass an empty argument, nor does it +happen if the token preceding @code{##} is anything other than a +comma. + +@noindent +When the only macro parameter is a variable arguments parameter, and +the macro call has no argument at all, it is not obvious whether that +means an empty argument or a missing argument. Should the comma be +kept, or deleted? The C standard says to keep the comma, but the +preexisting GNU C extension deleted the comma. Nowadays, GNU C +retains the comma when implementing a specific C standard, and deletes +it otherwise. + +C99 mandates that the only place the identifier @code{@w{__VA_ARGS__}} +can appear is in the replacement list of a variadic macro. It may not +be used as a macro name, macro parameter name, or within a different +type of macro. It may also be forbidden in open text; the standard is +ambiguous. We recommend you avoid using that name except for its +special purpose. + +Variadic macros where you specify the parameter name is a GNU C +feature that has been supported for a long time. Standard C, as of +C99, supports only the form where the parameter is called +@code{@w{__VA_ARGS__}}. For portability to previous versions of GNU C +you should use only named variable argument parameters. On the other +hand, for portability to other C99 compilers, you should use only +@code{@w{__VA_ARGS__}}. + +@node Predefined Macros +@subsection Predefined Macros +@cindex predefined macros + +Several object-like macros are predefined; you use them without +supplying their definitions. Here we explain the ones user programs +often need to use. Many other macro names starting with @samp{__} are +predefined; in general, you should not define such macro names +yourself. + +@table @code +@item __FILE__ +This macro expands to the name of the current input file, in the form +of a C string constant. This is the full name by which the GCC opened +the file, not the short name specified in @code{#include} or as the +input file name argument. For example, +@code{"/usr/local/include/myheader.h"} is a possible expansion of this +macro. + +@item __LINE__ +This macro expands to the current input line number, in the form of a +decimal integer constant. While we call it a predefined macro, it's +a pretty strange macro, since its ``definition'' changes with each +new line of source code. + +@item __func__ +@itemx __FUNCTION__ +These names are like variables that have as value a string containing +the name of the current function definition. They are not really +macros, but this is the best place to mention them. + +@code{__FUNCTION__} is the name that has been defined in GNU C since +time immemorial; @code{__func__} is defined by the C standard. +With the following conditionals, you can use whichever one is defined. + +@example +#if __STDC_VERSION__ < 199901L +# if __GNUC__ >= 2 +# define __func__ __FUNCTION__ +# else +# define __func__ "" +# endif +#endif +@end example + +@item __PRETTY_FUNCTION__ +This is equivalent to @code{__FUNCTION__} in C, but in C@code{++} +the string includes argument type information as well. +It is a GNU C extension. +@end table + +Those features are useful in generating an error message to report an +inconsistency detected by the program; the message can state the +source line where the inconsistency was detected. For example, + +@example +fprintf (stderr, "Internal error: " + "negative string length " + "in function %s " + "%d at %s, line %d.", + __func__, length, __FILE__, __LINE__); +@end example + +A @code{#line} directive changes @code{__LINE__}, and may change +@code{__FILE__} as well. @xref{Line Control}. + +@table @code +@item __DATE__ +This macro expands to a string constant that describes the date of +compilation. The string constant contains eleven characters and looks +like @code{@w{"Feb 12 1996"}}. If the day of the month is just one +digit, an extra space precedes it so that the date is always eleven +characters. + +If the compiler cannot determine the current date, it emits a warning messages +(once per compilation) and @code{__DATE__} expands to +@code{@w{"??? ?? ????"}}. + +We deprecate the use of @code{__DATE__} for the sake of reproducible +compilation. + +@item __TIME__ +This macro expands to a string constant that describes the time of +compilation. The string constant contains eight characters and looks +like @code{"23:59:01"}. + +If the compiler cannot determine the current time, it emits a warning +message (once per compilation) and @code{__TIME__} expands to +@code{"??:??:??"}. + +We deprecate the use of @code{__TIME__} for the sake of reproducible +compilation. + +@item __STDC__ +In normal operation, this macro expands to the constant 1, to signify +that this compiler implements ISO Standard C@. + +@item __STDC_VERSION__ +This macro expands to the C Standard's version number, a long integer +constant of the form @code{@var{yyyy}@var{mm}L} where @var{yyyy} and +@var{mm} are the year and month of the Standard version. This states +which version of the C Standard the compiler implements. + +The current default value is @code{201112L}, which signifies the C +2011 standard. + +@item __STDC_HOSTED__ +This macro is defined, with value 1, if the compiler's target is a +@dfn{hosted environment}. A hosted environment provides the full +facilities of the standard C library. +@end table + +The rest of the predefined macros are GNU C extensions. + +@table @code +@item __COUNTER__ +This macro expands to sequential integral values starting from 0. In +other words, each time the program uses this acro, it generates the +next successive integer. This, with the @code{##} operator, provides +a convenient means for macros to generate unique identifiers. + +@item __GNUC__ +@itemx __GNUC_MINOR__ +@itemx __GNUC_PATCHLEVEL__ +These macros expand to the major version, minor version, and patch +level of the compiler, as integer constants. For example, GCC 3.2.1 +expands @code{__GNUC__} to 3, @code{__GNUC_MINOR__} to 2, and +@code{__GNUC_PATCHLEVEL__} to 1. + +If all you need to know is whether or not your program is being +compiled by GCC, or a non-GCC compiler that claims to accept the GNU C +extensions, you can simply test @code{__GNUC__}. If you need to write +code that depends on a specific version, you must check more +carefully. Each change in the minor version resets the patch level to +zero; each change in the major version (which happens rarely) resets +the minor version and the patch level to zero. To use the predefined +macros directly in the conditional, write it like this: + +@example +/* @r{Test for version 3.2.0 or later.} */ +#if __GNUC__ > 3 || \ + (__GNUC__ == 3 && (__GNUC_MINOR__ > 2 || \ + (__GNUC_MINOR__ == 2 && \ + __GNUC_PATCHLEVEL__ > 0)) +@end example + +@noindent +Another approach is to use the predefined macros to +calculate a single number, then compare that against a threshold: + +@example +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) +/* @r{@dots{}} */ +/* @r{Test for GCC > 3.2.0} */ +#if GCC_VERSION > 30200 +@end example + +@noindent +Many people find this form easier to understand. + +@item __VERSION__ +This macro expands to a string constant that describes the version of +the compiler in use. You should not rely on its contents' having any +particular form, but you can count on it to contain at least the +release number. + +@item __TIMESTAMP__ +This macro expands to a string constant that describes the date and +time of the last modification of the current source file. The string +constant contains abbreviated day of the week, month, day of the +month, time in hh:mm:ss form, and the year, in the format +@code{@w{"Sun Sep 16 01:03:52 1973"}}. If the day of the month is +less than 10, it is padded with a space on the left. + +If GCC cannot determine that information date, it emits a warning +message (once per compilation) and @code{__TIMESTAMP__} expands to +@code{@w{"??? ??? ?? ??:??:?? ????"}}. + +We deprecate the use of this macro for the sake of reproducible +compilation. +@end table + +@node Undefining and Redefining Macros +@subsection Undefining and Redefining Macros +@cindex undefining macros +@cindex redefining macros +@findex #undef + +You can @dfn{undefine} a macro with the @code{#undef} directive. +@code{#undef} takes a single argument, the name of the macro to +undefine. You use the bare macro name, even if the macro is +function-like. It is an error if anything appears on the line after +the macro name. @code{#undef} has no effect if the name is not a +macro. + +@example +#define FOO 4 +x = FOO; @expansion{} x = 4; +#undef FOO +x = FOO; @expansion{} x = FOO; +@end example + +Once a macro has been undefined, that identifier may be @dfn{redefined} +as a macro by a subsequent @code{#define} directive. The new definition +need not have any resemblance to the old definition. + +You can define a macro again without first undefining it only if +the new definition is @dfn{effectively the same} as the old one. +Two macro definitions are effectively the same if: + +@itemize @bullet +@item Both are the same type of macro (object- or function-like). +@item All the tokens of the replacement list are the same. +@item If there are any parameters, they are the same. +@item Whitespace appears in the same places in both. It need not be +exactly the same amount of whitespace, though. Remember that comments +count as whitespace. +@end itemize + +@noindent +These definitions are effectively the same: +@example +#define FOUR (2 + 2) +#define FOUR (2 + 2) +#define FOUR (2 /* @r{two} */ + 2) +@end example +@noindent +but these are not: +@example +#define FOUR (2 + 2) +#define FOUR ( 2+2 ) +#define FOUR (2 * 2) +#define FOUR(score,and,seven,years,ago) (2 + 2) +@end example + +This allows two different header files to define a common macro. + +You can redefine an existing macro with #define, but redefining an +existing macro name with a different definition results in a warning. + +@node Directives Within Macro Arguments +@subsection Directives Within Macro Arguments +@cindex macro arguments and directives + +GNU C permits and handles preprocessing directives in the text provided +as arguments for a macro. That case is undefined in the C standard. +but in GNU C@ conditional directives in macro arguments +are clear and valid. + +A paradoxical case is to redefine a macro within the call to that same +macro. What happens is, the new definition takes effect in time for +pre-expansion of @emph{all} the arguments, then the original +definition is expanded to replace the call. Here is a pathological +example: + +@example +#define f(x) x x +f (first f second +#undef f +#define f 2 +f) +@end example + +@noindent +which expands to + +@example +first 2 second 2 first 2 second 2 +@end example + +@noindent +with the semantics described above. We suggest you avoid writing code +which does this sort of thing. + +@node Macro Pitfalls +@subsection Macro Pitfalls +@cindex problems with macros +@cindex pitfalls of macros + +In this section we describe some special rules that apply to macros and +macro expansion, and point out certain cases in which the rules have +counter-intuitive consequences that you must watch out for. + +@menu +* Misnesting:: +* Operator Precedence Problems:: +* Swallowing the Semicolon:: +* Duplication of Side Effects:: +* Macros and Auto Type:: +* Self-Referential Macros:: +* Argument Prescan:: +@end menu + +@node Misnesting +@subsubsection Misnesting + +When a macro is called with arguments, the arguments are substituted +into the macro body and the result is checked, together with the rest of +the input file, for more macro calls. It is possible to piece together +a macro call coming partially from the macro body and partially from the +arguments. For example, + +@example +#define twice(x) (2*(x)) +#define call_with_1(x) x(1) +call_with_1 (twice) + @expansion{} twice(1) + @expansion{} (2*(1)) +@end example + +Macro definitions do not have to have balanced parentheses. By writing +an unbalanced open parenthesis in a macro body, it is possible to create +a macro call that begins inside the macro body but ends outside of it. +For example, + +@example +#define strange(file) fprintf (file, "%s %d", +/* @r{@dots{}} */ +strange(stderr) p, 35) + @expansion{} fprintf (stderr, "%s %d", p, 35) +@end example + +The ability to piece together a macro call can be useful, but the use of +unbalanced open parentheses in a macro body is just confusing, and +should be avoided. + +@node Operator Precedence Problems +@subsubsection Operator Precedence Problems +@cindex parentheses in macro bodies + +You may have noticed that in most of the macro definition examples shown +above, each occurrence of a macro parameter name had parentheses around +it. In addition, another pair of parentheses usually surrounds the +entire macro definition. Here is why it is best to write macros that +way. + +Suppose you define a macro as follows, + +@example +#define ceil_div(x, y) (x + y - 1) / y +@end example + +@noindent +whose purpose is to divide, rounding up. (One use for this operation is +to compute how many @code{int} objects are needed to hold a certain +number of @code{char} objects.) Then suppose it is used as follows: + +@example +a = ceil_div (b & c, sizeof (int)); + @expansion{} a = (b & c + sizeof (int) - 1) / sizeof (int); +@end example + +@noindent +This does not do what is intended. The operator-precedence rules of +C make it equivalent to this: + +@example +a = (b & (c + sizeof (int) - 1)) / sizeof (int); +@end example + +@noindent +What we want is this: + +@example +a = ((b & c) + sizeof (int) - 1)) / sizeof (int); +@end example + +@noindent +Defining the macro as + +@example +#define ceil_div(x, y) ((x) + (y) - 1) / (y) +@end example + +@noindent +provides the desired result. + +Unintended grouping can result in another way. Consider @code{sizeof +ceil_div(1, 2)}. That has the appearance of a C expression that would +compute the size of the type of @code{ceil_div (1, 2)}, but in fact it +means something very different. Here is what it expands to: + +@example +sizeof ((1) + (2) - 1) / (2) +@end example + +@noindent +This would take the size of an integer and divide it by two. The +precedence rules have put the division outside the @code{sizeof} when it +was intended to be inside. + +Parentheses around the entire macro definition prevent such problems. +Here, then, is the recommended way to define @code{ceil_div}: + +@example +#define ceil_div(x, y) (((x) + (y) - 1) / (y)) +@end example + +@node Swallowing the Semicolon +@subsubsection Swallowing the Semicolon +@cindex semicolons (after macro calls) + +Often it is desirable to define a macro that expands into a compound +statement. Consider, for example, the following macro, that advances a +pointer (the parameter @code{p} says where to find it) across whitespace +characters: + +@example +#define SKIP_SPACES(p, limit) \ +@{ char *lim = (limit); \ + while (p < lim) @{ \ + if (*p++ != ' ') @{ \ + p--; break; @}@}@} +@end example + +@noindent +Here backslash-newline is used to split the macro definition, which must +be a single logical line, so that it resembles the way such code would +be laid out if not part of a macro definition. + +A call to this macro might be @code{SKIP_SPACES (p, lim)}. Strictly +speaking, the call expands to a compound statement, which is a complete +statement with no need for a semicolon to end it. However, since it +looks like a function call, it minimizes confusion if you can use it +like a function call, writing a semicolon afterward, as in +@code{SKIP_SPACES (p, lim);} + +This can cause trouble before @code{else} statements, because the +semicolon is actually a null statement. Suppose you write + +@example +if (*p != 0) + SKIP_SPACES (p, lim); +else /* @r{@dots{}} */ +@end example + +@noindent +The presence of two statements---the compound statement and a null +statement---in between the @code{if} condition and the @code{else} +makes invalid C code. + +The definition of the macro @code{SKIP_SPACES} can be altered to solve +this problem, using a @code{do @r{@dots{}} while} statement. Here is how: + +@example +#define SKIP_SPACES(p, limit) \ +do @{ char *lim = (limit); \ + while (p < lim) @{ \ + if (*p++ != ' ') @{ \ + p--; break; @}@}@} \ +while (0) +@end example + +Now @code{SKIP_SPACES (p, lim);} expands into + +@example +do @{ /* @r{@dots{}} */ @} while (0); +@end example + +@noindent +which is one statement. The loop executes exactly once; most compilers +generate no extra code for it. + +@node Duplication of Side Effects +@subsubsection Duplication of Side Effects + +@cindex side effects (in macro arguments) +@cindex unsafe macros +Many C programs define a macro @code{min}, for ``minimum'', like this: + +@example +#define min(X, Y) ((X) < (Y) ? (X) : (Y)) +@end example + +When you use this macro with an argument containing a side effect, +as shown here, + +@example +next = min (x + y, foo (z)); +@end example + +@noindent +it expands as follows: + +@example +next = ((x + y) < (foo (z)) ? (x + y) : (foo (z))); +@end example + +@noindent +where @code{x + y} has been substituted for @code{X} and @code{foo (z)} +for @code{Y}. + +The function @code{foo} is used only once in the statement as it +appears in the program, but the expression @code{foo (z)} has been +substituted twice into the macro expansion. As a result, @code{foo} +might be called twice when the statement is executed. If it has side +effects or if it takes a long time to compute, that may be +undesirable. We say that @code{min} is an @dfn{unsafe} macro. + +The best solution to this problem is to define @code{min} in a way that +computes the value of @code{foo (z)} only once. In general, that requires +using @code{__auto_type} (@pxref{Auto Type}). How to use it for this +is described in the following section. @xref{Macros and Auto Type}. + +Otherwise, you will need to be careful when @emph{using} the macro +@code{min}. For example, you can calculate the value of @code{foo +(z)}, save it in a variable, and use that variable in @code{min}: + +@example +@group +#define min(X, Y) ((X) < (Y) ? (X) : (Y)) +/* @r{@dots{}} */ +@{ + int tem = foo (z); + next = min (x + y, tem); +@} +@end group +@end example + +@noindent +(where we assume that @code{foo} returns type @code{int}). + +When the repeated value appears as the condition of the @code{?:} +operator and again as its @var{iftrue} expression, you can avoid +repeated execution by omitting the @var{iftrue} expression, like this: + +@example +#define x_or_y(X, Y) ((X) ? : (Y)) +@end example + +@noindent +In GNU C, this expands to use the first macro argument's value if that +isn't zero. If that's zero, it compiles the second argument and uses +that value. @xref{Conditional Expression}. + +@node Macros and Auto Type +@subsubsection Using @code{__auto_type} for Local Variables +@cindex local variables in macros +@cindex variables, local, in macros +@cindex macros, local variables in + +The operator @code{__auto_type} makes it possible to +define macros that can work on any data type even though they need to +generate local variable declarations. @xref{Auto Type}. + +For instance, here's how to define a safe ``maximum'' macro that +operates on any arithmetic type and computes each of its arguments +exactly once: + +@example +#define max(a,b) \ + (@{ __auto_type _a = (a); \ + __auto_type _b = (b); \ + _a > _b ? _a : _b; @}) +@end example + +The @samp{(@{ @dots{} @})} notation produces @dfn{statement +expression}---a statement that can be used as an expression +(@pxref{Statement Exprs}). Its value is the value of its last +statement. This permits us to define local variables and store each +argument value into one. + +@cindex underscores in variables in macros +@cindex @samp{_} in variables in macros + +The reason for using names that start with underscores for the local +variables is to avoid conflicts with variable names that occur within +the expressions that are substituted for @code{a} and @code{b}. +Underscore followed by a lower case letter won't be predefined by the +system in any way. + +@c We hope someday to extend C with a new form of declaration syntax +@c which all the newly declared variables' scopes would begin at the end +@c of the entire declaration, rather than as soon as each variable's +@c declaration begins. This way, all the variables' initializers would +@c be interpreted in the context before the declaration. Then we could +@c use any names whatsoever for the local variables and always get correct +@c behavior for the macro. + +@node Self-Referential Macros +@subsubsection Self-Referential Macros +@cindex self-reference + +A @dfn{self-referential} macro is one whose name appears in its +definition. Recall that all macro definitions are rescanned for more +macros to replace. If the self-reference were considered a use of the +macro, it would produce an infinitely large expansion. To prevent +this, the self-reference is not considered a macro call: preprocessing +leaves it unchanged. Consider an example: + +@example +#define foo (4 + foo) +@end example + +@noindent +where @code{foo} is also a variable in your program. + +Following the ordinary rules, each reference to @code{foo} will expand +into @code{(4 + foo)}; then this will be rescanned and will expand into +@code{(4 + (4 + foo))}; and so on until the computer runs out of memory. + +The self-reference rule cuts this process short after one step, at +@code{(4 + foo)}. Therefore, this macro definition has the possibly +useful effect of causing the program to add 4 to the value of @code{foo} +wherever @code{foo} is referred to. + +In most cases, it is a bad idea to take advantage of this feature. A +person reading the program who sees that @code{foo} is a variable will +not expect that it is a macro as well. The reader will come across the +identifier @code{foo} in the program and think its value should be that +of the variable @code{foo}, whereas in fact the value is four greater. + +It is useful to make a macro definition that expands to the macro +name itself. If you write + +@example +#define EPERM EPERM +@end example + +@noindent +then the macro @code{EPERM} expands to @code{EPERM}. Effectively, +preprocessing leaves it unchanged in the source code. You can tell +that it's a macro with @code{#ifdef}. You might do this if you want +to define numeric constants with an @code{enum}, but have +@code{#ifdef} be true for each constant. + +If a macro @code{x} expands to use a macro @code{y}, and the expansion of +@code{y} refers to the macro @code{x}, that is an @dfn{indirect +self-reference} of @code{x}. @code{x} is not expanded in this case +either. Thus, if we have + +@example +#define x (4 + y) +#define y (2 * x) +@end example + +@noindent +then @code{x} and @code{y} expand as follows: + +@example +@group +x @expansion{} (4 + y) + @expansion{} (4 + (2 * x)) + +y @expansion{} (2 * x) + @expansion{} (2 * (4 + y)) +@end group +@end example + +@noindent +Each macro is expanded when it appears in the definition of the other +macro, but not when it indirectly appears in its own definition. + +@node Argument Prescan +@subsubsection Argument Prescan +@cindex expansion of arguments +@cindex macro argument expansion +@cindex prescan of macro arguments + +Macro arguments are completely macro-expanded before they are +substituted into a macro body, unless they are stringified or pasted +with other tokens. After substitution, the entire macro body, including +the substituted arguments, is scanned again for macros to be expanded. +The result is that the arguments are scanned @emph{twice} to expand +macro calls in them. + +Most of the time, this has no effect. If the argument contained any +macro calls, they were expanded during the first scan. The result +therefore contains no macro calls, so the second scan does not change +it. If the argument were substituted as given, with no prescan, the +single remaining scan would find the same macro calls and produce the +same results. + +You might expect the double scan to change the results when a +self-referential macro is used in an argument of another macro +(@pxref{Self-Referential Macros}): the self-referential macro would be +expanded once in the first scan, and a second time in the second scan. +However, this is not what happens. The self-references that do not +expand in the first scan are marked so that they will not expand in the +second scan either. + +You might wonder, ``Why mention the prescan, if it makes no difference? +And why not skip it and make preprocessing go faster?'' The answer is +that the prescan does make a difference in three special cases: + +@itemize @bullet +@item +Nested calls to a macro. + +We say that @dfn{nested} calls to a macro occur when a macro's argument +contains a call to that very macro. For example, if @code{f} is a macro +that expects one argument, @code{f (f (1))} is a nested pair of calls to +@code{f}. The desired expansion is made by expanding @code{f (1)} and +substituting that into the definition of @code{f}. The prescan causes +the expected result to happen. Without the prescan, @code{f (1)} itself +would be substituted as an argument, and the inner use of @code{f} would +appear during the main scan as an indirect self-reference and would not +be expanded. + +@item +Macros that call other macros that stringify or concatenate. + +If an argument is stringified or concatenated, the prescan does not +occur. If you @emph{want} to expand a macro, then stringify or +concatenate its expansion, you can do that by causing one macro to call +another macro that does the stringification or concatenation. For +instance, if you have + +@example +#define AFTERX(x) X_ ## x +#define XAFTERX(x) AFTERX(x) +#define TABLESIZE 1024 +#define BUFSIZE TABLESIZE +@end example + +@noindent +then @code{AFTERX(BUFSIZE)} expands to @code{X_BUFSIZE}, and +@code{XAFTERX(BUFSIZE)} expands to @code{X_1024}. (Not to +@code{X_TABLESIZE}. Prescan always does a complete expansion.) + +@item +Macros used in arguments, whose expansions contain unshielded commas. + +This can cause a macro expanded on the second scan to be called with the +wrong number of arguments. Here is an example: + +@example +#define foo a,b +#define bar(x) lose(x) +#define lose(x) (1 + (x)) +@end example + +We would like @code{bar(foo)} to turn into @code{(1 + (foo))}, which +would then turn into @code{(1 + (a,b))}. Instead, @code{bar(foo)} +expands into @code{lose(a,b)}, which gives an error because @code{lose} +requires a single argument. In this case, the problem is easily solved +by the same parentheses that ought to be used to prevent misnesting of +arithmetic operations: + +@example +#define foo (a,b) +@exdent or +#define bar(x) lose((x)) +@end example + +The extra pair of parentheses prevents the comma in @code{foo}'s +definition from being interpreted as an argument separator. +@end itemize + +@ignore +@c This is commented out because pragmas are not supposed +@c to alter the meaning of the program. +@c Microsoft did something stupid in defining these. + +@node Macro Pragmas +@subsection Macro Pragmas + +A pragma is a way of specifying special directions to the C compiler. +@xref{Pragmas}, for the basic syntax of pragmas. Here we describe two +pragmas that save the current definition of a macro on a stack, and +restore it later. This makes it possible to redefine a macro temporarily +and later go back to the previous definition. + +@table @code +@item #pragma push_macro (@var{macro_name}) +@itemx _Pragma ("push_macro (@var{macro_name})") +The @samp{push_macro} pragma saves the current macro definition of +@var{macro_name} on the macro definition stack. + +@item #pragma pop_macro (@var{macro_name}) +@itemx _Pragma ("pop_macro (@var{macro_name})") +The @samp{pop_macro} pragma pops a saved macro definition +off the macro definition stack and defines @var{macro_name} with +that definition. +@end table + +Each macro name has a separate stack, and @samp{pop_macro} +when the stack is empty has no effect. + +Here's an example of using these to pragmas to override temporarily +the definition of @code{FOO}. + +@example +#define FOO 42 + +/* @r{Do something with @var{FOO} defined as 42...} */ + +_Pragma ("push_macro (\"FOO\")") +#undef FOO +#define FOO 47 + +/* @r{Do something with @var{FOO} defined as 47...} */ + +_Pragma ("pop_macro (\"FOO\")") + +/* @r{@var{FOO} is now restored} + @r{to its previous definition of 42.} */ +@end example +@end ignore + +@node Conditionals +@section Conditionals +@cindex conditionals + +A @dfn{conditional} is a preprocessing directive that controls whether +or not to include a chunk of code in the final token stream that is +compiled. Preprocessing conditionals can test arithmetic expressions, +or whether a name is defined as a macro, or both together using the +special @code{defined} operator. + +A preprocessing conditional in C resembles in some ways an @code{if} +statement in C, but it is important to understand the difference between +them. The condition in an @code{if} statement is tested during the +execution of your program. Its purpose is to allow your program to +behave differently from run to run, depending on the data it is +operating on. The condition in a preprocessing conditional directive is +tested when your program is compiled. Its purpose is to allow different +code to be included in the program depending on the situation at the +time of compilation. + +Sometimes this distinction makes no practical difference. GCC and +other modern compilers often +do test @code{if} statements when a program is compiled, if their +conditions are known not to vary at run time, and eliminate code that +can never be executed. If you can count on your compiler to do this, +you may find that your program is more readable if you use @code{if} +statements with constant conditions (perhaps determined by macros). Of +course, you can only use this to exclude code, not type definitions or +other preprocessing directives, and you can only do it if the file +remains syntactically valid when that code is not used. + +@menu +* Conditional Uses:: +* Conditional Syntax:: +* Deleted Code:: +@end menu + +@node Conditional Uses +@subsection Uses of Conditional Directives + +There are three usual reasons to use a preprocessing conditional. + +@itemize @bullet +@item +A program may need to use different code depending on the machine or +operating system it is to run on. In some cases the code for one +operating system may be erroneous on another operating system; for +example, it might refer to data types or constants that do not exist on +the other system. When this happens, it is not enough to avoid +executing the invalid code. Its mere presence will cause the compiler +to reject the program. With a preprocessing conditional, the offending +code can be effectively excised from the program when it is not valid. + +@item +You may want to be able to compile the same source file into two +different programs. One version might make frequent time-consuming +consistency checks on its intermediate data, or print the values of +those data for debugging, and the other not. + +@item +A conditional whose condition is always false is one way to exclude code +from the program but keep it as a sort of comment for future reference. +@end itemize + +Simple programs that do not need system-specific logic or complex +debugging hooks generally will not need to use preprocessing +conditionals. + +@node Conditional Syntax +@subsection Syntax of Preprocessing Conditionals + +@findex #if +A preprocessing conditional begins with a @dfn{conditional +directive}: @code{#if}, @code{#ifdef} or @code{#ifndef}. + +@menu +* ifdef:: +* if:: +* defined:: +* else:: +* elif:: +@end menu + +@node ifdef +@subsubsection The @code{#ifdef} directive +@findex #ifdef +@findex #endif + +The simplest sort of conditional is + +@example +@group +#ifdef @var{MACRO} + +@var{controlled text} + +#endif /* @var{MACRO} */ +@end group +@end example + +@cindex conditional group +This block is called a @dfn{conditional group}. The body, +@var{controlled text}, will be included in compilation if +and only if @var{MACRO} is defined. We say that the conditional +@dfn{succeeds} if @var{MACRO} is defined, @dfn{fails} if it is not. + +The @var{controlled text} inside a conditional can include +preprocessing directives. They are executed only if the conditional +succeeds. You can nest conditional groups inside other conditional +groups, but they must be completely nested. In other words, +@code{#endif} always matches the nearest @code{#ifdef} (or +@code{#ifndef}, or @code{#if}). Also, you cannot start a conditional +group in one file and end it in another. + +Even if a conditional fails, the @var{controlled text} inside it is +still run through initial transformations and tokenization. Therefore, +it must all be lexically valid C@. Normally the only way this matters is +that all comments and string literals inside a failing conditional group +must still be properly ended. + +The comment following the @code{#endif} is not required, but it is a +good practice if there is a lot of @var{controlled text}, because it +helps people match the @code{#endif} to the corresponding @code{#ifdef}. + +Older programs sometimes put @var{macro} directly after the +@code{#endif} without enclosing it in a comment. This is invalid code +according to the C standard, but it only causes a warning in GNU C@. +It never affects which @code{#ifndef} the @code{#endif} matches. + +@findex #ifndef +Sometimes you wish to use some code if a macro is @emph{not} defined. +You can do this by writing @code{#ifndef} instead of @code{#ifdef}. +One common use of @code{#ifndef} is to include code only the first +time a header file is included. @xref{Once-Only Headers}. + +Macro definitions can vary between compilations for several reasons. +Here are some samples. + +@itemize @bullet +@item +Some macros are predefined on each kind of machine +(@pxref{System-specific Predefined Macros, System-specific Predefined +Macros, System-specific Predefined Macros, gcc, Using the GNU Compiler +Collection}). This allows you to provide code specially tuned for a +particular machine. + +@item +System header files define more macros, associated with the features +they implement. You can test these macros with conditionals to avoid +using a system feature on a machine where it is not implemented. + +@item +Macros can be defined or undefined with the @option{-D} and @option{-U} +command-line options when you compile the program. You can arrange to +compile the same source file into two different programs by choosing a +macro name to specify which program you want, writing conditionals to +test whether or how this macro is defined, and then controlling the +state of the macro with command-line options, perhaps set in the +file @file{Makefile}. @xref{Invocation, Invoking GCC, Invoking GCC, +gcc, Using the GNU Compiler Collection}. + +@item +Your program might have a special header file (often called +@file{config.h}) that is adjusted when the program is compiled. It can +define or not define macros depending on the features of the system and +the desired capabilities of the program. The adjustment can be +automated by a tool such as @command{autoconf}, or done by hand. +@end itemize + +@node if +@subsubsection The @code{#if} directive + +The @code{#if} directive allows you to test the value of an integer arithmetic +expression, rather than the mere existence of one macro. Its syntax is + +@example +@group +#if @var{expression} + +@var{controlled text} + +#endif /* @var{expression} */ +@end group +@end example + +@var{expression} is a C expression of integer type, subject to +stringent restrictions so its value can be computed at compile time. +It may contain + +@itemize @bullet +@item +Integer constants. + +@item +Character constants, which are interpreted as they would be in normal +code. + +@item +Arithmetic operators for addition, subtraction, multiplication, +division, bitwise operations, shifts, comparisons, and logical +operations (@code{&&} and @code{||}). The latter two obey the usual +short-circuiting rules of standard C@. + +@item +Macros. All macros in the expression are expanded before actual +computation of the expression's value begins. + +@item +Uses of the @code{defined} operator, which lets you check whether macros +are defined in the middle of an @code{#if}. + +@item +Identifiers that are not macros, which are all considered to be the +number zero. This allows you to write @code{@w{#if MACRO}} instead of +@code{@w{#ifdef MACRO}}, if you know that MACRO, when defined, will +always have a nonzero value. Function-like macros used without their +function call parentheses are also treated as zero. + +In some contexts this shortcut is undesirable. The @option{-Wundef} +requests warnings for any identifier in an @code{#if} that is not +defined as a macro. +@end itemize + +Preprocessing does not know anything about the data types of C. +Therefore, @code{sizeof} operators are not recognized in @code{#if}; +@code{sizeof} is simply an identifier, and if it is not a macro, it +stands for zero. This is likely to make the expression invalid. +Preprocessing does not recognize @code{enum} constants; they too are +simply identifiers, so if they are not macros, they stand for zero. + +Preprocessing calculates the value of @var{expression}, and carries +out all calculations in the widest integer type known to the compiler; +on most machines supported by GNU C this is 64 bits. This is not the +same rule as the compiler uses to calculate the value of a constant +expression, and may give different results in some cases. If the +value comes out to be nonzero, the @code{#if} succeeds and the +@var{controlled text} is compiled; otherwise it is skipped. + +@node defined +@subsubsection The @code{defined} test + +@cindex @code{defined} +The special operator @code{defined} is used in @code{#if} and +@code{#elif} expressions to test whether a certain name is defined as a +macro. @code{defined @var{name}} and @code{defined (@var{name})} are +both expressions whose value is 1 if @var{name} is defined as a macro at +the current point in the program, and 0 otherwise. Thus, @code{@w{#if +defined MACRO}} is precisely equivalent to @code{@w{#ifdef MACRO}}. + +@code{defined} is useful when you wish to test more than one macro for +existence at once. For example, + +@example +#if defined (__arm__) || defined (__PPC__) +@end example + +@noindent +would succeed if either of the names @code{__arm__} or +@code{__PPC__} is defined as a macro---in other words, +when compiling for ARM processors or PowerPC processors. + +Conditionals written like this: + +@example +#if defined BUFSIZE && BUFSIZE >= 1024 +@end example + +@noindent +can generally be simplified to just @code{@w{#if BUFSIZE >= 1024}}, +since if @code{BUFSIZE} is not defined, it will be interpreted as having +the value zero. + +In GCC, you can include @code{defined} as part of another macro definition, +like this: + +@example +#define MACRO_DEFINED(X) defined X + +#if MACRO_DEFINED(BUFSIZE) +@end example + +@noindent +which would expand the @code{#if} expression to: + +@example +#if defined BUFSIZE +@end example + +@noindent +Generating @code{defined} in this way is a GNU C extension. + +@node else +@subsubsection The @code{#else} directive + +@findex #else +The @code{#else} directive can be added to a conditional to provide +alternative text to be used if the condition fails. This is what it +looks like: + +@example +@group +#if @var{expression} +@var{text-if-true} +#else /* Not @var{expression} */ +@var{text-if-false} +#endif /* Not @var{expression} */ +@end group +@end example + +@noindent +If @var{expression} is nonzero, the @var{text-if-true} is included and +the @var{text-if-false} is skipped. If @var{expression} is zero, the +opposite happens. + +You can use @code{#else} with @code{#ifdef} and @code{#ifndef}, too. + +@node elif +@subsubsection The @code{#elif} directive + +@findex #elif +One common case of nested conditionals is used to check for more than two +possible alternatives. For example, you might have + +@example +#if X == 1 +/* @r{@dots{}} */ +#else /* X != 1 */ +#if X == 2 +/* @r{@dots{}} */ +#else /* X != 2 */ +/* @r{@dots{}} */ +#endif /* X != 2 */ +#endif /* X != 1 */ +@end example + +Another conditional directive, @code{#elif}, allows this to be +abbreviated as follows: + +@example +#if X == 1 +/* @r{@dots{}} */ +#elif X == 2 +/* @r{@dots{}} */ +#else /* X != 2 and X != 1*/ +/* @r{@dots{}} */ +#endif /* X != 2 and X != 1*/ +@end example + +@code{#elif} stands for ``else if''. Like @code{#else}, it goes in the +middle of a conditional group and subdivides it; it does not require a +matching @code{#endif} of its own. Like @code{#if}, the @code{#elif} +directive includes an expression to be tested. The text following the +@code{#elif} is processed only if the original @code{#if}-condition +failed and the @code{#elif} condition succeeds. + +More than one @code{#elif} can go in the same conditional group. Then +the text after each @code{#elif} is processed only if the @code{#elif} +condition succeeds after the original @code{#if} and all previous +@code{#elif} directives within it have failed. + +@code{#else} is allowed after any number of @code{#elif} directives, but +@code{#elif} may not follow @code{#else}. + +@node Deleted Code +@subsection Deleted Code +@cindex commenting out code + +If you replace or delete a part of the program but want to keep the +old code in the file for future reference, commenting it out is not so +straightforward in C. Block comments do not nest, so the first +comment inside the old code will end the commenting-out. The probable +result is a flood of syntax errors. + +One way to avoid this problem is to use an always-false conditional +instead. For instance, put @code{#if 0} before the deleted code and +@code{#endif} after it. This works even if the code being turned +off contains conditionals, but they must be entire conditionals +(balanced @code{#if} and @code{#endif}). + +Some people use @code{#ifdef notdef} instead. This is risky, because +@code{notdef} might be accidentally defined as a macro, and then the +conditional would succeed. @code{#if 0} can be counted on to fail. + +Do not use @code{#if 0} around text that is not C code. Use a real +comment, instead. The interior of @code{#if 0} must consist of complete +tokens; in particular, single-quote characters must balance. Comments +often contain unbalanced single-quote characters (known in English as +apostrophes). These confuse @code{#if 0}. They don't confuse +@samp{/*}. + +@node Diagnostics +@section Diagnostics +@cindex diagnostic +@cindex reporting errors +@cindex reporting warnings + +@findex #error +The directive @code{#error} reports a fatal error. The +tokens forming the rest of the line following @code{#error} are used +as the error message. + +The usual place to use @code{#error} is inside a conditional that +detects a combination of parameters that you know the program does not +properly support. For example, + +@smallexample +#if !defined(UNALIGNED_INT_ASM_OP) && defined(DWARF2_DEBUGGING_INFO) +#error "DWARF2_DEBUGGING_INFO requires UNALIGNED_INT_ASM_OP." +#endif +@end smallexample + +@findex #warning +The directive @code{#warning} is like @code{#error}, but it reports a +warning instead of an error. The tokens following @code{#warning} are +used as the warning message. + +You might use @code{#warning} in obsolete header files, with a message +saying which header file to use instead. + +Neither @code{#error} nor @code{#warning} macro-expands its argument. +Internal whitespace sequences are each replaced with a single space. +The line must consist of complete tokens. It is wisest to make the +argument of these directives be a single string constant; this avoids +problems with apostrophes and the like. + +@node Line Control +@section Line Control +@cindex line control + +Due to C's widespread availability and low-level nature, it is often +used as the target language for translation of other languages, or for +the output of lexical analyzers and parsers (e.g., lex/flex and +yacc/bison). Line control enables the user to track diagnostics back +to the location in the original language. + +The C compiler knows the location in the source file where each token +came from: file name, starting line and column, and final line and column. +(Column numbers are used only for error messages.) + +When a program generates C source code, as the Bison parser generator +does, often it copies some of that C code from another file. For +instance parts of the output from Bison are generated from scratch or +come from a standard parser file, but Bison copies the rest from +Bison's input file. Errors in that code, at compile time or run time, +should refer to that file, which is the real source code. To make that happen, +Bison generates line-control directives that the C compiler understands. + +@findex #line +@code{#line} is a directive that specifies the original line number +and source file name for subsequent code. @code{#line} has three +variants: + +@table @code +@item #line @var{linenum} +@var{linenum} is a non-negative decimal integer constant. It specifies +the line number that should be reported for the following line of +input. Subsequent lines are counted from @var{linenum}. + +@item #line @var{linenum} @var{filename} +@var{linenum} is the same as for the first form, and has the same +effect. In addition, @var{filename} is a string constant that +specifies the source file name. Subsequent source lines are recorded +as coming from that file, until something else happens to change that. +@var{filename} is interpreted according to the normal rules for a +string constant. Backslash escapes are interpreted, in contrast to +@code{#include}. + +@item #line @var{anything else} +@var{anything else} is checked for macro calls, which are expanded. +The result should match one of the above two forms. +@end table + +@code{#line} directives alter the results of the @code{__FILE__} and +@code{__LINE__} symbols from that point on. @xref{Predefined Macros}. + +@node Null Directive +@section Null Directive + +@cindex null directive +The @dfn{null directive} consists of a @code{#} followed by a newline, +with only whitespace and comments in between. It has no +effect on the output of the compiler. + + diff --git a/fdl.texi b/fdl.texi new file mode 100644 index 0000000..cb71f05 --- /dev/null +++ b/fdl.texi @@ -0,0 +1,505 @@ +@c The GNU Free Documentation License. +@center Version 1.3, 3 November 2008 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +@uref{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +@end display + +@enumerate 0 +@item +PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +functional and useful document @dfn{free} in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. + +This License is a kind of ``copyleft'', which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + +@item +APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The ``Document'', below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as ``you''. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. + +A ``Modified Version'' of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A ``Secondary Section'' is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The ``Invariant Sections'' are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. + +The ``Cover Texts'' are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. + +A ``Transparent'' copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not ``Transparent'' is called ``Opaque''. + +Examples of suitable formats for Transparent copies include plain +ASCII without markup, Texinfo input format, La@TeX{} input +format, SGML or XML using a publicly available +DTD, and standard-conforming simple HTML, +PostScript or PDF designed for human modification. Examples +of transparent image formats include PNG, XCF and +JPG. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, SGML or +XML for which the DTD and/or processing tools are +not generally available, and the machine-generated HTML, +PostScript or PDF produced by some word processors for +output purposes only. + +The ``Title Page'' means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, ``Title Page'' means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + +The ``publisher'' means any person or entity that distributes copies +of the Document to the public. + +A section ``Entitled XYZ'' means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as ``Acknowledgements'', +``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' +of such a section when you modify the Document means that it remains a +section ``Entitled XYZ'' according to this definition. + +The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. + +@item +VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + +@item +COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. + +@item +MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +@enumerate A +@item +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +@item +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +@item +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +@item +Preserve all the copyright notices of the Document. + +@item +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +@item +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +@item +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +@item +Include an unaltered copy of this License. + +@item +Preserve the section Entitled ``History'', Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled ``History'' in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +@item +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the ``History'' section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +@item +For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +@item +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +@item +Delete any section Entitled ``Endorsements''. Such a section +may not be included in the Modified Version. + +@item +Do not retitle any existing section to be Entitled ``Endorsements'' or +to conflict in title with any Invariant Section. + +@item +Preserve any Warranty Disclaimers. +@end enumerate + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section Entitled ``Endorsements'', provided it contains +nothing but endorsements of your Modified Version by various +parties---for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. + +@item +COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled ``History'' +in the various original documents, forming one section Entitled +``History''; likewise combine any sections Entitled ``Acknowledgements'', +and any sections Entitled ``Dedications''. You must delete all +sections Entitled ``Endorsements.'' + +@item +COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. + +@item +AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an ``aggregate'' if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. + +@item +TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. + +If a section in the Document is Entitled ``Acknowledgements'', +``Dedications'', or ``History'', the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. + +@item +TERMINATION + +You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. + +@item +FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +@uref{http://www.gnu.org/copyleft/}. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. + +@item +RELICENSING + +``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the +site means any set of copyrightable works thus published on the MMC +site. + +``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. + +``Incorporate'' means to publish or republish a Document, in whole or +in part, as part of another Document. + +An MMC is ``eligible for relicensing'' if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. + +The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. + +@end enumerate + +@page +@heading ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + +@smallexample +@group + Copyright (C) @var{year} @var{your name}. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +@end group +@end smallexample + +If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the ``with@dots{}Texts.''@: line with this: + +@smallexample +@group + with the Invariant Sections being @var{list their titles}, with + the Front-Cover Texts being @var{list}, and with the Back-Cover Texts + being @var{list}. +@end group +@end smallexample + +If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. + +@c Local Variables: +@c ispell-local-pdict: "ispell-dict" +@c End: diff --git a/fp.texi b/fp.texi new file mode 100644 index 0000000..5766671 --- /dev/null +++ b/fp.texi @@ -0,0 +1,1801 @@ +@ignore +Copyright @copyright{} 2017,2019,2020 Free Software Foundation, Inc. + +DRAFT --- DO NOT REDISTRIBUTE --- DRAFT --- DO NOT REDISTRIBUTE --- DRAFT +@end ignore + +@node Floating Point in Depth +@chapter Floating Point in Depth + +@menu +* Floating Representations:: +* Floating Type Specs:: +* Special Float Values:: +* Invalid Optimizations:: +* Exception Flags:: +* Exact Floating-Point:: +* Rounding:: +* Rounding Issues:: +* Significance Loss:: +* Fused Multiply-Add:: +* Error Recovery:: +@c * Double-Rounding Problems:: +* Exact Floating Constants:: +* Handling Infinity:: +* Handling NaN:: +* Signed Zeros:: +* Scaling by the Base:: +* Rounding Control:: +* Machine Epsilon:: +* Complex Arithmetic:: +* Round-Trip Base Conversion:: +* Further Reading:: +@end menu + +@node Floating Representations +@section Floating-Point Representations +@cindex floating-point representations +@cindex representation of floating-point numbers + +@cindex IEEE 754-2008 Standard +Storing numbers as @dfn{floating point} allows representation of +numbers with fractional values, in a range larger than that of +hardware integers. A floating-point number consists of a sign bit, a +@dfn{significand} (also called the @dfn{mantissa}), and a power of a +fixed base. GNU C uses the floating-point representations specified by +the @cite{IEEE 754-2008 Standard for Floating-Point Arithmetic}. + +The IEEE 754-2008 specification defines basic binary floating-point +formats of five different sizes: 16-bit, 32-bit, 64-bit, 128-bit, and +256-bit. The formats of 32, 64, and 128 bits are used for the +standard C types @code{float}, @code{double}, and @code{long double}. +GNU C supports the 16-bit floating point type @code{_Float16} on some +platforms, but does not support the 256-bit floating point type. + +Each of the formats encodes the floating-point number as a sign bit. +After this comes an exponent that specifies a power of 2 (with a fixed +offset). Then comes the significand. + +The first bit of the significand, before the binary point, is always +1, so there is no need to store it in memory. It is called the +@dfn{hidden bit} because it doesn't appear in the floating-point +number as used in the computer itself. + +All of those floating-point formats are sign-magnitude representations, +so +0 and @minus{}0 are different values. + +Besides the IEEE 754 format 128-bit float, GNU C also offers a format +consisting of a pair of 64-bit floating point numbers. This lacks the +full exponent range of the IEEE 128-bit format, but is useful when the +underlying hardware platform does not support that. + +@node Floating Type Specs +@section Floating-Point Type Specifications + +The standard library header file @file{float.h} defines a number of +constants that describe the platform's implementation of +floating-point types @code{float}, @code{double} and @code{long +double}. They include: + +@findex FLT_MIN +@findex DBL_MIN +@findex LDBL_MIN +@findex FLT_HAS_SUBNORM +@findex DBL_HAS_SUBNORM +@findex LDBL_HAS_SUBNORM +@findex FLT_TRUE_MIN +@findex DBL_TRUE_MIN +@findex LDBL_TRUE_MIN +@findex FLT_MAX +@findex DBL_MAX +@findex LDBL_MAX +@findex FLT_DECIMAL_DIG +@findex DBL_DECIMAL_DIG +@findex LDBL_DECIMAL_DIG + +@table @code +@item FLT_MIN +@itemx DBL_MIN +@itemx LDBL_MIN +Defines the minimum normalized positive floating-point values that can +be represented with the type. + +@item FLT_HAS_SUBNORM +@itemx DBL_HAS_SUBNORM +@itemx LDBL_HAS_SUBNORM +Defines if the floating-point type supports subnormal (or ``denormalized'') +numbers or not (@pxref{subnormal numbers}). + +@item FLT_TRUE_MIN +@itemx DBL_TRUE_MIN +@itemx LDBL_TRUE_MIN +Defines the minimum positive values (including subnormal values) that +can be represented with the type. + +@item FLT_MAX +@itemx DBL_MAX +@itemx LDBL_MAX +Defines the largest values that can be represented with the type. + +@item FLT_DECIMAL_DIG +@itemx DBL_DECIMAL_DIG +@itemx LDBL_DECIMAL_DIG +Defines the number of decimal digits @code{n} such that any +floating-point number that can be represented in the type can be +rounded to a floating-point number with @code{n} decimal digits, and +back again, without losing any precision of the value. +@end table + +@node Special Float Values +@section Special Floating-Point Values +@cindex special floating-point values +@cindex floating-point values, special + +IEEE floating point provides for special values that are not ordinary +numbers. + +@table @asis + +@item infinities +@code{+Infinity} and @code{-Infinity} are two different infinite +values, one positive and one negative. These result from +operations such as @code{1 / 0}, @code{Infinity + Infinity}, +@code{Infinity * Infinity}, and @code{Infinity + @var{finite}}, and also +from a result that is finite, but larger than the most positive possible +value or smaller than the most negative possible value. + +@xref{Handling Infinity}, for more about working with infinities. + +@item NaNs (not a number) +@cindex QNaN +@cindex SNaN +There are two special values, called Not-a-Number (NaN): a quiet +NaN (QNaN), and a signaling NaN (SNaN). + +A QNaN is produced by operations for which the value is undefined +in real arithmetic, such as @code{0 / 0}, @code{sqrt (-1)}, +@code{Infinity - Infinity}, and any basic operation in which an +operand is a QNaN. + +The signaling NaN is intended for initializing +otherwise-unassigned storage, and the goal is that unlike a +QNaN, an SNaN @emph{does} cause an interrupt that can be caught +by a software handler, diagnosed, and reported. In practice, +little use has been made of signaling NaNs, because the most +common CPUs in desktop and portable computers fail to implement +the full IEEE 754 Standard, and supply only one kind of NaN, the +quiet one. Also, programming-language standards have taken +decades to catch up to the IEEE 754 standard, and implementations +of those language standards make an additional delay before +programmers become willing to use these features. + +To enable support for signaling NaNs, use the GCC command-line option +@option{-fsignaling-nans}, but this is an experimental feature and may +not work as expected in every situation. + +A NaN has a sign bit, but its value means nothing. + +@xref{Handling NaN}, for more about working with NaNs. + +@item subnormal numbers +@cindex subnormal numbers +@cindex underflow, floating +@cindex floating underflow +@anchor{subnormal numbers} +It can happen that a computed floating-point value is too small to +represent, such as when two tiny numbers are multiplied. The result +is then said to @dfn{underflow}. The traditional behavior before +the IEEE 754 Standard was to use zero as the result, and possibly to report +the underflow in some sort of program output. + +The IEEE 754 Standard is vague about whether rounding happens +before detection of floating underflow and overflow, or after, and CPU +designers may choose either. + +However, the Standard does something unusual compared to earlier +designs, and that is that when the result is smaller than the +smallest @dfn{normalized} representable value (i.e., one in +which the leading significand bit is @code{1}), the normalization +requirement is relaxed, leading zero bits are permitted, and +precision is gradually lost until there are no more bits in the +significand. That phenomenon is called @dfn{gradual underflow}, +and it serves important numerical purposes, although it does +reduce the precision of the final result. Some floating-point +designs allow you to choose at compile time, or even at +run time, whether underflows are gradual, or are flushed abruptly +to zero. Numbers that have entered the region of gradual +underflow are called @dfn{subnormal}. + +You can use the library functions @code{fesetround} and +@code{fegetround} to set and get the rounding mode. Rounding modes +are defined (if supported by the platform) in @code{fenv.h} as: +@code{FE_UPWARD} to round toward positive infinity; @code{FE_DOWNWARD} +to round toward negative infinity; @code{FE_TOWARDZERO} to round +toward zero; and @code{FE_TONEAREST} to round to the nearest +representable value, the default mode. It is best to use +@code{FE_TONEAREST} except when there is a special need for some other +mode. +@end table + +@node Invalid Optimizations +@section Invalid Optimizations +@cindex invalid optimizations in floating-point arithmetic +@cindex floating-point arithmetic invalid optimizations + +Signed zeros, Infinity, and NaN invalidate some optimizations by +programmers and compilers that might otherwise have seemed obvious: + +@itemize @bullet +@item +@code{x + 0} and @code{x - 0} are not the same as @code{x} when +@code{x} is zero, because the result depends on the rounding rule. +@xref{Rounding}, for more about rounding rules. + +@item +@code{x * 0.0} is not the same as @code{0.0} when @code{x} is +Infinity, a NaN, or negative zero. + +@item +@code{x / x} is not the same as @code{1.0} when @code{x} is Infinity, +a NaN, or zero. + +@item +@code{(x - y)} is not the same as @code{-(y - x)} because when the +operands are finite and equal, one evaluates to @code{+0} and the +other to @code{-0}. + +@item +@code{x - x} is not the same as @code{0.0} when @var{x} is Infinity or +a NaN. + +@item +@code{x == x} and @code{x != x} are not equivalent to @code{1} and +@code{0} when @var{x} is a NaN. + +@item +@code{x < y} and @code{isless (x, y)} are not equivalent, because the +first sets a sticky exception flag (@pxref{Exception Flags}) when an +operand is a NaN, whereas the second does not affect that flag. The +same holds for the other @code{isxxx} functions that are companions to +relational operators. @xref{FP Comparison Functions, , , libc, The +GNU C Library Reference Manual}. + +@end itemize + +The @option{-funsafe-math-optimizations} option enables +these optimizations. + + +@node Exception Flags +@section Floating Arithmetic Exception Flags +@cindex floating arithmetic exception flags +@cindex exception flags (floating point) +@cindex sticky exception flags (floating point) +@cindex floating overflow +@cindex overflow, floating +@cindex floating underflow +@cindex underflow, floating + +@dfn{Sticky exception flags} record the occurrence of particular +conditions: once set, they remain set until the program explicitly +clears them. + +The conditions include @emph{invalid operand}, +@emph{division-by_zero}, @emph{inexact result} (i.e., one that +required rounding), @emph{underflow}, and @emph{overflow}. Some +extended floating-point designs offer several additional exception +flags. The functions @code{feclearexcept}, @code{feraiseexcept}, +@code{fetestexcept}, @code{fegetexceptflags}, and +@code{fesetexceptflags} provide a standardized interface to those +flags. @xref{Status bit operations, , , libc, The GNU C Library +Reference Manual}. + +One important use of those @anchor{fetestexcept}flags is to do a +computation that is normally expected to be exact in floating-point +arithmetic, but occasionally might not be, in which case, corrective +action is needed. You can clear the @emph{inexact result} flag with a +call to @code{feclearexcept (FE_INEXACT)}, do the computation, and +then test the flag with @code{fetestexcept (FE_INEXACT)}; the result +of that call is 0 if the flag is not set (there was no rounding), and +1 when there was rounding (which, we presume, implies the program has +to correct for that). + +@c ===================================================================== + +@ignore +@node IEEE 754 Decimal Arithmetic +@section IEEE 754 Decimal Arithmetic +@cindex IEEE 754 decimal arithmetic + +One of the difficulties that users of computers for numerical +work face, whether they realize it or not, is that the computer +does not operate in the number base that most people are familiar +with. As a result, input decimal fractions must be converted to +binary floating-point values for use in computations, and then +the final results converted back to decimal for humans. Because the +precision is finite and limited, and because algorithms for correct +round-trip conversion between number bases were not known until the +1990s, and are still not implemented on most systems and most +programming languages, the result is frequent confusion for users, +when a simple expression like @code{3.0*(1.0/3.0)} evaluates to +0.999999 instead of the expected 1.0. Here is an +example from a floating-point calculator that allows rounding-mode +control, with the mode set to @emph{round-towards-zero}: + +@example +for (k = 1; k <= 10; ++k) + (void)printf ("%2d\t%10.6f\n", k, k*(1.0/k)) + 1 1.000000 + 2 1.000000 + 3 0.999999 + 4 1.000000 + 5 0.999999 + 6 0.999999 + 7 0.999999 + 8 1.000000 + 9 0.999999 +10 0.999999 +@end example + +Increasing working precision can sometimes help by reducing +intermediate rounding errors, but the reality is that when +fractional values are involved, @emph{no amount} of extra +precision can suffice for some computations. For example, the +nice decimal value @code{1/10} in C99-style binary representation +is @code{+0x1.999999999999ap-4}; that final digit @code{a} is the +rounding of an infinite string of @code{9}'s. + +Financial computations in particular depend critically on correct +arithmetic, and the losses due to rounding errors can be +large, especially for businesses with large numbers of small +transactions, such as grocery stores and telephone companies. +Tax authorities are particularly picky, and demand specific +rounding rules, including one that instead of rounding ties to +the nearest number, rounds instead in the direction that favors +the taxman. + +Programming languages used for business applications, notably the +venerable Cobol language, have therefore always implemented +financial computations in @emph{fixed-point decimal arithmetic} +in software, and because of the large monetary amounts that must be +processed, successive Cobol standards have increased the minimum +number size from 18 to 32 decimal digits, and the most recent one +requires a decimal exponent range of at least @code{[-999, +999]}. + +The revised IEEE 754-2008 standard therefore requires decimal +floating-point arithmetic, as well as the now-widely used binary +formats from 1985. Like the binary formats, the decimal formats +also support Infinity, NaN, and signed zero, and the five basic +operations are also required to produce correctly rounded +representations of infinitely precise exact results. + +However, the financial applications of decimal arithmetic +introduce some new features: + +@itemize @bullet + +@item +There are three decimal formats occupying 32, 64, and 128 bits of +storage, and offering exactly 7, 16, and 34 decimal digits of +precision. If one size has @code{n} digits, the next larger size +has @code{2 n + 2} digits. Thus, a future 256-bit format would +supply 70 decimal digits, and at least one library already +supports the 256-bit binary and decimal formats. + +@item +Decimal arithmetic has an additional rounding mode, called +@emph{round-ties-to-away-from-zero}, meaning that a four-digit +rounding of @code{1.2345} is @code{1.235}, and @code{-1.2345} +becomes @code{-1.235}. That rounding mode is mandated by +financial laws in several countries. + +@item +The decimal significand is an +@anchor{decimal-significand}@emph{integer}, instead of a fractional +value, and trailing zeros are only removed at user request. That +feature allows floating-point arithmetic to emulate the +@emph{fixed-point arithmetic} traditionally used in financial +computations. + +@end itemize + +@noindent +We can easily estimate how many digits are likely to be needed for +financial work: seven billion people on Earth, with an average annual +income of less than US$10,000, means a world financial base that can +be represented in just 15 decimal digits. Even allowing for alternate +currencies, future growth, multiyear accounting, and intermediate +computations, the 34 digits supplied by the 128-bit format are more +than enough for financial purposes. + +We return to decimal arithmetic later in this chapter +(@pxref{More on decimal floating-point arithmetic}), after we have +covered more about floating-point arithmetic in general. + +@c ===================================================================== + +@end ignore + +@node Exact Floating-Point +@section Exact Floating-Point Arithmetic +@cindex exact floating-point arithmetic +@cindex floating-point arithmetic, exact + +As long as the numbers are exactly representable (fractions whose +denominator is a power of 2), and intermediate results do not require +rounding, then floating-point arithmetic is @emph{exact}. It is easy +to predict how many digits are needed for the results of arithmetic +operations: + +@itemize @bullet + +@item +addition and subtraction of two @var{n}-digit values with the +@emph{same} exponent require at most @code{@var{n} + 1} digits, but +when the exponents differ, many more digits may be needed; + +@item +multiplication of two @var{n}-digit values requires exactly +2 @var{n} digits; + +@item +although integer division produces a quotient and a remainder of +no more than @var{n}-digits, floating-point remainder and square +root may require an unbounded number of digits, and the quotient +can need many more digits than can be stored. + +@end itemize + +Whenever a result requires more than @var{n} digits, rounding +is needed. + +@c ===================================================================== + +@node Rounding +@section Rounding +@cindex rounding + +When floating-point arithmetic produces a result that can't fit +exactly in the significand of the type that's in use, it has to +@dfn{round} the value. The basic arithmetic operations---addition, +subtraction, multiplication, division, and square root---always produce +a result that is equivalent to the exact, possibly infinite-precision +result rounded to storage precision according to the current rounding +rule. + +Rounding sets the @code{FE_INEXACT} exception flag (@pxref{Exception +Flags}). This enables programs to determine that rounding has +occurred. + +Rounding consists of adjusting the exponent to bring the significand +back to the required base-point alignment, then applying the current +@dfn{rounding rule} to squeeze the significand into the fixed +available size. + +The current rule is selected at run time from four options. Here they +are: + +@itemize * +@item +@emph{round-to-nearest}, with ties rounded to an even integer; + +@item +@emph{round-up}, towards @code{+Infinity}; + +@item +@emph{round-down}, towards @code{-Infinity}; + +@item +@emph{round-towards-zero}. +@end itemize + +Under those four rounding rules, a decimal value +@code{-1.2345} that is to be rounded to a four-digit result would +become @code{-1.234}, @code{-1.234}, @code{-1.235}, and +@code{-1.234}, respectively. + +The default rounding rule is @emph{round-to-nearest}, because that has +the least bias, and produces the lowest average error. When the true +result lies exactly halfway between two representable machine numbers, +the result is rounded to the one that ends with an even digit. + +The @emph{round-towards-zero} rule was common on many early computer +designs, because it is the easiest to implement: it just requires +silent truncation of all extra bits. + +The two other rules, @emph{round-up} and @emph{round-down}, are +essential for implementing @dfn{interval arithmetic}, whereby +each arithmetic operation produces lower and upper bounds that +are guaranteed to enclose the exact result. + +@xref{Rounding Control}, for details on getting and setting the +current rounding mode. + +@node Rounding Issues +@section Rounding Issues +@cindex rounding issues (floating point) +@cindex floating-point rounding issues + +The default IEEE 754 rounding mode minimizes errors, and most +normal computations should not suffer any serious accumulation of +errors from rounding. + +Of course, you can contrive examples where that is not so. Here +is one: iterate a square root, then attempt to recover the +original value by repeated squaring. + +@example +#include +#include + +int main (void) +@{ + double x = 100.0; + double y; + for (n = 10; n <= 100; n += 10) + @{ + y = x; + for (k = 0; k < n; ++k) y = sqrt (y); + for (k = 0; k < n; ++k) y *= y; + printf ("n = %3d; x = %.0f\ty = %.6f\n", n, x, y); + @} + return 0; +@} +@end example + +@noindent +Here is the output: + +@example +n = 10; x = 100 y = 100.000000 +n = 20; x = 100 y = 100.000000 +n = 30; x = 100 y = 99.999977 +n = 40; x = 100 y = 99.981025 +n = 50; x = 100 y = 90.017127 +n = 60; x = 100 y = 1.000000 +n = 70; x = 100 y = 1.000000 +n = 80; x = 100 y = 1.000000 +n = 90; x = 100 y = 1.000000 +n = 100; x = 100 y = 1.000000 +@end example + +After 50 iterations, @code{y} has barely one correct digit, and +soon after, there are no correct digits. + +@c ===================================================================== + +@node Significance Loss +@section Significance Loss +@cindex significance loss (floating point) +@cindex floating-point significance loss + +A much more serious source of error in floating-point computation is +@dfn{significance loss} from subtraction of nearly equal values. This +means that the number of bits in the significand of the result is +fewer than the size of the value would permit. If the values being +subtracted are close enough, but still not equal, a @emph{single +subtraction} can wipe out all correct digits, possibly contaminating +all future computations. + +Floating-point calculations can sometimes be carefully designed so +that significance loss is not possible, such as summing a series where +all terms have the same sign. For example, the Taylor series +expansions of the trigonometric and hyperbolic sines have terms of +identical magnitude, of the general form @code{@var{x}**(2*@var{n} + +1) / (2*@var{n} + 1)!}. However, those in the trigonometric sine series +alternate in sign, while those in the hyperbolic sine series are all +positive. Here is the output of two small programs that sum @var{k} +terms of the series for @code{sin (@var{x})}, and compare the computed +sums with known-to-be-accurate library functions: + +@example +x = 10 k = 51 +s (x) = -0.544_021_110_889_270 +sin (x) = -0.544_021_110_889_370 + +x = 20 k = 81 +s (x) = 0.912_945_250_749_573 +sin (x) = 0.912_945_250_727_628 + +x = 30 k = 109 +s (x) = -0.987_813_746_058_855 +sin (x) = -0.988_031_624_092_862 + +x = 40 k = 137 +s (x) = 0.617_400_430_980_474 +sin (x) = 0.745_113_160_479_349 + +x = 50 k = 159 +s (x) = 57_105.187_673_745_720_532 +sin (x) = -0.262_374_853_703_929 + +// sinh(x) series summation with positive signs +// with k terms needed to converge to machine precision + +x = 10 k = 47 +t (x) = 1.101_323_287_470_340e+04 +sinh (x) = 1.101_323_287_470_339e+04 + +x = 20 k = 69 +t (x) = 2.425_825_977_048_951e+08 +sinh (x) = 2.425_825_977_048_951e+08 + +x = 30 k = 87 +t (x) = 5.343_237_290_762_229e+12 +sinh (x) = 5.343_237_290_762_231e+12 + +x = 40 k = 105 +t (x) = 1.176_926_334_185_100e+17 +sinh (x) = 1.176_926_334_185_100e+17 + +x = 50 k = 121 +t (x) = 2.592_352_764_293_534e+21 +sinh (x) = 2.592_352_764_293_536e+21 +@end example + +@noindent +We have added underscores to the numbers to enhance readability. + +The @code{sinh (@var{x})} series with positive terms can be summed to +high accuracy. By contrast, the series for @code{sin (@var{x})} +suffers increasing significance loss, so that when @var{x} = 30 only +two correct digits remain. Soon after, all digits are wrong, and the +answers are complete nonsense. + +An important skill in numerical programming is to recognize when +significance loss is likely to contaminate a computation, and revise +the algorithm to reduce this problem. Sometimes, the only practical +way to do so is to compute in higher intermediate precision, which is +why the extended types like @code{long double} are important. + +@c Formerly mentioned @code{__float128} + +@c ===================================================================== + +@node Fused Multiply-Add +@section Fused Multiply-Add +@cindex fused multiply-add in floating-point computations +@cindex floating-point fused multiply-add + +In 1990, when IBM introduced the POWER architecture, the CPU +provided a previously unknown instruction, the @dfn{fused +multiply-add} (FMA). It computes the value @code{x * y + z} with +an @strong{exact} double-length product, followed by an addition with a +@emph{single} rounding. Numerical computation often needs pairs of +multiply and add operations, for which the FMA is well-suited. + +On the POWER architecture, there are two dedicated registers that +hold permanent values of @code{0.0} and @code{1.0}, and the +normal @emph{multiply} and @emph{add} instructions are just +wrappers around the FMA that compute @code{x * y + 0.0} and +@code{x * 1.0 + z}, respectively. + +In the early days, it appeared that the main benefit of the FMA +was getting two floating-point operations for the price of one, +almost doubling the performance of some algorithms. However, +numerical analysts have since shown numerous uses of the FMA for +significantly enhancing accuracy. We discuss one of the most +important ones in the next section. + +A few other architectures have since included the FMA, and most +provide variants for the related operations @code{x * y - z} +(FMS), @code{-x * y + z} (FNMA), and @code{-x * y - z} (FNMS). +@c The IEEE 754-2008 revision requires implementations to provide +@c the FMA, as a sixth basic operation. + +The functions @code{fmaf}, @code{fma}, and @code{fmal} implement fused +multiply-add for the @code{float}, @code{double}, and @code{long +double} data types. Correct implementation of the FMA in software is +difficult, and some systems that appear to provide those functions do +not satisfy the single-rounding requirement. That situation should +change as more programmers use the FMA operation, and more CPUs +provide FMA in hardware. + +Use the @option{-ffp-contract=fast} option to allow generation of FMA +instructions, or @option{-ffp-contract=off} to disallow it. + +@c ===================================================================== + +@node Error Recovery +@section Error Recovery +@cindex error recovery (floating point) +@cindex floating-point error recovery + +When two numbers are combined by one of the four basic +operations, the result often requires rounding to storage +precision. For accurate computation, one would like to be able +to recover that rounding error. With historical floating-point +designs, it was difficult to do so portably, but now that IEEE +754 arithmetic is almost universal, the job is much easier. + +For addition with the default @emph{round-to-nearest} rounding +mode, we can determine the error in a sum like this: + +@example +volatile double err, sum, tmp, x, y; + +if (fabs (x) >= fabs (y)) + @{ + sum = x + y; + tmp = sum - x; + err = y - tmp; + @} +else /* fabs (x) < fabs (y) */ + @{ + sum = x + y; + tmp = sum - y; + err = x - tmp; + @} +@end example + +@noindent +@cindex twosum +Now, @code{x + y} is @emph{exactly} represented by @code{sum + err}. +This basic operation, which has come to be called @dfn{twosum} +in the numerical-analysis literature, is the first key to tracking, +and accounting for, rounding error. + +To determine the error in subtraction, just swap the @code{+} and +@code{-} operators. + +We used the @code{volatile} qualifier (@pxref{volatile}) in the +declaration of the variables, which forces the compiler to store and +retrieve them from memory, and prevents the compiler from optimizing +@code{err = y - ((x + y) - x)} into @code{err = 0}. + +For multiplication, we can compute the rounding error without +magnitude tests with the FMA operation (@pxref{Fused Multiply-Add}), +like this: + +@example +volatile double err, prod, x, y; +prod = x * y; /* @r{rounded product} */ +err = fma (x, y, -prod); /* @r{exact product @code{= @var{prod} + @var{err}}} */ +@end example + +For addition, subtraction, and multiplication, we can represent the +exact result with the notional sum of two values. However, the exact +result of division, remainder, or square root potentially requires an +infinite number of digits, so we can at best approximate it. +Nevertheless, we can compute an error term that is close to the true +error: it is just that error value, rounded to machine precision. + +For division, you can approximate @code{x / y} with @code{quo + err} +like this: + +@example +volatile double err, quo, x, y; +quo = x / y; +err = fma (-quo, y, x) / y; +@end example + +For square root, we can approximate @code{sqrt (x)} with @code{root + +err} like this: + +@example +volatile double err, root, x; +root = sqrt (x); +err = fma (-root, root, x) / (root + root); +@end example + +With the reliable and predictable floating-point design provided +by IEEE 754 arithmetic, we now have the tools we need to track +errors in the five basic floating-point operations, and we can +effectively simulate computing in twice working precision, which +is sometimes sufficient to remove almost all traces of arithmetic +errors. + +@c ===================================================================== + +@ignore +@node Double-Rounding Problems +@section Double-Rounding Problems +@cindex double-rounding problems (floating point) +@cindex floating-point double-rounding problems + +Most developers today use 64-bit x86 processors with a 64-bit +operating system, with a Streaming SIMD Extensions (SSE) instruction +set. In the past, using a 32-bit x87 instruction set was common, +leading to issues described in this section. + +To offer a few more digits of precision and a wider exponent range, +the IEEE 754 Standard included an optional @emph{temporary real} +format, with 11 more bits in the significand, and 4 more bits in the +biased exponent. + +Compilers are free to exploit the longer format, and most do so. +That is usually a @emph{good thing}, such as in computing a +lengthy sum or product, or in implementing the computation of the +hypotenuse of a right-triangle as @code{sqrt (x*x + y*y)}: the +wider exponent range is critical there for avoiding disastrous +overflow or underflow. + +@findex fesetprec +@findex fegetprec +However, sometimes it is critical to know what the intermediate +precision and rounding mode are, such as in tracking errors with +the techniques of the preceding section. Some compilers provide +options to prevent the use of the 80-bit format in computations +with 64-bit @code{double}, but ensuring that code is always +compiled that way may be difficult. The x86 architecture has the +ability to force rounding of all operations in the 80-bit +registers to the 64-bit storage format, and some systems provide +a software interface with the functions @code{fesetprec} and +@code{fegetprec}. Unfortunately, neither of those functions is +defined by the ISO Standards for C and C++, and consequently, +they are not standardly available on all platforms that use +the x86 floating-point design. + +When @code{double} computations are done in the 80-bit format, +results necessarily involve a @dfn{double rounding}: first to the +64-bit significand in intermediate operations in registers, and +then to the 53-bit significand when the register contents are +stored to memory. Here is an example in decimal arithmetic where +such a double rounding results in the wrong answer: round +@code{1_234_999} from seven to five to four digits. The result is +@code{1_235_000}, whereas the correct representation to four +significant digits is @code{1_234_000}. + +@cindex -ffloat-store +One way to reduce the use of the 80-bit format is to declare variables +as @code{volatile double}: that way, the compiler is required to store +and load intermediates from memory, rather than keeping them in 80-bit +registers over long sequences of floating-point instructions. Doing +so does not, however, eliminate double rounding. The now-common +x86-64 architecture has separate sets of 32-bit and 64-bit +floating-point registers. The option @option{-float-store} says that +floating-point computation should use only those registers, thus eliminating +the possibility of double rounding. +@end ignore + +@c ===================================================================== + +@node Exact Floating Constants +@section Exact Floating-Point Constants +@cindex exact specification of floating-point constants +@cindex floating-point constants, exact specification of + +One of the frustrations that numerical programmers have suffered +with since the dawn of digital computers is the inability to +precisely specify numbers in their programs. On the early +decimal machines, that was not an issue: you could write a +constant @code{1e-30} and be confident of that exact value +being used in floating-point operations. However, when the +hardware works in a base other than 10, then human-specified +numbers have to be converted to that base, and then converted +back again at output time. The two base conversions are rarely +exact, and unwanted rounding errors are introduced. + +@cindex hexademical floating-point constants +As computers usually represent numbers in a base other than 10, +numbers often must be converted to and from different bases, and +rounding errors can occur during conversion. This problem is solved +in C using hexademical floating-point constants. For example, +@code{+0x1.fffffcp-1} is the number that is the IEEE 754 32-bit value +closest to, but below, @code{1.0}. The significand is represented as a +hexadecimal fraction, and the @emph{power of two} is written in +decimal following the exponent letter @code{p} (the traditional +exponent letter @code{e} is not possible, because it is a hexadecimal +digit). + +In @code{printf} and @code{scanf} and related functions, you can use +the @samp{%a} and @samp{%A} format specifiers for writing and reading +hexadecimal floating-point values. @samp{%a} writes them with lower +case letters and @samp{%A} writes them with upper case letters. For +instance, this code reproduces our sample number: + +@example +printf ("%a\n", 1.0 - pow (2.0, -23)); + @print{} 0x1.fffffcp-1 +@end example + +@noindent +The @code{strtod} family was similarly extended to recognize +numbers in that new format. + +If you want to ensure exact data representation for transfer of +floating-point numbers between C programs on different +computers, then hexadecimal constants are an optimum choice. + +@c ===================================================================== + +@node Handling Infinity +@section Handling Infinity +@cindex infinity in floating-point arithmetic +@cindex floating-point infinity + +As we noted earlier, the IEEE 754 model of computing is not to stop +the program when exceptional conditions occur. It takes note of +exceptional values or conditions by setting sticky @dfn{exception +flags}, or by producing results with the special values Infinity and +QNaN. In this section, we discuss Infinity; @pxref{Handling NaN} for +the other. + +In GNU C, you can create a value of negative Infinity in software like +this: + +@verbatim +double x; + +x = -1.0 / 0.0; +@end verbatim + +GNU C supplies the @code{__builtin_inf}, @code{__builtin_inff}, and +@code{__builtin_infl} macros, and the GNU C Library provides the +@code{INFINITY} macro, all of which are compile-time constants for +positive infinity. + +GNU C also provides a standard function to test for an Infinity: +@code{isinf (x)} returns @code{1} if the argument is a signed +infinity, and @code{0} if not. + +Infinities can be compared, and all Infinities of the same sign are +equal: there is no notion in IEEE 754 arithmetic of different kinds of +Infinities, as there are in some areas of mathematics. Positive +Infinity is larger than any finite value, and negative Infinity is +smaller than finite value. + +Infinities propagate in addition, subtraction, multiplication, +and square root, but in division, they disappear, because of the +rule that @code{finite / Infinity} is @code{0.0}. Thus, an +overflow in an intermediate computation that produces an Infinity +is likely to be noticed later in the final results. The programmer can +then decide whether the overflow is expected, and acceptable, or whether +the code possibly has a bug, or needs to be run in higher +precision, or redesigned to avoid the production of the Infinity. + +@c ===================================================================== + +@node Handling NaN +@section Handling NaN +@cindex NaN in floating-point arithmetic +@cindex not a number +@cindex floating-point NaN + +NaNs are not numbers: they represent values from computations that +produce undefined results. They have a distinctive property that +makes them unlike any other floating-point value: they are +@emph{unequal to everything, including themselves}! Thus, you can +write a test for a NaN like this: + +@example +if (x != x) + printf ("x is a NaN\n"); +@end example + +@noindent +This test works in GNU C, but some compilers might evaluate that test +expression as false without properly checking for the NaN value. +A more portable way to test for NaN is to use the @code{isnan} +function declared in @code{math.h}: + +@example +if (isnan (x)) + printf ("x is a NaN\n"); +@end example + +@noindent +@xref{Floating Point Classes, , , libc, The GNU C Library Reference Manual}. + +One important use of NaNs is marking of missing data. For +example, in statistics, such data must be omitted from +computations. Use of any particular finite value for missing +data would eventually collide with real data, whereas such data +could never be a NaN, so it is an ideal marker. Functions that +deal with collections of data that may have holes can be written +to test for, and ignore, NaN values. + +It is easy to generate a NaN in computations: evaluating @code{0.0 / +0.0} is the commonest way, but @code{Infinity - Infinity}, +@code{Infinity / Infinity}, and @code{sqrt (-1.0)} also work. +Functions that receive out-of-bounds arguments can choose to return a +stored NaN value, such as with the @code{NAN} macro defined in +@code{math.h}, but that does not set the @emph{invalid operand} +exception flag, and that can fool some programs. + +@cindex NaNs-always-propagate rule +Like Infinity, NaNs propagate in computations, but they are even +stickier, because they never disappear in division. Thus, once a +NaN appears in a chain of numerical operations, it is almost +certain to pop out into the final results. The programmer +has to decide whether that is expected, or whether there is a +coding or algorithmic error that needs repair. + +In general, when function gets a NaN argument, it usually returns a +NaN. However, there are some exceptions in the math-library functions +that you need to be aware of, because they violate the +@emph{NaNs-always-propagate} rule: + +@itemize @bullet + +@item +@code{pow (x, 0.0)} always returns @code{1.0}, even if @code{x} is +0.0, Infinity, or a NaN. + +@item +@code{pow (1, y)} always returns @code{1}, even if @code{y} is a NaN. + +@item +@code{hypot (INFINITY, y)} and @code{hypot (-INFINITY, y)} both +always return @code{INFINITY}, even if @code{y} is a Nan. + +@item +If just one of the arguments to @code{fmax (x, y)} or +@code{fmin (x, y)} is a NaN, it returns the other argument. If +both arguments are NaNs, it returns a NaN, but there is no +requirement about where it comes from: it could be @code{x}, or +@code{y}, or some other quiet NaN. +@end itemize + +NaNs are also used for the return values of math-library +functions where the result is not representable in real +arithmetic, or is mathematically undefined or uncertain, such as +@code{sqrt (-1.0)} and @code{sin (Infinity)}. However, note that a +result that is merely too big to represent should always produce +an Infinity, such as with @code{exp (1000.0)} (too big) and +@code{exp (Infinity)} (truly infinite). + +@c ===================================================================== + +@node Signed Zeros +@section Signed Zeros +@cindex signed zeros in floating-point arithmetic +@cindex floating-point signed zeros + +The sign of zero is significant, and important, because it records the +creation of a value that is too small to represent, but came from +either the negative axis, or from the positive axis. Such fine +distinctions are essential for proper handling of @dfn{branch cuts} +in complex arithmetic (@pxref{Complex Arithmetic}). + +The key point about signed zeros is that in comparisons, their sign +does not matter: @code{0.0 == -0.0} must @emph{always} evaluate to +@code{1} (true). However, they are not @emph{the same number}, and +@code{-0.0} in C code stands for a negative zero. + +@c ===================================================================== + +@node Scaling by the Base +@section Scaling by Powers of the Base +@cindex scaling floating point by powers of the base +@cindex floating-point scaling by powers of the base + +We have discussed rounding errors several times in this chapter, +but it is important to remember that when results require no more +bits than the exponent and significand bits can represent, those results +are @emph{exact}. + +One particularly useful exact operation is scaling by a power of +the base. While one, in principle, could do that with code like +this: + +@example +y = x * pow (2.0, (double)k); /* @r{Undesirable scaling: avoid!} */ +@end example + +@noindent +that is not advisable, because it relies on the quality of the +math-library power function, and that happens to be one of the +most difficult functions in the C math library to make accurate. +What is likely to happen on many systems is that the returned +value from @code{pow} will be close to a power of two, but +slightly different, so the subsequent multiplication introduces +rounding error. + +The correct, and fastest, way to do the scaling is either via the +traditional C library function, or with its C99 equivalent: + +@example +y = ldexp (x, k); /* @r{Traditional pre-C99 style.} */ +y = scalbn (x, k); /* @r{C99 style.} */ +@end example + +@noindent +Both functions return @code{x * 2**k}. +@xref{Normalization Functions, , , libc, The GNU C Library Reference Manual}. + +@c ===================================================================== + +@node Rounding Control +@section Rounding Control +@cindex rounding control (floating point) +@cindex floating-point rounding control + +Here we describe how to specify the rounding mode at run time. System +header file @file{fenv.h} provides the prototypes for these functions. +@xref{Rounding, , , libc, The GNU C Library Reference Manual}. + +@noindent +That header file also provides constant names for the four rounding modes: +@code{FE_DOWNWARD}, @code{FE_TONEAREST}, @code{FE_TOWARDZERO}, and +@code{FE_UPWARD}. + +The function @code{fegetround} examines and returns the current +rounding mode. On a platform with IEEE 754 floating point, +the value will always equal one of those four constants. +On other platforms, it may return a negative value. The function +@code{fesetround} sets the current rounding mode. + +Changing the rounding mode can be slow, so it is useful to minimize +the number of changes. For interval arithmetic, we seem to need three +changes for each operation, but we really only need two, because we +can write code like this example for interval addition of two reals: + +@example +@{ + struct interval_double + @{ + double hi, lo; + @} v; + volatile double x, y; + int rule; + + rule = fegetround (); + + if (fesetround (FE_UPWARD) == 0) + @{ + v.hi = x + y; + v.lo = -(-x - y); + @} + else + fatal ("ERROR: failed to change rounding rule"); + + if (fesetround (rule) != 0) + fatal ("ERROR: failed to restore rounding rule"); +@} +@end example + +@noindent +The @code{volatile} qualifier (@pxref{volatile}) is essential on x86 +platforms to prevent an optimizing compiler from producing the same +value for both bounds. + +@ignore We no longer discuss the double rounding issue. + The code also needs to be compiled with the +option @option{-ffloat-store} that prevents use of higher precision +for the basic operations, because that would introduce double rounding +that could spoil the bounds guarantee of interval arithmetic. +@end ignore + +@c ===================================================================== + +@node Machine Epsilon +@section Machine Epsilon +@cindex machine epsilon (floating point) +@cindex floating-point machine epsilon + +In any floating-point system, three attributes are particularly +important to know: @dfn{base} (the number that the exponent specifies +a power of), @dfn{precision} (number of digits in the significand), +and @dfn{range} (difference between most positive and most negative +values). The allocation of bits between exponent and significand +decides the answers to those questions. + +A measure of the precision is the answer to the question: what is +the smallest number that can be added to @code{1.0} such that the +sum differs from @code{1.0}? That number is called the +@dfn{machine epsilon}. + +We could define the needed machine-epsilon constants for @code{float}, +@code{double}, and @code{long double} like this: + +@example +static const float epsf = 0x1p-23; /* @r{about 1.192e-07} */ +static const double eps = 0x1p-52; /* @r{about 2.220e-16} */ +static const long double epsl = 0x1p-63; /* @r{about 1.084e-19} */ +@end example + +@noindent +Instead of the hexadecimal constants, we could also have used the +Standard C macros, @code{FLT_EPSILON}, @code{DBL_EPSILON}, and +@code{LDBL_EPSILON}. + +It is useful to be able to compute the machine epsilons at +run time, and we can easily generalize the operation by replacing +the constant @code{1.0} with a user-supplied value: + +@example +double +macheps (double x) +@{ /* @r{Return machine epsilon for @var{x},} */ + @r{such that @var{x} + macheps (@var{x}) > @var{x}.} */ + static const double base = 2.0; + double eps; + + if (isnan (x)) + eps = x; + else + @{ + eps = (x == 0.0) ? 1.0 : x; + + while ((x + eps / base) != x) + eps /= base; /* @r{Always exact!} */ + @} + + return (eps); +@} +@end example + +@noindent +If we call that function with arguments from @code{0} to +@code{10}, as well as Infinity and NaN, and print the returned +values in hexadecimal, we get output like this: + +@example +macheps ( 0) = 0x1.0000000000000p-1074 +macheps ( 1) = 0x1.0000000000000p-52 +macheps ( 2) = 0x1.0000000000000p-51 +macheps ( 3) = 0x1.8000000000000p-52 +macheps ( 4) = 0x1.0000000000000p-50 +macheps ( 5) = 0x1.4000000000000p-51 +macheps ( 6) = 0x1.8000000000000p-51 +macheps ( 7) = 0x1.c000000000000p-51 +macheps ( 8) = 0x1.0000000000000p-49 +macheps ( 9) = 0x1.2000000000000p-50 +macheps ( 10) = 0x1.4000000000000p-50 +macheps (Inf) = infinity +macheps (NaN) = nan +@end example + +@noindent +Notice that @code{macheps} has a special test for a NaN to prevent an +infinite loop. + +@ignore We no longer discuss double rounding. +To ensure that no expressions are evaluated with an intermediate higher +precision, we can compile with the @option{-fexcess-precision=standard} +option, which tells the compiler that all calculation results, including +intermediate results, are to be put on the stack, forcing rounding. +@end ignore + +Our code made another test for a zero argument to avoid getting a +zero return. The returned value in that case is the smallest +representable floating-point number, here the subnormal value +@code{2**(-1074)}, which is about @code{4.941e-324}. + +No special test is needed for an Infinity, because the +@code{eps}-reduction loop then terminates at the first iteration. + +Our @code{macheps} function here assumes binary floating point; some +architectures may differ. + +The C library includes some related functions that can also be used to +determine machine epsilons at run time: + +@example +#include /* @r{Include for these prototypes.} */ + +double nextafter (double x, double y); +float nextafterf (float x, float y); +long double nextafterl (long double x, long double y); +@end example + +@noindent +These return the machine number nearest @var{x} in the direction of +@var{y}. For example, @code{nextafter (1.0, 2.0)} produces the same +result as @code{1.0 + macheps (1.0)} and @code{1.0 + DBL_EPSILON}. +@xref{FP Bit Twiddling, , , libc, The GNU C Library Reference Manual}. + +It is important to know that the machine epsilon is not symmetric +about all numbers. At the boundaries where normalization changes the +exponent, the epsilon below @var{x} is smaller than that just above +@var{x} by a factor @code{1 / base}. For example, @code{macheps +(1.0)} returns @code{+0x1p-52}, whereas @code{macheps (-1.0)} returns +@code{+0x1p-53}. Some authors distinguish those cases by calling them +the @emph{positive} and @emph{negative}, or @emph{big} and +@emph{small}, machine epsilons. You can produce their values like +this: + +@example +eps_neg = 1.0 - nextafter (1.0, -1.0); +eps_pos = nextafter (1.0, +2.0) - 1.0; +@end example + +If @var{x} is a variable, such that you do not know its value at +compile time, then you can substitute literal @var{y} values with +either @code{-inf()} or @code{+inf()}, like this: + +@example +eps_neg = x - nextafter (x, -inf ()); +eps_pos = nextafter (x, +inf() - x); +@end example + +@noindent +In such cases, if @var{x} is Infinity, then @emph{the @code{nextafter} +functions return @var{y} if @var{x} equals @var{y}}. Our two +assignments then produce @code{+0x1.fffffffffffffp+1023} (about +1.798e+308) for @var{eps_neg} and Infinity for @var{eps_pos}. Thus, +the call @code{nextafter (INFINITY, -INFINITY)} can be used to find +the largest representable finite number, and with the call +@code{nextafter (0.0, 1.0)}, the smallest representable number (here, +@code{0x1p-1074} (about 4.491e-324), a number that we saw before as +the output from @code{macheps (0.0)}). + +@c ===================================================================== + +@node Complex Arithmetic +@section Complex Arithmetic +@cindex complex arithmetic in floating-point calculations +@cindex floating-point arithmetic with complex numbers + +We've already looked at defining and referring to complex numbers +(@pxref{Complex Data Types}). What is important to discuss here are +some issues that are unlikely to be obvious to programmers without +extensive experience in both numerical computing, and in complex +arithmetic in mathematics. + +The first important point is that, unlike real arithmetic, in complex +arithmetic, the danger of significance loss is @emph{pervasive}, and +affects @emph{every one} of the basic operations, and @emph{almost +all} of the math-library functions. To understand why, recall the +rules for complex multiplication and division: + +@example +a = u + I*v /* @r{First operand.} */ +b = x + I*y /* @r{Second operand.} */ + +prod = a * b + = (u + I*v) * (x + I*y) + = (u * x - v * y) + I*(v * x + u * y) + +quo = a / b + = (u + I*v) / (x + I*y) + = [(u + I*v) * (x - I*y)] / [(x + I*y) * (x - I*y)] + = [(u * x + v * y) + I*(v * x - u * y)] / (x**2 + y**2) +@end example + +@noindent +There are four critical observations about those formulas: + +@itemize @bullet + +@item +the multiplications on the right-hand side introduce the +possibility of premature underflow or overflow; + +@item +the products must be accurate to twice working precision; + +@item +there is @emph{always} one subtraction on the right-hand sides +that is subject to catastrophic significance loss; and + +@item +complex multiplication has up to @emph{six} rounding errors, and +complex division has @emph{ten} rounding errors. + +@end itemize + +@cindex branch cuts +Another point that needs careful study is the fact that many functions +in complex arithmetic have @dfn{branch cuts}. You can view a +function with a complex argument, @code{f (z)}, as @code{f (x + I*y)}, +and thus, it defines a relation between a point @code{(x, y)} on the +complex plane with an elevation value on a surface. A branch cut +looks like a tear in that surface, so approaching the cut from one +side produces a particular value, and from the other side, a quite +different value. Great care is needed to handle branch cuts properly, +and even small numerical errors can push a result from one side to the +other, radically changing the returned value. As we reported earlier, +correct handling of the sign of zero is critically important for +computing near branch cuts. + +The best advice that we can give to programmers who need complex +arithmetic is to always use the @emph{highest precision available}, +and then to carefully check the results of test calculations to gauge +the likely accuracy of the computed results. It is easy to supply +test values of real and imaginary parts where all five basic +operations in complex arithmetic, and almost all of the complex math +functions, lose @emph{all} significance, and fail to produce even a +single correct digit. + +Even though complex arithmetic makes some programming tasks +easier, it may be numerically preferable to rework the algorithm +so that it can be carried out in real arithmetic. That is +commonly possible in matrix algebra. + +GNU C can perform code optimization on complex number multiplication and +division if certain boundary checks will not be needed. The +command-line option @option{-fcx-limited-range} tells the compiler that +a range reduction step is not needed when performing complex division, +and that there is no need to check if a complex multiplication or +division results in the value @code{Nan + I*NaN}. By default these +checks are enabled. You can explicitly enable them with the +@option{-fno-cx-limited-range} option. + +@ignore +@c ===================================================================== + +@node More on Decimal Floating-Point Arithmetic +@section More on Decimal Floating-Point Arithmetic +@cindex decimal floating-point arithmetic +@cindex floating-point arithmetic, decimal + +Proposed extensions to the C programming language call for the +inclusion of decimal floating-point arithmetic, which handles +floating-point numbers with a specified radix of 10, instead of the +unspecified traditional radix of 2. + +The proposed new types are @code{_Decimal32}, @code{_Decimal64}, and +@code{_Decimal128}, with corresponding literal constant suffixes of +@code{df}, @code{dd}, and @code{dl}, respectively. For example, a +32-bit decimal floating-point variable could be defined as: + +@example +_Decimal32 foo = 42.123df; +@end example + +We stated earlier (@pxref{decimal-significand}) that the significand +in decimal floating-point arithmetic is an integer, rather than +fractional, value. Decimal instructions do not normally alter the +exponent by normalizing nonzero significands to remove trailing zeros. +That design feature is intentional: it allows emulation of the +fixed-point arithmetic that has commonly been used for financial +computations. + +One consequence of the lack of normalization is that there are +multiple representations of any number that does not use all of the +significand digits. Thus, in the 32-bit format, the values +@code{1.DF}, @code{1.0DF}, @code{1.00DF}, @dots{}, @code{1.000_000DF}, +all have different bit patterns in storage, even though they compare +equal. Thus, programmers need to be careful about trailing zero +digits, because they appear in the results, and affect scaling. For +example, @code{1.0DF * 1.0DF} evaluates to @code{1.00DF}, which is +stored as @code{100 * 10**(-2)}. + +In general, when you look at a decimal expression with fractional +digits, you should mentally rewrite it in integer form with suitable +powers of ten. Thus, a multiplication like @code{1.23 * 4.56} really +means @code{123 * 10**(-2) * 456 * 10**(-2)}, which evaluates to +@code{56088 * 10**(-4)}, and would be output as @code{5.6088}. + +Another consequence of the decimal significand choice is that +initializing decimal floating-point values to a pattern of +all-bits-zero does not produce the expected value @code{0.}: instead, +the result is the subnormal values @code{0.e-101}, @code{0.e-398}, and +@code{0.e-6176} in the three storage formats. + +GNU C currently supports basic storage and manipulation of decimal +floating-point values on some platforms, and support is expected to +grow in future releases. + +@c ??? Suggest chopping the rest of this section, at least for the time +@c ??? being. Decimal floating-point support in GNU C is not yet complete, +@c ??? and functionality discussed appears to not be available on all +@c ??? platforms, and is not obviously documented for end users of GNU C. --TJR + +The exponent in decimal arithmetic is called the @emph{quantum}, and +financial computations require that the quantum always be preserved. +If it is not, then rounding may have happened, and additional scaling +is required. + +The function @code{samequantumd (x,y)} for 64-bit decimal arithmetic +returns @code{1} if the arguments have the same exponent, and @code{0} +otherwise. + +The function @code{quantized (x,y)} returns a value of @var{x} that has +been adjusted to have the same quantum as @var{y}; that adjustment +could require rounding of the significand. For example, +@code{quantized (5.dd, 1.00dd)} returns the value @code{5.00dd}, which +is stored as @code{500 * 10**(-2)}. As another example, a sales-tax +computation might be carried out like this: + +@example +decimal_long_double amount, rate, total; + +amount = 0.70DL; +rate = 1.05DL; +total = quantizedl (amount * rate, 1.00DL); +@end example + +@noindent +Without the call to @code{quantizedl}, the result would have been +@code{0.7350}, instead of the correctly rounded @code{0.74}. That +particular example was chosen because it illustrates yet another +difference between decimal and binary arithmetic: in the latter, the +factors each require an infinite number of bits, and their product, +when converted to decimal, looks like @code{0.734_999_999@dots{}}. +Thus, rounding the product in binary format to two decimal places +always gets @code{0.73}, which is the @emph{wrong} answer for tax +laws. + +In financial computations in decimal floating-point arithmetic, the +@code{quantized} function family is expected to receive wide use +whenever multiplication or division change the desired quantum of the +result. + +The function call @code{normalized (x)} returns a value that is +numerically equal to @var{x}, but with trailing zeros trimmed. +Here are some examples of its operation: + +@multitable @columnfractions .5 .5 +@headitem Function Call @tab Result +@item normalized (+0.00100DD) @tab +0.001DD +@item normalized (+1.00DD) @tab +1.DD +@item normalized (+1.E2DD) @tab +1E+2DD +@item normalized (+100.DD) @tab +1E+2DD +@item normalized (+100.00DD) @tab +1E+2DD +@item normalized (+NaN(0x1234)) @tab +NaN(0x1234) +@item normalized (-NaN(0x1234)) @tab -NaN(0x1234) +@item normalized (+Infinity) @tab +Infinity +@item normalized (-Infinity) @tab -Infinity +@end multitable + +@noindent +The NaN examples show that payloads are preserved. + +Because the @code{printf} and @code{scanf} families were designed long +before IEEE 754 decimal arithmetic, their format items do not support +distinguishing between numbers with identical values, but different +quanta, and yet, that distinction is likely to be needed in output. + +The solution adopted by one early library for decimal arithmetic is to +provide a family of number-to-string conversion functions that +preserve quantization. Here is a code fragment and its output that +shows how they work. + +@example +decimal_float x; + +x = 123.000DF; +printf ("%%He: x = %He\n", x); +printf ("%%Hf: x = %Hf\n", x); +printf ("%%Hg: x = %Hg\n", x); +printf ("ntosdf (x) = %s\n", ntosdf (x)); + +%He: x = 1.230000e+02 +%Hf: x = 123.000000 +%Hg: x = 123 +ntosdf (x) = +123.000 +@end example + +@noindent +The format modifier letter @code{H} indicates a 32-bit decimal value, +and the modifiers @code{DD} and @code{DL} correspond to the two other +formats. + +@c ===================================================================== +@end ignore + +@node Round-Trip Base Conversion +@section Round-Trip Base Conversion +@cindex round-trip base conversion +@cindex base conversion (floating point) +@cindex floating-point round-trip base conversion + +Most numeric programs involve converting between base-2 floating-point +numbers, as represented by the computer, and base-10 floating-point +numbers, as entered and handled by the programmer. What might not be +obvious is the number of base-2 bits vs. base-10 digits required for +each representation. Consider the following tables showing the number of +decimal digits representable in a given number of bits, and vice versa: + +@multitable @columnfractions .5 .1 .1 .1 .1 .1 +@item binary in @tab 24 @tab 53 @tab 64 @tab 113 @tab 237 +@item decimal out @tab 9 @tab 17 @tab 21 @tab 36 @tab 73 +@end multitable + +@multitable @columnfractions .5 .1 .1 .1 .1 +@item decimal in @tab 7 @tab 16 @tab 34 @tab 70 +@item binary out @tab 25 @tab 55 @tab 114 @tab 234 +@end multitable + +We can compute the table numbers with these two functions: + +@example +int +matula(int nbits) +@{ /* @r{Return output decimal digits needed for nbits-bits input.} */ + return ((int)ceil((double)nbits / log2(10.0) + 1.0)); +@} + +int +goldberg(int ndec) +@{ /* @r{Return output bits needed for ndec-digits input.} */ + return ((int)ceil((double)ndec / log10(2.0) + 1.0)); +@} +@end example + +One significant observation from those numbers is that we cannot +achieve correct round-trip conversion between the decimal and +binary formats in the same storage size! For example, we need 25 +bits to represent a 7-digit value from the 32-bit decimal format, +but the binary format only has 24 available. Similar +observations hold for each of the other conversion pairs. + +The general input/output base-conversion problem is astonishingly +complicated, and solutions were not generally known until the +publication of two papers in 1990 that are listed later near the end +of this chapter. For the 128-bit formats, the worst case needs more +than 11,500 decimal digits of precision to guarantee correct rounding +in a binary-to-decimal conversion! + +For further details see the references for Bennett Goldberg and David +Matula. + +@c ===================================================================== + +@node Further Reading +@section Further Reading + +The subject of floating-point arithmetic is much more complex +than many programmers seem to think, and few books on programming +languages spend much time in that area. In this chapter, we have +tried to expose the reader to some of the key ideas, and to warn +of easily overlooked pitfalls that can soon lead to nonsensical +results. There are a few good references that we recommend +for further reading, and for finding other important material +about computer arithmetic: + +@c ===================================================================== +@c Each bibliography item has a sort key, so the bibliography can be +@c sorted in emacs with M-x sort-paragraphs on the region with the items. +@c ===================================================================== + +@itemize @bullet + +@item @c sort-key: Abbott +Paul H. Abbott and 15 others, @cite{Architecture and software support +in IBM S/390 Parallel Enterprise Servers for IEEE Floating-Point +arithmetic}, IBM Journal of Research and Development @b{43}(5/6) +723--760 (1999), +@uref{https://doi.org/10.1147/rd.435.0723}. This article gives +a good description of IBM's algorithm for exact decimal-to-binary +conversion, complementing earlier ones by Clinger and others. + +@item @c sort-key: Beebe +Nelson H. F. Beebe, @cite{The Mathematical-Function Computation Handbook: +Programming Using the MathCW Portable Software Library}, +Springer (2017), ISBN 3-319-64109-3 (hardcover), 3-319-64110-7 (e-book) +(xxxvi + 1114 pages), +@uref{https://doi.org/10.1007/978-3-319-64110-2}. +This book describes portable implementations of a large superset +of the mathematical functions available in many programming +languages, extended to a future 256-bit format (70 decimal +digits), for both binary and decimal floating point. It includes +a substantial portion of the functions described in the famous +@cite{NIST Handbook of Mathematical Functions}, Cambridge (2018), +ISBN 0-521-19225-0. +See +@uref{http://www.math.utah.edu/pub/mathcw} +for compilers and libraries. + +@item @c sort-key: Clinger-1990 +William D. Clinger, @cite{How to Read Floating Point Numbers +Accurately}, ACM SIGPLAN Notices @b{25}(6) 92--101 (June 1990), +@uref{https://doi.org/10.1145/93548.93557}. +See also the papers by Steele & White. + +@item @c sort-key: Clinger-2004 +William D. Clinger, @cite{Retrospective: How to read floating +point numbers accurately}, ACM SIGPLAN Notices @b{39}(4) 360--371 (April 2004), +@uref{http://doi.acm.org/10.1145/989393.989430}. Reprint of 1990 paper, +with additional commentary. + +@item @c sort-key: Goldberg-1967 +I. Bennett Goldberg, @cite{27 Bits Are Not Enough For 8-Digit Accuracy}, +Communications of the ACM @b{10}(2) 105--106 (February 1967), +@uref{http://doi.acm.org/10.1145/363067.363112}. This paper, +and its companions by David Matula, address the base-conversion +problem, and show that the naive formulas are wrong by one or +two digits. + +@item @c sort-key: Goldberg-1991 +David Goldberg, @cite{What Every Computer Scientist Should Know +About Floating-Point Arithmetic}, ACM Computing Surveys @b{23}(1) +5--58 (March 1991), corrigendum @b{23}(3) 413 (September 1991), +@uref{https://doi.org/10.1145/103162.103163}. +This paper has been widely distributed, and reissued in vendor +programming-language documentation. It is well worth reading, +and then rereading from time to time. + +@item @c sort-key: Juffa +Norbert Juffa and Nelson H. F. Beebe, @cite{A Bibliography of +Publications on Floating-Point Arithmetic}, +@uref{http://www.math.utah.edu/pub/tex/bib/fparith.bib}. +This is the largest known bibliography of publications about +floating-point, and also integer, arithmetic. It is actively +maintained, and in mid 2019, contains more than 6400 references to +original research papers, reports, theses, books, and Web sites on the +subject matter. It can be used to locate the latest research in the +field, and the historical coverage dates back to a 1726 paper on +signed-digit arithmetic, and an 1837 paper by Charles Babbage, the +intellectual father of mechanical computers. The entries for the +Abbott, Clinger, and Steele & White papers cited earlier contain +pointers to several other important related papers on the +base-conversion problem. + +@item @c sort-key: Kahan +William Kahan, @cite{Branch Cuts for Complex Elementary Functions, or +Much Ado About Nothing's Sign Bit}, (1987), +@uref{http://people.freebsd.org/~das/kahan86branch.pdf}. +This Web document about the fine points of complex arithmetic +also appears in the volume edited by A. Iserles and +M. J. D. Powell, @cite{The State of the Art in Numerical +Analysis: Proceedings of the Joint IMA/SIAM Conference on the +State of the Art in Numerical Analysis held at the University of +Birmingham, 14--18 April 1986}, Oxford University Press (1987), +ISBN 0-19-853614-3 (xiv + 719 pages). Its author is the famous +chief architect of the IEEE 754 arithmetic system, and one of the +world's greatest experts in the field of floating-point +arithmetic. An entire generation of his students at the +University of California, Berkeley, have gone on to careers in +academic and industry, spreading the knowledge of how to do +floating-point arithmetic right. + +@item @c sort-key: Knuth +Donald E. Knuth, @cite{A Simple Program Whose Proof Isn't}, +in @cite{Beauty is our business: a birthday salute to Edsger +W. Dijkstra}, W. H. J. Feijen, A. J. M. van Gasteren, +D. Gries, and J. Misra (eds.), Springer (1990), ISBN +1-4612-8792-8, +@uref{https://doi.org/10.1007/978-1-4612-4476-9}. This book +chapter supplies a correctness proof of the decimal to +binary, and binary to decimal, conversions in fixed-point +arithmetic in the TeX typesetting system. The proof evaded +its author for a dozen years. + +@item @c sort-key: Matula-1968a +David W. Matula, @cite{In-and-out conversions}, +Communications of the ACM @b{11}(1) 57--50 (January 1968), +@uref{https://doi.org/10.1145/362851.362887}. + +@item @c sort-key: Matula-1968b +David W. Matula, @cite{The Base Conversion Theorem}, +Proceedings of the American Mathematical Society @b{19}(3) +716--723 (June 1968). See also other papers here by this author, +and by I. Bennett Goldberg. + +@item @c sort-key: Matula-1970 +David W. Matula, @cite{A Formalization of Floating-Point Numeric +Base Conversion}, IEEE Transactions on Computers @b{C-19}(8) +681--692 (August 1970), +@uref{https://doi.org/10.1109/T-C.1970.223017}. + +@item @c sort-key: Muller-2010 +Jean-Michel Muller and eight others, @cite{Handbook of +Floating-Point Arithmetic}, Birkh@"auser-Boston (2010), ISBN +0-8176-4704-X (xxiii + 572 pages), +@uref{https://doi.org/10.1007/978-0-8176-4704-9}. This is a +comprehensive treatise from a French team who are among the +world's greatest experts in floating-point arithmetic, and among +the most prolific writers of research papers in that field. They +have much to teach, and their book deserves a place on the +shelves of every serious numerical programmer. + +@item @c sort-key: Muller-2018 +Jean-Michel Muller and eight others, @cite{Handbook of +Floating-Point Arithmetic}, Second edition, Birkh@"auser-Boston (2018), ISBN +3-319-76525-6 (xxv + 627 pages), +@uref{https://doi.org/10.1007/978-3-319-76526-6}. This is a new +edition of the preceding entry. + +@item @c sort-key: Overton +Michael Overton, @cite{Numerical Computing with IEEE Floating +Point Arithmetic, Including One Theorem, One Rule of Thumb, and +One Hundred and One Exercises}, SIAM (2001), ISBN 0-89871-482-6 +(xiv + 104 pages), +@uref{http://www.ec-securehost.com/SIAM/ot76.html}. +This is a small volume that can be covered in a few hours. + +@item @c sort-key: Steele-1990 +Guy L. Steele Jr. and Jon L. White, @cite{How to Print +Floating-Point Numbers Accurately}, ACM SIGPLAN Notices +@b{25}(6) 112--126 (June 1990), +@uref{https://doi.org/10.1145/93548.93559}. +See also the papers by Clinger. + +@item @c sort-key: Steele-2004 +Guy L. Steele Jr. and Jon L. White, @cite{Retrospective: How to +Print Floating-Point Numbers Accurately}, ACM SIGPLAN Notices +@b{39}(4) 372--389 (April 2004), +@uref{http://doi.acm.org/10.1145/989393.989431}. Reprint of 1990 +paper, with additional commentary. + +@item @c sort-key: Sterbenz +Pat H. Sterbenz, @cite{Floating Point Computation}, Prentice-Hall +(1974), ISBN 0-13-322495-3 (xiv + 316 pages). This often-cited book +provides solid coverage of what floating-point arithmetic was like +@emph{before} the introduction of IEEE 754 arithmetic. + +@end itemize diff --git a/texinfo.tex b/texinfo.tex new file mode 100644 index 0000000..c614e02 --- /dev/null +++ b/texinfo.tex @@ -0,0 +1,11727 @@ +% texinfo.tex -- TeX macros to handle Texinfo files. +% +% Load plain if necessary, i.e., if running under initex. +\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi +% +\def\texinfoversion{2018-01-09.11} +% +% Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995, +% 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +% 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 +% Free Software Foundation, Inc. +% +% This texinfo.tex file is free software: you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This texinfo.tex file is distributed in the hope that it will be +% useful, but WITHOUT ANY WARRANTY; without even the implied warranty +% of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . +% +% As a special exception, when this file is read by TeX when processing +% a Texinfo source document, you may use the result without +% restriction. This Exception is an additional permission under section 7 +% of the GNU General Public License, version 3 ("GPLv3"). +% +% Please try the latest version of texinfo.tex before submitting bug +% reports; you can get the latest version from: +% https://ftp.gnu.org/gnu/texinfo/ (the Texinfo release area), or +% https://ftpmirror.gnu.org/texinfo/ (same, via a mirror), or +% https://www.gnu.org/software/texinfo/ (the Texinfo home page) +% The texinfo.tex in any given distribution could well be out +% of date, so if that's what you're using, please check. +% +% Send bug reports to bug-texinfo@gnu.org. Please include including a +% complete document in each bug report with which we can reproduce the +% problem. Patches are, of course, greatly appreciated. +% +% To process a Texinfo manual with TeX, it's most reliable to use the +% texi2dvi shell script that comes with the distribution. For a simple +% manual foo.texi, however, you can get away with this: +% tex foo.texi +% texindex foo.?? +% tex foo.texi +% tex foo.texi +% dvips foo.dvi -o # or whatever; this makes foo.ps. +% The extra TeX runs get the cross-reference information correct. +% Sometimes one run after texindex suffices, and sometimes you need more +% than two; texi2dvi does it as many times as necessary. +% +% It is possible to adapt texinfo.tex for other languages, to some +% extent. You can get the existing language-specific files from the +% full Texinfo distribution. +% +% The GNU Texinfo home page is https://www.gnu.org/software/texinfo. + + +\message{Loading texinfo [version \texinfoversion]:} + +% If in a .fmt file, print the version number +% and turn on active characters that we couldn't do earlier because +% they might have appeared in the input file name. +\everyjob{\message{[Texinfo version \texinfoversion]}% + \catcode`+=\active \catcode`\_=\active} + +% LaTeX's \typeout. This ensures that the messages it is used for +% are identical in format to the corresponding ones from latex/pdflatex. +\def\typeout{\immediate\write17}% + +\chardef\other=12 + +% We never want plain's \outer definition of \+ in Texinfo. +% For @tex, we can use \tabalign. +\let\+ = \relax + +% Save some plain tex macros whose names we will redefine. +\let\ptexb=\b +\let\ptexbullet=\bullet +\let\ptexc=\c +\let\ptexcomma=\, +\let\ptexdot=\. +\let\ptexdots=\dots +\let\ptexend=\end +\let\ptexequiv=\equiv +\let\ptexexclam=\! +\let\ptexfootnote=\footnote +\let\ptexgtr=> +\let\ptexhat=^ +\let\ptexi=\i +\let\ptexindent=\indent +\let\ptexinsert=\insert +\let\ptexlbrace=\{ +\let\ptexless=< +\let\ptexnewwrite\newwrite +\let\ptexnoindent=\noindent +\let\ptexplus=+ +\let\ptexraggedright=\raggedright +\let\ptexrbrace=\} +\let\ptexslash=\/ +\let\ptexsp=\sp +\let\ptexstar=\* +\let\ptexsup=\sup +\let\ptext=\t +\let\ptextop=\top +{\catcode`\'=\active \global\let\ptexquoteright'}% active in plain's math mode + +% If this character appears in an error message or help string, it +% starts a new line in the output. +\newlinechar = `^^J + +% Use TeX 3.0's \inputlineno to get the line number, for better error +% messages, but if we're using an old version of TeX, don't do anything. +% +\ifx\inputlineno\thisisundefined + \let\linenumber = \empty % Pre-3.0. +\else + \def\linenumber{l.\the\inputlineno:\space} +\fi + +% Set up fixed words for English if not already set. +\ifx\putwordAppendix\undefined \gdef\putwordAppendix{Appendix}\fi +\ifx\putwordChapter\undefined \gdef\putwordChapter{Chapter}\fi +\ifx\putworderror\undefined \gdef\putworderror{error}\fi +\ifx\putwordfile\undefined \gdef\putwordfile{file}\fi +\ifx\putwordin\undefined \gdef\putwordin{in}\fi +\ifx\putwordIndexIsEmpty\undefined \gdef\putwordIndexIsEmpty{(Index is empty)}\fi +\ifx\putwordIndexNonexistent\undefined \gdef\putwordIndexNonexistent{(Index is nonexistent)}\fi +\ifx\putwordInfo\undefined \gdef\putwordInfo{Info}\fi +\ifx\putwordInstanceVariableof\undefined \gdef\putwordInstanceVariableof{Instance Variable of}\fi +\ifx\putwordMethodon\undefined \gdef\putwordMethodon{Method on}\fi +\ifx\putwordNoTitle\undefined \gdef\putwordNoTitle{No Title}\fi +\ifx\putwordof\undefined \gdef\putwordof{of}\fi +\ifx\putwordon\undefined \gdef\putwordon{on}\fi +\ifx\putwordpage\undefined \gdef\putwordpage{page}\fi +\ifx\putwordsection\undefined \gdef\putwordsection{section}\fi +\ifx\putwordSection\undefined \gdef\putwordSection{Section}\fi +\ifx\putwordsee\undefined \gdef\putwordsee{see}\fi +\ifx\putwordSee\undefined \gdef\putwordSee{See}\fi +\ifx\putwordShortTOC\undefined \gdef\putwordShortTOC{Short Contents}\fi +\ifx\putwordTOC\undefined \gdef\putwordTOC{Table of Contents}\fi +% +\ifx\putwordMJan\undefined \gdef\putwordMJan{January}\fi +\ifx\putwordMFeb\undefined \gdef\putwordMFeb{February}\fi +\ifx\putwordMMar\undefined \gdef\putwordMMar{March}\fi +\ifx\putwordMApr\undefined \gdef\putwordMApr{April}\fi +\ifx\putwordMMay\undefined \gdef\putwordMMay{May}\fi +\ifx\putwordMJun\undefined \gdef\putwordMJun{June}\fi +\ifx\putwordMJul\undefined \gdef\putwordMJul{July}\fi +\ifx\putwordMAug\undefined \gdef\putwordMAug{August}\fi +\ifx\putwordMSep\undefined \gdef\putwordMSep{September}\fi +\ifx\putwordMOct\undefined \gdef\putwordMOct{October}\fi +\ifx\putwordMNov\undefined \gdef\putwordMNov{November}\fi +\ifx\putwordMDec\undefined \gdef\putwordMDec{December}\fi +% +\ifx\putwordDefmac\undefined \gdef\putwordDefmac{Macro}\fi +\ifx\putwordDefspec\undefined \gdef\putwordDefspec{Special Form}\fi +\ifx\putwordDefvar\undefined \gdef\putwordDefvar{Variable}\fi +\ifx\putwordDefopt\undefined \gdef\putwordDefopt{User Option}\fi +\ifx\putwordDeffunc\undefined \gdef\putwordDeffunc{Function}\fi + +% Give the space character the catcode for a space. +\def\spaceisspace{\catcode`\ =10\relax} + +% Likewise for ^^M, the end of line character. +\def\endlineisspace{\catcode13=10\relax} + +\chardef\dashChar = `\- +\chardef\slashChar = `\/ +\chardef\underChar = `\_ + +% Ignore a token. +% +\def\gobble#1{} + +% The following is used inside several \edef's. +\def\makecsname#1{\expandafter\noexpand\csname#1\endcsname} + +% Hyphenation fixes. +\hyphenation{ + Flor-i-da Ghost-script Ghost-view Mac-OS Post-Script + ap-pen-dix bit-map bit-maps + data-base data-bases eshell fall-ing half-way long-est man-u-script + man-u-scripts mini-buf-fer mini-buf-fers over-view par-a-digm + par-a-digms rath-er rec-tan-gu-lar ro-bot-ics se-vere-ly set-up spa-ces + spell-ing spell-ings + stand-alone strong-est time-stamp time-stamps which-ever white-space + wide-spread wrap-around +} + +% Sometimes it is convenient to have everything in the transcript file +% and nothing on the terminal. We don't just call \tracingall here, +% since that produces some useless output on the terminal. We also make +% some effort to order the tracing commands to reduce output in the log +% file; cf. trace.sty in LaTeX. +% +\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% +\def\loggingall{% + \tracingstats2 + \tracingpages1 + \tracinglostchars2 % 2 gives us more in etex + \tracingparagraphs1 + \tracingoutput1 + \tracingmacros2 + \tracingrestores1 + \showboxbreadth\maxdimen \showboxdepth\maxdimen + \ifx\eTeXversion\thisisundefined\else % etex gives us more logging + \tracingscantokens1 + \tracingifs1 + \tracinggroups1 + \tracingnesting2 + \tracingassigns1 + \fi + \tracingcommands3 % 3 gives us more in etex + \errorcontextlines16 +}% + +% @errormsg{MSG}. Do the index-like expansions on MSG, but if things +% aren't perfect, it's not the end of the world, being an error message, +% after all. +% +\def\errormsg{\begingroup \indexnofonts \doerrormsg} +\def\doerrormsg#1{\errmessage{#1}} + +% add check for \lastpenalty to plain's definitions. If the last thing +% we did was a \nobreak, we don't want to insert more space. +% +\def\smallbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\smallskipamount + \removelastskip\penalty-50\smallskip\fi\fi} +\def\medbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\medskipamount + \removelastskip\penalty-100\medskip\fi\fi} +\def\bigbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\bigskipamount + \removelastskip\penalty-200\bigskip\fi\fi} + +% Output routine +% + +% For a final copy, take out the rectangles +% that mark overfull boxes (in case you have decided +% that the text looks ok even though it passes the margin). +% +\def\finalout{\overfullrule=0pt } + +% Do @cropmarks to get crop marks. +% +\newif\ifcropmarks +\let\cropmarks = \cropmarkstrue +% +% Dimensions to add cropmarks at corners. +% Added by P. A. MacKay, 12 Nov. 1986 +% +\newdimen\outerhsize \newdimen\outervsize % set by the paper size routines +\newdimen\cornerlong \cornerlong=1pc +\newdimen\cornerthick \cornerthick=.3pt +\newdimen\topandbottommargin \topandbottommargin=.75in + +% Output a mark which sets \thischapter, \thissection and \thiscolor. +% We dump everything together because we only have one kind of mark. +% This works because we only use \botmark / \topmark, not \firstmark. +% +% A mark contains a subexpression of the \ifcase ... \fi construct. +% \get*marks macros below extract the needed part using \ifcase. +% +% Another complication is to let the user choose whether \thischapter +% (\thissection) refers to the chapter (section) in effect at the top +% of a page, or that at the bottom of a page. + +% \domark is called twice inside \chapmacro, to add one +% mark before the section break, and one after. +% In the second call \prevchapterdefs is the same as \lastchapterdefs, +% and \prevsectiondefs is the same as \lastsectiondefs. +% Then if the page is not broken at the mark, some of the previous +% section appears on the page, and we can get the name of this section +% from \firstmark for @everyheadingmarks top. +% @everyheadingmarks bottom uses \botmark. +% +% See page 260 of The TeXbook. +\def\domark{% + \toks0=\expandafter{\lastchapterdefs}% + \toks2=\expandafter{\lastsectiondefs}% + \toks4=\expandafter{\prevchapterdefs}% + \toks6=\expandafter{\prevsectiondefs}% + \toks8=\expandafter{\lastcolordefs}% + \mark{% + \the\toks0 \the\toks2 % 0: marks for @everyheadingmarks top + \noexpand\or \the\toks4 \the\toks6 % 1: for @everyheadingmarks bottom + \noexpand\else \the\toks8 % 2: color marks + }% +} + +% \gettopheadingmarks, \getbottomheadingmarks, +% \getcolormarks - extract needed part of mark. +% +% \topmark doesn't work for the very first chapter (after the title +% page or the contents), so we use \firstmark there -- this gets us +% the mark with the chapter defs, unless the user sneaks in, e.g., +% @setcolor (or @url, or @link, etc.) between @contents and the very +% first @chapter. +\def\gettopheadingmarks{% + \ifcase0\topmark\fi + \ifx\thischapter\empty \ifcase0\firstmark\fi \fi +} +\def\getbottomheadingmarks{\ifcase1\botmark\fi} +\def\getcolormarks{\ifcase2\topmark\fi} + +% Avoid "undefined control sequence" errors. +\def\lastchapterdefs{} +\def\lastsectiondefs{} +\def\lastsection{} +\def\prevchapterdefs{} +\def\prevsectiondefs{} +\def\lastcolordefs{} + +% Margin to add to right of even pages, to left of odd pages. +\newdimen\bindingoffset +\newdimen\normaloffset +\newdimen\txipagewidth \newdimen\txipageheight + +% Main output routine. +% +\chardef\PAGE = 255 +\output = {\onepageout{\pagecontents\PAGE}} + +\newbox\headlinebox +\newbox\footlinebox + +% \onepageout takes a vbox as an argument. +% \shipout a vbox for a single page, adding an optional header, footer, +% cropmarks, and footnote. This also causes index entries for this page +% to be written to the auxiliary files. +% +\def\onepageout#1{% + \ifcropmarks \hoffset=0pt \else \hoffset=\normaloffset \fi + % + \ifodd\pageno \advance\hoffset by \bindingoffset + \else \advance\hoffset by -\bindingoffset\fi + % + % Common context changes for both heading and footing. + % Do this outside of the \shipout so @code etc. will be expanded in + % the headline as they should be, not taken literally (outputting ''code). + \def\commmonheadfootline{\let\hsize=\txipagewidth \texinfochars} + % + % Retrieve the information for the headings from the marks in the page, + % and call Plain TeX's \makeheadline and \makefootline, which use the + % values in \headline and \footline. + % + % This is used to check if we are on the first page of a chapter. + \ifcase1\topmark\fi + \let\prevchaptername\thischaptername + \ifcase0\firstmark\fi + \let\curchaptername\thischaptername + % + \ifodd\pageno \getoddheadingmarks \else \getevenheadingmarks \fi + \ifodd\pageno \getoddfootingmarks \else \getevenfootingmarks \fi + % + \ifx\curchaptername\prevchaptername + \let\thischapterheading\thischapter + \else + % \thischapterheading is the same as \thischapter except it is blank + % for the first page of a chapter. This is to prevent the chapter name + % being shown twice. + \def\thischapterheading{}% + \fi + % + \global\setbox\headlinebox = \vbox{\commmonheadfootline \makeheadline}% + \global\setbox\footlinebox = \vbox{\commmonheadfootline \makefootline}% + % + {% + % Set context for writing to auxiliary files like index files. + % Have to do this stuff outside the \shipout because we want it to + % take effect in \write's, yet the group defined by the \vbox ends + % before the \shipout runs. + % + \indexdummies % don't expand commands in the output. + \normalturnoffactive % \ in index entries must not stay \, e.g., if + % the page break happens to be in the middle of an example. + % We don't want .vr (or whatever) entries like this: + % \entry{{\indexbackslash }acronym}{32}{\code {\acronym}} + % "\acronym" won't work when it's read back in; + % it needs to be + % {\code {{\backslashcurfont }acronym} + \shipout\vbox{% + % Do this early so pdf references go to the beginning of the page. + \ifpdfmakepagedest \pdfdest name{\the\pageno} xyz\fi + % + \ifcropmarks \vbox to \outervsize\bgroup + \hsize = \outerhsize + \vskip-\topandbottommargin + \vtop to0pt{% + \line{\ewtop\hfil\ewtop}% + \nointerlineskip + \line{% + \vbox{\moveleft\cornerthick\nstop}% + \hfill + \vbox{\moveright\cornerthick\nstop}% + }% + \vss}% + \vskip\topandbottommargin + \line\bgroup + \hfil % center the page within the outer (page) hsize. + \ifodd\pageno\hskip\bindingoffset\fi + \vbox\bgroup + \fi + % + \unvbox\headlinebox + \pagebody{#1}% + \ifdim\ht\footlinebox > 0pt + % Only leave this space if the footline is nonempty. + % (We lessened \vsize for it in \oddfootingyyy.) + % The \baselineskip=24pt in plain's \makefootline has no effect. + \vskip 24pt + \unvbox\footlinebox + \fi + % + \ifcropmarks + \egroup % end of \vbox\bgroup + \hfil\egroup % end of (centering) \line\bgroup + \vskip\topandbottommargin plus1fill minus1fill + \boxmaxdepth = \cornerthick + \vbox to0pt{\vss + \line{% + \vbox{\moveleft\cornerthick\nsbot}% + \hfill + \vbox{\moveright\cornerthick\nsbot}% + }% + \nointerlineskip + \line{\ewbot\hfil\ewbot}% + }% + \egroup % \vbox from first cropmarks clause + \fi + }% end of \shipout\vbox + }% end of group with \indexdummies + \advancepageno + \ifnum\outputpenalty>-20000 \else\dosupereject\fi +} + +\newinsert\margin \dimen\margin=\maxdimen + +% Main part of page, including any footnotes +\def\pagebody#1{\vbox to\txipageheight{\boxmaxdepth=\maxdepth #1}} +{\catcode`\@ =11 +\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi +% marginal hacks, juha@viisa.uucp (Juha Takala) +\ifvoid\margin\else % marginal info is present + \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi +\dimen@=\dp#1\relax \unvbox#1\relax +\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi +\ifr@ggedbottom \kern-\dimen@ \vfil \fi} +} + +% Here are the rules for the cropmarks. Note that they are +% offset so that the space between them is truly \outerhsize or \outervsize +% (P. A. MacKay, 12 November, 1986) +% +\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong} +\def\nstop{\vbox + {\hrule height\cornerthick depth\cornerlong width\cornerthick}} +\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong} +\def\nsbot{\vbox + {\hrule height\cornerlong depth\cornerthick width\cornerthick}} + + +% Argument parsing + +% Parse an argument, then pass it to #1. The argument is the rest of +% the input line (except we remove a trailing comment). #1 should be a +% macro which expects an ordinary undelimited TeX argument. +% For example, \def\foo{\parsearg\fooxxx}. +% +\def\parsearg{\parseargusing{}} +\def\parseargusing#1#2{% + \def\argtorun{#2}% + \begingroup + \obeylines + \spaceisspace + #1% + \parseargline\empty% Insert the \empty token, see \finishparsearg below. +} + +{\obeylines % + \gdef\parseargline#1^^M{% + \endgroup % End of the group started in \parsearg. + \argremovecomment #1\comment\ArgTerm% + }% +} + +% First remove any @comment, then any @c comment. Also remove a @texinfoc +% comment (see \scanmacro for details). Pass the result on to \argcheckspaces. +\def\argremovecomment#1\comment#2\ArgTerm{\argremovec #1\c\ArgTerm} +\def\argremovec#1\c#2\ArgTerm{\argremovetexinfoc #1\texinfoc\ArgTerm} +\def\argremovetexinfoc#1\texinfoc#2\ArgTerm{\argcheckspaces#1\^^M\ArgTerm} + +% Each occurrence of `\^^M' or `\^^M' is replaced by a single space. +% +% \argremovec might leave us with trailing space, e.g., +% @end itemize @c foo +% This space token undergoes the same procedure and is eventually removed +% by \finishparsearg. +% +\def\argcheckspaces#1\^^M{\argcheckspacesX#1\^^M \^^M} +\def\argcheckspacesX#1 \^^M{\argcheckspacesY#1\^^M} +\def\argcheckspacesY#1\^^M#2\^^M#3\ArgTerm{% + \def\temp{#3}% + \ifx\temp\empty + % Do not use \next, perhaps the caller of \parsearg uses it; reuse \temp: + \let\temp\finishparsearg + \else + \let\temp\argcheckspaces + \fi + % Put the space token in: + \temp#1 #3\ArgTerm +} + +% If a _delimited_ argument is enclosed in braces, they get stripped; so +% to get _exactly_ the rest of the line, we had to prevent such situation. +% We prepended an \empty token at the very beginning and we expand it now, +% just before passing the control to \argtorun. +% (Similarly, we have to think about #3 of \argcheckspacesY above: it is +% either the null string, or it ends with \^^M---thus there is no danger +% that a pair of braces would be stripped. +% +% But first, we have to remove the trailing space token. +% +\def\finishparsearg#1 \ArgTerm{\expandafter\argtorun\expandafter{#1}} + + +% \parseargdef - define a command taking an argument on the line +% +% \parseargdef\foo{...} +% is roughly equivalent to +% \def\foo{\parsearg\Xfoo} +% \def\Xfoo#1{...} +\def\parseargdef#1{% + \expandafter \doparseargdef \csname\string#1\endcsname #1% +} +\def\doparseargdef#1#2{% + \def#2{\parsearg#1}% + \def#1##1% +} + +% Several utility definitions with active space: +{ + \obeyspaces + \gdef\obeyedspace{ } + + % Make each space character in the input produce a normal interword + % space in the output. Don't allow a line break at this space, as this + % is used only in environments like @example, where each line of input + % should produce a line of output anyway. + % + \gdef\sepspaces{\obeyspaces\let =\tie} + + % If an index command is used in an @example environment, any spaces + % therein should become regular spaces in the raw index file, not the + % expansion of \tie (\leavevmode \penalty \@M \ ). + \gdef\unsepspaces{\let =\space} +} + + +\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next} + +% Define the framework for environments in texinfo.tex. It's used like this: +% +% \envdef\foo{...} +% \def\Efoo{...} +% +% It's the responsibility of \envdef to insert \begingroup before the +% actual body; @end closes the group after calling \Efoo. \envdef also +% defines \thisenv, so the current environment is known; @end checks +% whether the environment name matches. The \checkenv macro can also be +% used to check whether the current environment is the one expected. +% +% Non-false conditionals (@iftex, @ifset) don't fit into this, so they +% are not treated as environments; they don't open a group. (The +% implementation of @end takes care not to call \endgroup in this +% special case.) + + +% At run-time, environments start with this: +\def\startenvironment#1{\begingroup\def\thisenv{#1}} +% initialize +\let\thisenv\empty + +% ... but they get defined via ``\envdef\foo{...}'': +\long\def\envdef#1#2{\def#1{\startenvironment#1#2}} +\def\envparseargdef#1#2{\parseargdef#1{\startenvironment#1#2}} + +% Check whether we're in the right environment: +\def\checkenv#1{% + \def\temp{#1}% + \ifx\thisenv\temp + \else + \badenverr + \fi +} + +% Environment mismatch, #1 expected: +\def\badenverr{% + \errhelp = \EMsimple + \errmessage{This command can appear only \inenvironment\temp, + not \inenvironment\thisenv}% +} +\def\inenvironment#1{% + \ifx#1\empty + outside of any environment% + \else + in environment \expandafter\string#1% + \fi +} + +% @end foo executes the definition of \Efoo. +% But first, it executes a specialized version of \checkenv +% +\parseargdef\end{% + \if 1\csname iscond.#1\endcsname + \else + % The general wording of \badenverr may not be ideal. + \expandafter\checkenv\csname#1\endcsname + \csname E#1\endcsname + \endgroup + \fi +} + +\newhelp\EMsimple{Press RETURN to continue.} + + +% Be sure we're in horizontal mode when doing a tie, since we make space +% equivalent to this in @example-like environments. Otherwise, a space +% at the beginning of a line will start with \penalty -- and +% since \penalty is valid in vertical mode, we'd end up putting the +% penalty on the vertical list instead of in the new paragraph. +{\catcode`@ = 11 + % Avoid using \@M directly, because that causes trouble + % if the definition is written into an index file. + \global\let\tiepenalty = \@M + \gdef\tie{\leavevmode\penalty\tiepenalty\ } +} + +% @: forces normal size whitespace following. +\def\:{\spacefactor=1000 } + +% @* forces a line break. +\def\*{\unskip\hfil\break\hbox{}\ignorespaces} + +% @/ allows a line break. +\let\/=\allowbreak + +% @. is an end-of-sentence period. +\def\.{.\spacefactor=\endofsentencespacefactor\space} + +% @! is an end-of-sentence bang. +\def\!{!\spacefactor=\endofsentencespacefactor\space} + +% @? is an end-of-sentence query. +\def\?{?\spacefactor=\endofsentencespacefactor\space} + +% @frenchspacing on|off says whether to put extra space after punctuation. +% +\def\onword{on} +\def\offword{off} +% +\parseargdef\frenchspacing{% + \def\temp{#1}% + \ifx\temp\onword \plainfrenchspacing + \else\ifx\temp\offword \plainnonfrenchspacing + \else + \errhelp = \EMsimple + \errmessage{Unknown @frenchspacing option `\temp', must be on|off}% + \fi\fi +} + +% @w prevents a word break. Without the \leavevmode, @w at the +% beginning of a paragraph, when TeX is still in vertical mode, would +% produce a whole line of output instead of starting the paragraph. +\def\w#1{\leavevmode\hbox{#1}} + +% @group ... @end group forces ... to be all on one page, by enclosing +% it in a TeX vbox. We use \vtop instead of \vbox to construct the box +% to keep its height that of a normal line. According to the rules for +% \topskip (p.114 of the TeXbook), the glue inserted is +% max (\topskip - \ht (first item), 0). If that height is large, +% therefore, no glue is inserted, and the space between the headline and +% the text is small, which looks bad. +% +% Another complication is that the group might be very large. This can +% cause the glue on the previous page to be unduly stretched, because it +% does not have much material. In this case, it's better to add an +% explicit \vfill so that the extra space is at the bottom. The +% threshold for doing this is if the group is more than \vfilllimit +% percent of a page (\vfilllimit can be changed inside of @tex). +% +\newbox\groupbox +\def\vfilllimit{0.7} +% +\envdef\group{% + \ifnum\catcode`\^^M=\active \else + \errhelp = \groupinvalidhelp + \errmessage{@group invalid in context where filling is enabled}% + \fi + \startsavinginserts + % + \setbox\groupbox = \vtop\bgroup + % Do @comment since we are called inside an environment such as + % @example, where each end-of-line in the input causes an + % end-of-line in the output. We don't want the end-of-line after + % the `@group' to put extra space in the output. Since @group + % should appear on a line by itself (according to the Texinfo + % manual), we don't worry about eating any user text. + \comment +} +% +% The \vtop produces a box with normal height and large depth; thus, TeX puts +% \baselineskip glue before it, and (when the next line of text is done) +% \lineskip glue after it. Thus, space below is not quite equal to space +% above. But it's pretty close. +\def\Egroup{% + % To get correct interline space between the last line of the group + % and the first line afterwards, we have to propagate \prevdepth. + \endgraf % Not \par, as it may have been set to \lisppar. + \global\dimen1 = \prevdepth + \egroup % End the \vtop. + \addgroupbox + \prevdepth = \dimen1 + \checkinserts +} + +\def\addgroupbox{ + % \dimen0 is the vertical size of the group's box. + \dimen0 = \ht\groupbox \advance\dimen0 by \dp\groupbox + % \dimen2 is how much space is left on the page (more or less). + \dimen2 = \txipageheight \advance\dimen2 by -\pagetotal + % if the group doesn't fit on the current page, and it's a big big + % group, force a page break. + \ifdim \dimen0 > \dimen2 + \ifdim \pagetotal < \vfilllimit\txipageheight + \page + \fi + \fi + \box\groupbox +} + +% +% TeX puts in an \escapechar (i.e., `@') at the beginning of the help +% message, so this ends up printing `@group can only ...'. +% +\newhelp\groupinvalidhelp{% +group can only be used in environments such as @example,^^J% +where each line of input produces a line of output.} + +% @need space-in-mils +% forces a page break if there is not space-in-mils remaining. + +\newdimen\mil \mil=0.001in + +\parseargdef\need{% + % Ensure vertical mode, so we don't make a big box in the middle of a + % paragraph. + \par + % + % If the @need value is less than one line space, it's useless. + \dimen0 = #1\mil + \dimen2 = \ht\strutbox + \advance\dimen2 by \dp\strutbox + \ifdim\dimen0 > \dimen2 + % + % Do a \strut just to make the height of this box be normal, so the + % normal leading is inserted relative to the preceding line. + % And a page break here is fine. + \vtop to #1\mil{\strut\vfil}% + % + % TeX does not even consider page breaks if a penalty added to the + % main vertical list is 10000 or more. But in order to see if the + % empty box we just added fits on the page, we must make it consider + % page breaks. On the other hand, we don't want to actually break the + % page after the empty box. So we use a penalty of 9999. + % + % There is an extremely small chance that TeX will actually break the + % page at this \penalty, if there are no other feasible breakpoints in + % sight. (If the user is using lots of big @group commands, which + % almost-but-not-quite fill up a page, TeX will have a hard time doing + % good page breaking, for example.) However, I could not construct an + % example where a page broke at this \penalty; if it happens in a real + % document, then we can reconsider our strategy. + \penalty9999 + % + % Back up by the size of the box, whether we did a page break or not. + \kern -#1\mil + % + % Do not allow a page break right after this kern. + \nobreak + \fi +} + +% @br forces paragraph break (and is undocumented). + +\let\br = \par + +% @page forces the start of a new page. +% +\def\page{\par\vfill\supereject} + +% @exdent text.... +% outputs text on separate line in roman font, starting at standard page margin + +% This records the amount of indent in the innermost environment. +% That's how much \exdent should take out. +\newskip\exdentamount + +% This defn is used inside fill environments such as @defun. +\parseargdef\exdent{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break} + +% This defn is used inside nofill environments such as @example. +\parseargdef\nofillexdent{{\advance \leftskip by -\exdentamount + \leftline{\hskip\leftskip{\rm#1}}}} + +% @inmargin{WHICH}{TEXT} puts TEXT in the WHICH margin next to the current +% paragraph. For more general purposes, use the \margin insertion +% class. WHICH is `l' or `r'. Not documented, written for gawk manual. +% +\newskip\inmarginspacing \inmarginspacing=1cm +\def\strutdepth{\dp\strutbox} +% +\def\doinmargin#1#2{\strut\vadjust{% + \nobreak + \kern-\strutdepth + \vtop to \strutdepth{% + \baselineskip=\strutdepth + \vss + % if you have multiple lines of stuff to put here, you'll need to + % make the vbox yourself of the appropriate size. + \ifx#1l% + \llap{\ignorespaces #2\hskip\inmarginspacing}% + \else + \rlap{\hskip\hsize \hskip\inmarginspacing \ignorespaces #2}% + \fi + \null + }% +}} +\def\inleftmargin{\doinmargin l} +\def\inrightmargin{\doinmargin r} +% +% @inmargin{TEXT [, RIGHT-TEXT]} +% (if RIGHT-TEXT is given, use TEXT for left page, RIGHT-TEXT for right; +% else use TEXT for both). +% +\def\inmargin#1{\parseinmargin #1,,\finish} +\def\parseinmargin#1,#2,#3\finish{% not perfect, but better than nothing. + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt + \def\lefttext{#1}% have both texts + \def\righttext{#2}% + \else + \def\lefttext{#1}% have only one text + \def\righttext{#1}% + \fi + % + \ifodd\pageno + \def\temp{\inrightmargin\righttext}% odd page -> outside is right margin + \else + \def\temp{\inleftmargin\lefttext}% + \fi + \temp +} + +% @include FILE -- \input text of FILE. +% +\def\include{\parseargusing\filenamecatcodes\includezzz} +\def\includezzz#1{% + \pushthisfilestack + \def\thisfile{#1}% + {% + \makevalueexpandable % we want to expand any @value in FILE. + \turnoffactive % and allow special characters in the expansion + \indexnofonts % Allow `@@' and other weird things in file names. + \wlog{texinfo.tex: doing @include of #1^^J}% + \edef\temp{\noexpand\input #1 }% + % + % This trickery is to read FILE outside of a group, in case it makes + % definitions, etc. + \expandafter + }\temp + \popthisfilestack +} +\def\filenamecatcodes{% + \catcode`\\=\other + \catcode`~=\other + \catcode`^=\other + \catcode`_=\other + \catcode`|=\other + \catcode`<=\other + \catcode`>=\other + \catcode`+=\other + \catcode`-=\other + \catcode`\`=\other + \catcode`\'=\other +} + +\def\pushthisfilestack{% + \expandafter\pushthisfilestackX\popthisfilestack\StackTerm +} +\def\pushthisfilestackX{% + \expandafter\pushthisfilestackY\thisfile\StackTerm +} +\def\pushthisfilestackY #1\StackTerm #2\StackTerm {% + \gdef\popthisfilestack{\gdef\thisfile{#1}\gdef\popthisfilestack{#2}}% +} + +\def\popthisfilestack{\errthisfilestackempty} +\def\errthisfilestackempty{\errmessage{Internal error: + the stack of filenames is empty.}} +% +\def\thisfile{} + +% @center line +% outputs that line, centered. +% +\parseargdef\center{% + \ifhmode + \let\centersub\centerH + \else + \let\centersub\centerV + \fi + \centersub{\hfil \ignorespaces#1\unskip \hfil}% + \let\centersub\relax % don't let the definition persist, just in case +} +\def\centerH#1{{% + \hfil\break + \advance\hsize by -\leftskip + \advance\hsize by -\rightskip + \line{#1}% + \break +}} +% +\newcount\centerpenalty +\def\centerV#1{% + % The idea here is the same as in \startdefun, \cartouche, etc.: if + % @center is the first thing after a section heading, we need to wipe + % out the negative parskip inserted by \sectionheading, but still + % prevent a page break here. + \centerpenalty = \lastpenalty + \ifnum\centerpenalty>10000 \vskip\parskip \fi + \ifnum\centerpenalty>9999 \penalty\centerpenalty \fi + \line{\kern\leftskip #1\kern\rightskip}% +} + +% @sp n outputs n lines of vertical space +% +\parseargdef\sp{\vskip #1\baselineskip} + +% @comment ...line which is ignored... +% @c is the same as @comment +% @ignore ... @end ignore is another way to write a comment + + +\def\c{\begingroup \catcode`\^^M=\active% +\catcode`\@=\other \catcode`\{=\other \catcode`\}=\other% +\cxxx} +{\catcode`\^^M=\active \gdef\cxxx#1^^M{\endgroup}} +% +\let\comment\c + +% @paragraphindent NCHARS +% We'll use ems for NCHARS, close enough. +% NCHARS can also be the word `asis' or `none'. +% We cannot feasibly implement @paragraphindent asis, though. +% +\def\asisword{asis} % no translation, these are keywords +\def\noneword{none} +% +\parseargdef\paragraphindent{% + \def\temp{#1}% + \ifx\temp\asisword + \else + \ifx\temp\noneword + \defaultparindent = 0pt + \else + \defaultparindent = #1em + \fi + \fi + \parindent = \defaultparindent +} + +% @exampleindent NCHARS +% We'll use ems for NCHARS like @paragraphindent. +% It seems @exampleindent asis isn't necessary, but +% I preserve it to make it similar to @paragraphindent. +\parseargdef\exampleindent{% + \def\temp{#1}% + \ifx\temp\asisword + \else + \ifx\temp\noneword + \lispnarrowing = 0pt + \else + \lispnarrowing = #1em + \fi + \fi +} + +% @firstparagraphindent WORD +% If WORD is `none', then suppress indentation of the first paragraph +% after a section heading. If WORD is `insert', then do indent at such +% paragraphs. +% +% The paragraph indentation is suppressed or not by calling +% \suppressfirstparagraphindent, which the sectioning commands do. +% We switch the definition of this back and forth according to WORD. +% By default, we suppress indentation. +% +\def\suppressfirstparagraphindent{\dosuppressfirstparagraphindent} +\def\insertword{insert} +% +\parseargdef\firstparagraphindent{% + \def\temp{#1}% + \ifx\temp\noneword + \let\suppressfirstparagraphindent = \dosuppressfirstparagraphindent + \else\ifx\temp\insertword + \let\suppressfirstparagraphindent = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @firstparagraphindent option `\temp'}% + \fi\fi +} + +% Here is how we actually suppress indentation. Redefine \everypar to +% \kern backwards by \parindent, and then reset itself to empty. +% +% We also make \indent itself not actually do anything until the next +% paragraph. +% +\gdef\dosuppressfirstparagraphindent{% + \gdef\indent {\restorefirstparagraphindent \indent}% + \gdef\noindent{\restorefirstparagraphindent \noindent}% + \global\everypar = {\kern -\parindent \restorefirstparagraphindent}% +} +% +\gdef\restorefirstparagraphindent{% + \global\let\indent = \ptexindent + \global\let\noindent = \ptexnoindent + \global\everypar = {}% +} + + +% @refill is a no-op. +\let\refill=\relax + +% @setfilename INFO-FILENAME - ignored +\let\setfilename=\comment + +% @bye. +\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} + + +\message{pdf,} +% adobe `portable' document format +\newcount\tempnum +\newcount\lnkcount +\newtoks\filename +\newcount\filenamelength +\newcount\pgn +\newtoks\toksA +\newtoks\toksB +\newtoks\toksC +\newtoks\toksD +\newbox\boxA +\newbox\boxB +\newcount\countA +\newif\ifpdf +\newif\ifpdfmakepagedest + +% +% For LuaTeX +% + +\newif\iftxiuseunicodedestname +\txiuseunicodedestnamefalse % For pdfTeX etc. + +\ifx\luatexversion\thisisundefined +\else + % Use Unicode destination names + \txiuseunicodedestnametrue + % Escape PDF strings with converting UTF-16 from UTF-8 + \begingroup + \catcode`\%=12 + \directlua{ + function UTF16oct(str) + tex.sprint(string.char(0x5c) .. '376' .. string.char(0x5c) .. '377') + for c in string.utfvalues(str) do + if c < 0x10000 then + tex.sprint( + string.format(string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o', + (c / 256), (c % 256))) + else + c = c - 0x10000 + local c_hi = c / 1024 + 0xd800 + local c_lo = c % 1024 + 0xdc00 + tex.sprint( + string.format(string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o', + (c_hi / 256), (c_hi % 256), + (c_lo / 256), (c_lo % 256))) + end + end + end + } + \endgroup + \def\pdfescapestrutfsixteen#1{\directlua{UTF16oct('\luaescapestring{#1}')}} + % Escape PDF strings without converting + \begingroup + \directlua{ + function PDFescstr(str) + for c in string.bytes(str) do + if c <= 0x20 or c >= 0x80 or c == 0x28 or c == 0x29 or c == 0x5c then + tex.sprint( + string.format(string.char(0x5c) .. string.char(0x25) .. '03o', + c)) + else + tex.sprint(string.char(c)) + end + end + end + } + \endgroup + \def\pdfescapestring#1{\directlua{PDFescstr('\luaescapestring{#1}')}} + \ifnum\luatexversion>84 + % For LuaTeX >= 0.85 + \def\pdfdest{\pdfextension dest} + \let\pdfoutput\outputmode + \def\pdfliteral{\pdfextension literal} + \def\pdfcatalog{\pdfextension catalog} + \def\pdftexversion{\numexpr\pdffeedback version\relax} + \let\pdfximage\saveimageresource + \let\pdfrefximage\useimageresource + \let\pdflastximage\lastsavedimageresourceindex + \def\pdfendlink{\pdfextension endlink\relax} + \def\pdfoutline{\pdfextension outline} + \def\pdfstartlink{\pdfextension startlink} + \def\pdffontattr{\pdfextension fontattr} + \def\pdfobj{\pdfextension obj} + \def\pdflastobj{\numexpr\pdffeedback lastobj\relax} + \let\pdfpagewidth\pagewidth + \let\pdfpageheight\pageheight + \edef\pdfhorigin{\pdfvariable horigin} + \edef\pdfvorigin{\pdfvariable vorigin} + \fi +\fi + +% when pdftex is run in dvi mode, \pdfoutput is defined (so \pdfoutput=1 +% can be set). So we test for \relax and 0 as well as being undefined. +\ifx\pdfoutput\thisisundefined +\else + \ifx\pdfoutput\relax + \else + \ifcase\pdfoutput + \else + \pdftrue + \fi + \fi +\fi + +% PDF uses PostScript string constants for the names of xref targets, +% for display in the outlines, and in other places. Thus, we have to +% double any backslashes. Otherwise, a name like "\node" will be +% interpreted as a newline (\n), followed by o, d, e. Not good. +% +% See http://www.ntg.nl/pipermail/ntg-pdftex/2004-July/000654.html and +% related messages. The final outcome is that it is up to the TeX user +% to double the backslashes and otherwise make the string valid, so +% that's what we do. pdftex 1.30.0 (ca.2005) introduced a primitive to +% do this reliably, so we use it. + +% #1 is a control sequence in which to do the replacements, +% which we \xdef. +\def\txiescapepdf#1{% + \ifx\pdfescapestring\thisisundefined + % No primitive available; should we give a warning or log? + % Many times it won't matter. + \xdef#1{#1}% + \else + % The expandable \pdfescapestring primitive escapes parentheses, + % backslashes, and other special chars. + \xdef#1{\pdfescapestring{#1}}% + \fi +} +\def\txiescapepdfutfsixteen#1{% + \ifx\pdfescapestrutfsixteen\thisisundefined + % No UTF-16 converting macro available. + \txiescapepdf{#1}% + \else + \xdef#1{\pdfescapestrutfsixteen{#1}}% + \fi +} + +\newhelp\nopdfimagehelp{Texinfo supports .png, .jpg, .jpeg, and .pdf images +with PDF output, and none of those formats could be found. (.eps cannot +be supported due to the design of the PDF format; use regular TeX (DVI +output) for that.)} + +\ifpdf + % + % Color manipulation macros using ideas from pdfcolor.tex, + % except using rgb instead of cmyk; the latter is said to render as a + % very dark gray on-screen and a very dark halftone in print, instead + % of actual black. The dark red here is dark enough to print on paper as + % nearly black, but still distinguishable for online viewing. We use + % black by default, though. + \def\rgbDarkRed{0.50 0.09 0.12} + \def\rgbBlack{0 0 0} + % + % rg sets the color for filling (usual text, etc.); + % RG sets the color for stroking (thin rules, e.g., normal _'s). + \def\pdfsetcolor#1{\pdfliteral{#1 rg #1 RG}} + % + % Set color, and create a mark which defines \thiscolor accordingly, + % so that \makeheadline knows which color to restore. + \def\setcolor#1{% + \xdef\lastcolordefs{\gdef\noexpand\thiscolor{#1}}% + \domark + \pdfsetcolor{#1}% + } + % + \def\maincolor{\rgbBlack} + \pdfsetcolor{\maincolor} + \edef\thiscolor{\maincolor} + \def\lastcolordefs{} + % + \def\makefootline{% + \baselineskip24pt + \line{\pdfsetcolor{\maincolor}\the\footline}% + } + % + \def\makeheadline{% + \vbox to 0pt{% + \vskip-22.5pt + \line{% + \vbox to8.5pt{}% + % Extract \thiscolor definition from the marks. + \getcolormarks + % Typeset the headline with \maincolor, then restore the color. + \pdfsetcolor{\maincolor}\the\headline\pdfsetcolor{\thiscolor}% + }% + \vss + }% + \nointerlineskip + } + % + % + \pdfcatalog{/PageMode /UseOutlines} + % + % #1 is image name, #2 width (might be empty/whitespace), #3 height (ditto). + \def\dopdfimage#1#2#3{% + \def\pdfimagewidth{#2}\setbox0 = \hbox{\ignorespaces #2}% + \def\pdfimageheight{#3}\setbox2 = \hbox{\ignorespaces #3}% + % + % pdftex (and the PDF format) support .pdf, .png, .jpg (among + % others). Let's try in that order, PDF first since if + % someone has a scalable image, presumably better to use that than a + % bitmap. + \let\pdfimgext=\empty + \begingroup + \openin 1 #1.pdf \ifeof 1 + \openin 1 #1.PDF \ifeof 1 + \openin 1 #1.png \ifeof 1 + \openin 1 #1.jpg \ifeof 1 + \openin 1 #1.jpeg \ifeof 1 + \openin 1 #1.JPG \ifeof 1 + \errhelp = \nopdfimagehelp + \errmessage{Could not find image file #1 for pdf}% + \else \gdef\pdfimgext{JPG}% + \fi + \else \gdef\pdfimgext{jpeg}% + \fi + \else \gdef\pdfimgext{jpg}% + \fi + \else \gdef\pdfimgext{png}% + \fi + \else \gdef\pdfimgext{PDF}% + \fi + \else \gdef\pdfimgext{pdf}% + \fi + \closein 1 + \endgroup + % + % without \immediate, ancient pdftex seg faults when the same image is + % included twice. (Version 3.14159-pre-1.0-unofficial-20010704.) + \ifnum\pdftexversion < 14 + \immediate\pdfimage + \else + \immediate\pdfximage + \fi + \ifdim \wd0 >0pt width \pdfimagewidth \fi + \ifdim \wd2 >0pt height \pdfimageheight \fi + \ifnum\pdftexversion<13 + #1.\pdfimgext + \else + {#1.\pdfimgext}% + \fi + \ifnum\pdftexversion < 14 \else + \pdfrefximage \pdflastximage + \fi} + % + \def\setpdfdestname#1{{% + % We have to set dummies so commands such as @code, and characters + % such as \, aren't expanded when present in a section title. + \indexnofonts + \makevalueexpandable + \turnoffactive + \iftxiuseunicodedestname + \ifx \declaredencoding \latone + % Pass through Latin-1 characters. + % LuaTeX with byte wise I/O converts Latin-1 characters to Unicode. + \else + \ifx \declaredencoding \utfeight + % Pass through Unicode characters. + \else + % Use ASCII approximations in destination names. + \passthroughcharsfalse + \fi + \fi + \else + % Use ASCII approximations in destination names. + \passthroughcharsfalse + \fi + \def\pdfdestname{#1}% + \txiescapepdf\pdfdestname + }} + % + \def\setpdfoutlinetext#1{{% + \indexnofonts + \makevalueexpandable + \turnoffactive + \ifx \declaredencoding \latone + % The PDF format can use an extended form of Latin-1 in bookmark + % strings. See Appendix D of the PDF Reference, Sixth Edition, for + % the "PDFDocEncoding". + \passthroughcharstrue + % Pass through Latin-1 characters. + % LuaTeX: Convert to Unicode + % pdfTeX: Use Latin-1 as PDFDocEncoding + \def\pdfoutlinetext{#1}% + \else + \ifx \declaredencoding \utfeight + \ifx\luatexversion\thisisundefined + % For pdfTeX with UTF-8. + % TODO: the PDF format can use UTF-16 in bookmark strings, + % but the code for this isn't done yet. + % Use ASCII approximations. + \passthroughcharsfalse + \def\pdfoutlinetext{#1}% + \else + % For LuaTeX with UTF-8. + % Pass through Unicode characters for title texts. + \passthroughcharstrue + \def\pdfoutlinetext{#1}% + \fi + \else + % For non-Latin-1 or non-UTF-8 encodings. + % Use ASCII approximations. + \passthroughcharsfalse + \def\pdfoutlinetext{#1}% + \fi + \fi + % LuaTeX: Convert to UTF-16 + % pdfTeX: Use Latin-1 as PDFDocEncoding + \txiescapepdfutfsixteen\pdfoutlinetext + }} + % + \def\pdfmkdest#1{% + \setpdfdestname{#1}% + \safewhatsit{\pdfdest name{\pdfdestname} xyz}% + } + % + % used to mark target names; must be expandable. + \def\pdfmkpgn#1{#1} + % + % by default, use black for everything. + \def\urlcolor{\rgbBlack} + \def\linkcolor{\rgbBlack} + \def\endlink{\setcolor{\maincolor}\pdfendlink} + % + % Adding outlines to PDF; macros for calculating structure of outlines + % come from Petr Olsak + \def\expnumber#1{\expandafter\ifx\csname#1\endcsname\relax 0% + \else \csname#1\endcsname \fi} + \def\advancenumber#1{\tempnum=\expnumber{#1}\relax + \advance\tempnum by 1 + \expandafter\xdef\csname#1\endcsname{\the\tempnum}} + % + % #1 is the section text, which is what will be displayed in the + % outline by the pdf viewer. #2 is the pdf expression for the number + % of subentries (or empty, for subsubsections). #3 is the node text, + % which might be empty if this toc entry had no corresponding node. + % #4 is the page number + % + \def\dopdfoutline#1#2#3#4{% + % Generate a link to the node text if that exists; else, use the + % page number. We could generate a destination for the section + % text in the case where a section has no node, but it doesn't + % seem worth the trouble, since most documents are normally structured. + \setpdfoutlinetext{#1} + \setpdfdestname{#3} + \ifx\pdfdestname\empty + \def\pdfdestname{#4}% + \fi + % + \pdfoutline goto name{\pdfmkpgn{\pdfdestname}}#2{\pdfoutlinetext}% + } + % + \def\pdfmakeoutlines{% + \begingroup + % Read toc silently, to get counts of subentries for \pdfoutline. + \def\partentry##1##2##3##4{}% ignore parts in the outlines + \def\numchapentry##1##2##3##4{% + \def\thischapnum{##2}% + \def\thissecnum{0}% + \def\thissubsecnum{0}% + }% + \def\numsecentry##1##2##3##4{% + \advancenumber{chap\thischapnum}% + \def\thissecnum{##2}% + \def\thissubsecnum{0}% + }% + \def\numsubsecentry##1##2##3##4{% + \advancenumber{sec\thissecnum}% + \def\thissubsecnum{##2}% + }% + \def\numsubsubsecentry##1##2##3##4{% + \advancenumber{subsec\thissubsecnum}% + }% + \def\thischapnum{0}% + \def\thissecnum{0}% + \def\thissubsecnum{0}% + % + % use \def rather than \let here because we redefine \chapentry et + % al. a second time, below. + \def\appentry{\numchapentry}% + \def\appsecentry{\numsecentry}% + \def\appsubsecentry{\numsubsecentry}% + \def\appsubsubsecentry{\numsubsubsecentry}% + \def\unnchapentry{\numchapentry}% + \def\unnsecentry{\numsecentry}% + \def\unnsubsecentry{\numsubsecentry}% + \def\unnsubsubsecentry{\numsubsubsecentry}% + \readdatafile{toc}% + % + % Read toc second time, this time actually producing the outlines. + % The `-' means take the \expnumber as the absolute number of + % subentries, which we calculated on our first read of the .toc above. + % + % We use the node names as the destinations. + \def\numchapentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{chap##2}}{##3}{##4}}% + \def\numsecentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{sec##2}}{##3}{##4}}% + \def\numsubsecentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{subsec##2}}{##3}{##4}}% + \def\numsubsubsecentry##1##2##3##4{% count is always zero + \dopdfoutline{##1}{}{##3}{##4}}% + % + % PDF outlines are displayed using system fonts, instead of + % document fonts. Therefore we cannot use special characters, + % since the encoding is unknown. For example, the eogonek from + % Latin 2 (0xea) gets translated to a | character. Info from + % Staszek Wawrykiewicz, 19 Jan 2004 04:09:24 +0100. + % + % TODO this right, we have to translate 8-bit characters to + % their "best" equivalent, based on the @documentencoding. Too + % much work for too little return. Just use the ASCII equivalents + % we use for the index sort strings. + % + \indexnofonts + \setupdatafile + % We can have normal brace characters in the PDF outlines, unlike + % Texinfo index files. So set that up. + \def\{{\lbracecharliteral}% + \def\}{\rbracecharliteral}% + \catcode`\\=\active \otherbackslash + \input \tocreadfilename + \endgroup + } + {\catcode`[=1 \catcode`]=2 + \catcode`{=\other \catcode`}=\other + \gdef\lbracecharliteral[{]% + \gdef\rbracecharliteral[}]% + ] + % + \def\skipspaces#1{\def\PP{#1}\def\D{|}% + \ifx\PP\D\let\nextsp\relax + \else\let\nextsp\skipspaces + \addtokens{\filename}{\PP}% + \advance\filenamelength by 1 + \fi + \nextsp} + \def\getfilename#1{% + \filenamelength=0 + % If we don't expand the argument now, \skipspaces will get + % snagged on things like "@value{foo}". + \edef\temp{#1}% + \expandafter\skipspaces\temp|\relax + } + \ifnum\pdftexversion < 14 + \let \startlink \pdfannotlink + \else + \let \startlink \pdfstartlink + \fi + % make a live url in pdf output. + \def\pdfurl#1{% + \begingroup + % it seems we really need yet another set of dummies; have not + % tried to figure out what each command should do in the context + % of @url. for now, just make @/ a no-op, that's the only one + % people have actually reported a problem with. + % + \normalturnoffactive + \def\@{@}% + \let\/=\empty + \makevalueexpandable + % do we want to go so far as to use \indexnofonts instead of just + % special-casing \var here? + \def\var##1{##1}% + % + \leavevmode\setcolor{\urlcolor}% + \startlink attr{/Border [0 0 0]}% + user{/Subtype /Link /A << /S /URI /URI (#1) >>}% + \endgroup} + \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}} + \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} + \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks} + \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}} + \def\maketoks{% + \expandafter\poptoks\the\toksA|ENDTOKS|\relax + \ifx\first0\adn0 + \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3 + \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6 + \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9 + \else + \ifnum0=\countA\else\makelink\fi + \ifx\first.\let\next=\done\else + \let\next=\maketoks + \addtokens{\toksB}{\the\toksD} + \ifx\first,\addtokens{\toksB}{\space}\fi + \fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \next} + \def\makelink{\addtokens{\toksB}% + {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0} + \def\pdflink#1{% + \startlink attr{/Border [0 0 0]} goto name{\pdfmkpgn{#1}} + \setcolor{\linkcolor}#1\endlink} + \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st} +\else + % non-pdf mode + \let\pdfmkdest = \gobble + \let\pdfurl = \gobble + \let\endlink = \relax + \let\setcolor = \gobble + \let\pdfsetcolor = \gobble + \let\pdfmakeoutlines = \relax +\fi % \ifx\pdfoutput + +% +% For XeTeX +% +\ifx\XeTeXrevision\thisisundefined +\else + % + % XeTeX version check + % + \ifnum\strcmp{\the\XeTeXversion\XeTeXrevision}{0.99996}>-1 + % TeX Live 2016 contains XeTeX 0.99996 and xdvipdfmx 20160307. + % It can use the `dvipdfmx:config' special (from TeX Live SVN r40941). + % For avoiding PDF destination name replacement, we use this special + % instead of xdvipdfmx's command line option `-C 0x0010'. + \special{dvipdfmx:config C 0x0010} + % XeTeX 0.99995+ comes with xdvipdfmx 20160307+. + % It can handle Unicode destination names for PDF. + \txiuseunicodedestnametrue + \else + % XeTeX < 0.99996 (TeX Live < 2016) cannot use the + % `dvipdfmx:config' special. + % So for avoiding PDF destination name replacement, + % xdvipdfmx's command line option `-C 0x0010' is necessary. + % + % XeTeX < 0.99995 can not handle Unicode destination names for PDF + % because xdvipdfmx 20150315 has a UTF-16 conversion issue. + % It is fixed by xdvipdfmx 20160106 (TeX Live SVN r39753). + \txiuseunicodedestnamefalse + \fi + % + % Color support + % + \def\rgbDarkRed{0.50 0.09 0.12} + \def\rgbBlack{0 0 0} + % + \def\pdfsetcolor#1{\special{pdf:scolor [#1]}} + % + % Set color, and create a mark which defines \thiscolor accordingly, + % so that \makeheadline knows which color to restore. + \def\setcolor#1{% + \xdef\lastcolordefs{\gdef\noexpand\thiscolor{#1}}% + \domark + \pdfsetcolor{#1}% + } + % + \def\maincolor{\rgbBlack} + \pdfsetcolor{\maincolor} + \edef\thiscolor{\maincolor} + \def\lastcolordefs{} + % + \def\makefootline{% + \baselineskip24pt + \line{\pdfsetcolor{\maincolor}\the\footline}% + } + % + \def\makeheadline{% + \vbox to 0pt{% + \vskip-22.5pt + \line{% + \vbox to8.5pt{}% + % Extract \thiscolor definition from the marks. + \getcolormarks + % Typeset the headline with \maincolor, then restore the color. + \pdfsetcolor{\maincolor}\the\headline\pdfsetcolor{\thiscolor}% + }% + \vss + }% + \nointerlineskip + } + % + % PDF outline support + % + % Emulate pdfTeX primitive + \def\pdfdest name#1 xyz{% + \special{pdf:dest (#1) [@thispage /XYZ @xpos @ypos null]}% + } + % + \def\setpdfdestname#1{{% + % We have to set dummies so commands such as @code, and characters + % such as \, aren't expanded when present in a section title. + \indexnofonts + \makevalueexpandable + \turnoffactive + \iftxiuseunicodedestname + % Pass through Unicode characters. + \else + % Use ASCII approximations in destination names. + \passthroughcharsfalse + \fi + \def\pdfdestname{#1}% + \txiescapepdf\pdfdestname + }} + % + \def\setpdfoutlinetext#1{{% + \turnoffactive + % Always use Unicode characters in title texts. + \def\pdfoutlinetext{#1}% + % For XeTeX, xdvipdfmx converts to UTF-16. + % So we do not convert. + \txiescapepdf\pdfoutlinetext + }} + % + \def\pdfmkdest#1{% + \setpdfdestname{#1}% + \safewhatsit{\pdfdest name{\pdfdestname} xyz}% + } + % + % by default, use black for everything. + \def\urlcolor{\rgbBlack} + \def\linkcolor{\rgbBlack} + \def\endlink{\setcolor{\maincolor}\pdfendlink} + % + \def\dopdfoutline#1#2#3#4{% + \setpdfoutlinetext{#1} + \setpdfdestname{#3} + \ifx\pdfdestname\empty + \def\pdfdestname{#4}% + \fi + % + \special{pdf:out [-] #2 << /Title (\pdfoutlinetext) /A + << /S /GoTo /D (\pdfdestname) >> >> }% + } + % + \def\pdfmakeoutlines{% + \begingroup + % + % For XeTeX, counts of subentries are not necessary. + % Therefore, we read toc only once. + % + % We use node names as destinations. + \def\partentry##1##2##3##4{}% ignore parts in the outlines + \def\numchapentry##1##2##3##4{% + \dopdfoutline{##1}{1}{##3}{##4}}% + \def\numsecentry##1##2##3##4{% + \dopdfoutline{##1}{2}{##3}{##4}}% + \def\numsubsecentry##1##2##3##4{% + \dopdfoutline{##1}{3}{##3}{##4}}% + \def\numsubsubsecentry##1##2##3##4{% + \dopdfoutline{##1}{4}{##3}{##4}}% + % + \let\appentry\numchapentry% + \let\appsecentry\numsecentry% + \let\appsubsecentry\numsubsecentry% + \let\appsubsubsecentry\numsubsubsecentry% + \let\unnchapentry\numchapentry% + \let\unnsecentry\numsecentry% + \let\unnsubsecentry\numsubsecentry% + \let\unnsubsubsecentry\numsubsubsecentry% + % + % For XeTeX, xdvipdfmx converts strings to UTF-16. + % Therefore, the encoding and the language may not be considered. + % + \indexnofonts + \setupdatafile + % We can have normal brace characters in the PDF outlines, unlike + % Texinfo index files. So set that up. + \def\{{\lbracecharliteral}% + \def\}{\rbracecharliteral}% + \catcode`\\=\active \otherbackslash + \input \tocreadfilename + \endgroup + } + {\catcode`[=1 \catcode`]=2 + \catcode`{=\other \catcode`}=\other + \gdef\lbracecharliteral[{]% + \gdef\rbracecharliteral[}]% + ] + + \special{pdf:docview << /PageMode /UseOutlines >> } + % ``\special{pdf:tounicode ...}'' is not necessary + % because xdvipdfmx converts strings from UTF-8 to UTF-16 without it. + % However, due to a UTF-16 conversion issue of xdvipdfmx 20150315, + % ``\special{pdf:dest ...}'' cannot handle non-ASCII strings. + % It is fixed by xdvipdfmx 20160106 (TeX Live SVN r39753). +% + \def\skipspaces#1{\def\PP{#1}\def\D{|}% + \ifx\PP\D\let\nextsp\relax + \else\let\nextsp\skipspaces + \addtokens{\filename}{\PP}% + \advance\filenamelength by 1 + \fi + \nextsp} + \def\getfilename#1{% + \filenamelength=0 + % If we don't expand the argument now, \skipspaces will get + % snagged on things like "@value{foo}". + \edef\temp{#1}% + \expandafter\skipspaces\temp|\relax + } + % make a live url in pdf output. + \def\pdfurl#1{% + \begingroup + % it seems we really need yet another set of dummies; have not + % tried to figure out what each command should do in the context + % of @url. for now, just make @/ a no-op, that's the only one + % people have actually reported a problem with. + % + \normalturnoffactive + \def\@{@}% + \let\/=\empty + \makevalueexpandable + % do we want to go so far as to use \indexnofonts instead of just + % special-casing \var here? + \def\var##1{##1}% + % + \leavevmode\setcolor{\urlcolor}% + \special{pdf:bann << /Border [0 0 0] + /Subtype /Link /A << /S /URI /URI (#1) >> >>}% + \endgroup} + \def\endlink{\setcolor{\maincolor}\special{pdf:eann}} + \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}} + \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} + \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks} + \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}} + \def\maketoks{% + \expandafter\poptoks\the\toksA|ENDTOKS|\relax + \ifx\first0\adn0 + \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3 + \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6 + \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9 + \else + \ifnum0=\countA\else\makelink\fi + \ifx\first.\let\next=\done\else + \let\next=\maketoks + \addtokens{\toksB}{\the\toksD} + \ifx\first,\addtokens{\toksB}{\space}\fi + \fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \next} + \def\makelink{\addtokens{\toksB}% + {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0} + \def\pdflink#1{% + \special{pdf:bann << /Border [0 0 0] + /Type /Annot /Subtype /Link /A << /S /GoTo /D (#1) >> >>}% + \setcolor{\linkcolor}#1\endlink} + \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st} +% + % + % @image support + % + % #1 is image name, #2 width (might be empty/whitespace), #3 height (ditto). + \def\doxeteximage#1#2#3{% + \def\xeteximagewidth{#2}\setbox0 = \hbox{\ignorespaces #2}% + \def\xeteximageheight{#3}\setbox2 = \hbox{\ignorespaces #3}% + % + % XeTeX (and the PDF format) supports .pdf, .png, .jpg (among + % others). Let's try in that order, PDF first since if + % someone has a scalable image, presumably better to use that than a + % bitmap. + \let\xeteximgext=\empty + \begingroup + \openin 1 #1.pdf \ifeof 1 + \openin 1 #1.PDF \ifeof 1 + \openin 1 #1.png \ifeof 1 + \openin 1 #1.jpg \ifeof 1 + \openin 1 #1.jpeg \ifeof 1 + \openin 1 #1.JPG \ifeof 1 + \errmessage{Could not find image file #1 for XeTeX}% + \else \gdef\xeteximgext{JPG}% + \fi + \else \gdef\xeteximgext{jpeg}% + \fi + \else \gdef\xeteximgext{jpg}% + \fi + \else \gdef\xeteximgext{png}% + \fi + \else \gdef\xeteximgext{PDF}% + \fi + \else \gdef\xeteximgext{pdf}% + \fi + \closein 1 + \endgroup + % + \def\xetexpdfext{pdf}% + \ifx\xeteximgext\xetexpdfext + \XeTeXpdffile "#1".\xeteximgext "" + \else + \def\xetexpdfext{PDF}% + \ifx\xeteximgext\xetexpdfext + \XeTeXpdffile "#1".\xeteximgext "" + \else + \XeTeXpicfile "#1".\xeteximgext "" + \fi + \fi + \ifdim \wd0 >0pt width \xeteximagewidth \fi + \ifdim \wd2 >0pt height \xeteximageheight \fi \relax + } +\fi + + +% +\message{fonts,} + +% Set the baselineskip to #1, and the lineskip and strut size +% correspondingly. There is no deep meaning behind these magic numbers +% used as factors; they just match (closely enough) what Knuth defined. +% +\def\lineskipfactor{.08333} +\def\strutheightpercent{.70833} +\def\strutdepthpercent {.29167} +% +% can get a sort of poor man's double spacing by redefining this. +\def\baselinefactor{1} +% +\newdimen\textleading +\def\setleading#1{% + \dimen0 = #1\relax + \normalbaselineskip = \baselinefactor\dimen0 + \normallineskip = \lineskipfactor\normalbaselineskip + \normalbaselines + \setbox\strutbox =\hbox{% + \vrule width0pt height\strutheightpercent\baselineskip + depth \strutdepthpercent \baselineskip + }% +} + +% PDF CMaps. See also LaTeX's t1.cmap. +% +% do nothing with this by default. +\expandafter\let\csname cmapOT1\endcsname\gobble +\expandafter\let\csname cmapOT1IT\endcsname\gobble +\expandafter\let\csname cmapOT1TT\endcsname\gobble + +% if we are producing pdf, and we have \pdffontattr, then define cmaps. +% (\pdffontattr was introduced many years ago, but people still run +% older pdftex's; it's easy to conditionalize, so we do.) +\ifpdf \ifx\pdffontattr\thisisundefined \else + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1-0) +%%Title: (TeX-OT1-0 TeX OT1 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1) +/Supplement 0 +>> def +/CMapName /TeX-OT1-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +8 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<23> <26> <0023> +<28> <3B> <0028> +<3F> <5B> <003F> +<5D> <5E> <005D> +<61> <7A> <0061> +<7B> <7C> <2013> +endbfrange +40 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <007E> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +% +% \cmapOT1IT + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1IT-0) +%%Title: (TeX-OT1IT-0 TeX OT1IT 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1IT) +/Supplement 0 +>> def +/CMapName /TeX-OT1IT-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +8 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<25> <26> <0025> +<28> <3B> <0028> +<3F> <5B> <003F> +<5D> <5E> <005D> +<61> <7A> <0061> +<7B> <7C> <2013> +endbfrange +42 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<23> <0023> +<24> <00A3> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <007E> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1IT\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +% +% \cmapOT1TT + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1TT-0) +%%Title: (TeX-OT1TT-0 TeX OT1TT 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1TT) +/Supplement 0 +>> def +/CMapName /TeX-OT1TT-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +5 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> +endbfrange +32 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1TT\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +\fi\fi + + +% Set the font macro #1 to the font named \fontprefix#2. +% #3 is the font's design size, #4 is a scale factor, #5 is the CMap +% encoding (only OT1, OT1IT and OT1TT are allowed, or empty to omit). +% Example: +% #1 = \textrm +% #2 = \rmshape +% #3 = 10 +% #4 = \mainmagstep +% #5 = OT1 +% +\def\setfont#1#2#3#4#5{% + \font#1=\fontprefix#2#3 scaled #4 + \csname cmap#5\endcsname#1% +} +% This is what gets called when #5 of \setfont is empty. +\let\cmap\gobble +% +% (end of cmaps) + +% Use cm as the default font prefix. +% To specify the font prefix, you must define \fontprefix +% before you read in texinfo.tex. +\ifx\fontprefix\thisisundefined +\def\fontprefix{cm} +\fi +% Support font families that don't use the same naming scheme as CM. +\def\rmshape{r} +\def\rmbshape{bx} % where the normal face is bold +\def\bfshape{b} +\def\bxshape{bx} +\def\ttshape{tt} +\def\ttbshape{tt} +\def\ttslshape{sltt} +\def\itshape{ti} +\def\itbshape{bxti} +\def\slshape{sl} +\def\slbshape{bxsl} +\def\sfshape{ss} +\def\sfbshape{ss} +\def\scshape{csc} +\def\scbshape{csc} + +% Definitions for a main text size of 11pt. (The default in Texinfo.) +% +\def\definetextfontsizexi{% +% Text fonts (11.2pt, magstep1). +\def\textnominalsize{11pt} +\edef\mainmagstep{\magstephalf} +\setfont\textrm\rmshape{10}{\mainmagstep}{OT1} +\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT} +\setfont\textbf\bfshape{10}{\mainmagstep}{OT1} +\setfont\textit\itshape{10}{\mainmagstep}{OT1IT} +\setfont\textsl\slshape{10}{\mainmagstep}{OT1} +\setfont\textsf\sfshape{10}{\mainmagstep}{OT1} +\setfont\textsc\scshape{10}{\mainmagstep}{OT1} +\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT} +\font\texti=cmmi10 scaled \mainmagstep +\font\textsy=cmsy10 scaled \mainmagstep +\def\textecsize{1095} + +% A few fonts for @defun names and args. +\setfont\defbf\bfshape{10}{\magstep1}{OT1} +\setfont\deftt\ttshape{10}{\magstep1}{OT1TT} +\setfont\defsl\slshape{10}{\magstep1}{OT1TT} +\setfont\defttsl\ttslshape{10}{\magstep1}{OT1TT} +\def\df{\let\ttfont=\deftt \let\bffont = \defbf +\let\ttslfont=\defttsl \let\slfont=\defsl \bf} + +% Fonts for indices, footnotes, small examples (9pt). +\def\smallnominalsize{9pt} +\setfont\smallrm\rmshape{9}{1000}{OT1} +\setfont\smalltt\ttshape{9}{1000}{OT1TT} +\setfont\smallbf\bfshape{10}{900}{OT1} +\setfont\smallit\itshape{9}{1000}{OT1IT} +\setfont\smallsl\slshape{9}{1000}{OT1} +\setfont\smallsf\sfshape{9}{1000}{OT1} +\setfont\smallsc\scshape{10}{900}{OT1} +\setfont\smallttsl\ttslshape{10}{900}{OT1TT} +\font\smalli=cmmi9 +\font\smallsy=cmsy9 +\def\smallecsize{0900} + +% Fonts for small examples (8pt). +\def\smallernominalsize{8pt} +\setfont\smallerrm\rmshape{8}{1000}{OT1} +\setfont\smallertt\ttshape{8}{1000}{OT1TT} +\setfont\smallerbf\bfshape{10}{800}{OT1} +\setfont\smallerit\itshape{8}{1000}{OT1IT} +\setfont\smallersl\slshape{8}{1000}{OT1} +\setfont\smallersf\sfshape{8}{1000}{OT1} +\setfont\smallersc\scshape{10}{800}{OT1} +\setfont\smallerttsl\ttslshape{10}{800}{OT1TT} +\font\smalleri=cmmi8 +\font\smallersy=cmsy8 +\def\smallerecsize{0800} + +% Fonts for math mode superscripts (7pt). +\def\sevennominalsize{7pt} +\setfont\sevenrm\rmshape{7}{1000}{OT1} +\setfont\seventt\ttshape{10}{700}{OT1TT} +\setfont\sevenbf\bfshape{10}{700}{OT1} +\setfont\sevenit\itshape{7}{1000}{OT1IT} +\setfont\sevensl\slshape{10}{700}{OT1} +\setfont\sevensf\sfshape{10}{700}{OT1} +\setfont\sevensc\scshape{10}{700}{OT1} +\setfont\seventtsl\ttslshape{10}{700}{OT1TT} +\font\seveni=cmmi7 +\font\sevensy=cmsy7 +\def\sevenecsize{0700} + +% Fonts for title page (20.4pt): +\def\titlenominalsize{20pt} +\setfont\titlerm\rmbshape{12}{\magstep3}{OT1} +\setfont\titleit\itbshape{10}{\magstep4}{OT1IT} +\setfont\titlesl\slbshape{10}{\magstep4}{OT1} +\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT} +\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT} +\setfont\titlesf\sfbshape{17}{\magstep1}{OT1} +\let\titlebf=\titlerm +\setfont\titlesc\scbshape{10}{\magstep4}{OT1} +\font\titlei=cmmi12 scaled \magstep3 +\font\titlesy=cmsy10 scaled \magstep4 +\def\titleecsize{2074} + +% Chapter (and unnumbered) fonts (17.28pt). +\def\chapnominalsize{17pt} +\setfont\chaprm\rmbshape{12}{\magstep2}{OT1} +\setfont\chapit\itbshape{10}{\magstep3}{OT1IT} +\setfont\chapsl\slbshape{10}{\magstep3}{OT1} +\setfont\chaptt\ttbshape{12}{\magstep2}{OT1TT} +\setfont\chapttsl\ttslshape{10}{\magstep3}{OT1TT} +\setfont\chapsf\sfbshape{17}{1000}{OT1} +\let\chapbf=\chaprm +\setfont\chapsc\scbshape{10}{\magstep3}{OT1} +\font\chapi=cmmi12 scaled \magstep2 +\font\chapsy=cmsy10 scaled \magstep3 +\def\chapecsize{1728} + +% Section fonts (14.4pt). +\def\secnominalsize{14pt} +\setfont\secrm\rmbshape{12}{\magstep1}{OT1} +\setfont\secrmnotbold\rmshape{12}{\magstep1}{OT1} +\setfont\secit\itbshape{10}{\magstep2}{OT1IT} +\setfont\secsl\slbshape{10}{\magstep2}{OT1} +\setfont\sectt\ttbshape{12}{\magstep1}{OT1TT} +\setfont\secttsl\ttslshape{10}{\magstep2}{OT1TT} +\setfont\secsf\sfbshape{12}{\magstep1}{OT1} +\let\secbf\secrm +\setfont\secsc\scbshape{10}{\magstep2}{OT1} +\font\seci=cmmi12 scaled \magstep1 +\font\secsy=cmsy10 scaled \magstep2 +\def\sececsize{1440} + +% Subsection fonts (13.15pt). +\def\ssecnominalsize{13pt} +\setfont\ssecrm\rmbshape{12}{\magstephalf}{OT1} +\setfont\ssecit\itbshape{10}{1315}{OT1IT} +\setfont\ssecsl\slbshape{10}{1315}{OT1} +\setfont\ssectt\ttbshape{12}{\magstephalf}{OT1TT} +\setfont\ssecttsl\ttslshape{10}{1315}{OT1TT} +\setfont\ssecsf\sfbshape{12}{\magstephalf}{OT1} +\let\ssecbf\ssecrm +\setfont\ssecsc\scbshape{10}{1315}{OT1} +\font\sseci=cmmi12 scaled \magstephalf +\font\ssecsy=cmsy10 scaled 1315 +\def\ssececsize{1200} + +% Reduced fonts for @acronym in text (10pt). +\def\reducednominalsize{10pt} +\setfont\reducedrm\rmshape{10}{1000}{OT1} +\setfont\reducedtt\ttshape{10}{1000}{OT1TT} +\setfont\reducedbf\bfshape{10}{1000}{OT1} +\setfont\reducedit\itshape{10}{1000}{OT1IT} +\setfont\reducedsl\slshape{10}{1000}{OT1} +\setfont\reducedsf\sfshape{10}{1000}{OT1} +\setfont\reducedsc\scshape{10}{1000}{OT1} +\setfont\reducedttsl\ttslshape{10}{1000}{OT1TT} +\font\reducedi=cmmi10 +\font\reducedsy=cmsy10 +\def\reducedecsize{1000} + +\textleading = 13.2pt % line spacing for 11pt CM +\textfonts % reset the current fonts +\rm +} % end of 11pt text font size definitions, \definetextfontsizexi + + +% Definitions to make the main text be 10pt Computer Modern, with +% section, chapter, etc., sizes following suit. This is for the GNU +% Press printing of the Emacs 22 manual. Maybe other manuals in the +% future. Used with @smallbook, which sets the leading to 12pt. +% +\def\definetextfontsizex{% +% Text fonts (10pt). +\def\textnominalsize{10pt} +\edef\mainmagstep{1000} +\setfont\textrm\rmshape{10}{\mainmagstep}{OT1} +\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT} +\setfont\textbf\bfshape{10}{\mainmagstep}{OT1} +\setfont\textit\itshape{10}{\mainmagstep}{OT1IT} +\setfont\textsl\slshape{10}{\mainmagstep}{OT1} +\setfont\textsf\sfshape{10}{\mainmagstep}{OT1} +\setfont\textsc\scshape{10}{\mainmagstep}{OT1} +\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT} +\font\texti=cmmi10 scaled \mainmagstep +\font\textsy=cmsy10 scaled \mainmagstep +\def\textecsize{1000} + +% A few fonts for @defun names and args. +\setfont\defbf\bfshape{10}{\magstephalf}{OT1} +\setfont\deftt\ttshape{10}{\magstephalf}{OT1TT} +\setfont\defsl\slshape{10}{\magstephalf}{OT1TT} +\setfont\defttsl\ttslshape{10}{\magstephalf}{OT1TT} +\def\df{\let\ttfont=\deftt \let\bffont = \defbf +\let\slfont=\defsl \let\ttslfont=\defttsl \bf} + +% Fonts for indices, footnotes, small examples (9pt). +\def\smallnominalsize{9pt} +\setfont\smallrm\rmshape{9}{1000}{OT1} +\setfont\smalltt\ttshape{9}{1000}{OT1TT} +\setfont\smallbf\bfshape{10}{900}{OT1} +\setfont\smallit\itshape{9}{1000}{OT1IT} +\setfont\smallsl\slshape{9}{1000}{OT1} +\setfont\smallsf\sfshape{9}{1000}{OT1} +\setfont\smallsc\scshape{10}{900}{OT1} +\setfont\smallttsl\ttslshape{10}{900}{OT1TT} +\font\smalli=cmmi9 +\font\smallsy=cmsy9 +\def\smallecsize{0900} + +% Fonts for small examples (8pt). +\def\smallernominalsize{8pt} +\setfont\smallerrm\rmshape{8}{1000}{OT1} +\setfont\smallertt\ttshape{8}{1000}{OT1TT} +\setfont\smallerbf\bfshape{10}{800}{OT1} +\setfont\smallerit\itshape{8}{1000}{OT1IT} +\setfont\smallersl\slshape{8}{1000}{OT1} +\setfont\smallersf\sfshape{8}{1000}{OT1} +\setfont\smallersc\scshape{10}{800}{OT1} +\setfont\smallerttsl\ttslshape{10}{800}{OT1TT} +\font\smalleri=cmmi8 +\font\smallersy=cmsy8 +\def\smallerecsize{0800} + +% Fonts for math mode superscripts (7pt). +\def\sevennominalsize{7pt} +\setfont\sevenrm\rmshape{7}{1000}{OT1} +\setfont\seventt\ttshape{10}{700}{OT1TT} +\setfont\sevenbf\bfshape{10}{700}{OT1} +\setfont\sevenit\itshape{7}{1000}{OT1IT} +\setfont\sevensl\slshape{10}{700}{OT1} +\setfont\sevensf\sfshape{10}{700}{OT1} +\setfont\sevensc\scshape{10}{700}{OT1} +\setfont\seventtsl\ttslshape{10}{700}{OT1TT} +\font\seveni=cmmi7 +\font\sevensy=cmsy7 +\def\sevenecsize{0700} + +% Fonts for title page (20.4pt): +\def\titlenominalsize{20pt} +\setfont\titlerm\rmbshape{12}{\magstep3}{OT1} +\setfont\titleit\itbshape{10}{\magstep4}{OT1IT} +\setfont\titlesl\slbshape{10}{\magstep4}{OT1} +\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT} +\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT} +\setfont\titlesf\sfbshape{17}{\magstep1}{OT1} +\let\titlebf=\titlerm +\setfont\titlesc\scbshape{10}{\magstep4}{OT1} +\font\titlei=cmmi12 scaled \magstep3 +\font\titlesy=cmsy10 scaled \magstep4 +\def\titleecsize{2074} + +% Chapter fonts (14.4pt). +\def\chapnominalsize{14pt} +\setfont\chaprm\rmbshape{12}{\magstep1}{OT1} +\setfont\chapit\itbshape{10}{\magstep2}{OT1IT} +\setfont\chapsl\slbshape{10}{\magstep2}{OT1} +\setfont\chaptt\ttbshape{12}{\magstep1}{OT1TT} +\setfont\chapttsl\ttslshape{10}{\magstep2}{OT1TT} +\setfont\chapsf\sfbshape{12}{\magstep1}{OT1} +\let\chapbf\chaprm +\setfont\chapsc\scbshape{10}{\magstep2}{OT1} +\font\chapi=cmmi12 scaled \magstep1 +\font\chapsy=cmsy10 scaled \magstep2 +\def\chapecsize{1440} + +% Section fonts (12pt). +\def\secnominalsize{12pt} +\setfont\secrm\rmbshape{12}{1000}{OT1} +\setfont\secit\itbshape{10}{\magstep1}{OT1IT} +\setfont\secsl\slbshape{10}{\magstep1}{OT1} +\setfont\sectt\ttbshape{12}{1000}{OT1TT} +\setfont\secttsl\ttslshape{10}{\magstep1}{OT1TT} +\setfont\secsf\sfbshape{12}{1000}{OT1} +\let\secbf\secrm +\setfont\secsc\scbshape{10}{\magstep1}{OT1} +\font\seci=cmmi12 +\font\secsy=cmsy10 scaled \magstep1 +\def\sececsize{1200} + +% Subsection fonts (10pt). +\def\ssecnominalsize{10pt} +\setfont\ssecrm\rmbshape{10}{1000}{OT1} +\setfont\ssecit\itbshape{10}{1000}{OT1IT} +\setfont\ssecsl\slbshape{10}{1000}{OT1} +\setfont\ssectt\ttbshape{10}{1000}{OT1TT} +\setfont\ssecttsl\ttslshape{10}{1000}{OT1TT} +\setfont\ssecsf\sfbshape{10}{1000}{OT1} +\let\ssecbf\ssecrm +\setfont\ssecsc\scbshape{10}{1000}{OT1} +\font\sseci=cmmi10 +\font\ssecsy=cmsy10 +\def\ssececsize{1000} + +% Reduced fonts for @acronym in text (9pt). +\def\reducednominalsize{9pt} +\setfont\reducedrm\rmshape{9}{1000}{OT1} +\setfont\reducedtt\ttshape{9}{1000}{OT1TT} +\setfont\reducedbf\bfshape{10}{900}{OT1} +\setfont\reducedit\itshape{9}{1000}{OT1IT} +\setfont\reducedsl\slshape{9}{1000}{OT1} +\setfont\reducedsf\sfshape{9}{1000}{OT1} +\setfont\reducedsc\scshape{10}{900}{OT1} +\setfont\reducedttsl\ttslshape{10}{900}{OT1TT} +\font\reducedi=cmmi9 +\font\reducedsy=cmsy9 +\def\reducedecsize{0900} + +\divide\parskip by 2 % reduce space between paragraphs +\textleading = 12pt % line spacing for 10pt CM +\textfonts % reset the current fonts +\rm +} % end of 10pt text font size definitions, \definetextfontsizex + +% Fonts for short table of contents. +\setfont\shortcontrm\rmshape{12}{1000}{OT1} +\setfont\shortcontbf\bfshape{10}{\magstep1}{OT1} % no cmb12 +\setfont\shortcontsl\slshape{12}{1000}{OT1} +\setfont\shortconttt\ttshape{12}{1000}{OT1TT} + + +% We provide the user-level command +% @fonttextsize 10 +% (or 11) to redefine the text font size. pt is assumed. +% +\def\xiword{11} +\def\xword{10} +\def\xwordpt{10pt} +% +\parseargdef\fonttextsize{% + \def\textsizearg{#1}% + %\wlog{doing @fonttextsize \textsizearg}% + % + % Set \globaldefs so that documents can use this inside @tex, since + % makeinfo 4.8 does not support it, but we need it nonetheless. + % + \begingroup \globaldefs=1 + \ifx\textsizearg\xword \definetextfontsizex + \else \ifx\textsizearg\xiword \definetextfontsizexi + \else + \errhelp=\EMsimple + \errmessage{@fonttextsize only supports `10' or `11', not `\textsizearg'} + \fi\fi + \endgroup +} + +% +% Change the current font style to #1, remembering it in \curfontstyle. +% For now, we do not accumulate font styles: @b{@i{foo}} prints foo in +% italics, not bold italics. +% +\def\setfontstyle#1{% + \def\curfontstyle{#1}% not as a control sequence, because we are \edef'd. + \csname #1font\endcsname % change the current font +} + +\def\rm{\fam=0 \setfontstyle{rm}} +\def\it{\fam=\itfam \setfontstyle{it}} +\def\sl{\fam=\slfam \setfontstyle{sl}} +\def\bf{\fam=\bffam \setfontstyle{bf}}\def\bfstylename{bf} +\def\tt{\fam=\ttfam \setfontstyle{tt}} + +% Texinfo sort of supports the sans serif font style, which plain TeX does not. +% So we set up a \sf. +\newfam\sffam +\def\sf{\fam=\sffam \setfontstyle{sf}} + +% We don't need math for this font style. +\def\ttsl{\setfontstyle{ttsl}} + + +% In order for the font changes to affect most math symbols and letters, +% we have to define the \textfont of the standard families. +% We don't bother to reset \scriptscriptfont; awaiting user need. +% +\def\resetmathfonts{% + \textfont0=\rmfont \textfont1=\ifont \textfont2=\syfont + \textfont\itfam=\itfont \textfont\slfam=\slfont \textfont\bffam=\bffont + \textfont\ttfam=\ttfont \textfont\sffam=\sffont + % + % Fonts for superscript. Note that the 7pt fonts are used regardless + % of the current font size. + \scriptfont0=\sevenrm \scriptfont1=\seveni \scriptfont2=\sevensy + \scriptfont\itfam=\sevenit \scriptfont\slfam=\sevensl + \scriptfont\bffam=\sevenbf \scriptfont\ttfam=\seventt + \scriptfont\sffam=\sevensf +} + +% + +% The font-changing commands (all called \...fonts) redefine the meanings +% of \STYLEfont, instead of just \STYLE. We do this because \STYLE needs +% to also set the current \fam for math mode. Our \STYLE (e.g., \rm) +% commands hardwire \STYLEfont to set the current font. +% +% The fonts used for \ifont are for "math italics" (\itfont is for italics +% in regular text). \syfont is also used in math mode only. +% +% Each font-changing command also sets the names \lsize (one size lower) +% and \lllsize (three sizes lower). These relative commands are used +% in, e.g., the LaTeX logo and acronyms. +% +% This all needs generalizing, badly. +% + +\def\assignfonts#1{% + \expandafter\let\expandafter\rmfont\csname #1rm\endcsname + \expandafter\let\expandafter\itfont\csname #1it\endcsname + \expandafter\let\expandafter\slfont\csname #1sl\endcsname + \expandafter\let\expandafter\bffont\csname #1bf\endcsname + \expandafter\let\expandafter\ttfont\csname #1tt\endcsname + \expandafter\let\expandafter\smallcaps\csname #1sc\endcsname + \expandafter\let\expandafter\sffont \csname #1sf\endcsname + \expandafter\let\expandafter\ifont \csname #1i\endcsname + \expandafter\let\expandafter\syfont \csname #1sy\endcsname + \expandafter\let\expandafter\ttslfont\csname #1ttsl\endcsname +} + +\newif\ifrmisbold + +% Select smaller font size with the current style. Used to change font size +% in, e.g., the LaTeX logo and acronyms. If we are using bold fonts for +% normal roman text, also use bold fonts for roman text in the smaller size. +\def\switchtolllsize{% + \expandafter\assignfonts\expandafter{\lllsize}% + \ifrmisbold + \let\rmfont\bffont + \fi + \csname\curfontstyle\endcsname +}% + +\def\switchtolsize{% + \expandafter\assignfonts\expandafter{\lsize}% + \ifrmisbold + \let\rmfont\bffont + \fi + \csname\curfontstyle\endcsname +}% + +\def\definefontsetatsize#1#2#3#4#5{% +\expandafter\def\csname #1fonts\endcsname{% + \def\curfontsize{#1}% + \def\lsize{#2}\def\lllsize{#3}% + \csname rmisbold#5\endcsname + \assignfonts{#1}% + \resetmathfonts + \setleading{#4}% +}} + +\definefontsetatsize{text} {reduced}{smaller}{\textleading}{false} +\definefontsetatsize{title} {chap} {subsec} {27pt} {true} +\definefontsetatsize{chap} {sec} {text} {19pt} {true} +\definefontsetatsize{sec} {subsec} {reduced}{17pt} {true} +\definefontsetatsize{ssec} {text} {small} {15pt} {true} +\definefontsetatsize{reduced}{small} {smaller}{10.5pt}{false} +\definefontsetatsize{small} {smaller}{smaller}{10.5pt}{false} +\definefontsetatsize{smaller}{smaller}{smaller}{9.5pt} {false} + +\def\titlefont#1{{\titlefonts\rm #1}} +\let\subsecfonts = \ssecfonts +\let\subsubsecfonts = \ssecfonts + +% Define these just so they can be easily changed for other fonts. +\def\angleleft{$\langle$} +\def\angleright{$\rangle$} + +% Set the fonts to use with the @small... environments. +\let\smallexamplefonts = \smallfonts + +% About \smallexamplefonts. If we use \smallfonts (9pt), @smallexample +% can fit this many characters: +% 8.5x11=86 smallbook=72 a4=90 a5=69 +% If we use \scriptfonts (8pt), then we can fit this many characters: +% 8.5x11=90+ smallbook=80 a4=90+ a5=77 +% For me, subjectively, the few extra characters that fit aren't worth +% the additional smallness of 8pt. So I'm making the default 9pt. +% +% By the way, for comparison, here's what fits with @example (10pt): +% 8.5x11=71 smallbook=60 a4=75 a5=58 +% --karl, 24jan03. + +% Set up the default fonts, so we can use them for creating boxes. +% +\definetextfontsizexi + + +\message{markup,} + +% Check if we are currently using a typewriter font. Since all the +% Computer Modern typewriter fonts have zero interword stretch (and +% shrink), and it is reasonable to expect all typewriter fonts to have +% this property, we can check that font parameter. +% +\def\ifmonospace{\ifdim\fontdimen3\font=0pt } + +% Markup style infrastructure. \defmarkupstylesetup\INITMACRO will +% define and register \INITMACRO to be called on markup style changes. +% \INITMACRO can check \currentmarkupstyle for the innermost +% style. + +\let\currentmarkupstyle\empty + +\def\setupmarkupstyle#1{% + \def\currentmarkupstyle{#1}% + \markupstylesetup +} + +\let\markupstylesetup\empty + +\def\defmarkupstylesetup#1{% + \expandafter\def\expandafter\markupstylesetup + \expandafter{\markupstylesetup #1}% + \def#1% +} + +% Markup style setup for left and right quotes. +\defmarkupstylesetup\markupsetuplq{% + \expandafter\let\expandafter \temp + \csname markupsetuplq\currentmarkupstyle\endcsname + \ifx\temp\relax \markupsetuplqdefault \else \temp \fi +} + +\defmarkupstylesetup\markupsetuprq{% + \expandafter\let\expandafter \temp + \csname markupsetuprq\currentmarkupstyle\endcsname + \ifx\temp\relax \markupsetuprqdefault \else \temp \fi +} + +{ +\catcode`\'=\active +\catcode`\`=\active + +\gdef\markupsetuplqdefault{\let`\lq} +\gdef\markupsetuprqdefault{\let'\rq} + +\gdef\markupsetcodequoteleft{\let`\codequoteleft} +\gdef\markupsetcodequoteright{\let'\codequoteright} +} + +\let\markupsetuplqcode \markupsetcodequoteleft +\let\markupsetuprqcode \markupsetcodequoteright +% +\let\markupsetuplqexample \markupsetcodequoteleft +\let\markupsetuprqexample \markupsetcodequoteright +% +\let\markupsetuplqkbd \markupsetcodequoteleft +\let\markupsetuprqkbd \markupsetcodequoteright +% +\let\markupsetuplqsamp \markupsetcodequoteleft +\let\markupsetuprqsamp \markupsetcodequoteright +% +\let\markupsetuplqverb \markupsetcodequoteleft +\let\markupsetuprqverb \markupsetcodequoteright +% +\let\markupsetuplqverbatim \markupsetcodequoteleft +\let\markupsetuprqverbatim \markupsetcodequoteright + +% Allow an option to not use regular directed right quote/apostrophe +% (char 0x27), but instead the undirected quote from cmtt (char 0x0d). +% The undirected quote is ugly, so don't make it the default, but it +% works for pasting with more pdf viewers (at least evince), the +% lilypond developers report. xpdf does work with the regular 0x27. +% +\def\codequoteright{% + \ifmonospace + \expandafter\ifx\csname SETtxicodequoteundirected\endcsname\relax + \expandafter\ifx\csname SETcodequoteundirected\endcsname\relax + '% + \else \char'15 \fi + \else \char'15 \fi + \else + '% + \fi +} +% +% and a similar option for the left quote char vs. a grave accent. +% Modern fonts display ASCII 0x60 as a grave accent, so some people like +% the code environments to do likewise. +% +\def\codequoteleft{% + \ifmonospace + \expandafter\ifx\csname SETtxicodequotebacktick\endcsname\relax + \expandafter\ifx\csname SETcodequotebacktick\endcsname\relax + % [Knuth] pp. 380,381,391 + % \relax disables Spanish ligatures ?` and !` of \tt font. + \relax`% + \else \char'22 \fi + \else \char'22 \fi + \else + \relax`% + \fi +} + +% Commands to set the quote options. +% +\parseargdef\codequoteundirected{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxicodequoteundirected\endcsname + = t% + \else\ifx\temp\offword + \expandafter\let\csname SETtxicodequoteundirected\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @codequoteundirected value `\temp', must be on|off}% + \fi\fi +} +% +\parseargdef\codequotebacktick{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxicodequotebacktick\endcsname + = t% + \else\ifx\temp\offword + \expandafter\let\csname SETtxicodequotebacktick\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @codequotebacktick value `\temp', must be on|off}% + \fi\fi +} + +% [Knuth] pp. 380,381,391, disable Spanish ligatures ?` and !` of \tt font. +\def\noligaturesquoteleft{\relax\lq} + +% Count depth in font-changes, for error checks +\newcount\fontdepth \fontdepth=0 + +% Font commands. + +% #1 is the font command (\sl or \it), #2 is the text to slant. +% If we are in a monospaced environment, however, 1) always use \ttsl, +% and 2) do not add an italic correction. +\def\dosmartslant#1#2{% + \ifusingtt + {{\ttsl #2}\let\next=\relax}% + {\def\next{{#1#2}\futurelet\next\smartitaliccorrection}}% + \next +} +\def\smartslanted{\dosmartslant\sl} +\def\smartitalic{\dosmartslant\it} + +% Output an italic correction unless \next (presumed to be the following +% character) is such as not to need one. +\def\smartitaliccorrection{% + \ifx\next,% + \else\ifx\next-% + \else\ifx\next.% + \else\ifx\next\.% + \else\ifx\next\comma% + \else\ptexslash + \fi\fi\fi\fi\fi + \aftersmartic +} + +% Unconditional use \ttsl, and no ic. @var is set to this for defuns. +\def\ttslanted#1{{\ttsl #1}} + +% @cite is like \smartslanted except unconditionally use \sl. We never want +% ttsl for book titles, do we? +\def\cite#1{{\sl #1}\futurelet\next\smartitaliccorrection} + +\def\aftersmartic{} +\def\var#1{% + \let\saveaftersmartic = \aftersmartic + \def\aftersmartic{\null\let\aftersmartic=\saveaftersmartic}% + \smartslanted{#1}% +} + +\let\i=\smartitalic +\let\slanted=\smartslanted +\let\dfn=\smartslanted +\let\emph=\smartitalic + +% Explicit font changes: @r, @sc, undocumented @ii. +\def\r#1{{\rm #1}} % roman font +\def\sc#1{{\smallcaps#1}} % smallcaps font +\def\ii#1{{\it #1}} % italic font + +% @b, explicit bold. Also @strong. +\def\b#1{{\bf #1}} +\let\strong=\b + +% @sansserif, explicit sans. +\def\sansserif#1{{\sf #1}} + +% We can't just use \exhyphenpenalty, because that only has effect at +% the end of a paragraph. Restore normal hyphenation at the end of the +% group within which \nohyphenation is presumably called. +% +\def\nohyphenation{\hyphenchar\font = -1 \aftergroup\restorehyphenation} +\def\restorehyphenation{\hyphenchar\font = `- } + +% Set sfcode to normal for the chars that usually have another value. +% Can't use plain's \frenchspacing because it uses the `\x notation, and +% sometimes \x has an active definition that messes things up. +% +\catcode`@=11 + \def\plainfrenchspacing{% + \sfcode`\.=\@m \sfcode`\?=\@m \sfcode`\!=\@m + \sfcode`\:=\@m \sfcode`\;=\@m \sfcode`\,=\@m + \def\endofsentencespacefactor{1000}% for @. and friends + } + \def\plainnonfrenchspacing{% + \sfcode`\.3000\sfcode`\?3000\sfcode`\!3000 + \sfcode`\:2000\sfcode`\;1500\sfcode`\,1250 + \def\endofsentencespacefactor{3000}% for @. and friends + } +\catcode`@=\other +\def\endofsentencespacefactor{3000}% default + +% @t, explicit typewriter. +\def\t#1{% + {\tt \rawbackslash \plainfrenchspacing #1}% + \null +} + +% @samp. +\def\samp#1{{\setupmarkupstyle{samp}\lq\tclose{#1}\rq\null}} + +% @indicateurl is \samp, that is, with quotes. +\let\indicateurl=\samp + +% @code (and similar) prints in typewriter, but with spaces the same +% size as normal in the surrounding text, without hyphenation, etc. +% This is a subroutine for that. +\def\tclose#1{% + {% + % Change normal interword space to be same as for the current font. + \spaceskip = \fontdimen2\font + % + % Switch to typewriter. + \tt + % + % But `\ ' produces the large typewriter interword space. + \def\ {{\spaceskip = 0pt{} }}% + % + % Turn off hyphenation. + \nohyphenation + % + \rawbackslash + \plainfrenchspacing + #1% + }% + \null % reset spacefactor to 1000 +} + +% We *must* turn on hyphenation at `-' and `_' in @code. +% (But see \codedashfinish below.) +% Otherwise, it is too hard to avoid overfull hboxes +% in the Emacs manual, the Library manual, etc. +% +% Unfortunately, TeX uses one parameter (\hyphenchar) to control +% both hyphenation at - and hyphenation within words. +% We must therefore turn them both off (\tclose does that) +% and arrange explicitly to hyphenate at a dash. -- rms. +{ + \catcode`\-=\active \catcode`\_=\active + \catcode`\'=\active \catcode`\`=\active + \global\let'=\rq \global\let`=\lq % default definitions + % + \global\def\code{\begingroup + \setupmarkupstyle{code}% + % The following should really be moved into \setupmarkupstyle handlers. + \catcode\dashChar=\active \catcode\underChar=\active + \ifallowcodebreaks + \let-\codedash + \let_\codeunder + \else + \let-\normaldash + \let_\realunder + \fi + % Given -foo (with a single dash), we do not want to allow a break + % after the hyphen. + \global\let\codedashprev=\codedash + % + \codex + } + % + \gdef\codedash{\futurelet\next\codedashfinish} + \gdef\codedashfinish{% + \normaldash % always output the dash character itself. + % + % Now, output a discretionary to allow a line break, unless + % (a) the next character is a -, or + % (b) the preceding character is a -. + % E.g., given --posix, we do not want to allow a break after either -. + % Given --foo-bar, we do want to allow a break between the - and the b. + \ifx\next\codedash \else + \ifx\codedashprev\codedash + \else \discretionary{}{}{}\fi + \fi + % we need the space after the = for the case when \next itself is a + % space token; it would get swallowed otherwise. As in @code{- a}. + \global\let\codedashprev= \next + } +} +\def\normaldash{-} +% +\def\codex #1{\tclose{#1}\endgroup} + +\def\codeunder{% + % this is all so @math{@code{var_name}+1} can work. In math mode, _ + % is "active" (mathcode"8000) and \normalunderscore (or \char95, etc.) + % will therefore expand the active definition of _, which is us + % (inside @code that is), therefore an endless loop. + \ifusingtt{\ifmmode + \mathchar"075F % class 0=ordinary, family 7=ttfam, pos 0x5F=_. + \else\normalunderscore \fi + \discretionary{}{}{}}% + {\_}% +} + +% An additional complication: the above will allow breaks after, e.g., +% each of the four underscores in __typeof__. This is bad. +% @allowcodebreaks provides a document-level way to turn breaking at - +% and _ on and off. +% +\newif\ifallowcodebreaks \allowcodebreakstrue + +\def\keywordtrue{true} +\def\keywordfalse{false} + +\parseargdef\allowcodebreaks{% + \def\txiarg{#1}% + \ifx\txiarg\keywordtrue + \allowcodebreakstrue + \else\ifx\txiarg\keywordfalse + \allowcodebreaksfalse + \else + \errhelp = \EMsimple + \errmessage{Unknown @allowcodebreaks option `\txiarg', must be true|false}% + \fi\fi +} + +% For @command, @env, @file, @option quotes seem unnecessary, +% so use \code rather than \samp. +\let\command=\code +\let\env=\code +\let\file=\code +\let\option=\code + +% @uref (abbreviation for `urlref') aka @url takes an optional +% (comma-separated) second argument specifying the text to display and +% an optional third arg as text to display instead of (rather than in +% addition to) the url itself. First (mandatory) arg is the url. + +% TeX-only option to allow changing PDF output to show only the second +% arg (if given), and not the url (which is then just the link target). +\newif\ifurefurlonlylink + +% The main macro is \urefbreak, which allows breaking at expected +% places within the url. (There used to be another version, which +% didn't support automatic breaking.) +\def\urefbreak{\begingroup \urefcatcodes \dourefbreak} +\let\uref=\urefbreak +% +\def\dourefbreak#1{\urefbreakfinish #1,,,\finish} +\def\urefbreakfinish#1,#2,#3,#4\finish{% doesn't work in @example + \unsepspaces + \pdfurl{#1}% + \setbox0 = \hbox{\ignorespaces #3}% + \ifdim\wd0 > 0pt + \unhbox0 % third arg given, show only that + \else + \setbox0 = \hbox{\ignorespaces #2}% look for second arg + \ifdim\wd0 > 0pt + \ifpdf + % For pdfTeX and LuaTeX + \ifurefurlonlylink + % PDF plus option to not display url, show just arg + \unhbox0 + \else + % PDF, normally display both arg and url for consistency, + % visibility, if the pdf is eventually used to print, etc. + \unhbox0\ (\urefcode{#1})% + \fi + \else + \ifx\XeTeXrevision\thisisundefined + \unhbox0\ (\urefcode{#1})% DVI, always show arg and url + \else + % For XeTeX + \ifurefurlonlylink + % PDF plus option to not display url, show just arg + \unhbox0 + \else + % PDF, normally display both arg and url for consistency, + % visibility, if the pdf is eventually used to print, etc. + \unhbox0\ (\urefcode{#1})% + \fi + \fi + \fi + \else + \urefcode{#1}% only url given, so show it + \fi + \fi + \endlink +\endgroup} + +% Allow line breaks around only a few characters (only). +\def\urefcatcodes{% + \catcode`\&=\active \catcode`\.=\active + \catcode`\#=\active \catcode`\?=\active + \catcode`\/=\active +} +{ + \urefcatcodes + % + \global\def\urefcode{\begingroup + \setupmarkupstyle{code}% + \urefcatcodes + \let&\urefcodeamp + \let.\urefcodedot + \let#\urefcodehash + \let?\urefcodequest + \let/\urefcodeslash + \codex + } + % + % By default, they are just regular characters. + \global\def&{\normalamp} + \global\def.{\normaldot} + \global\def#{\normalhash} + \global\def?{\normalquest} + \global\def/{\normalslash} +} + +% we put a little stretch before and after the breakable chars, to help +% line breaking of long url's. The unequal skips make look better in +% cmtt at least, especially for dots. +\def\urefprestretchamount{.13em} +\def\urefpoststretchamount{.1em} +\def\urefprestretch{\urefprebreak \hskip0pt plus\urefprestretchamount\relax} +\def\urefpoststretch{\urefpostbreak \hskip0pt plus\urefprestretchamount\relax} +% +\def\urefcodeamp{\urefprestretch \&\urefpoststretch} +\def\urefcodedot{\urefprestretch .\urefpoststretch} +\def\urefcodehash{\urefprestretch \#\urefpoststretch} +\def\urefcodequest{\urefprestretch ?\urefpoststretch} +\def\urefcodeslash{\futurelet\next\urefcodeslashfinish} +{ + \catcode`\/=\active + \global\def\urefcodeslashfinish{% + \urefprestretch \slashChar + % Allow line break only after the final / in a sequence of + % slashes, to avoid line break between the slashes in http://. + \ifx\next/\else \urefpoststretch \fi + } +} + +% One more complication: by default we'll break after the special +% characters, but some people like to break before the special chars, so +% allow that. Also allow no breaking at all, for manual control. +% +\parseargdef\urefbreakstyle{% + \def\txiarg{#1}% + \ifx\txiarg\wordnone + \def\urefprebreak{\nobreak}\def\urefpostbreak{\nobreak} + \else\ifx\txiarg\wordbefore + \def\urefprebreak{\allowbreak}\def\urefpostbreak{\nobreak} + \else\ifx\txiarg\wordafter + \def\urefprebreak{\nobreak}\def\urefpostbreak{\allowbreak} + \else + \errhelp = \EMsimple + \errmessage{Unknown @urefbreakstyle setting `\txiarg'}% + \fi\fi\fi +} +\def\wordafter{after} +\def\wordbefore{before} +\def\wordnone{none} + +\urefbreakstyle after + +% @url synonym for @uref, since that's how everyone uses it. +% +\let\url=\uref + +% rms does not like angle brackets --karl, 17may97. +% So now @email is just like @uref, unless we are pdf. +% +%\def\email#1{\angleleft{\tt #1}\angleright} +\ifpdf + \def\email#1{\doemail#1,,\finish} + \def\doemail#1,#2,#3\finish{\begingroup + \unsepspaces + \pdfurl{mailto:#1}% + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi + \endlink + \endgroup} +\else + \ifx\XeTeXrevision\thisisundefined + \let\email=\uref + \else + \def\email#1{\doemail#1,,\finish} + \def\doemail#1,#2,#3\finish{\begingroup + \unsepspaces + \pdfurl{mailto:#1}% + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi + \endlink + \endgroup} + \fi +\fi + +% @kbdinputstyle -- arg is `distinct' (@kbd uses slanted tty font always), +% `example' (@kbd uses ttsl only inside of @example and friends), +% or `code' (@kbd uses normal tty font always). +\parseargdef\kbdinputstyle{% + \def\txiarg{#1}% + \ifx\txiarg\worddistinct + \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\ttsl}% + \else\ifx\txiarg\wordexample + \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\tt}% + \else\ifx\txiarg\wordcode + \gdef\kbdexamplefont{\tt}\gdef\kbdfont{\tt}% + \else + \errhelp = \EMsimple + \errmessage{Unknown @kbdinputstyle setting `\txiarg'}% + \fi\fi\fi +} +\def\worddistinct{distinct} +\def\wordexample{example} +\def\wordcode{code} + +% Default is `distinct'. +\kbdinputstyle distinct + +% @kbd is like @code, except that if the argument is just one @key command, +% then @kbd has no effect. +\def\kbd#1{{\def\look{#1}\expandafter\kbdsub\look??\par}} + +\def\xkey{\key} +\def\kbdsub#1#2#3\par{% + \def\one{#1}\def\three{#3}\def\threex{??}% + \ifx\one\xkey\ifx\threex\three \key{#2}% + \else{\tclose{\kbdfont\setupmarkupstyle{kbd}\look}}\fi + \else{\tclose{\kbdfont\setupmarkupstyle{kbd}\look}}\fi +} + +% definition of @key that produces a lozenge. Doesn't adjust to text size. +%\setfont\keyrm\rmshape{8}{1000}{OT1} +%\font\keysy=cmsy9 +%\def\key#1{{\keyrm\textfont2=\keysy \leavevmode\hbox{% +% \raise0.4pt\hbox{\angleleft}\kern-.08em\vtop{% +% \vbox{\hrule\kern-0.4pt +% \hbox{\raise0.4pt\hbox{\vphantom{\angleleft}}#1}}% +% \kern-0.4pt\hrule}% +% \kern-.06em\raise0.4pt\hbox{\angleright}}}} + +% definition of @key with no lozenge. If the current font is already +% monospace, don't change it; that way, we respect @kbdinputstyle. But +% if it isn't monospace, then use \tt. +% +\def\key#1{{\setupmarkupstyle{key}% + \nohyphenation + \ifmonospace\else\tt\fi + #1}\null} + +% @clicksequence{File @click{} Open ...} +\def\clicksequence#1{\begingroup #1\endgroup} + +% @clickstyle @arrow (by default) +\parseargdef\clickstyle{\def\click{#1}} +\def\click{\arrow} + +% Typeset a dimension, e.g., `in' or `pt'. The only reason for the +% argument is to make the input look right: @dmn{pt} instead of @dmn{}pt. +% +\def\dmn#1{\thinspace #1} + +% @acronym for "FBI", "NATO", and the like. +% We print this one point size smaller, since it's intended for +% all-uppercase. +% +\def\acronym#1{\doacronym #1,,\finish} +\def\doacronym#1,#2,#3\finish{% + {\switchtolsize #1}% + \def\temp{#2}% + \ifx\temp\empty \else + \space ({\unsepspaces \ignorespaces \temp \unskip})% + \fi + \null % reset \spacefactor=1000 +} + +% @abbr for "Comput. J." and the like. +% No font change, but don't do end-of-sentence spacing. +% +\def\abbr#1{\doabbr #1,,\finish} +\def\doabbr#1,#2,#3\finish{% + {\plainfrenchspacing #1}% + \def\temp{#2}% + \ifx\temp\empty \else + \space ({\unsepspaces \ignorespaces \temp \unskip})% + \fi + \null % reset \spacefactor=1000 +} + +% @asis just yields its argument. Used with @table, for example. +% +\def\asis#1{#1} + +% @math outputs its argument in math mode. +% +% One complication: _ usually means subscripts, but it could also mean +% an actual _ character, as in @math{@var{some_variable} + 1}. So make +% _ active, and distinguish by seeing if the current family is \slfam, +% which is what @var uses. +{ + \catcode`\_ = \active + \gdef\mathunderscore{% + \catcode`\_=\active + \def_{\ifnum\fam=\slfam \_\else\sb\fi}% + } +} +% Another complication: we want \\ (and @\) to output a math (or tt) \. +% FYI, plain.tex uses \\ as a temporary control sequence (for no +% particular reason), but this is not advertised and we don't care. +% +% The \mathchar is class=0=ordinary, family=7=ttfam, position=5C=\. +\def\mathbackslash{\ifnum\fam=\ttfam \mathchar"075C \else\backslash \fi} +% +\def\math{% + \ifmmode\else % only go into math if not in math mode already + \tex + \mathunderscore + \let\\ = \mathbackslash + \mathactive + % make the texinfo accent commands work in math mode + \let\"=\ddot + \let\'=\acute + \let\==\bar + \let\^=\hat + \let\`=\grave + \let\u=\breve + \let\v=\check + \let\~=\tilde + \let\dotaccent=\dot + % have to provide another name for sup operator + \let\mathopsup=\sup + $\expandafter\finishmath\fi +} +\def\finishmath#1{#1$\endgroup} % Close the group opened by \tex. + +% Some active characters (such as <) are spaced differently in math. +% We have to reset their definitions in case the @math was an argument +% to a command which sets the catcodes (such as @item or @section). +% +{ + \catcode`^ = \active + \catcode`< = \active + \catcode`> = \active + \catcode`+ = \active + \catcode`' = \active + \gdef\mathactive{% + \let^ = \ptexhat + \let< = \ptexless + \let> = \ptexgtr + \let+ = \ptexplus + \let' = \ptexquoteright + } +} + +% for @sub and @sup, if in math mode, just do a normal sub/superscript. +% If in text, use math to place as sub/superscript, but switch +% into text mode, with smaller fonts. This is a different font than the +% one used for real math sub/superscripts (8pt vs. 7pt), but let's not +% fix it (significant additions to font machinery) until someone notices. +% +\def\sub{\ifmmode \expandafter\sb \else \expandafter\finishsub\fi} +\def\finishsub#1{$\sb{\hbox{\switchtolllsize #1}}$}% +% +\def\sup{\ifmmode \expandafter\ptexsp \else \expandafter\finishsup\fi} +\def\finishsup#1{$\ptexsp{\hbox{\switchtolllsize #1}}$}% + +% @inlinefmt{FMTNAME,PROCESSED-TEXT} and @inlineraw{FMTNAME,RAW-TEXT}. +% Ignore unless FMTNAME == tex; then it is like @iftex and @tex, +% except specified as a normal braced arg, so no newlines to worry about. +% +\def\outfmtnametex{tex} +% +\long\def\inlinefmt#1{\doinlinefmt #1,\finish} +\long\def\doinlinefmt#1,#2,\finish{% + \def\inlinefmtname{#1}% + \ifx\inlinefmtname\outfmtnametex \ignorespaces #2\fi +} +% +% @inlinefmtifelse{FMTNAME,THEN-TEXT,ELSE-TEXT} expands THEN-TEXT if +% FMTNAME is tex, else ELSE-TEXT. +\long\def\inlinefmtifelse#1{\doinlinefmtifelse #1,,,\finish} +\long\def\doinlinefmtifelse#1,#2,#3,#4,\finish{% + \def\inlinefmtname{#1}% + \ifx\inlinefmtname\outfmtnametex \ignorespaces #2\else \ignorespaces #3\fi +} +% +% For raw, must switch into @tex before parsing the argument, to avoid +% setting catcodes prematurely. Doing it this way means that, for +% example, @inlineraw{html, foo{bar} gets a parse error instead of being +% ignored. But this isn't important because if people want a literal +% *right* brace they would have to use a command anyway, so they may as +% well use a command to get a left brace too. We could re-use the +% delimiter character idea from \verb, but it seems like overkill. +% +\long\def\inlineraw{\tex \doinlineraw} +\long\def\doinlineraw#1{\doinlinerawtwo #1,\finish} +\def\doinlinerawtwo#1,#2,\finish{% + \def\inlinerawname{#1}% + \ifx\inlinerawname\outfmtnametex \ignorespaces #2\fi + \endgroup % close group opened by \tex. +} + +% @inlineifset{VAR, TEXT} expands TEXT if VAR is @set. +% +\long\def\inlineifset#1{\doinlineifset #1,\finish} +\long\def\doinlineifset#1,#2,\finish{% + \def\inlinevarname{#1}% + \expandafter\ifx\csname SET\inlinevarname\endcsname\relax + \else\ignorespaces#2\fi +} + +% @inlineifclear{VAR, TEXT} expands TEXT if VAR is not @set. +% +\long\def\inlineifclear#1{\doinlineifclear #1,\finish} +\long\def\doinlineifclear#1,#2,\finish{% + \def\inlinevarname{#1}% + \expandafter\ifx\csname SET\inlinevarname\endcsname\relax \ignorespaces#2\fi +} + + +\message{glyphs,} +% and logos. + +% @@ prints an @, as does @atchar{}. +\def\@{\char64 } +\let\atchar=\@ + +% @{ @} @lbracechar{} @rbracechar{} all generate brace characters. +\def\lbracechar{{\ifmonospace\char123\else\ensuremath\lbrace\fi}} +\def\rbracechar{{\ifmonospace\char125\else\ensuremath\rbrace\fi}} +\let\{=\lbracechar +\let\}=\rbracechar + +% @comma{} to avoid , parsing problems. +\let\comma = , + +% Accents: @, @dotaccent @ringaccent @ubaraccent @udotaccent +% Others are defined by plain TeX: @` @' @" @^ @~ @= @u @v @H. +\let\, = \ptexc +\let\dotaccent = \ptexdot +\def\ringaccent#1{{\accent23 #1}} +\let\tieaccent = \ptext +\let\ubaraccent = \ptexb +\let\udotaccent = \d + +% Other special characters: @questiondown @exclamdown @ordf @ordm +% Plain TeX defines: @AA @AE @O @OE @L (plus lowercase versions) @ss. +\def\questiondown{?`} +\def\exclamdown{!`} +\def\ordf{\leavevmode\raise1ex\hbox{\switchtolllsize \underbar{a}}} +\def\ordm{\leavevmode\raise1ex\hbox{\switchtolllsize \underbar{o}}} + +% Dotless i and dotless j, used for accents. +\def\imacro{i} +\def\jmacro{j} +\def\dotless#1{% + \def\temp{#1}% + \ifx\temp\imacro \ifmmode\imath \else\ptexi \fi + \else\ifx\temp\jmacro \ifmmode\jmath \else\j \fi + \else \errmessage{@dotless can be used only with i or j}% + \fi\fi +} + +% The \TeX{} logo, as in plain, but resetting the spacing so that a +% period following counts as ending a sentence. (Idea found in latex.) +% +\edef\TeX{\TeX \spacefactor=1000 } + +% @LaTeX{} logo. Not quite the same results as the definition in +% latex.ltx, since we use a different font for the raised A; it's most +% convenient for us to use an explicitly smaller font, rather than using +% the \scriptstyle font (since we don't reset \scriptstyle and +% \scriptscriptstyle). +% +\def\LaTeX{% + L\kern-.36em + {\setbox0=\hbox{T}% + \vbox to \ht0{\hbox{% + \ifx\textnominalsize\xwordpt + % for 10pt running text, lllsize (8pt) is too small for the A in LaTeX. + % Revert to plain's \scriptsize, which is 7pt. + \count255=\the\fam $\fam\count255 \scriptstyle A$% + \else + % For 11pt, we can use our lllsize. + \switchtolllsize A% + \fi + }% + \vss + }}% + \kern-.15em + \TeX +} + +% Some math mode symbols. Define \ensuremath to switch into math mode +% unless we are already there. Expansion tricks may not be needed here, +% but safer, and can't hurt. +\def\ensuremath{\ifmmode \expandafter\asis \else\expandafter\ensuredmath \fi} +\def\ensuredmath#1{$\relax#1$} +% +\def\bullet{\ensuremath\ptexbullet} +\def\geq{\ensuremath\ge} +\def\leq{\ensuremath\le} +\def\minus{\ensuremath-} + +% @dots{} outputs an ellipsis using the current font. +% We do .5em per period so that it has the same spacing in the cm +% typewriter fonts as three actual period characters; on the other hand, +% in other typewriter fonts three periods are wider than 1.5em. So do +% whichever is larger. +% +\def\dots{% + \leavevmode + \setbox0=\hbox{...}% get width of three periods + \ifdim\wd0 > 1.5em + \dimen0 = \wd0 + \else + \dimen0 = 1.5em + \fi + \hbox to \dimen0{% + \hskip 0pt plus.25fil + .\hskip 0pt plus1fil + .\hskip 0pt plus1fil + .\hskip 0pt plus.5fil + }% +} + +% @enddots{} is an end-of-sentence ellipsis. +% +\def\enddots{% + \dots + \spacefactor=\endofsentencespacefactor +} + +% @point{}, @result{}, @expansion{}, @print{}, @equiv{}. +% +% Since these characters are used in examples, they should be an even number of +% \tt widths. Each \tt character is 1en, so two makes it 1em. +% +\def\point{$\star$} +\def\arrow{\leavevmode\raise.05ex\hbox to 1em{\hfil$\rightarrow$\hfil}} +\def\result{\leavevmode\raise.05ex\hbox to 1em{\hfil$\Rightarrow$\hfil}} +\def\expansion{\leavevmode\hbox to 1em{\hfil$\mapsto$\hfil}} +\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}} +\def\equiv{\leavevmode\hbox to 1em{\hfil$\ptexequiv$\hfil}} + +% The @error{} command. +% Adapted from the TeXbook's \boxit. +% +\newbox\errorbox +% +{\ttfont \global\dimen0 = 3em}% Width of the box. +\dimen2 = .55pt % Thickness of rules +% The text. (`r' is open on the right, `e' somewhat less so on the left.) +\setbox0 = \hbox{\kern-.75pt \reducedsf \putworderror\kern-1.5pt} +% +\setbox\errorbox=\hbox to \dimen0{\hfil + \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right. + \advance\hsize by -2\dimen2 % Rules. + \vbox{% + \hrule height\dimen2 + \hbox{\vrule width\dimen2 \kern3pt % Space to left of text. + \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below. + \kern3pt\vrule width\dimen2}% Space to right. + \hrule height\dimen2} + \hfil} +% +\def\error{\leavevmode\lower.7ex\copy\errorbox} + +% @pounds{} is a sterling sign, which Knuth put in the CM italic font. +% +\def\pounds{{\it\$}} + +% @euro{} comes from a separate font, depending on the current style. +% We use the free feym* fonts from the eurosym package by Henrik +% Theiling, which support regular, slanted, bold and bold slanted (and +% "outlined" (blackboard board, sort of) versions, which we don't need). +% It is available from http://www.ctan.org/tex-archive/fonts/eurosym. +% +% Although only regular is the truly official Euro symbol, we ignore +% that. The Euro is designed to be slightly taller than the regular +% font height. +% +% feymr - regular +% feymo - slanted +% feybr - bold +% feybo - bold slanted +% +% There is no good (free) typewriter version, to my knowledge. +% A feymr10 euro is ~7.3pt wide, while a normal cmtt10 char is ~5.25pt wide. +% Hmm. +% +% Also doesn't work in math. Do we need to do math with euro symbols? +% Hope not. +% +% +\def\euro{{\eurofont e}} +\def\eurofont{% + % We set the font at each command, rather than predefining it in + % \textfonts and the other font-switching commands, so that + % installations which never need the symbol don't have to have the + % font installed. + % + % There is only one designed size (nominal 10pt), so we always scale + % that to the current nominal size. + % + % By the way, simply using "at 1em" works for cmr10 and the like, but + % does not work for cmbx10 and other extended/shrunken fonts. + % + \def\eurosize{\csname\curfontsize nominalsize\endcsname}% + % + \ifx\curfontstyle\bfstylename + % bold: + \font\thiseurofont = \ifusingit{feybo10}{feybr10} at \eurosize + \else + % regular: + \font\thiseurofont = \ifusingit{feymo10}{feymr10} at \eurosize + \fi + \thiseurofont +} + +% Glyphs from the EC fonts. We don't use \let for the aliases, because +% sometimes we redefine the original macro, and the alias should reflect +% the redefinition. +% +% Use LaTeX names for the Icelandic letters. +\def\DH{{\ecfont \char"D0}} % Eth +\def\dh{{\ecfont \char"F0}} % eth +\def\TH{{\ecfont \char"DE}} % Thorn +\def\th{{\ecfont \char"FE}} % thorn +% +\def\guillemetleft{{\ecfont \char"13}} +\def\guillemotleft{\guillemetleft} +\def\guillemetright{{\ecfont \char"14}} +\def\guillemotright{\guillemetright} +\def\guilsinglleft{{\ecfont \char"0E}} +\def\guilsinglright{{\ecfont \char"0F}} +\def\quotedblbase{{\ecfont \char"12}} +\def\quotesinglbase{{\ecfont \char"0D}} +% +% This positioning is not perfect (see the ogonek LaTeX package), but +% we have the precomposed glyphs for the most common cases. We put the +% tests to use those glyphs in the single \ogonek macro so we have fewer +% dummy definitions to worry about for index entries, etc. +% +% ogonek is also used with other letters in Lithuanian (IOU), but using +% the precomposed glyphs for those is not so easy since they aren't in +% the same EC font. +\def\ogonek#1{{% + \def\temp{#1}% + \ifx\temp\macrocharA\Aogonek + \else\ifx\temp\macrochara\aogonek + \else\ifx\temp\macrocharE\Eogonek + \else\ifx\temp\macrochare\eogonek + \else + \ecfont \setbox0=\hbox{#1}% + \ifdim\ht0=1ex\accent"0C #1% + \else\ooalign{\unhbox0\crcr\hidewidth\char"0C \hidewidth}% + \fi + \fi\fi\fi\fi + }% +} +\def\Aogonek{{\ecfont \char"81}}\def\macrocharA{A} +\def\aogonek{{\ecfont \char"A1}}\def\macrochara{a} +\def\Eogonek{{\ecfont \char"86}}\def\macrocharE{E} +\def\eogonek{{\ecfont \char"A6}}\def\macrochare{e} +% +% Use the European Computer Modern fonts (cm-super in outline format) +% for non-CM glyphs. That is ec* for regular text and tc* for the text +% companion symbols (LaTeX TS1 encoding). Both are part of the ec +% package and follow the same conventions. +% +\def\ecfont{\etcfont{e}} +\def\tcfont{\etcfont{t}} +% +\def\etcfont#1{% + % We can't distinguish serif/sans and italic/slanted, but this + % is used for crude hacks anyway (like adding French and German + % quotes to documents typeset with CM, where we lose kerning), so + % hopefully nobody will notice/care. + \edef\ecsize{\csname\curfontsize ecsize\endcsname}% + \edef\nominalsize{\csname\curfontsize nominalsize\endcsname}% + \ifmonospace + % typewriter: + \font\thisecfont = #1ctt\ecsize \space at \nominalsize + \else + \ifx\curfontstyle\bfstylename + % bold: + \font\thisecfont = #1cb\ifusingit{i}{x}\ecsize \space at \nominalsize + \else + % regular: + \font\thisecfont = #1c\ifusingit{ti}{rm}\ecsize \space at \nominalsize + \fi + \fi + \thisecfont +} + +% @registeredsymbol - R in a circle. The font for the R should really +% be smaller yet, but lllsize is the best we can do for now. +% Adapted from the plain.tex definition of \copyright. +% +\def\registeredsymbol{% + $^{{\ooalign{\hfil\raise.07ex\hbox{\switchtolllsize R}% + \hfil\crcr\Orb}}% + }$% +} + +% @textdegree - the normal degrees sign. +% +\def\textdegree{$^\circ$} + +% Laurent Siebenmann reports \Orb undefined with: +% Textures 1.7.7 (preloaded format=plain 93.10.14) (68K) 16 APR 2004 02:38 +% so we'll define it if necessary. +% +\ifx\Orb\thisisundefined +\def\Orb{\mathhexbox20D} +\fi + +% Quotes. +\chardef\quotedblleft="5C +\chardef\quotedblright=`\" +\chardef\quoteleft=`\` +\chardef\quoteright=`\' + + +\message{page headings,} + +\newskip\titlepagetopglue \titlepagetopglue = 1.5in +\newskip\titlepagebottomglue \titlepagebottomglue = 2pc + +% First the title page. Must do @settitle before @titlepage. +\newif\ifseenauthor +\newif\iffinishedtitlepage + +% @setcontentsaftertitlepage used to do an implicit @contents or +% @shortcontents after @end titlepage, but it is now obsolete. +\def\setcontentsaftertitlepage{% + \errmessage{@setcontentsaftertitlepage has been removed as a Texinfo + command; move your @contents command if you want the contents + after the title page.}}% +\def\setshortcontentsaftertitlepage{% + \errmessage{@setshortcontentsaftertitlepage has been removed as a Texinfo + command; move your @shortcontents and @contents commands if you + want the contents after the title page.}}% + +\parseargdef\shorttitlepage{% + \begingroup \hbox{}\vskip 1.5in \chaprm \centerline{#1}% + \endgroup\page\hbox{}\page} + +\envdef\titlepage{% + % Open one extra group, as we want to close it in the middle of \Etitlepage. + \begingroup + \parindent=0pt \textfonts + % Leave some space at the very top of the page. + \vglue\titlepagetopglue + % No rule at page bottom unless we print one at the top with @title. + \finishedtitlepagetrue + % + % Most title ``pages'' are actually two pages long, with space + % at the top of the second. We don't want the ragged left on the second. + \let\oldpage = \page + \def\page{% + \iffinishedtitlepage\else + \finishtitlepage + \fi + \let\page = \oldpage + \page + \null + }% +} + +\def\Etitlepage{% + \iffinishedtitlepage\else + \finishtitlepage + \fi + % It is important to do the page break before ending the group, + % because the headline and footline are only empty inside the group. + % If we use the new definition of \page, we always get a blank page + % after the title page, which we certainly don't want. + \oldpage + \endgroup + % + % Need this before the \...aftertitlepage checks so that if they are + % in effect the toc pages will come out with page numbers. + \HEADINGSon +} + +\def\finishtitlepage{% + \vskip4pt \hrule height 2pt width \hsize + \vskip\titlepagebottomglue + \finishedtitlepagetrue +} + +% Settings used for typesetting titles: no hyphenation, no indentation, +% don't worry much about spacing, ragged right. This should be used +% inside a \vbox, and fonts need to be set appropriately first. \par should +% be specified before the end of the \vbox, since a vbox is a group. +% +\def\raggedtitlesettings{% + \rm + \hyphenpenalty=10000 + \parindent=0pt + \tolerance=5000 + \ptexraggedright +} + +% Macros to be used within @titlepage: + +\let\subtitlerm=\rmfont +\def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines} + +\parseargdef\title{% + \checkenv\titlepage + \vbox{\titlefonts \raggedtitlesettings #1\par}% + % print a rule at the page bottom also. + \finishedtitlepagefalse + \vskip4pt \hrule height 4pt width \hsize \vskip4pt +} + +\parseargdef\subtitle{% + \checkenv\titlepage + {\subtitlefont \rightline{#1}}% +} + +% @author should come last, but may come many times. +% It can also be used inside @quotation. +% +\parseargdef\author{% + \def\temp{\quotation}% + \ifx\thisenv\temp + \def\quotationauthor{#1}% printed in \Equotation. + \else + \checkenv\titlepage + \ifseenauthor\else \vskip 0pt plus 1filll \seenauthortrue \fi + {\secfonts\rm \leftline{#1}}% + \fi +} + + +% Set up page headings and footings. + +\let\thispage=\folio + +\newtoks\evenheadline % headline on even pages +\newtoks\oddheadline % headline on odd pages +\newtoks\evenfootline % footline on even pages +\newtoks\oddfootline % footline on odd pages + +% Now make \makeheadline and \makefootline in Plain TeX use those variables +\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline + \else \the\evenheadline \fi}} +\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline + \else \the\evenfootline \fi}\HEADINGShook} +\let\HEADINGShook=\relax + +% Commands to set those variables. +% For example, this is what @headings on does +% @evenheading @thistitle|@thispage|@thischapter +% @oddheading @thischapter|@thispage|@thistitle +% @evenfooting @thisfile|| +% @oddfooting ||@thisfile + + +\def\evenheading{\parsearg\evenheadingxxx} +\def\evenheadingxxx #1{\evenheadingyyy #1\|\|\|\|\finish} +\def\evenheadingyyy #1\|#2\|#3\|#4\finish{% +\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\def\oddheading{\parsearg\oddheadingxxx} +\def\oddheadingxxx #1{\oddheadingyyy #1\|\|\|\|\finish} +\def\oddheadingyyy #1\|#2\|#3\|#4\finish{% +\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\parseargdef\everyheading{\oddheadingxxx{#1}\evenheadingxxx{#1}}% + +\def\evenfooting{\parsearg\evenfootingxxx} +\def\evenfootingxxx #1{\evenfootingyyy #1\|\|\|\|\finish} +\def\evenfootingyyy #1\|#2\|#3\|#4\finish{% +\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\def\oddfooting{\parsearg\oddfootingxxx} +\def\oddfootingxxx #1{\oddfootingyyy #1\|\|\|\|\finish} +\def\oddfootingyyy #1\|#2\|#3\|#4\finish{% + \global\oddfootline = {\rlap{\centerline{#2}}\line{#1\hfil#3}}% + % + % Leave some space for the footline. Hopefully ok to assume + % @evenfooting will not be used by itself. + \global\advance\txipageheight by -12pt + \global\advance\vsize by -12pt +} + +\parseargdef\everyfooting{\oddfootingxxx{#1}\evenfootingxxx{#1}} + +% @evenheadingmarks top \thischapter <- chapter at the top of a page +% @evenheadingmarks bottom \thischapter <- chapter at the bottom of a page +% +% The same set of arguments for: +% +% @oddheadingmarks +% @evenfootingmarks +% @oddfootingmarks +% @everyheadingmarks +% @everyfootingmarks + +% These define \getoddheadingmarks, \getevenheadingmarks, +% \getoddfootingmarks, and \getevenfootingmarks, each to one of +% \gettopheadingmarks, \getbottomheadingmarks. +% +\def\evenheadingmarks{\headingmarks{even}{heading}} +\def\oddheadingmarks{\headingmarks{odd}{heading}} +\def\evenfootingmarks{\headingmarks{even}{footing}} +\def\oddfootingmarks{\headingmarks{odd}{footing}} +\parseargdef\everyheadingmarks{\headingmarks{even}{heading}{#1} + \headingmarks{odd}{heading}{#1} } +\parseargdef\everyfootingmarks{\headingmarks{even}{footing}{#1} + \headingmarks{odd}{footing}{#1} } +% #1 = even/odd, #2 = heading/footing, #3 = top/bottom. +\def\headingmarks#1#2#3 {% + \expandafter\let\expandafter\temp \csname get#3headingmarks\endcsname + \global\expandafter\let\csname get#1#2marks\endcsname \temp +} + +\everyheadingmarks bottom +\everyfootingmarks bottom + +% @headings double turns headings on for double-sided printing. +% @headings single turns headings on for single-sided printing. +% @headings off turns them off. +% @headings on same as @headings double, retained for compatibility. +% @headings after turns on double-sided headings after this page. +% @headings doubleafter turns on double-sided headings after this page. +% @headings singleafter turns on single-sided headings after this page. +% By default, they are off at the start of a document, +% and turned `on' after @end titlepage. + +\parseargdef\headings{\csname HEADINGS#1\endcsname} + +\def\headingsoff{% non-global headings elimination + \evenheadline={\hfil}\evenfootline={\hfil}% + \oddheadline={\hfil}\oddfootline={\hfil}% +} + +\def\HEADINGSoff{{\globaldefs=1 \headingsoff}} % global setting +\HEADINGSoff % it's the default + +% When we turn headings on, set the page number to 1. +% For double-sided printing, put current file name in lower left corner, +% chapter name on inside top of right hand pages, document +% title on inside top of left hand pages, and page numbers on outside top +% edge of all pages. +\def\HEADINGSdouble{% +\global\pageno=1 +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\folio\hfil\thistitle}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chapoddpage +} +\let\contentsalignmacro = \chappager + +% For single-sided printing, chapter title goes across top left of page, +% page number on top right. +\def\HEADINGSsingle{% +\global\pageno=1 +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\thischapterheading\hfil\folio}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chappager +} +\def\HEADINGSon{\HEADINGSdouble} + +\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex} +\let\HEADINGSdoubleafter=\HEADINGSafter +\def\HEADINGSdoublex{% +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\folio\hfil\thistitle}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chapoddpage +} + +\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex} +\def\HEADINGSsinglex{% +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\thischapterheading\hfil\folio}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chappager +} + +% Subroutines used in generating headings +% This produces Day Month Year style of output. +% Only define if not already defined, in case a txi-??.tex file has set +% up a different format (e.g., txi-cs.tex does this). +\ifx\today\thisisundefined +\def\today{% + \number\day\space + \ifcase\month + \or\putwordMJan\or\putwordMFeb\or\putwordMMar\or\putwordMApr + \or\putwordMMay\or\putwordMJun\or\putwordMJul\or\putwordMAug + \or\putwordMSep\or\putwordMOct\or\putwordMNov\or\putwordMDec + \fi + \space\number\year} +\fi + +% @settitle line... specifies the title of the document, for headings. +% It generates no output of its own. +\def\thistitle{\putwordNoTitle} +\def\settitle{\parsearg{\gdef\thistitle}} + + +\message{tables,} +% Tables -- @table, @ftable, @vtable, @item(x). + +% default indentation of table text +\newdimen\tableindent \tableindent=.8in +% default indentation of @itemize and @enumerate text +\newdimen\itemindent \itemindent=.3in +% margin between end of table item and start of table text. +\newdimen\itemmargin \itemmargin=.1in + +% used internally for \itemindent minus \itemmargin +\newdimen\itemmax + +% Note @table, @ftable, and @vtable define @item, @itemx, etc., with +% these defs. +% They also define \itemindex +% to index the item name in whatever manner is desired (perhaps none). + +\newif\ifitemxneedsnegativevskip + +\def\itemxpar{\par\ifitemxneedsnegativevskip\nobreak\vskip-\parskip\nobreak\fi} + +\def\internalBitem{\smallbreak \parsearg\itemzzz} +\def\internalBitemx{\itemxpar \parsearg\itemzzz} + +\def\itemzzz #1{\begingroup % + \advance\hsize by -\rightskip + \advance\hsize by -\tableindent + \setbox0=\hbox{\itemindicate{#1}}% + \itemindex{#1}% + \nobreak % This prevents a break before @itemx. + % + % If the item text does not fit in the space we have, put it on a line + % by itself, and do not allow a page break either before or after that + % line. We do not start a paragraph here because then if the next + % command is, e.g., @kindex, the whatsit would get put into the + % horizontal list on a line by itself, resulting in extra blank space. + \ifdim \wd0>\itemmax + % + % Make this a paragraph so we get the \parskip glue and wrapping, + % but leave it ragged-right. + \begingroup + \advance\leftskip by-\tableindent + \advance\hsize by\tableindent + \advance\rightskip by0pt plus1fil\relax + \leavevmode\unhbox0\par + \endgroup + % + % We're going to be starting a paragraph, but we don't want the + % \parskip glue -- logically it's part of the @item we just started. + \nobreak \vskip-\parskip + % + % Stop a page break at the \parskip glue coming up. However, if + % what follows is an environment such as @example, there will be no + % \parskip glue; then the negative vskip we just inserted would + % cause the example and the item to crash together. So we use this + % bizarre value of 10001 as a signal to \aboveenvbreak to insert + % \parskip glue after all. Section titles are handled this way also. + % + \penalty 10001 + \endgroup + \itemxneedsnegativevskipfalse + \else + % The item text fits into the space. Start a paragraph, so that the + % following text (if any) will end up on the same line. + \noindent + % Do this with kerns and \unhbox so that if there is a footnote in + % the item text, it can migrate to the main vertical list and + % eventually be printed. + \nobreak\kern-\tableindent + \dimen0 = \itemmax \advance\dimen0 by \itemmargin \advance\dimen0 by -\wd0 + \unhbox0 + \nobreak\kern\dimen0 + \endgroup + \itemxneedsnegativevskiptrue + \fi +} + +\def\item{\errmessage{@item while not in a list environment}} +\def\itemx{\errmessage{@itemx while not in a list environment}} + +% @table, @ftable, @vtable. +\envdef\table{% + \let\itemindex\gobble + \tablecheck{table}% +} +\envdef\ftable{% + \def\itemindex ##1{\doind {fn}{\code{##1}}}% + \tablecheck{ftable}% +} +\envdef\vtable{% + \def\itemindex ##1{\doind {vr}{\code{##1}}}% + \tablecheck{vtable}% +} +\def\tablecheck#1{% + \ifnum \the\catcode`\^^M=\active + \endgroup + \errmessage{This command won't work in this context; perhaps the problem is + that we are \inenvironment\thisenv}% + \def\next{\doignore{#1}}% + \else + \let\next\tablex + \fi + \next +} +\def\tablex#1{% + \def\itemindicate{#1}% + \parsearg\tabley +} +\def\tabley#1{% + {% + \makevalueexpandable + \edef\temp{\noexpand\tablez #1\space\space\space}% + \expandafter + }\temp \endtablez +} +\def\tablez #1 #2 #3 #4\endtablez{% + \aboveenvbreak + \ifnum 0#1>0 \advance \leftskip by #1\mil \fi + \ifnum 0#2>0 \tableindent=#2\mil \fi + \ifnum 0#3>0 \advance \rightskip by #3\mil \fi + \itemmax=\tableindent + \advance \itemmax by -\itemmargin + \advance \leftskip by \tableindent + \exdentamount=\tableindent + \parindent = 0pt + \parskip = \smallskipamount + \ifdim \parskip=0pt \parskip=2pt \fi + \let\item = \internalBitem + \let\itemx = \internalBitemx +} +\def\Etable{\endgraf\afterenvbreak} +\let\Eftable\Etable +\let\Evtable\Etable +\let\Eitemize\Etable +\let\Eenumerate\Etable + +% This is the counter used by @enumerate, which is really @itemize + +\newcount \itemno + +\envdef\itemize{\parsearg\doitemize} + +\def\doitemize#1{% + \aboveenvbreak + \itemmax=\itemindent + \advance\itemmax by -\itemmargin + \advance\leftskip by \itemindent + \exdentamount=\itemindent + \parindent=0pt + \parskip=\smallskipamount + \ifdim\parskip=0pt \parskip=2pt \fi + % + % Try typesetting the item mark so that if the document erroneously says + % something like @itemize @samp (intending @table), there's an error + % right away at the @itemize. It's not the best error message in the + % world, but it's better than leaving it to the @item. This means if + % the user wants an empty mark, they have to say @w{} not just @w. + \def\itemcontents{#1}% + \setbox0 = \hbox{\itemcontents}% + % + % @itemize with no arg is equivalent to @itemize @bullet. + \ifx\itemcontents\empty\def\itemcontents{\bullet}\fi + % + \let\item=\itemizeitem +} + +% Definition of @item while inside @itemize and @enumerate. +% +\def\itemizeitem{% + \advance\itemno by 1 % for enumerations + {\let\par=\endgraf \smallbreak}% reasonable place to break + {% + % If the document has an @itemize directly after a section title, a + % \nobreak will be last on the list, and \sectionheading will have + % done a \vskip-\parskip. In that case, we don't want to zero + % parskip, or the item text will crash with the heading. On the + % other hand, when there is normal text preceding the item (as there + % usually is), we do want to zero parskip, or there would be too much + % space. In that case, we won't have a \nobreak before. At least + % that's the theory. + \ifnum\lastpenalty<10000 \parskip=0in \fi + \noindent + \hbox to 0pt{\hss \itemcontents \kern\itemmargin}% + % + \ifinner\else + \vadjust{\penalty 1200}% not good to break after first line of item. + \fi + % We can be in inner vertical mode in a footnote, although an + % @itemize looks awful there. + }% + \flushcr +} + +% \splitoff TOKENS\endmark defines \first to be the first token in +% TOKENS, and \rest to be the remainder. +% +\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}% + +% Allow an optional argument of an uppercase letter, lowercase letter, +% or number, to specify the first label in the enumerated list. No +% argument is the same as `1'. +% +\envparseargdef\enumerate{\enumeratey #1 \endenumeratey} +\def\enumeratey #1 #2\endenumeratey{% + % If we were given no argument, pretend we were given `1'. + \def\thearg{#1}% + \ifx\thearg\empty \def\thearg{1}\fi + % + % Detect if the argument is a single token. If so, it might be a + % letter. Otherwise, the only valid thing it can be is a number. + % (We will always have one token, because of the test we just made. + % This is a good thing, since \splitoff doesn't work given nothing at + % all -- the first parameter is undelimited.) + \expandafter\splitoff\thearg\endmark + \ifx\rest\empty + % Only one token in the argument. It could still be anything. + % A ``lowercase letter'' is one whose \lccode is nonzero. + % An ``uppercase letter'' is one whose \lccode is both nonzero, and + % not equal to itself. + % Otherwise, we assume it's a number. + % + % We need the \relax at the end of the \ifnum lines to stop TeX from + % continuing to look for a . + % + \ifnum\lccode\expandafter`\thearg=0\relax + \numericenumerate % a number (we hope) + \else + % It's a letter. + \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax + \lowercaseenumerate % lowercase letter + \else + \uppercaseenumerate % uppercase letter + \fi + \fi + \else + % Multiple tokens in the argument. We hope it's a number. + \numericenumerate + \fi +} + +% An @enumerate whose labels are integers. The starting integer is +% given in \thearg. +% +\def\numericenumerate{% + \itemno = \thearg + \startenumeration{\the\itemno}% +} + +% The starting (lowercase) letter is in \thearg. +\def\lowercaseenumerate{% + \itemno = \expandafter`\thearg + \startenumeration{% + % Be sure we're not beyond the end of the alphabet. + \ifnum\itemno=0 + \errmessage{No more lowercase letters in @enumerate; get a bigger + alphabet}% + \fi + \char\lccode\itemno + }% +} + +% The starting (uppercase) letter is in \thearg. +\def\uppercaseenumerate{% + \itemno = \expandafter`\thearg + \startenumeration{% + % Be sure we're not beyond the end of the alphabet. + \ifnum\itemno=0 + \errmessage{No more uppercase letters in @enumerate; get a bigger + alphabet} + \fi + \char\uccode\itemno + }% +} + +% Call \doitemize, adding a period to the first argument and supplying the +% common last two arguments. Also subtract one from the initial value in +% \itemno, since @item increments \itemno. +% +\def\startenumeration#1{% + \advance\itemno by -1 + \doitemize{#1.}\flushcr +} + +% @alphaenumerate and @capsenumerate are abbreviations for giving an arg +% to @enumerate. +% +\def\alphaenumerate{\enumerate{a}} +\def\capsenumerate{\enumerate{A}} +\def\Ealphaenumerate{\Eenumerate} +\def\Ecapsenumerate{\Eenumerate} + + +% @multitable macros +% Amy Hendrickson, 8/18/94, 3/6/96 +% +% @multitable ... @end multitable will make as many columns as desired. +% Contents of each column will wrap at width given in preamble. Width +% can be specified either with sample text given in a template line, +% or in percent of \hsize, the current width of text on page. + +% Table can continue over pages but will only break between lines. + +% To make preamble: +% +% Either define widths of columns in terms of percent of \hsize: +% @multitable @columnfractions .25 .3 .45 +% @item ... +% +% Numbers following @columnfractions are the percent of the total +% current hsize to be used for each column. You may use as many +% columns as desired. + + +% Or use a template: +% @multitable {Column 1 template} {Column 2 template} {Column 3 template} +% @item ... +% using the widest term desired in each column. + +% Each new table line starts with @item, each subsequent new column +% starts with @tab. Empty columns may be produced by supplying @tab's +% with nothing between them for as many times as empty columns are needed, +% ie, @tab@tab@tab will produce two empty columns. + +% @item, @tab do not need to be on their own lines, but it will not hurt +% if they are. + +% Sample multitable: + +% @multitable {Column 1 template} {Column 2 template} {Column 3 template} +% @item first col stuff @tab second col stuff @tab third col +% @item +% first col stuff +% @tab +% second col stuff +% @tab +% third col +% @item first col stuff @tab second col stuff +% @tab Many paragraphs of text may be used in any column. +% +% They will wrap at the width determined by the template. +% @item@tab@tab This will be in third column. +% @end multitable + +% Default dimensions may be reset by user. +% @multitableparskip is vertical space between paragraphs in table. +% @multitableparindent is paragraph indent in table. +% @multitablecolmargin is horizontal space to be left between columns. +% @multitablelinespace is space to leave between table items, baseline +% to baseline. +% 0pt means it depends on current normal line spacing. +% +\newskip\multitableparskip +\newskip\multitableparindent +\newdimen\multitablecolspace +\newskip\multitablelinespace +\multitableparskip=0pt +\multitableparindent=6pt +\multitablecolspace=12pt +\multitablelinespace=0pt + +% Macros used to set up halign preamble: +% +\let\endsetuptable\relax +\def\xendsetuptable{\endsetuptable} +\let\columnfractions\relax +\def\xcolumnfractions{\columnfractions} +\newif\ifsetpercent + +% #1 is the @columnfraction, usually a decimal number like .5, but might +% be just 1. We just use it, whatever it is. +% +\def\pickupwholefraction#1 {% + \global\advance\colcount by 1 + \expandafter\xdef\csname col\the\colcount\endcsname{#1\hsize}% + \setuptable +} + +\newcount\colcount +\def\setuptable#1{% + \def\firstarg{#1}% + \ifx\firstarg\xendsetuptable + \let\go = \relax + \else + \ifx\firstarg\xcolumnfractions + \global\setpercenttrue + \else + \ifsetpercent + \let\go\pickupwholefraction + \else + \global\advance\colcount by 1 + \setbox0=\hbox{#1\unskip\space}% Add a normal word space as a + % separator; typically that is always in the input, anyway. + \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% + \fi + \fi + \ifx\go\pickupwholefraction + % Put the argument back for the \pickupwholefraction call, so + % we'll always have a period there to be parsed. + \def\go{\pickupwholefraction#1}% + \else + \let\go = \setuptable + \fi% + \fi + \go +} + +% multitable-only commands. +% +% @headitem starts a heading row, which we typeset in bold. Assignments +% have to be global since we are inside the implicit group of an +% alignment entry. \everycr below resets \everytab so we don't have to +% undo it ourselves. +\def\headitemfont{\b}% for people to use in the template row; not changeable +\def\headitem{% + \checkenv\multitable + \crcr + \gdef\headitemcrhook{\nobreak}% attempt to avoid page break after headings + \global\everytab={\bf}% can't use \headitemfont since the parsing differs + \the\everytab % for the first item +}% +% +% default for tables with no headings. +\let\headitemcrhook=\relax +% +% A \tab used to include \hskip1sp. But then the space in a template +% line is not enough. That is bad. So let's go back to just `&' until +% we again encounter the problem the 1sp was intended to solve. +% --karl, nathan@acm.org, 20apr99. +\def\tab{\checkenv\multitable &\the\everytab}% + +% @multitable ... @end multitable definitions: +% +\newtoks\everytab % insert after every tab. +% +\envdef\multitable{% + \vskip\parskip + \startsavinginserts + % + % @item within a multitable starts a normal row. + % We use \def instead of \let so that if one of the multitable entries + % contains an @itemize, we don't choke on the \item (seen as \crcr aka + % \endtemplate) expanding \doitemize. + \def\item{\crcr}% + % + \tolerance=9500 + \hbadness=9500 + \setmultitablespacing + \parskip=\multitableparskip + \parindent=\multitableparindent + \overfullrule=0pt + \global\colcount=0 + % + \everycr = {% + \noalign{% + \global\everytab={}% Reset from possible headitem. + \global\colcount=0 % Reset the column counter. + % + % Check for saved footnotes, etc.: + \checkinserts + % + % Perhaps a \nobreak, then reset: + \headitemcrhook + \global\let\headitemcrhook=\relax + }% + }% + % + \parsearg\domultitable +} +\def\domultitable#1{% + % To parse everything between @multitable and @item: + \setuptable#1 \endsetuptable + % + % This preamble sets up a generic column definition, which will + % be used as many times as user calls for columns. + % \vtop will set a single line and will also let text wrap and + % continue for many paragraphs if desired. + \halign\bgroup &% + \global\advance\colcount by 1 + \multistrut + \vtop{% + % Use the current \colcount to find the correct column width: + \hsize=\expandafter\csname col\the\colcount\endcsname + % + % In order to keep entries from bumping into each other + % we will add a \leftskip of \multitablecolspace to all columns after + % the first one. + % + % If a template has been used, we will add \multitablecolspace + % to the width of each template entry. + % + % If the user has set preamble in terms of percent of \hsize we will + % use that dimension as the width of the column, and the \leftskip + % will keep entries from bumping into each other. Table will start at + % left margin and final column will justify at right margin. + % + % Make sure we don't inherit \rightskip from the outer environment. + \rightskip=0pt + \ifnum\colcount=1 + % The first column will be indented with the surrounding text. + \advance\hsize by\leftskip + \else + \ifsetpercent \else + % If user has not set preamble in terms of percent of \hsize + % we will advance \hsize by \multitablecolspace. + \advance\hsize by \multitablecolspace + \fi + % In either case we will make \leftskip=\multitablecolspace: + \leftskip=\multitablecolspace + \fi + % Ignoring space at the beginning and end avoids an occasional spurious + % blank line, when TeX decides to break the line at the space before the + % box from the multistrut, so the strut ends up on a line by itself. + % For example: + % @multitable @columnfractions .11 .89 + % @item @code{#} + % @tab Legal holiday which is valid in major parts of the whole country. + % Is automatically provided with highlighting sequences respectively + % marking characters. + \noindent\ignorespaces##\unskip\multistrut + }\cr +} +\def\Emultitable{% + \crcr + \egroup % end the \halign + \global\setpercentfalse +} + +\def\setmultitablespacing{% + \def\multistrut{\strut}% just use the standard line spacing + % + % Compute \multitablelinespace (if not defined by user) for use in + % \multitableparskip calculation. We used define \multistrut based on + % this, but (ironically) that caused the spacing to be off. + % See bug-texinfo report from Werner Lemberg, 31 Oct 2004 12:52:20 +0100. +\ifdim\multitablelinespace=0pt +\setbox0=\vbox{X}\global\multitablelinespace=\the\baselineskip +\global\advance\multitablelinespace by-\ht0 +\fi +% Test to see if parskip is larger than space between lines of +% table. If not, do nothing. +% If so, set to same dimension as multitablelinespace. +\ifdim\multitableparskip>\multitablelinespace +\global\multitableparskip=\multitablelinespace +\global\advance\multitableparskip-7pt % to keep parskip somewhat smaller + % than skip between lines in the table. +\fi% +\ifdim\multitableparskip=0pt +\global\multitableparskip=\multitablelinespace +\global\advance\multitableparskip-7pt % to keep parskip somewhat smaller + % than skip between lines in the table. +\fi} + + +\message{conditionals,} + +% @iftex, @ifnotdocbook, @ifnothtml, @ifnotinfo, @ifnotplaintext, +% @ifnotxml always succeed. They currently do nothing; we don't +% attempt to check whether the conditionals are properly nested. But we +% have to remember that they are conditionals, so that @end doesn't +% attempt to close an environment group. +% +\def\makecond#1{% + \expandafter\let\csname #1\endcsname = \relax + \expandafter\let\csname iscond.#1\endcsname = 1 +} +\makecond{iftex} +\makecond{ifnotdocbook} +\makecond{ifnothtml} +\makecond{ifnotinfo} +\makecond{ifnotplaintext} +\makecond{ifnotxml} + +% Ignore @ignore, @ifhtml, @ifinfo, and the like. +% +\def\direntry{\doignore{direntry}} +\def\documentdescription{\doignore{documentdescription}} +\def\docbook{\doignore{docbook}} +\def\html{\doignore{html}} +\def\ifdocbook{\doignore{ifdocbook}} +\def\ifhtml{\doignore{ifhtml}} +\def\ifinfo{\doignore{ifinfo}} +\def\ifnottex{\doignore{ifnottex}} +\def\ifplaintext{\doignore{ifplaintext}} +\def\ifxml{\doignore{ifxml}} +\def\ignore{\doignore{ignore}} +\def\menu{\doignore{menu}} +\def\xml{\doignore{xml}} + +% Ignore text until a line `@end #1', keeping track of nested conditionals. +% +% A count to remember the depth of nesting. +\newcount\doignorecount + +\def\doignore#1{\begingroup + % Scan in ``verbatim'' mode: + \obeylines + \catcode`\@ = \other + \catcode`\{ = \other + \catcode`\} = \other + % + % Make sure that spaces turn into tokens that match what \doignoretext wants. + \spaceisspace + % + % Count number of #1's that we've seen. + \doignorecount = 0 + % + % Swallow text until we reach the matching `@end #1'. + \dodoignore{#1}% +} + +{ \catcode`_=11 % We want to use \_STOP_ which cannot appear in texinfo source. + \obeylines % + % + \gdef\dodoignore#1{% + % #1 contains the command name as a string, e.g., `ifinfo'. + % + % Define a command to find the next `@end #1'. + \long\def\doignoretext##1^^M@end #1{% + \doignoretextyyy##1^^M@#1\_STOP_}% + % + % And this command to find another #1 command, at the beginning of a + % line. (Otherwise, we would consider a line `@c @ifset', for + % example, to count as an @ifset for nesting.) + \long\def\doignoretextyyy##1^^M@#1##2\_STOP_{\doignoreyyy{##2}\_STOP_}% + % + % And now expand that command. + \doignoretext ^^M% + }% +} + +\def\doignoreyyy#1{% + \def\temp{#1}% + \ifx\temp\empty % Nothing found. + \let\next\doignoretextzzz + \else % Found a nested condition, ... + \advance\doignorecount by 1 + \let\next\doignoretextyyy % ..., look for another. + % If we're here, #1 ends with ^^M\ifinfo (for example). + \fi + \next #1% the token \_STOP_ is present just after this macro. +} + +% We have to swallow the remaining "\_STOP_". +% +\def\doignoretextzzz#1{% + \ifnum\doignorecount = 0 % We have just found the outermost @end. + \let\next\enddoignore + \else % Still inside a nested condition. + \advance\doignorecount by -1 + \let\next\doignoretext % Look for the next @end. + \fi + \next +} + +% Finish off ignored text. +{ \obeylines% + % Ignore anything after the last `@end #1'; this matters in verbatim + % environments, where otherwise the newline after an ignored conditional + % would result in a blank line in the output. + \gdef\enddoignore#1^^M{\endgroup\ignorespaces}% +} + + +% @set VAR sets the variable VAR to an empty value. +% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. +% +% Since we want to separate VAR from REST-OF-LINE (which might be +% empty), we can't just use \parsearg; we have to insert a space of our +% own to delimit the rest of the line, and then take it out again if we +% didn't need it. +% We rely on the fact that \parsearg sets \catcode`\ =10. +% +\parseargdef\set{\setyyy#1 \endsetyyy} +\def\setyyy#1 #2\endsetyyy{% + {% + \makevalueexpandable + \def\temp{#2}% + \edef\next{\gdef\makecsname{SET#1}}% + \ifx\temp\empty + \next{}% + \else + \setzzz#2\endsetzzz + \fi + }% +} +% Remove the trailing space \setxxx inserted. +\def\setzzz#1 \endsetzzz{\next{#1}} + +% @clear VAR clears (i.e., unsets) the variable VAR. +% +\parseargdef\clear{% + {% + \makevalueexpandable + \global\expandafter\let\csname SET#1\endcsname=\relax + }% +} + +% @value{foo} gets the text saved in variable foo. +\def\value{\begingroup\makevalueexpandable\valuexxx} +\def\valuexxx#1{\expandablevalue{#1}\endgroup} +{ + \catcode`\-=\active \catcode`\_=\active + % + \gdef\makevalueexpandable{% + \let\value = \expandablevalue + % We don't want these characters active, ... + \catcode`\-=\other \catcode`\_=\other + % ..., but we might end up with active ones in the argument if + % we're called from @code, as @code{@value{foo-bar_}}, though. + % So \let them to their normal equivalents. + \let-\normaldash \let_\normalunderscore + } +} + +% We have this subroutine so that we can handle at least some @value's +% properly in indexes (we call \makevalueexpandable in \indexdummies). +% The command has to be fully expandable (if the variable is set), since +% the result winds up in the index file. This means that if the +% variable's value contains other Texinfo commands, it's almost certain +% it will fail (although perhaps we could fix that with sufficient work +% to do a one-level expansion on the result, instead of complete). +% +% Unfortunately, this has the consequence that when _ is in the *value* +% of an @set, it does not print properly in the roman fonts (get the cmr +% dot accent at position 126 instead). No fix comes to mind, and it's +% been this way since 2003 or earlier, so just ignore it. +% +\def\expandablevalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + {[No value for ``#1'']}% + \message{Variable `#1', used in @value, is not set.}% + \else + \csname SET#1\endcsname + \fi +} + +% Like \expandablevalue, but completely expandable (the \message in the +% definition above operates at the execution level of TeX). Used when +% writing to auxiliary files, due to the expansion that \write does. +% If flag is undefined, pass through an unexpanded @value command: maybe it +% will be set by the time it is read back in. +% +% NB flag names containing - or _ may not work here. +\def\dummyvalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + \noexpand\value{#1}% + \else + \csname SET#1\endcsname + \fi +} + +% Used for @value's in index entries to form the sort key: expand the @value +% if possible, otherwise sort late. +\def\indexnofontsvalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + ZZZZZZZ + \else + \csname SET#1\endcsname + \fi +} + +% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined +% with @set. +% +% To get the special treatment we need for `@end ifset,' we call +% \makecond and then redefine. +% +\makecond{ifset} +\def\ifset{\parsearg{\doifset{\let\next=\ifsetfail}}} +\def\doifset#1#2{% + {% + \makevalueexpandable + \let\next=\empty + \expandafter\ifx\csname SET#2\endcsname\relax + #1% If not set, redefine \next. + \fi + \expandafter + }\next +} +\def\ifsetfail{\doignore{ifset}} + +% @ifclear VAR ... @end executes the `...' iff VAR has never been +% defined with @set, or has been undefined with @clear. +% +% The `\else' inside the `\doifset' parameter is a trick to reuse the +% above code: if the variable is not set, do nothing, if it is set, +% then redefine \next to \ifclearfail. +% +\makecond{ifclear} +\def\ifclear{\parsearg{\doifset{\else \let\next=\ifclearfail}}} +\def\ifclearfail{\doignore{ifclear}} + +% @ifcommandisdefined CMD ... @end executes the `...' if CMD (written +% without the @) is in fact defined. We can only feasibly check at the +% TeX level, so something like `mathcode' is going to considered +% defined even though it is not a Texinfo command. +% +\makecond{ifcommanddefined} +\def\ifcommanddefined{\parsearg{\doifcmddefined{\let\next=\ifcmddefinedfail}}} +% +\def\doifcmddefined#1#2{{% + \makevalueexpandable + \let\next=\empty + \expandafter\ifx\csname #2\endcsname\relax + #1% If not defined, \let\next as above. + \fi + \expandafter + }\next +} +\def\ifcmddefinedfail{\doignore{ifcommanddefined}} + +% @ifcommandnotdefined CMD ... handled similar to @ifclear above. +\makecond{ifcommandnotdefined} +\def\ifcommandnotdefined{% + \parsearg{\doifcmddefined{\else \let\next=\ifcmdnotdefinedfail}}} +\def\ifcmdnotdefinedfail{\doignore{ifcommandnotdefined}} + +% Set the `txicommandconditionals' variable, so documents have a way to +% test if the @ifcommand...defined conditionals are available. +\set txicommandconditionals + +% @dircategory CATEGORY -- specify a category of the dir file +% which this file should belong to. Ignore this in TeX. +\let\dircategory=\comment + +% @defininfoenclose. +\let\definfoenclose=\comment + + +\message{indexing,} +% Index generation facilities + +% Define \newwrite to be identical to plain tex's \newwrite +% except not \outer, so it can be used within macros and \if's. +\edef\newwrite{\makecsname{ptexnewwrite}} + +% \newindex {foo} defines an index named IX. +% It automatically defines \IXindex such that +% \IXindex ...rest of line... puts an entry in the index IX. +% It also defines \IXindfile to be the number of the output channel for +% the file that accumulates this index. The file's extension is IX. +% The name of an index should be no more than 2 characters long +% for the sake of vms. +% +\def\newindex#1{% + \expandafter\chardef\csname#1indfile\endcsname=0 + \expandafter\xdef\csname#1index\endcsname{% % Define @#1index + \noexpand\doindex{#1}} +} + +% @defindex foo == \newindex{foo} +% +\def\defindex{\parsearg\newindex} + +% Define @defcodeindex, like @defindex except put all entries in @code. +% +\def\defcodeindex{\parsearg\newcodeindex} +% +\def\newcodeindex#1{% + \expandafter\chardef\csname#1indfile\endcsname=0 + \expandafter\xdef\csname#1index\endcsname{% + \noexpand\docodeindex{#1}}% +} + +% The default indices: +\newindex{cp}% concepts, +\newcodeindex{fn}% functions, +\newcodeindex{vr}% variables, +\newcodeindex{tp}% types, +\newcodeindex{ky}% keys +\newcodeindex{pg}% and programs. + + +% @synindex foo bar makes index foo feed into index bar. +% Do this instead of @defindex foo if you don't want it as a separate index. +% +% @syncodeindex foo bar similar, but put all entries made for index foo +% inside @code. +% +\def\synindex#1 #2 {\dosynindex\doindex{#1}{#2}} +\def\syncodeindex#1 #2 {\dosynindex\docodeindex{#1}{#2}} + +% #1 is \doindex or \docodeindex, #2 the index getting redefined (foo), +% #3 the target index (bar). +\def\dosynindex#1#2#3{% + \requireopenindexfile{#3}% + % redefine \fooindfile: + \expandafter\let\expandafter\temp\expandafter=\csname#3indfile\endcsname + \expandafter\let\csname#2indfile\endcsname=\temp + % redefine \fooindex: + \expandafter\xdef\csname#2index\endcsname{\noexpand#1{#3}}% +} + +% Define \doindex, the driver for all index macros. +% Argument #1 is generated by the calling \fooindex macro, +% and it is the two-letter name of the index. + +\def\doindex#1{\edef\indexname{#1}\parsearg\doindexxxx} +\def\doindexxxx #1{\doind{\indexname}{#1}} + +% like the previous two, but they put @code around the argument. +\def\docodeindex#1{\edef\indexname{#1}\parsearg\docodeindexxxx} +\def\docodeindexxxx #1{\doind{\indexname}{\code{#1}}} + + +% Used when writing an index entry out to an index file to prevent +% expansion of Texinfo commands that can appear in an index entry. +% +\def\indexdummies{% + \escapechar = `\\ % use backslash in output files. + \definedummyletter\@% + \definedummyletter\ % + % + % For texindex which always views { and } as separators. + \def\{{\lbracechar{}}% + \def\}{\rbracechar{}}% + % + % Do the redefinitions. + \definedummies +} + +% Used for the aux and toc files, where @ is the escape character. +% +\def\atdummies{% + \definedummyletter\@% + \definedummyletter\ % + \definedummyletter\{% + \definedummyletter\}% + % + % Do the redefinitions. + \definedummies + \otherbackslash +} + +% \definedummyword defines \#1 as \string\#1\space, thus effectively +% preventing its expansion. This is used only for control words, +% not control letters, because the \space would be incorrect for +% control characters, but is needed to separate the control word +% from whatever follows. +% +% These can be used both for control words that take an argument and +% those that do not. If it is followed by {arg} in the input, then +% that will dutifully get written to the index (or wherever). +% +% For control letters, we have \definedummyletter, which omits the +% space. +% +\def\definedummyword #1{\def#1{\string#1\space}}% +\def\definedummyletter#1{\def#1{\string#1}}% +\let\definedummyaccent\definedummyletter + +% Called from \indexdummies and \atdummies, to effectively prevent +% the expansion of commands. +% +\def\definedummies{% + % + \let\commondummyword\definedummyword + \let\commondummyletter\definedummyletter + \let\commondummyaccent\definedummyaccent + \commondummiesnofonts + % + \definedummyletter\_% + \definedummyletter\-% + % + % Non-English letters. + \definedummyword\AA + \definedummyword\AE + \definedummyword\DH + \definedummyword\L + \definedummyword\O + \definedummyword\OE + \definedummyword\TH + \definedummyword\aa + \definedummyword\ae + \definedummyword\dh + \definedummyword\exclamdown + \definedummyword\l + \definedummyword\o + \definedummyword\oe + \definedummyword\ordf + \definedummyword\ordm + \definedummyword\questiondown + \definedummyword\ss + \definedummyword\th + % + % Although these internal commands shouldn't show up, sometimes they do. + \definedummyword\bf + \definedummyword\gtr + \definedummyword\hat + \definedummyword\less + \definedummyword\sf + \definedummyword\sl + \definedummyword\tclose + \definedummyword\tt + % + \definedummyword\LaTeX + \definedummyword\TeX + % + % Assorted special characters. + \definedummyword\atchar + \definedummyword\arrow + \definedummyword\bullet + \definedummyword\comma + \definedummyword\copyright + \definedummyword\registeredsymbol + \definedummyword\dots + \definedummyword\enddots + \definedummyword\entrybreak + \definedummyword\equiv + \definedummyword\error + \definedummyword\euro + \definedummyword\expansion + \definedummyword\geq + \definedummyword\guillemetleft + \definedummyword\guillemetright + \definedummyword\guilsinglleft + \definedummyword\guilsinglright + \definedummyword\lbracechar + \definedummyword\leq + \definedummyword\mathopsup + \definedummyword\minus + \definedummyword\ogonek + \definedummyword\pounds + \definedummyword\point + \definedummyword\print + \definedummyword\quotedblbase + \definedummyword\quotedblleft + \definedummyword\quotedblright + \definedummyword\quoteleft + \definedummyword\quoteright + \definedummyword\quotesinglbase + \definedummyword\rbracechar + \definedummyword\result + \definedummyword\sub + \definedummyword\sup + \definedummyword\textdegree + % + % We want to disable all macros so that they are not expanded by \write. + \macrolist + \let\value\dummyvalue + % + \normalturnoffactive +} + +% \commondummiesnofonts: common to \definedummies and \indexnofonts. +% Define \commondummyletter, \commondummyaccent and \commondummyword before +% using. Used for accents, font commands, and various control letters. +% +\def\commondummiesnofonts{% + % Control letters and accents. + \commondummyletter\!% + \commondummyaccent\"% + \commondummyaccent\'% + \commondummyletter\*% + \commondummyaccent\,% + \commondummyletter\.% + \commondummyletter\/% + \commondummyletter\:% + \commondummyaccent\=% + \commondummyletter\?% + \commondummyaccent\^% + \commondummyaccent\`% + \commondummyaccent\~% + \commondummyword\u + \commondummyword\v + \commondummyword\H + \commondummyword\dotaccent + \commondummyword\ogonek + \commondummyword\ringaccent + \commondummyword\tieaccent + \commondummyword\ubaraccent + \commondummyword\udotaccent + \commondummyword\dotless + % + % Texinfo font commands. + \commondummyword\b + \commondummyword\i + \commondummyword\r + \commondummyword\sansserif + \commondummyword\sc + \commondummyword\slanted + \commondummyword\t + % + % Commands that take arguments. + \commondummyword\abbr + \commondummyword\acronym + \commondummyword\anchor + \commondummyword\cite + \commondummyword\code + \commondummyword\command + \commondummyword\dfn + \commondummyword\dmn + \commondummyword\email + \commondummyword\emph + \commondummyword\env + \commondummyword\file + \commondummyword\image + \commondummyword\indicateurl + \commondummyword\inforef + \commondummyword\kbd + \commondummyword\key + \commondummyword\math + \commondummyword\option + \commondummyword\pxref + \commondummyword\ref + \commondummyword\samp + \commondummyword\strong + \commondummyword\tie + \commondummyword\U + \commondummyword\uref + \commondummyword\url + \commondummyword\var + \commondummyword\verb + \commondummyword\w + \commondummyword\xref +} + +% For testing: output @{ and @} in index sort strings as \{ and \}. +\newif\ifusebracesinindexes + +\let\indexlbrace\relax +\let\indexrbrace\relax + +{\catcode`\@=0 +\catcode`\\=13 + @gdef@backslashdisappear{@def\{}} +} + +{ +\catcode`\<=13 +\catcode`\-=13 +\catcode`\`=13 + \gdef\indexnonalnumdisappear{% + \expandafter\ifx\csname SETtxiindexlquoteignore\endcsname\relax\else + % @set txiindexlquoteignore makes us ignore left quotes in the sort term. + % (Introduced for FSFS 2nd ed.) + \let`=\empty + \fi + % + \expandafter\ifx\csname SETtxiindexbackslashignore\endcsname\relax\else + \backslashdisappear + \fi + % + \expandafter\ifx\csname SETtxiindexhyphenignore\endcsname\relax\else + \def-{}% + \fi + \expandafter\ifx\csname SETtxiindexlessthanignore\endcsname\relax\else + \def<{}% + \fi + \expandafter\ifx\csname SETtxiindexatsignignore\endcsname\relax\else + \def\@{}% + \fi + } + + \gdef\indexnonalnumreappear{% + \useindexbackslash + \let-\normaldash + \let<\normalless + \def\@{@}% + } +} + + +% \indexnofonts is used when outputting the strings to sort the index +% by, and when constructing control sequence names. It eliminates all +% control sequences and just writes whatever the best ASCII sort string +% would be for a given command (usually its argument). +% +\def\indexnofonts{% + % Accent commands should become @asis. + \def\commondummyaccent##1{\let##1\asis}% + % We can just ignore other control letters. + \def\commondummyletter##1{\let##1\empty}% + % All control words become @asis by default; overrides below. + \let\commondummyword\commondummyaccent + \commondummiesnofonts + % + % Don't no-op \tt, since it isn't a user-level command + % and is used in the definitions of the active chars like <, >, |, etc. + % Likewise with the other plain tex font commands. + %\let\tt=\asis + % + \def\ { }% + \def\@{@}% + \def\_{\normalunderscore}% + \def\-{}% @- shouldn't affect sorting + % + \uccode`\1=`\{ \uppercase{\def\{{1}}% + \uccode`\1=`\} \uppercase{\def\}{1}}% + \let\lbracechar\{% + \let\rbracechar\}% + % + % Non-English letters. + \def\AA{AA}% + \def\AE{AE}% + \def\DH{DZZ}% + \def\L{L}% + \def\OE{OE}% + \def\O{O}% + \def\TH{TH}% + \def\aa{aa}% + \def\ae{ae}% + \def\dh{dzz}% + \def\exclamdown{!}% + \def\l{l}% + \def\oe{oe}% + \def\ordf{a}% + \def\ordm{o}% + \def\o{o}% + \def\questiondown{?}% + \def\ss{ss}% + \def\th{th}% + % + \def\LaTeX{LaTeX}% + \def\TeX{TeX}% + % + % Assorted special characters. \defglyph gives the control sequence a + % definition that removes the {} that follows its use. + \defglyph\atchar{@}% + \defglyph\arrow{->}% + \defglyph\bullet{bullet}% + \defglyph\comma{,}% + \defglyph\copyright{copyright}% + \defglyph\dots{...}% + \defglyph\enddots{...}% + \defglyph\equiv{==}% + \defglyph\error{error}% + \defglyph\euro{euro}% + \defglyph\expansion{==>}% + \defglyph\geq{>=}% + \defglyph\guillemetleft{<<}% + \defglyph\guillemetright{>>}% + \defglyph\guilsinglleft{<}% + \defglyph\guilsinglright{>}% + \defglyph\leq{<=}% + \defglyph\lbracechar{\{}% + \defglyph\minus{-}% + \defglyph\point{.}% + \defglyph\pounds{pounds}% + \defglyph\print{-|}% + \defglyph\quotedblbase{"}% + \defglyph\quotedblleft{"}% + \defglyph\quotedblright{"}% + \defglyph\quoteleft{`}% + \defglyph\quoteright{'}% + \defglyph\quotesinglbase{,}% + \defglyph\rbracechar{\}}% + \defglyph\registeredsymbol{R}% + \defglyph\result{=>}% + \defglyph\textdegree{o}% + % + % We need to get rid of all macros, leaving only the arguments (if present). + % Of course this is not nearly correct, but it is the best we can do for now. + % makeinfo does not expand macros in the argument to @deffn, which ends up + % writing an index entry, and texindex isn't prepared for an index sort entry + % that starts with \. + % + % Since macro invocations are followed by braces, we can just redefine them + % to take a single TeX argument. The case of a macro invocation that + % goes to end-of-line is not handled. + % + \macrolist + \let\value\indexnofontsvalue +} +\def\defglyph#1#2{\def#1##1{#2}} % see above + + + + +\let\SETmarginindex=\relax % put index entries in margin (undocumented)? + +% Most index entries go through here, but \dosubind is the general case. +% #1 is the index name, #2 is the entry text. +\def\doind#1#2{\dosubind{#1}{#2}{}} + +% There is also \dosubind {index}{topic}{subtopic} +% which makes an entry in a two-level index such as the operation index. +% TODO: Two-level index? Operation index? + +% Workhorse for all indexes. +% #1 is name of index, #2 is stuff to put there, #3 is subentry -- +% empty if called from \doind, as we usually are (the main exception +% is with most defuns, which call us directly). +% +\def\dosubind#1#2#3{% + \iflinks + {% + \requireopenindexfile{#1}% + % Store the main index entry text (including the third arg). + \toks0 = {#2}% + % If third arg is present, precede it with a space. + \def\thirdarg{#3}% + \ifx\thirdarg\empty \else + \toks0 = \expandafter{\the\toks0 \space #3}% + \fi + % + \edef\writeto{\csname#1indfile\endcsname}% + % + \safewhatsit\dosubindwrite + }% + \fi +} + +% Check if an index file has been opened, and if not, open it. +\def\requireopenindexfile#1{% +\ifnum\csname #1indfile\endcsname=0 + \expandafter\newwrite \csname#1indfile\endcsname + \edef\suffix{#1}% + % A .fls suffix would conflict with the file extension for the output + % of -recorder, so use .f1s instead. + \ifx\suffix\indexisfl\def\suffix{f1}\fi + % Open the file + \immediate\openout\csname#1indfile\endcsname \jobname.\suffix + % Using \immediate above here prevents an object entering into the current + % box, which could confound checks such as those in \safewhatsit for + % preceding skips. + \typeout{Writing index file \jobname.\suffix}% +\fi} +\def\indexisfl{fl} + +% Output \ as {\indexbackslash}, because \ is an escape character in +% the index files. +\let\indexbackslash=\relax +{\catcode`\@=0 \catcode`\\=\active + @gdef@useindexbackslash{@def\{{@indexbackslash}}} +} + +% Definition for writing index entry text. +\def\sortas#1{\ignorespaces}% + +% Definition for writing index entry sort key. Should occur at the at +% the beginning of the index entry, like +% @cindex @sortas{september} \september +% The \ignorespaces takes care of following space, but there's no way +% to remove space before it. +{ +\catcode`\-=13 +\gdef\indexwritesortas{% + \begingroup + \indexnonalnumreappear + \indexwritesortasxxx} +\gdef\indexwritesortasxxx#1{% + \xdef\indexsortkey{#1}\endgroup} +} + + +% Write the entry in \toks0 to the index file. +% +\def\dosubindwrite{% + % Put the index entry in the margin if desired. + \ifx\SETmarginindex\relax\else + \insert\margin{\hbox{\vrule height8pt depth3pt width0pt \the\toks0}}% + \fi + % + % Remember, we are within a group. + \indexdummies % Must do this here, since \bf, etc expand at this stage + \useindexbackslash % \indexbackslash isn't defined now so it will be output + % as is; and it will print as backslash. + % The braces around \indexbrace are recognized by texindex. + % + % Get the string to sort by, by processing the index entry with all + % font commands turned off. + {\indexnofonts + \def\lbracechar{{\indexlbrace}}% + \def\rbracechar{{\indexrbrace}}% + \let\{=\lbracechar + \let\}=\rbracechar + \indexnonalnumdisappear + \xdef\indexsortkey{}% + \let\sortas=\indexwritesortas + \edef\temp{\the\toks0}% + \setbox\dummybox = \hbox{\temp}% Make sure to execute any \sortas + \ifx\indexsortkey\empty + \xdef\indexsortkey{\temp}% + \ifx\indexsortkey\empty\xdef\indexsortkey{ }\fi + \fi + }% + % + % Set up the complete index entry, with both the sort key and + % the original text, including any font commands. We write + % three arguments to \entry to the .?? file (four in the + % subentry case), texindex reduces to two when writing the .??s + % sorted result. + \edef\temp{% + \write\writeto{% + \string\entry{\indexsortkey}{\noexpand\folio}{\the\toks0}}% + }% + \temp +} +\newbox\dummybox % used above + +% Take care of unwanted page breaks/skips around a whatsit: +% +% If a skip is the last thing on the list now, preserve it +% by backing up by \lastskip, doing the \write, then inserting +% the skip again. Otherwise, the whatsit generated by the +% \write or \pdfdest will make \lastskip zero. The result is that +% sequences like this: +% @end defun +% @tindex whatever +% @defun ... +% will have extra space inserted, because the \medbreak in the +% start of the @defun won't see the skip inserted by the @end of +% the previous defun. +% +% But don't do any of this if we're not in vertical mode. We +% don't want to do a \vskip and prematurely end a paragraph. +% +% Avoid page breaks due to these extra skips, too. +% +% But wait, there is a catch there: +% We'll have to check whether \lastskip is zero skip. \ifdim is not +% sufficient for this purpose, as it ignores stretch and shrink parts +% of the skip. The only way seems to be to check the textual +% representation of the skip. +% +% The following is almost like \def\zeroskipmacro{0.0pt} except that +% the ``p'' and ``t'' characters have catcode \other, not 11 (letter). +% +\edef\zeroskipmacro{\expandafter\the\csname z@skip\endcsname} +% +\newskip\whatsitskip +\newcount\whatsitpenalty +% +% ..., ready, GO: +% +\def\safewhatsit#1{\ifhmode + #1% + \else + % \lastskip and \lastpenalty cannot both be nonzero simultaneously. + \whatsitskip = \lastskip + \edef\lastskipmacro{\the\lastskip}% + \whatsitpenalty = \lastpenalty + % + % If \lastskip is nonzero, that means the last item was a + % skip. And since a skip is discardable, that means this + % -\whatsitskip glue we're inserting is preceded by a + % non-discardable item, therefore it is not a potential + % breakpoint, therefore no \nobreak needed. + \ifx\lastskipmacro\zeroskipmacro + \else + \vskip-\whatsitskip + \fi + % + #1% + % + \ifx\lastskipmacro\zeroskipmacro + % If \lastskip was zero, perhaps the last item was a penalty, and + % perhaps it was >=10000, e.g., a \nobreak. In that case, we want + % to re-insert the same penalty (values >10000 are used for various + % signals); since we just inserted a non-discardable item, any + % following glue (such as a \parskip) would be a breakpoint. For example: + % @deffn deffn-whatever + % @vindex index-whatever + % Description. + % would allow a break between the index-whatever whatsit + % and the "Description." paragraph. + \ifnum\whatsitpenalty>9999 \penalty\whatsitpenalty \fi + \else + % On the other hand, if we had a nonzero \lastskip, + % this make-up glue would be preceded by a non-discardable item + % (the whatsit from the \write), so we must insert a \nobreak. + \nobreak\vskip\whatsitskip + \fi +\fi} + +% The index entry written in the file actually looks like +% \entry {sortstring}{page}{topic} +% or +% \entry {sortstring}{page}{topic}{subtopic} +% The texindex program reads in these files and writes files +% containing these kinds of lines: +% \initial {c} +% before the first topic whose initial is c +% \entry {topic}{pagelist} +% for a topic that is used without subtopics +% \primary {topic} +% for the beginning of a topic that is used with subtopics +% \secondary {subtopic}{pagelist} +% for each subtopic. + +% Define the user-accessible indexing commands +% @findex, @vindex, @kindex, @cindex. + +\def\findex {\fnindex} +\def\kindex {\kyindex} +\def\cindex {\cpindex} +\def\vindex {\vrindex} +\def\tindex {\tpindex} +\def\pindex {\pgindex} + +\def\cindexsub {\begingroup\obeylines\cindexsub} +{\obeylines % +\gdef\cindexsub "#1" #2^^M{\endgroup % +\dosubind{cp}{#2}{#1}}} + +% Define the macros used in formatting output of the sorted index material. + +% @printindex causes a particular index (the ??s file) to get printed. +% It does not print any chapter heading (usually an @unnumbered). +% +\parseargdef\printindex{\begingroup + \dobreak \chapheadingskip{10000}% + % + \smallfonts \rm + \tolerance = 9500 + \plainfrenchspacing + \everypar = {}% don't want the \kern\-parindent from indentation suppression. + % + % See if the index file exists and is nonempty. + % Change catcode of @ here so that if the index file contains + % \initial {@} + % as its first line, TeX doesn't complain about mismatched braces + % (because it thinks @} is a control sequence). + \catcode`\@ = 12 + % See comment in \requireopenindexfile. + \def\indexname{#1}\ifx\indexname\indexisfl\def\indexname{f1}\fi + \openin 1 \jobname.\indexname s + \ifeof 1 + % \enddoublecolumns gets confused if there is no text in the index, + % and it loses the chapter title and the aux file entries for the + % index. The easiest way to prevent this problem is to make sure + % there is some text. + \putwordIndexNonexistent + \typeout{No file \jobname.\indexname s.}% + \else + \catcode`\\ = 0 + % + % If the index file exists but is empty, then \openin leaves \ifeof + % false. We have to make TeX try to read something from the file, so + % it can discover if there is anything in it. + \read 1 to \thisline + \ifeof 1 + \putwordIndexIsEmpty + \else + % Index files are almost Texinfo source, but we use \ as the escape + % character. It would be better to use @, but that's too big a change + % to make right now. + \def\indexbackslash{\ttbackslash}% + \let\indexlbrace\{ % Likewise, set these sequences for braces + \let\indexrbrace\} % used in the sort key. + \begindoublecolumns + \let\dotheinsertentrybox\dotheinsertentryboxwithpenalty + % + % Read input from the index file line by line. + \loopdo + \ifeof1 \else + \read 1 to \nextline + \fi + % + \indexinputprocessing + \thisline + % + \ifeof1\else + \let\thisline\nextline + \repeat + %% + \enddoublecolumns + \fi + \fi + \closein 1 +\endgroup} +\def\loopdo#1\repeat{\def\body{#1}\loopdoxxx} +\def\loopdoxxx{\let\next=\relax\body\let\next=\loopdoxxx\fi\next} + +\def\indexinputprocessing{% + \ifeof1 + \let\firsttoken\relax + \else + \edef\act{\gdef\noexpand\firsttoken{\getfirsttoken\nextline}}% + \act + \fi +} +\def\getfirsttoken#1{\expandafter\getfirsttokenx#1\endfirsttoken} +\long\def\getfirsttokenx#1#2\endfirsttoken{\noexpand#1} + + +% These macros are used by the sorted index file itself. +% Change them to control the appearance of the index. + +{\catcode`\/=13 \catcode`\-=13 \catcode`\^=13 \catcode`\~=13 \catcode`\_=13 +\catcode`\|=13 \catcode`\<=13 \catcode`\>=13 \catcode`\+=13 \catcode`\"=13 +\catcode`\$=3 +\gdef\initialglyphs{% + % Some changes for non-alphabetic characters. Using the glyphs from the + % math fonts looks more consistent than the typewriter font used elsewhere + % for these characters. + \def\indexbackslash{\math{\backslash}}% + \let\\=\indexbackslash + % + % Can't get bold backslash so don't use bold forward slash + \catcode`\/=13 + \def/{{\secrmnotbold \normalslash}}% + \def-{{\normaldash\normaldash}}% en dash `--' + \def^{{\chapbf \normalcaret}}% + \def~{{\chapbf \normaltilde}}% + \def\_{% + \leavevmode \kern.07em \vbox{\hrule width.3em height.1ex}\kern .07em }% + \def|{$\vert$}% + \def<{$\less$}% + \def>{$\gtr$}% + \def+{$\normalplus$}% +}} + +\def\initial{% + \bgroup + \initialglyphs + \initialx +} + +\def\initialx#1{% + % Remove any glue we may have, we'll be inserting our own. + \removelastskip + % + % We like breaks before the index initials, so insert a bonus. + % The glue before the bonus allows a little bit of space at the + % bottom of a column to reduce an increase in inter-line spacing. + \nobreak + \vskip 0pt plus 5\baselineskip + \penalty -300 + \vskip 0pt plus -5\baselineskip + % + % Typeset the initial. Making this add up to a whole number of + % baselineskips increases the chance of the dots lining up from column + % to column. It still won't often be perfect, because of the stretch + % we need before each entry, but it's better. + % + % No shrink because it confuses \balancecolumns. + \vskip 1.67\baselineskip plus 1\baselineskip + \leftline{\secfonts \kern-0.05em \secbf #1}% + % \secfonts is inside the argument of \leftline so that the change of + % \baselineskip will not affect any glue inserted before the vbox that + % \leftline creates. + % Do our best not to break after the initial. + \nobreak + \vskip .33\baselineskip plus .1\baselineskip + \egroup % \initialglyphs +} + +\newdimen\entryrightmargin +\entryrightmargin=0pt + +% \entry typesets a paragraph consisting of the text (#1), dot leaders, and +% then page number (#2) flushed to the right margin. It is used for index +% and table of contents entries. The paragraph is indented by \leftskip. +% +\def\entry{% + \begingroup + % + % For pdfTeX and XeTeX. + % The redefinition of \domark stops marks being added in \pdflink to + % preserve coloured links across page boundaries. Otherwise the marks + % would get in the way of \lastbox in \insertentrybox. + \let\domark\relax + % + % Start a new paragraph if necessary, so our assignments below can't + % affect previous text. + \par + % + % No extra space above this paragraph. + \parskip = 0in + % + % When reading the text of entry, convert explicit line breaks + % from @* into spaces. The user might give these in long section + % titles, for instance. + \def\*{\unskip\space\ignorespaces}% + \def\entrybreak{\hfil\break}% An undocumented command + % + % Swallow the left brace of the text (first parameter): + \afterassignment\doentry + \let\temp = +} +\def\entrybreak{\unskip\space\ignorespaces}% +\def\doentry{% + % Save the text of the entry + \global\setbox\boxA=\hbox\bgroup + \bgroup % Instead of the swallowed brace. + \noindent + \aftergroup\finishentry + % And now comes the text of the entry. + % Not absorbing as a macro argument reduces the chance of problems + % with catcodes occurring. +} +{\catcode`\@=11 +\gdef\finishentry#1{% + \egroup % end box A + \dimen@ = \wd\boxA % Length of text of entry + \global\setbox\boxA=\hbox\bgroup\unhbox\boxA + % #1 is the page number. + % + % Get the width of the page numbers, and only use + % leaders if they are present. + \global\setbox\boxB = \hbox{#1}% + \ifdim\wd\boxB = 0pt + \null\nobreak\hfill\ % + \else + % + \null\nobreak\indexdotfill % Have leaders before the page number. + % + \ifpdf + \pdfgettoks#1.% + \hskip\skip\thinshrinkable\the\toksA + \else + \ifx\XeTeXrevision\thisisundefined + \hskip\skip\thinshrinkable #1% + \else + \pdfgettoks#1.% + \hskip\skip\thinshrinkable\the\toksA + \fi + \fi + \fi + \egroup % end \boxA + \ifdim\wd\boxB = 0pt + \global\setbox\entrybox=\vbox{\unhbox\boxA}% + \else + \global\setbox\entrybox=\vbox\bgroup + % We want the text of the entries to be aligned to the left, and the + % page numbers to be aligned to the right. + % + \parindent = 0pt + \advance\leftskip by 0pt plus 1fil + \advance\leftskip by 0pt plus -1fill + \rightskip = 0pt plus -1fil + \advance\rightskip by 0pt plus 1fill + % Cause last line, which could consist of page numbers on their own + % if the list of page numbers is long, to be aligned to the right. + \parfillskip=0pt plus -1fill + % + \advance\rightskip by \entryrightmargin + % Determine how far we can stretch into the margin. + % This allows, e.g., "Appendix H GNU Free Documentation License" to + % fit on one line in @letterpaper format. + \ifdim\entryrightmargin>2.1em + \dimen@i=2.1em + \else + \dimen@i=0em + \fi + \advance \parfillskip by 0pt minus 1\dimen@i + % + \dimen@ii = \hsize + \advance\dimen@ii by -1\leftskip + \advance\dimen@ii by -1\entryrightmargin + \advance\dimen@ii by 1\dimen@i + \ifdim\wd\boxA > \dimen@ii % If the entry doesn't fit in one line + \ifdim\dimen@ > 0.8\dimen@ii % due to long index text + % Try to split the text roughly evenly. \dimen@ will be the length of + % the first line. + \dimen@ = 0.7\dimen@ + \dimen@ii = \hsize + \ifnum\dimen@>\dimen@ii + % If the entry is too long (for example, if it needs more than + % two lines), use all the space in the first line. + \dimen@ = \dimen@ii + \fi + \advance\leftskip by 0pt plus 1fill % ragged right + \advance \dimen@ by 1\rightskip + \parshape = 2 0pt \dimen@ 0em \dimen@ii + % Ideally we'd add a finite glue at the end of the first line only, + % instead of using \parshape with explicit line lengths, but TeX + % doesn't seem to provide a way to do such a thing. + % + % Indent all lines but the first one. + \advance\leftskip by 1em + \advance\parindent by -1em + \fi\fi + \indent % start paragraph + \unhbox\boxA + % + % Do not prefer a separate line ending with a hyphen to fewer lines. + \finalhyphendemerits = 0 + % + % Word spacing - no stretch + \spaceskip=\fontdimen2\font minus \fontdimen4\font + % + \linepenalty=1000 % Discourage line breaks. + \hyphenpenalty=5000 % Discourage hyphenation. + % + \par % format the paragraph + \egroup % The \vbox + \fi + \endgroup + \dotheinsertentrybox +}} + +\newskip\thinshrinkable +\skip\thinshrinkable=.15em minus .15em + +\newbox\entrybox +\def\insertentrybox{% + \ourunvbox\entrybox +} + +% default definition +\let\dotheinsertentrybox\insertentrybox + +% Use \lastbox to take apart vbox box by box, and add each sub-box +% to the current vertical list. +\def\ourunvbox#1{% +\bgroup % for local binding of \delayedbox + % Remove the last box from box #1 + \global\setbox#1=\vbox{% + \unvbox#1% + \unskip % remove any glue + \unpenalty + \global\setbox\interbox=\lastbox + }% + \setbox\delayedbox=\box\interbox + \ifdim\ht#1=0pt\else + \ourunvbox#1 % Repeat on what's left of the box + \nobreak + \fi + \box\delayedbox +\egroup +} +\newbox\delayedbox +\newbox\interbox + +% Used from \printindex. \firsttoken should be the first token +% after the \entry. If it's not another \entry, we are at the last +% line of a group of index entries, so insert a penalty to discourage +% widowed index entries. +\def\dotheinsertentryboxwithpenalty{% + \ifx\firsttoken\isentry + \else + \penalty 9000 + \fi + \insertentrybox +} +\def\isentry{\entry}% + +% Like plain.tex's \dotfill, except uses up at least 1 em. +% The filll stretch here overpowers both the fil and fill stretch to push +% the page number to the right. +\def\indexdotfill{\cleaders + \hbox{$\mathsurround=0pt \mkern1.5mu.\mkern1.5mu$}\hskip 1em plus 1filll} + + +\def\primary #1{\line{#1\hfil}} + +\newskip\secondaryindent \secondaryindent=0.5cm +\def\secondary#1#2{{% + \parfillskip=0in + \parskip=0in + \hangindent=1in + \hangafter=1 + \noindent\hskip\secondaryindent\hbox{#1}\indexdotfill + \ifpdf + \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. + \else + \ifx\XeTeXrevision\thisisundefined + #2 + \else + \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. + \fi + \fi + \par +}} + +% Define two-column mode, which we use to typeset indexes. +% Adapted from the TeXbook, page 416, which is to say, +% the manmac.tex format used to print the TeXbook itself. +\catcode`\@=11 % private names + +\newbox\partialpage +\newdimen\doublecolumnhsize + +% Use inside an output routine to save \topmark and \firstmark +\def\savemarks{% + \global\savedtopmark=\expandafter{\topmark }% + \global\savedfirstmark=\expandafter{\firstmark }% +} +\newtoks\savedtopmark +\newtoks\savedfirstmark + +% Set \topmark and \firstmark for next time \output runs. +% Can't be run from withinside \output (because any material +% added while an output routine is active, including +% penalties, is saved for after it finishes). The page so far +% should be empty, otherwise what's on it will be thrown away. +\def\restoremarks{% + \mark{\the\savedtopmark}% + \bgroup\output = {% + \setbox\dummybox=\box\PAGE + }abc\eject\egroup + % "abc" because output routine doesn't fire for a completely empty page. + \mark{\the\savedfirstmark}% +} + +\def\begindoublecolumns{\begingroup % ended by \enddoublecolumns + % If not much space left on page, start a new page. + \ifdim\pagetotal>0.8\vsize\vfill\eject\fi + % + % Grab any single-column material above us. + \output = {% + % + % Here is a possibility not foreseen in manmac: if we accumulate a + % whole lot of material, we might end up calling this \output + % routine twice in a row (see the doublecol-lose test, which is + % essentially a couple of indexes with @setchapternewpage off). In + % that case we just ship out what is in \partialpage with the normal + % output routine. Generally, \partialpage will be empty when this + % runs and this will be a no-op. See the indexspread.tex test case. + \ifvoid\partialpage \else + \onepageout{\pagecontents\partialpage}% + \fi + % + \global\setbox\partialpage = \vbox{% + % Unvbox the main output page. + \unvbox\PAGE + \kern-\topskip \kern\baselineskip + }% + \savemarks + }% + \eject % run that output routine to set \partialpage + \restoremarks + % + % We recover the two marks that the last output routine saved in order + % to propagate the information in marks added around a chapter heading, + % which could be otherwise be lost by the time the final page is output. + % + % + % Use the double-column output routine for subsequent pages. + \output = {\doublecolumnout}% + % + % Change the page size parameters. We could do this once outside this + % routine, in each of @smallbook, @afourpaper, and the default 8.5x11 + % format, but then we repeat the same computation. Repeating a couple + % of assignments once per index is clearly meaningless for the + % execution time, so we may as well do it in one place. + % + % First we halve the line length, less a little for the gutter between + % the columns. We compute the gutter based on the line length, so it + % changes automatically with the paper format. The magic constant + % below is chosen so that the gutter has the same value (well, +-<1pt) + % as it did when we hard-coded it. + % + % We put the result in a separate register, \doublecolumhsize, so we + % can restore it in \pagesofar, after \hsize itself has (potentially) + % been clobbered. + % + \doublecolumnhsize = \hsize + \advance\doublecolumnhsize by -.04154\hsize + \divide\doublecolumnhsize by 2 + \hsize = \doublecolumnhsize + % + % Double the \vsize as well. + \advance\vsize by -\ht\partialpage + \vsize = 2\vsize + % + % For the benefit of balancing columns + \advance\baselineskip by 0pt plus 0.5pt +} + +% The double-column output routine for all double-column pages except +% the last, which is done by \balancecolumns. +% +\def\doublecolumnout{% + % + \splittopskip=\topskip \splitmaxdepth=\maxdepth + % Get the available space for the double columns -- the normal + % (undoubled) page height minus any material left over from the + % previous page. + \dimen@ = \vsize + \divide\dimen@ by 2 + % + % box0 will be the left-hand column, box2 the right. + \setbox0=\vsplit\PAGE to\dimen@ \setbox2=\vsplit\PAGE to\dimen@ + \global\advance\vsize by 2\ht\partialpage + \onepageout\pagesofar + \unvbox\PAGE + \penalty\outputpenalty +} +% +% Re-output the contents of the output page -- any previous material, +% followed by the two boxes we just split, in box0 and box2. +\def\pagesofar{% + \unvbox\partialpage + % + \hsize = \doublecolumnhsize + \wd0=\hsize \wd2=\hsize + \hbox to\txipagewidth{\box0\hfil\box2}% +} + + +% Finished with with double columns. +\def\enddoublecolumns{% + % The following penalty ensures that the page builder is exercised + % _before_ we change the output routine. This is necessary in the + % following situation: + % + % The last section of the index consists only of a single entry. + % Before this section, \pagetotal is less than \pagegoal, so no + % break occurs before the last section starts. However, the last + % section, consisting of \initial and the single \entry, does not + % fit on the page and has to be broken off. Without the following + % penalty the page builder will not be exercised until \eject + % below, and by that time we'll already have changed the output + % routine to the \balancecolumns version, so the next-to-last + % double-column page will be processed with \balancecolumns, which + % is wrong: The two columns will go to the main vertical list, with + % the broken-off section in the recent contributions. As soon as + % the output routine finishes, TeX starts reconsidering the page + % break. The two columns and the broken-off section both fit on the + % page, because the two columns now take up only half of the page + % goal. When TeX sees \eject from below which follows the final + % section, it invokes the new output routine that we've set after + % \balancecolumns below; \onepageout will try to fit the two columns + % and the final section into the vbox of \txipageheight (see + % \pagebody), causing an overfull box. + % + % Note that glue won't work here, because glue does not exercise the + % page builder, unlike penalties (see The TeXbook, pp. 280-281). + \penalty0 + % + \output = {% + % Split the last of the double-column material. + \savemarks + \balancecolumns + }% + \eject % call the \output just set + \ifdim\pagetotal=0pt + % Having called \balancecolumns once, we do not + % want to call it again. Therefore, reset \output to its normal + % definition right away. + \global\output = {\onepageout{\pagecontents\PAGE}}% + % + \endgroup % started in \begindoublecolumns + \restoremarks + % Leave the double-column material on the current page, no automatic + % page break. + \box\balancedcolumns + % + % \pagegoal was set to the doubled \vsize above, since we restarted + % the current page. We're now back to normal single-column + % typesetting, so reset \pagegoal to the normal \vsize. + \global\vsize = \txipageheight % + \pagegoal = \txipageheight % + \else + % We had some left-over material. This might happen when \doublecolumnout + % is called in \balancecolumns. Try again. + \expandafter\enddoublecolumns + \fi +} +\newbox\balancedcolumns +\setbox\balancedcolumns=\vbox{shouldnt see this}% +% +% Only called for the last of the double column material. \doublecolumnout +% does the others. +\def\balancecolumns{% + \setbox0 = \vbox{\unvbox\PAGE}% like \box255 but more efficient, see p.120. + \dimen@ = \ht0 + \advance\dimen@ by \topskip + \advance\dimen@ by-\baselineskip + \ifdim\dimen@<5\baselineskip + % Don't split a short final column in two. + \setbox2=\vbox{}% + \global\setbox\balancedcolumns=\vbox{\pagesofar}% + \else + \divide\dimen@ by 2 % target to split to + \dimen@ii = \dimen@ + \splittopskip = \topskip + % Loop until left column is at least as high as the right column. + {% + \vbadness = 10000 + \loop + \global\setbox3 = \copy0 + \global\setbox1 = \vsplit3 to \dimen@ + \ifdim\ht1<\ht3 + \global\advance\dimen@ by 1pt + \repeat + }% + % Now the left column is in box 1, and the right column in box 3. + % + % Check whether the left column has come out higher than the page itself. + % (Note that we have doubled \vsize for the double columns, so + % the actual height of the page is 0.5\vsize). + \ifdim2\ht1>\vsize + % It appears that we have been called upon to balance too much material. + % Output some of it with \doublecolumnout, leaving the rest on the page. + \setbox\PAGE=\box0 + \doublecolumnout + \else + % Compare the heights of the two columns. + \ifdim4\ht1>5\ht3 + % Column heights are too different, so don't make their bottoms + % flush with each other. + \setbox2=\vbox to \ht1 {\unvbox3\vfill}% + \setbox0=\vbox to \ht1 {\unvbox1\vfill}% + \else + % Make column bottoms flush with each other. + \setbox2=\vbox to\ht1{\unvbox3\unskip}% + \setbox0=\vbox to\ht1{\unvbox1\unskip}% + \fi + \global\setbox\balancedcolumns=\vbox{\pagesofar}% + \fi + \fi + % +} +\catcode`\@ = \other + + +\message{sectioning,} +% Chapters, sections, etc. + +% Let's start with @part. +\outer\parseargdef\part{\partzzz{#1}} +\def\partzzz#1{% + \chapoddpage + \null + \vskip.3\vsize % move it down on the page a bit + \begingroup + \noindent \titlefonts\rm #1\par % the text + \let\lastnode=\empty % no node to associate with + \writetocentry{part}{#1}{}% but put it in the toc + \headingsoff % no headline or footline on the part page + % This outputs a mark at the end of the page that clears \thischapter + % and \thissection, as is done in \startcontents. + \let\pchapsepmacro\relax + \chapmacro{}{Yomitfromtoc}{}% + \chapoddpage + \endgroup +} + +% \unnumberedno is an oxymoron. But we count the unnumbered +% sections so that we can refer to them unambiguously in the pdf +% outlines by their "section number". We avoid collisions with chapter +% numbers by starting them at 10000. (If a document ever has 10000 +% chapters, we're in trouble anyway, I'm sure.) +\newcount\unnumberedno \unnumberedno = 10000 +\newcount\chapno +\newcount\secno \secno=0 +\newcount\subsecno \subsecno=0 +\newcount\subsubsecno \subsubsecno=0 + +% This counter is funny since it counts through charcodes of letters A, B, ... +\newcount\appendixno \appendixno = `\@ +% +% \def\appendixletter{\char\the\appendixno} +% We do the following ugly conditional instead of the above simple +% construct for the sake of pdftex, which needs the actual +% letter in the expansion, not just typeset. +% +\def\appendixletter{% + \ifnum\appendixno=`A A% + \else\ifnum\appendixno=`B B% + \else\ifnum\appendixno=`C C% + \else\ifnum\appendixno=`D D% + \else\ifnum\appendixno=`E E% + \else\ifnum\appendixno=`F F% + \else\ifnum\appendixno=`G G% + \else\ifnum\appendixno=`H H% + \else\ifnum\appendixno=`I I% + \else\ifnum\appendixno=`J J% + \else\ifnum\appendixno=`K K% + \else\ifnum\appendixno=`L L% + \else\ifnum\appendixno=`M M% + \else\ifnum\appendixno=`N N% + \else\ifnum\appendixno=`O O% + \else\ifnum\appendixno=`P P% + \else\ifnum\appendixno=`Q Q% + \else\ifnum\appendixno=`R R% + \else\ifnum\appendixno=`S S% + \else\ifnum\appendixno=`T T% + \else\ifnum\appendixno=`U U% + \else\ifnum\appendixno=`V V% + \else\ifnum\appendixno=`W W% + \else\ifnum\appendixno=`X X% + \else\ifnum\appendixno=`Y Y% + \else\ifnum\appendixno=`Z Z% + % The \the is necessary, despite appearances, because \appendixletter is + % expanded while writing the .toc file. \char\appendixno is not + % expandable, thus it is written literally, thus all appendixes come out + % with the same letter (or @) in the toc without it. + \else\char\the\appendixno + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi} + +% Each @chapter defines these (using marks) as the number+name, number +% and name of the chapter. Page headings and footings can use +% these. @section does likewise. +\def\thischapter{} +\def\thischapternum{} +\def\thischaptername{} +\def\thissection{} +\def\thissectionnum{} +\def\thissectionname{} + +\newcount\absseclevel % used to calculate proper heading level +\newcount\secbase\secbase=0 % @raisesections/@lowersections modify this count + +% @raisesections: treat @section as chapter, @subsection as section, etc. +\def\raisesections{\global\advance\secbase by -1} +\let\up=\raisesections % original BFox name + +% @lowersections: treat @chapter as section, @section as subsection, etc. +\def\lowersections{\global\advance\secbase by 1} +\let\down=\lowersections % original BFox name + +% we only have subsub. +\chardef\maxseclevel = 3 +% +% A numbered section within an unnumbered changes to unnumbered too. +% To achieve this, remember the "biggest" unnum. sec. we are currently in: +\chardef\unnlevel = \maxseclevel +% +% Trace whether the current chapter is an appendix or not: +% \chapheadtype is "N" or "A", unnumbered chapters are ignored. +\def\chapheadtype{N} + +% Choose a heading macro +% #1 is heading type +% #2 is heading level +% #3 is text for heading +\def\genhead#1#2#3{% + % Compute the abs. sec. level: + \absseclevel=#2 + \advance\absseclevel by \secbase + % Make sure \absseclevel doesn't fall outside the range: + \ifnum \absseclevel < 0 + \absseclevel = 0 + \else + \ifnum \absseclevel > 3 + \absseclevel = 3 + \fi + \fi + % The heading type: + \def\headtype{#1}% + \if \headtype U% + \ifnum \absseclevel < \unnlevel + \chardef\unnlevel = \absseclevel + \fi + \else + % Check for appendix sections: + \ifnum \absseclevel = 0 + \edef\chapheadtype{\headtype}% + \else + \if \headtype A\if \chapheadtype N% + \errmessage{@appendix... within a non-appendix chapter}% + \fi\fi + \fi + % Check for numbered within unnumbered: + \ifnum \absseclevel > \unnlevel + \def\headtype{U}% + \else + \chardef\unnlevel = 3 + \fi + \fi + % Now print the heading: + \if \headtype U% + \ifcase\absseclevel + \unnumberedzzz{#3}% + \or \unnumberedseczzz{#3}% + \or \unnumberedsubseczzz{#3}% + \or \unnumberedsubsubseczzz{#3}% + \fi + \else + \if \headtype A% + \ifcase\absseclevel + \appendixzzz{#3}% + \or \appendixsectionzzz{#3}% + \or \appendixsubseczzz{#3}% + \or \appendixsubsubseczzz{#3}% + \fi + \else + \ifcase\absseclevel + \chapterzzz{#3}% + \or \seczzz{#3}% + \or \numberedsubseczzz{#3}% + \or \numberedsubsubseczzz{#3}% + \fi + \fi + \fi + \suppressfirstparagraphindent +} + +% an interface: +\def\numhead{\genhead N} +\def\apphead{\genhead A} +\def\unnmhead{\genhead U} + +% @chapter, @appendix, @unnumbered. Increment top-level counter, reset +% all lower-level sectioning counters to zero. +% +% Also set \chaplevelprefix, which we prepend to @float sequence numbers +% (e.g., figures), q.v. By default (before any chapter), that is empty. +\let\chaplevelprefix = \empty +% +\outer\parseargdef\chapter{\numhead0{#1}} % normally numhead0 calls chapterzzz +\def\chapterzzz#1{% + % section resetting is \global in case the chapter is in a group, such + % as an @include file. + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\chapno by 1 + % + % Used for \float. + \gdef\chaplevelprefix{\the\chapno.}% + \resetallfloatnos + % + % \putwordChapter can contain complex things in translations. + \toks0=\expandafter{\putwordChapter}% + \message{\the\toks0 \space \the\chapno}% + % + % Write the actual heading. + \chapmacro{#1}{Ynumbered}{\the\chapno}% + % + % So @section and the like are numbered underneath this chapter. + \global\let\section = \numberedsec + \global\let\subsection = \numberedsubsec + \global\let\subsubsection = \numberedsubsubsec +} + +\outer\parseargdef\appendix{\apphead0{#1}} % normally calls appendixzzz +% +\def\appendixzzz#1{% + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\appendixno by 1 + \gdef\chaplevelprefix{\appendixletter.}% + \resetallfloatnos + % + % \putwordAppendix can contain complex things in translations. + \toks0=\expandafter{\putwordAppendix}% + \message{\the\toks0 \space \appendixletter}% + % + \chapmacro{#1}{Yappendix}{\appendixletter}% + % + \global\let\section = \appendixsec + \global\let\subsection = \appendixsubsec + \global\let\subsubsection = \appendixsubsubsec +} + +% normally unnmhead0 calls unnumberedzzz: +\outer\parseargdef\unnumbered{\unnmhead0{#1}} +\def\unnumberedzzz#1{% + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\unnumberedno by 1 + % + % Since an unnumbered has no number, no prefix for figures. + \global\let\chaplevelprefix = \empty + \resetallfloatnos + % + % This used to be simply \message{#1}, but TeX fully expands the + % argument to \message. Therefore, if #1 contained @-commands, TeX + % expanded them. For example, in `@unnumbered The @cite{Book}', TeX + % expanded @cite (which turns out to cause errors because \cite is meant + % to be executed, not expanded). + % + % Anyway, we don't want the fully-expanded definition of @cite to appear + % as a result of the \message, we just want `@cite' itself. We use + % \the to achieve this: TeX expands \the only once, + % simply yielding the contents of . (We also do this for + % the toc entries.) + \toks0 = {#1}% + \message{(\the\toks0)}% + % + \chapmacro{#1}{Ynothing}{\the\unnumberedno}% + % + \global\let\section = \unnumberedsec + \global\let\subsection = \unnumberedsubsec + \global\let\subsubsection = \unnumberedsubsubsec +} + +% @centerchap is like @unnumbered, but the heading is centered. +\outer\parseargdef\centerchap{% + \let\centerparametersmaybe = \centerparameters + \unnmhead0{#1}% + \let\centerparametersmaybe = \relax +} + +% @top is like @unnumbered. +\let\top\unnumbered + +% Sections. +% +\outer\parseargdef\numberedsec{\numhead1{#1}} % normally calls seczzz +\def\seczzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Ynumbered}{\the\chapno.\the\secno}% +} + +% normally calls appendixsectionzzz: +\outer\parseargdef\appendixsection{\apphead1{#1}} +\def\appendixsectionzzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Yappendix}{\appendixletter.\the\secno}% +} +\let\appendixsec\appendixsection + +% normally calls unnumberedseczzz: +\outer\parseargdef\unnumberedsec{\unnmhead1{#1}} +\def\unnumberedseczzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Ynothing}{\the\unnumberedno.\the\secno}% +} + +% Subsections. +% +% normally calls numberedsubseczzz: +\outer\parseargdef\numberedsubsec{\numhead2{#1}} +\def\numberedsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Ynumbered}{\the\chapno.\the\secno.\the\subsecno}% +} + +% normally calls appendixsubseczzz: +\outer\parseargdef\appendixsubsec{\apphead2{#1}} +\def\appendixsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Yappendix}% + {\appendixletter.\the\secno.\the\subsecno}% +} + +% normally calls unnumberedsubseczzz: +\outer\parseargdef\unnumberedsubsec{\unnmhead2{#1}} +\def\unnumberedsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Ynothing}% + {\the\unnumberedno.\the\secno.\the\subsecno}% +} + +% Subsubsections. +% +% normally numberedsubsubseczzz: +\outer\parseargdef\numberedsubsubsec{\numhead3{#1}} +\def\numberedsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Ynumbered}% + {\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% normally appendixsubsubseczzz: +\outer\parseargdef\appendixsubsubsec{\apphead3{#1}} +\def\appendixsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Yappendix}% + {\appendixletter.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% normally unnumberedsubsubseczzz: +\outer\parseargdef\unnumberedsubsubsec{\unnmhead3{#1}} +\def\unnumberedsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Ynothing}% + {\the\unnumberedno.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% These macros control what the section commands do, according +% to what kind of chapter we are in (ordinary, appendix, or unnumbered). +% Define them by default for a numbered chapter. +\let\section = \numberedsec +\let\subsection = \numberedsubsec +\let\subsubsection = \numberedsubsubsec + +% Define @majorheading, @heading and @subheading + +\def\majorheading{% + {\advance\chapheadingskip by 10pt \chapbreak }% + \parsearg\chapheadingzzz +} + +\def\chapheading{\chapbreak \parsearg\chapheadingzzz} +\def\chapheadingzzz#1{% + \vbox{\chapfonts \raggedtitlesettings #1\par}% + \nobreak\bigskip \nobreak + \suppressfirstparagraphindent +} + +% @heading, @subheading, @subsubheading. +\parseargdef\heading{\sectionheading{#1}{sec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} +\parseargdef\subheading{\sectionheading{#1}{subsec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} +\parseargdef\subsubheading{\sectionheading{#1}{subsubsec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} + +% These macros generate a chapter, section, etc. heading only +% (including whitespace, linebreaking, etc. around it), +% given all the information in convenient, parsed form. + +% Args are the skip and penalty (usually negative) +\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi} + +% Parameter controlling skip before chapter headings (if needed) +\newskip\chapheadingskip + +% Define plain chapter starts, and page on/off switching for it. +\def\chapbreak{\dobreak \chapheadingskip {-4000}} + +% Start a new page +\def\chappager{\par\vfill\supereject} + +% \chapoddpage - start on an odd page for a new chapter +% Because \domark is called before \chapoddpage, the filler page will +% get the headings for the next chapter, which is wrong. But we don't +% care -- we just disable all headings on the filler page. +\def\chapoddpage{% + \chappager + \ifodd\pageno \else + \begingroup + \headingsoff + \null + \chappager + \endgroup + \fi +} + +\parseargdef\setchapternewpage{\csname CHAPPAG#1\endcsname} + +\def\CHAPPAGoff{% +\global\let\contentsalignmacro = \chappager +\global\let\pchapsepmacro=\chapbreak +\global\let\pagealignmacro=\chappager} + +\def\CHAPPAGon{% +\global\let\contentsalignmacro = \chappager +\global\let\pchapsepmacro=\chappager +\global\let\pagealignmacro=\chappager +\global\def\HEADINGSon{\HEADINGSsingle}} + +\def\CHAPPAGodd{% +\global\let\contentsalignmacro = \chapoddpage +\global\let\pchapsepmacro=\chapoddpage +\global\let\pagealignmacro=\chapoddpage +\global\def\HEADINGSon{\HEADINGSdouble}} + +\CHAPPAGon + +% \chapmacro - Chapter opening. +% +% #1 is the text, #2 is the section type (Ynumbered, Ynothing, +% Yappendix, Yomitfromtoc), #3 the chapter number. +% Not used for @heading series. +% +% To test against our argument. +\def\Ynothingkeyword{Ynothing} +\def\Yappendixkeyword{Yappendix} +\def\Yomitfromtockeyword{Yomitfromtoc} +% +\def\chapmacro#1#2#3{% + \expandafter\ifx\thisenv\titlepage\else + \checkenv{}% chapters, etc., should not start inside an environment. + \fi + % FIXME: \chapmacro is currently called from inside \titlepage when + % \setcontentsaftertitlepage to print the "Table of Contents" heading, but + % this should probably be done by \sectionheading with an option to print + % in chapter size. + % + % Insert the first mark before the heading break (see notes for \domark). + \let\prevchapterdefs=\lastchapterdefs + \let\prevsectiondefs=\lastsectiondefs + \gdef\lastsectiondefs{\gdef\thissectionname{}\gdef\thissectionnum{}% + \gdef\thissection{}}% + % + \def\temptype{#2}% + \ifx\temptype\Ynothingkeyword + \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}% + \gdef\thischapter{\thischaptername}}% + \else\ifx\temptype\Yomitfromtockeyword + \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}% + \gdef\thischapter{}}% + \else\ifx\temptype\Yappendixkeyword + \toks0={#1}% + \xdef\lastchapterdefs{% + \gdef\noexpand\thischaptername{\the\toks0}% + \gdef\noexpand\thischapternum{\appendixletter}% + % \noexpand\putwordAppendix avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thischapter{\noexpand\putwordAppendix{} + \noexpand\thischapternum: + \noexpand\thischaptername}% + }% + \else + \toks0={#1}% + \xdef\lastchapterdefs{% + \gdef\noexpand\thischaptername{\the\toks0}% + \gdef\noexpand\thischapternum{\the\chapno}% + % \noexpand\putwordChapter avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thischapter{\noexpand\putwordChapter{} + \noexpand\thischapternum: + \noexpand\thischaptername}% + }% + \fi\fi\fi + % + % Output the mark. Pass it through \safewhatsit, to take care of + % the preceding space. + \safewhatsit\domark + % + % Insert the chapter heading break. + \pchapsepmacro + % + % Now the second mark, after the heading break. No break points + % between here and the heading. + \let\prevchapterdefs=\lastchapterdefs + \let\prevsectiondefs=\lastsectiondefs + \domark + % + {% + \chapfonts \rm + \let\footnote=\errfootnoteheading % give better error message + % + % Have to define \lastsection before calling \donoderef, because the + % xref code eventually uses it. On the other hand, it has to be called + % after \pchapsepmacro, or the headline will change too soon. + \gdef\lastsection{#1}% + % + % Only insert the separating space if we have a chapter/appendix + % number, and don't print the unnumbered ``number''. + \ifx\temptype\Ynothingkeyword + \setbox0 = \hbox{}% + \def\toctype{unnchap}% + \else\ifx\temptype\Yomitfromtockeyword + \setbox0 = \hbox{}% contents like unnumbered, but no toc entry + \def\toctype{omit}% + \else\ifx\temptype\Yappendixkeyword + \setbox0 = \hbox{\putwordAppendix{} #3\enspace}% + \def\toctype{app}% + \else + \setbox0 = \hbox{#3\enspace}% + \def\toctype{numchap}% + \fi\fi\fi + % + % Write the toc entry for this chapter. Must come before the + % \donoderef, because we include the current node name in the toc + % entry, and \donoderef resets it to empty. + \writetocentry{\toctype}{#1}{#3}% + % + % For pdftex, we have to write out the node definition (aka, make + % the pdfdest) after any page break, but before the actual text has + % been typeset. If the destination for the pdf outline is after the + % text, then jumping from the outline may wind up with the text not + % being visible, for instance under high magnification. + \donoderef{#2}% + % + % Typeset the actual heading. + \nobreak % Avoid page breaks at the interline glue. + \vbox{\raggedtitlesettings \hangindent=\wd0 \centerparametersmaybe + \unhbox0 #1\par}% + }% + \nobreak\bigskip % no page break after a chapter title + \nobreak +} + +% @centerchap -- centered and unnumbered. +\let\centerparametersmaybe = \relax +\def\centerparameters{% + \advance\rightskip by 3\rightskip + \leftskip = \rightskip + \parfillskip = 0pt +} + + +% Section titles. These macros combine the section number parts and +% call the generic \sectionheading to do the printing. +% +\newskip\secheadingskip +\def\secheadingbreak{\dobreak \secheadingskip{-1000}} + +% Subsection titles. +\newskip\subsecheadingskip +\def\subsecheadingbreak{\dobreak \subsecheadingskip{-500}} + +% Subsubsection titles. +\def\subsubsecheadingskip{\subsecheadingskip} +\def\subsubsecheadingbreak{\subsecheadingbreak} + + +% Print any size, any type, section title. +% +% #1 is the text of the title, +% #2 is the section level (sec/subsec/subsubsec), +% #3 is the section type (Ynumbered, Ynothing, Yappendix, Yomitfromtoc), +% #4 is the section number. +% +\def\seckeyword{sec} +% +\def\sectionheading#1#2#3#4{% + {% + \def\sectionlevel{#2}% + \def\temptype{#3}% + % + % It is ok for the @heading series commands to appear inside an + % environment (it's been historically allowed, though the logic is + % dubious), but not the others. + \ifx\temptype\Yomitfromtockeyword\else + \checkenv{}% non-@*heading should not be in an environment. + \fi + \let\footnote=\errfootnoteheading + % + % Switch to the right set of fonts. + \csname #2fonts\endcsname \rm + % + % Insert first mark before the heading break (see notes for \domark). + \let\prevsectiondefs=\lastsectiondefs + \ifx\temptype\Ynothingkeyword + \ifx\sectionlevel\seckeyword + \gdef\lastsectiondefs{\gdef\thissectionname{#1}\gdef\thissectionnum{}% + \gdef\thissection{\thissectionname}}% + \fi + \else\ifx\temptype\Yomitfromtockeyword + % Don't redefine \thissection. + \else\ifx\temptype\Yappendixkeyword + \ifx\sectionlevel\seckeyword + \toks0={#1}% + \xdef\lastsectiondefs{% + \gdef\noexpand\thissectionname{\the\toks0}% + \gdef\noexpand\thissectionnum{#4}% + % \noexpand\putwordSection avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thissection{\noexpand\putwordSection{} + \noexpand\thissectionnum: + \noexpand\thissectionname}% + }% + \fi + \else + \ifx\sectionlevel\seckeyword + \toks0={#1}% + \xdef\lastsectiondefs{% + \gdef\noexpand\thissectionname{\the\toks0}% + \gdef\noexpand\thissectionnum{#4}% + % \noexpand\putwordSection avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thissection{\noexpand\putwordSection{} + \noexpand\thissectionnum: + \noexpand\thissectionname}% + }% + \fi + \fi\fi\fi + % + % Go into vertical mode. Usually we'll already be there, but we + % don't want the following whatsit to end up in a preceding paragraph + % if the document didn't happen to have a blank line. + \par + % + % Output the mark. Pass it through \safewhatsit, to take care of + % the preceding space. + \safewhatsit\domark + % + % Insert space above the heading. + \csname #2headingbreak\endcsname + % + % Now the second mark, after the heading break. No break points + % between here and the heading. + \global\let\prevsectiondefs=\lastsectiondefs + \domark + % + % Only insert the space after the number if we have a section number. + \ifx\temptype\Ynothingkeyword + \setbox0 = \hbox{}% + \def\toctype{unn}% + \gdef\lastsection{#1}% + \else\ifx\temptype\Yomitfromtockeyword + % for @headings -- no section number, don't include in toc, + % and don't redefine \lastsection. + \setbox0 = \hbox{}% + \def\toctype{omit}% + \let\sectionlevel=\empty + \else\ifx\temptype\Yappendixkeyword + \setbox0 = \hbox{#4\enspace}% + \def\toctype{app}% + \gdef\lastsection{#1}% + \else + \setbox0 = \hbox{#4\enspace}% + \def\toctype{num}% + \gdef\lastsection{#1}% + \fi\fi\fi + % + % Write the toc entry (before \donoderef). See comments in \chapmacro. + \writetocentry{\toctype\sectionlevel}{#1}{#4}% + % + % Write the node reference (= pdf destination for pdftex). + % Again, see comments in \chapmacro. + \donoderef{#3}% + % + % Interline glue will be inserted when the vbox is completed. + % That glue will be a valid breakpoint for the page, since it'll be + % preceded by a whatsit (usually from the \donoderef, or from the + % \writetocentry if there was no node). We don't want to allow that + % break, since then the whatsits could end up on page n while the + % section is on page n+1, thus toc/etc. are wrong. Debian bug 276000. + \nobreak + % + % Output the actual section heading. + \vbox{\hyphenpenalty=10000 \tolerance=5000 \parindent=0pt \ptexraggedright + \hangindent=\wd0 % zero if no section number + \unhbox0 #1}% + }% + % Add extra space after the heading -- half of whatever came above it. + % Don't allow stretch, though. + \kern .5 \csname #2headingskip\endcsname + % + % Do not let the kern be a potential breakpoint, as it would be if it + % was followed by glue. + \nobreak + % + % We'll almost certainly start a paragraph next, so don't let that + % glue accumulate. (Not a breakpoint because it's preceded by a + % discardable item.) However, when a paragraph is not started next + % (\startdefun, \cartouche, \center, etc.), this needs to be wiped out + % or the negative glue will cause weirdly wrong output, typically + % obscuring the section heading with something else. + \vskip-\parskip + % + % This is so the last item on the main vertical list is a known + % \penalty > 10000, so \startdefun, etc., can recognize the situation + % and do the needful. + \penalty 10001 +} + + +\message{toc,} +% Table of contents. +\newwrite\tocfile + +% Write an entry to the toc file, opening it if necessary. +% Called from @chapter, etc. +% +% Example usage: \writetocentry{sec}{Section Name}{\the\chapno.\the\secno} +% We append the current node name (if any) and page number as additional +% arguments for the \{chap,sec,...}entry macros which will eventually +% read this. The node name is used in the pdf outlines as the +% destination to jump to. +% +% We open the .toc file for writing here instead of at @setfilename (or +% any other fixed time) so that @contents can be anywhere in the document. +% But if #1 is `omit', then we don't do anything. This is used for the +% table of contents chapter openings themselves. +% +\newif\iftocfileopened +\def\omitkeyword{omit}% +% +\def\writetocentry#1#2#3{% + \edef\writetoctype{#1}% + \ifx\writetoctype\omitkeyword \else + \iftocfileopened\else + \immediate\openout\tocfile = \jobname.toc + \global\tocfileopenedtrue + \fi + % + \iflinks + {\atdummies + \edef\temp{% + \write\tocfile{@#1entry{#2}{#3}{\lastnode}{\noexpand\folio}}}% + \temp + }% + \fi + \fi + % + % Tell \shipout to create a pdf destination on each page, if we're + % writing pdf. These are used in the table of contents. We can't + % just write one on every page because the title pages are numbered + % 1 and 2 (the page numbers aren't printed), and so are the first + % two pages of the document. Thus, we'd have two destinations named + % `1', and two named `2'. + \ifpdf + \global\pdfmakepagedesttrue + \else + \ifx\XeTeXrevision\thisisundefined + \else + \global\pdfmakepagedesttrue + \fi + \fi +} + + +% These characters do not print properly in the Computer Modern roman +% fonts, so we must take special care. This is more or less redundant +% with the Texinfo input format setup at the end of this file. +% +\def\activecatcodes{% + \catcode`\"=\active + \catcode`\$=\active + \catcode`\<=\active + \catcode`\>=\active + \catcode`\\=\active + \catcode`\^=\active + \catcode`\_=\active + \catcode`\|=\active + \catcode`\~=\active +} + + +% Read the toc file, which is essentially Texinfo input. +\def\readtocfile{% + \setupdatafile + \activecatcodes + \input \tocreadfilename +} + +\newskip\contentsrightmargin \contentsrightmargin=1in +\newcount\savepageno +\newcount\lastnegativepageno \lastnegativepageno = -1 + +% Prepare to read what we've written to \tocfile. +% +\def\startcontents#1{% + % If @setchapternewpage on, and @headings double, the contents should + % start on an odd page, unlike chapters. Thus, we maintain + % \contentsalignmacro in parallel with \pagealignmacro. + % From: Torbjorn Granlund + \contentsalignmacro + \immediate\closeout\tocfile + % + % Don't need to put `Contents' or `Short Contents' in the headline. + % It is abundantly clear what they are. + \chapmacro{#1}{Yomitfromtoc}{}% + % + \savepageno = \pageno + \begingroup % Set up to handle contents files properly. + \raggedbottom % Worry more about breakpoints than the bottom. + \entryrightmargin=\contentsrightmargin % Don't use the full line length. + % + % Roman numerals for page numbers. + \ifnum \pageno>0 \global\pageno = \lastnegativepageno \fi +} + +% redefined for the two-volume lispref. We always output on +% \jobname.toc even if this is redefined. +% +\def\tocreadfilename{\jobname.toc} + +% Normal (long) toc. +% +\def\contents{% + \startcontents{\putwordTOC}% + \openin 1 \tocreadfilename\space + \ifeof 1 \else + \readtocfile + \fi + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect + \ifeof 1 \else + \pdfmakeoutlines + \fi + \closein 1 + \endgroup + \lastnegativepageno = \pageno + \global\pageno = \savepageno +} + +% And just the chapters. +\def\summarycontents{% + \startcontents{\putwordShortTOC}% + % + \let\partentry = \shortpartentry + \let\numchapentry = \shortchapentry + \let\appentry = \shortchapentry + \let\unnchapentry = \shortunnchapentry + % We want a true roman here for the page numbers. + \secfonts + \let\rm=\shortcontrm \let\bf=\shortcontbf + \let\sl=\shortcontsl \let\tt=\shortconttt + \rm + \hyphenpenalty = 10000 + \advance\baselineskip by 1pt % Open it up a little. + \def\numsecentry##1##2##3##4{} + \let\appsecentry = \numsecentry + \let\unnsecentry = \numsecentry + \let\numsubsecentry = \numsecentry + \let\appsubsecentry = \numsecentry + \let\unnsubsecentry = \numsecentry + \let\numsubsubsecentry = \numsecentry + \let\appsubsubsecentry = \numsecentry + \let\unnsubsubsecentry = \numsecentry + \openin 1 \tocreadfilename\space + \ifeof 1 \else + \readtocfile + \fi + \closein 1 + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect + \endgroup + \lastnegativepageno = \pageno + \global\pageno = \savepageno +} +\let\shortcontents = \summarycontents + +% Typeset the label for a chapter or appendix for the short contents. +% The arg is, e.g., `A' for an appendix, or `3' for a chapter. +% +\def\shortchaplabel#1{% + % This space should be enough, since a single number is .5em, and the + % widest letter (M) is 1em, at least in the Computer Modern fonts. + % But use \hss just in case. + % (This space doesn't include the extra space that gets added after + % the label; that gets put in by \shortchapentry above.) + % + % We'd like to right-justify chapter numbers, but that looks strange + % with appendix letters. And right-justifying numbers and + % left-justifying letters looks strange when there is less than 10 + % chapters. Have to read the whole toc once to know how many chapters + % there are before deciding ... + \hbox to 1em{#1\hss}% +} + +% These macros generate individual entries in the table of contents. +% The first argument is the chapter or section name. +% The last argument is the page number. +% The arguments in between are the chapter number, section number, ... + +% Parts, in the main contents. Replace the part number, which doesn't +% exist, with an empty box. Let's hope all the numbers have the same width. +% Also ignore the page number, which is conventionally not printed. +\def\numeralbox{\setbox0=\hbox{8}\hbox to \wd0{\hfil}} +\def\partentry#1#2#3#4{% + % Add stretch and a bonus for breaking the page before the part heading. + % This reduces the chance of the page being broken immediately after the + % part heading, before a following chapter heading. + \vskip 0pt plus 5\baselineskip + \penalty-300 + \vskip 0pt plus -5\baselineskip + \dochapentry{\numeralbox\labelspace#1}{}% +} +% +% Parts, in the short toc. +\def\shortpartentry#1#2#3#4{% + \penalty-300 + \vskip.5\baselineskip plus.15\baselineskip minus.1\baselineskip + \shortchapentry{{\bf #1}}{\numeralbox}{}{}% +} + +% Chapters, in the main contents. +\def\numchapentry#1#2#3#4{\dochapentry{#2\labelspace#1}{#4}} + +% Chapters, in the short toc. +% See comments in \dochapentry re vbox and related settings. +\def\shortchapentry#1#2#3#4{% + \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno\bgroup#4\egroup}% +} + +% Appendices, in the main contents. +% Need the word Appendix, and a fixed-size box. +% +\def\appendixbox#1{% + % We use M since it's probably the widest letter. + \setbox0 = \hbox{\putwordAppendix{} M}% + \hbox to \wd0{\putwordAppendix{} #1\hss}} +% +\def\appentry#1#2#3#4{\dochapentry{\appendixbox{#2}\hskip.7em#1}{#4}} + +% Unnumbered chapters. +\def\unnchapentry#1#2#3#4{\dochapentry{#1}{#4}} +\def\shortunnchapentry#1#2#3#4{\tocentry{#1}{\doshortpageno\bgroup#4\egroup}} + +% Sections. +\def\numsecentry#1#2#3#4{\dosecentry{#2\labelspace#1}{#4}} +\let\appsecentry=\numsecentry +\def\unnsecentry#1#2#3#4{\dosecentry{#1}{#4}} + +% Subsections. +\def\numsubsecentry#1#2#3#4{\dosubsecentry{#2\labelspace#1}{#4}} +\let\appsubsecentry=\numsubsecentry +\def\unnsubsecentry#1#2#3#4{\dosubsecentry{#1}{#4}} + +% And subsubsections. +\def\numsubsubsecentry#1#2#3#4{\dosubsubsecentry{#2\labelspace#1}{#4}} +\let\appsubsubsecentry=\numsubsubsecentry +\def\unnsubsubsecentry#1#2#3#4{\dosubsubsecentry{#1}{#4}} + +% This parameter controls the indentation of the various levels. +% Same as \defaultparindent. +\newdimen\tocindent \tocindent = 15pt + +% Now for the actual typesetting. In all these, #1 is the text and #2 is the +% page number. +% +% If the toc has to be broken over pages, we want it to be at chapters +% if at all possible; hence the \penalty. +\def\dochapentry#1#2{% + \penalty-300 \vskip1\baselineskip plus.33\baselineskip minus.25\baselineskip + \begingroup + % Move the page numbers slightly to the right + \advance\entryrightmargin by -0.05em + \chapentryfonts + \tocentry{#1}{\dopageno\bgroup#2\egroup}% + \endgroup + \nobreak\vskip .25\baselineskip plus.1\baselineskip +} + +\def\dosecentry#1#2{\begingroup + \secentryfonts \leftskip=\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +\def\dosubsecentry#1#2{\begingroup + \subsecentryfonts \leftskip=2\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +\def\dosubsubsecentry#1#2{\begingroup + \subsubsecentryfonts \leftskip=3\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +% We use the same \entry macro as for the index entries. +\let\tocentry = \entry + +% Space between chapter (or whatever) number and the title. +\def\labelspace{\hskip1em \relax} + +\def\dopageno#1{{\rm #1}} +\def\doshortpageno#1{{\rm #1}} + +\def\chapentryfonts{\secfonts \rm} +\def\secentryfonts{\textfonts} +\def\subsecentryfonts{\textfonts} +\def\subsubsecentryfonts{\textfonts} + + +\message{environments,} +% @foo ... @end foo. + +% @tex ... @end tex escapes into raw TeX temporarily. +% One exception: @ is still an escape character, so that @end tex works. +% But \@ or @@ will get a plain @ character. + +\envdef\tex{% + \setupmarkupstyle{tex}% + \catcode `\\=0 \catcode `\{=1 \catcode `\}=2 + \catcode `\$=3 \catcode `\&=4 \catcode `\#=6 + \catcode `\^=7 \catcode `\_=8 \catcode `\~=\active \let~=\tie + \catcode `\%=14 + \catcode `\+=\other + \catcode `\"=\other + \catcode `\|=\other + \catcode `\<=\other + \catcode `\>=\other + \catcode `\`=\other + \catcode `\'=\other + % + % ' is active in math mode (mathcode"8000). So reset it, and all our + % other math active characters (just in case), to plain's definitions. + \mathactive + % + % Inverse of the list at the beginning of the file. + \let\b=\ptexb + \let\bullet=\ptexbullet + \let\c=\ptexc + \let\,=\ptexcomma + \let\.=\ptexdot + \let\dots=\ptexdots + \let\equiv=\ptexequiv + \let\!=\ptexexclam + \let\i=\ptexi + \let\indent=\ptexindent + \let\noindent=\ptexnoindent + \let\{=\ptexlbrace + \let\+=\tabalign + \let\}=\ptexrbrace + \let\/=\ptexslash + \let\sp=\ptexsp + \let\*=\ptexstar + %\let\sup=\ptexsup % do not redefine, we want @sup to work in math mode + \let\t=\ptext + \expandafter \let\csname top\endcsname=\ptextop % we've made it outer + \let\frenchspacing=\plainfrenchspacing + % + \def\endldots{\mathinner{\ldots\ldots\ldots\ldots}}% + \def\enddots{\relax\ifmmode\endldots\else$\mathsurround=0pt \endldots\,$\fi}% + \def\@{@}% +} +% There is no need to define \Etex. + +% Define @lisp ... @end lisp. +% @lisp environment forms a group so it can rebind things, +% including the definition of @end lisp (which normally is erroneous). + +% Amount to narrow the margins by for @lisp. +\newskip\lispnarrowing \lispnarrowing=0.4in + +% This is the definition that ^^M gets inside @lisp, @example, and other +% such environments. \null is better than a space, since it doesn't +% have any width. +\def\lisppar{\null\endgraf} + +% This space is always present above and below environments. +\newskip\envskipamount \envskipamount = 0pt + +% Make spacing and below environment symmetrical. We use \parskip here +% to help in doing that, since in @example-like environments \parskip +% is reset to zero; thus the \afterenvbreak inserts no space -- but the +% start of the next paragraph will insert \parskip. +% +\def\aboveenvbreak{{% + % =10000 instead of <10000 because of a special case in \itemzzz and + % \sectionheading, q.v. + \ifnum \lastpenalty=10000 \else + \advance\envskipamount by \parskip + \endgraf + \ifdim\lastskip<\envskipamount + \removelastskip + \ifnum\lastpenalty<10000 + % Penalize breaking before the environment, because preceding text + % often leads into it. + \penalty100 + \fi + \vskip\envskipamount + \fi + \fi +}} + +\def\afterenvbreak{{% + % =10000 instead of <10000 because of a special case in \itemzzz and + % \sectionheading, q.v. + \ifnum \lastpenalty=10000 \else + \advance\envskipamount by \parskip + \endgraf + \ifdim\lastskip<\envskipamount + \removelastskip + % it's not a good place to break if the last penalty was \nobreak + % or better ... + \ifnum\lastpenalty<10000 \penalty-50 \fi + \vskip\envskipamount + \fi + \fi +}} + +% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins; it will +% also clear it, so that its embedded environments do the narrowing again. +\let\nonarrowing=\relax + +% @cartouche ... @end cartouche: draw rectangle w/rounded corners around +% environment contents. +\font\circle=lcircle10 +\newdimen\circthick +\newdimen\cartouter\newdimen\cartinner +\newskip\normbskip\newskip\normpskip\newskip\normlskip +\circthick=\fontdimen8\circle +% +\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth +\def\ctr{{\hskip 6pt\circle\char'010}} +\def\cbl{{\circle\char'012\hskip -6pt}} +\def\cbr{{\hskip 6pt\circle\char'011}} +\def\carttop{\hbox to \cartouter{\hskip\lskip + \ctl\leaders\hrule height\circthick\hfil\ctr + \hskip\rskip}} +\def\cartbot{\hbox to \cartouter{\hskip\lskip + \cbl\leaders\hrule height\circthick\hfil\cbr + \hskip\rskip}} +% +\newskip\lskip\newskip\rskip + +\envdef\cartouche{% + \ifhmode\par\fi % can't be in the midst of a paragraph. + \startsavinginserts + \lskip=\leftskip \rskip=\rightskip + \leftskip=0pt\rightskip=0pt % we want these *outside*. + \cartinner=\hsize \advance\cartinner by-\lskip + \advance\cartinner by-\rskip + \cartouter=\hsize + \advance\cartouter by 18.4pt % allow for 3pt kerns on either + % side, and for 6pt waste from + % each corner char, and rule thickness + \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip + % + % If this cartouche directly follows a sectioning command, we need the + % \parskip glue (backspaced over by default) or the cartouche can + % collide with the section heading. + \ifnum\lastpenalty>10000 \vskip\parskip \penalty\lastpenalty \fi + % + \setbox\groupbox=\vbox\bgroup + \baselineskip=0pt\parskip=0pt\lineskip=0pt + \carttop + \hbox\bgroup + \hskip\lskip + \vrule\kern3pt + \vbox\bgroup + \kern3pt + \hsize=\cartinner + \baselineskip=\normbskip + \lineskip=\normlskip + \parskip=\normpskip + \vskip -\parskip + \comment % For explanation, see the end of def\group. +} +\def\Ecartouche{% + \ifhmode\par\fi + \kern3pt + \egroup + \kern3pt\vrule + \hskip\rskip + \egroup + \cartbot + \egroup + \addgroupbox + \checkinserts +} + + +% This macro is called at the beginning of all the @example variants, +% inside a group. +\newdimen\nonfillparindent +\def\nonfillstart{% + \aboveenvbreak + \ifdim\hfuzz < 12pt \hfuzz = 12pt \fi % Don't be fussy + \sepspaces % Make spaces be word-separators rather than space tokens. + \let\par = \lisppar % don't ignore blank lines + \obeylines % each line of input is a line of output + \parskip = 0pt + % Turn off paragraph indentation but redefine \indent to emulate + % the normal \indent. + \nonfillparindent=\parindent + \parindent = 0pt + \let\indent\nonfillindent + % + \emergencystretch = 0pt % don't try to avoid overfull boxes + \ifx\nonarrowing\relax + \advance \leftskip by \lispnarrowing + \exdentamount=\lispnarrowing + \else + \let\nonarrowing = \relax + \fi + \let\exdent=\nofillexdent +} + +\begingroup +\obeyspaces +% We want to swallow spaces (but not other tokens) after the fake +% @indent in our nonfill-environments, where spaces are normally +% active and set to @tie, resulting in them not being ignored after +% @indent. +\gdef\nonfillindent{\futurelet\temp\nonfillindentcheck}% +\gdef\nonfillindentcheck{% +\ifx\temp % +\expandafter\nonfillindentgobble% +\else% +\leavevmode\nonfillindentbox% +\fi% +}% +\endgroup +\def\nonfillindentgobble#1{\nonfillindent} +\def\nonfillindentbox{\hbox to \nonfillparindent{\hss}} + +% If you want all examples etc. small: @set dispenvsize small. +% If you want even small examples the full size: @set dispenvsize nosmall. +% This affects the following displayed environments: +% @example, @display, @format, @lisp +% +\def\smallword{small} +\def\nosmallword{nosmall} +\let\SETdispenvsize\relax +\def\setnormaldispenv{% + \ifx\SETdispenvsize\smallword + % end paragraph for sake of leading, in case document has no blank + % line. This is redundant with what happens in \aboveenvbreak, but + % we need to do it before changing the fonts, and it's inconvenient + % to change the fonts afterward. + \ifnum \lastpenalty=10000 \else \endgraf \fi + \smallexamplefonts \rm + \fi +} +\def\setsmalldispenv{% + \ifx\SETdispenvsize\nosmallword + \else + \ifnum \lastpenalty=10000 \else \endgraf \fi + \smallexamplefonts \rm + \fi +} + +% We often define two environments, @foo and @smallfoo. +% Let's do it in one command. #1 is the env name, #2 the definition. +\def\makedispenvdef#1#2{% + \expandafter\envdef\csname#1\endcsname {\setnormaldispenv #2}% + \expandafter\envdef\csname small#1\endcsname {\setsmalldispenv #2}% + \expandafter\let\csname E#1\endcsname \afterenvbreak + \expandafter\let\csname Esmall#1\endcsname \afterenvbreak +} + +% Define two environment synonyms (#1 and #2) for an environment. +\def\maketwodispenvdef#1#2#3{% + \makedispenvdef{#1}{#3}% + \makedispenvdef{#2}{#3}% +} +% +% @lisp: indented, narrowed, typewriter font; +% @example: same as @lisp. +% +% @smallexample and @smalllisp: use smaller fonts. +% Originally contributed by Pavel@xerox. +% +\maketwodispenvdef{lisp}{example}{% + \nonfillstart + \tt\setupmarkupstyle{example}% + \let\kbdfont = \kbdexamplefont % Allow @kbd to do something special. + \gobble % eat return +} +% @display/@smalldisplay: same as @lisp except keep current font. +% +\makedispenvdef{display}{% + \nonfillstart + \gobble +} + +% @format/@smallformat: same as @display except don't narrow margins. +% +\makedispenvdef{format}{% + \let\nonarrowing = t% + \nonfillstart + \gobble +} + +% @flushleft: same as @format, but doesn't obey \SETdispenvsize. +\envdef\flushleft{% + \let\nonarrowing = t% + \nonfillstart + \gobble +} +\let\Eflushleft = \afterenvbreak + +% @flushright. +% +\envdef\flushright{% + \let\nonarrowing = t% + \nonfillstart + \advance\leftskip by 0pt plus 1fill\relax + \gobble +} +\let\Eflushright = \afterenvbreak + + +% @raggedright does more-or-less normal line breaking but no right +% justification. From plain.tex. Don't stretch around special +% characters in urls in this environment, since the stretch at the right +% should be enough. +\envdef\raggedright{% + \rightskip0pt plus2.4em \spaceskip.3333em \xspaceskip.5em\relax + \def\urefprestretchamount{0pt}% + \def\urefpoststretchamount{0pt}% +} +\let\Eraggedright\par + +\envdef\raggedleft{% + \parindent=0pt \leftskip0pt plus2em + \spaceskip.3333em \xspaceskip.5em \parfillskip=0pt + \hbadness=10000 % Last line will usually be underfull, so turn off + % badness reporting. +} +\let\Eraggedleft\par + +\envdef\raggedcenter{% + \parindent=0pt \rightskip0pt plus1em \leftskip0pt plus1em + \spaceskip.3333em \xspaceskip.5em \parfillskip=0pt + \hbadness=10000 % Last line will usually be underfull, so turn off + % badness reporting. +} +\let\Eraggedcenter\par + + +% @quotation does normal linebreaking (hence we can't use \nonfillstart) +% and narrows the margins. We keep \parskip nonzero in general, since +% we're doing normal filling. So, when using \aboveenvbreak and +% \afterenvbreak, temporarily make \parskip 0. +% +\makedispenvdef{quotation}{\quotationstart} +% +\def\quotationstart{% + \indentedblockstart % same as \indentedblock, but increase right margin too. + \ifx\nonarrowing\relax + \advance\rightskip by \lispnarrowing + \fi + \parsearg\quotationlabel +} + +% We have retained a nonzero parskip for the environment, since we're +% doing normal filling. +% +\def\Equotation{% + \par + \ifx\quotationauthor\thisisundefined\else + % indent a bit. + \leftline{\kern 2\leftskip \sl ---\quotationauthor}% + \fi + {\parskip=0pt \afterenvbreak}% +} +\def\Esmallquotation{\Equotation} + +% If we're given an argument, typeset it in bold with a colon after. +\def\quotationlabel#1{% + \def\temp{#1}% + \ifx\temp\empty \else + {\bf #1: }% + \fi +} + +% @indentedblock is like @quotation, but indents only on the left and +% has no optional argument. +% +\makedispenvdef{indentedblock}{\indentedblockstart} +% +\def\indentedblockstart{% + {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip + \parindent=0pt + % + % @cartouche defines \nonarrowing to inhibit narrowing at next level down. + \ifx\nonarrowing\relax + \advance\leftskip by \lispnarrowing + \exdentamount = \lispnarrowing + \else + \let\nonarrowing = \relax + \fi +} + +% Keep a nonzero parskip for the environment, since we're doing normal filling. +% +\def\Eindentedblock{% + \par + {\parskip=0pt \afterenvbreak}% +} +\def\Esmallindentedblock{\Eindentedblock} + + +% LaTeX-like @verbatim...@end verbatim and @verb{...} +% If we want to allow any as delimiter, +% we need the curly braces so that makeinfo sees the @verb command, eg: +% `@verbx...x' would look like the '@verbx' command. --janneke@gnu.org +% +% [Knuth]: Donald Ervin Knuth, 1996. The TeXbook. +% +% [Knuth] p.344; only we need to do the other characters Texinfo sets +% active too. Otherwise, they get lost as the first character on a +% verbatim line. +\def\dospecials{% + \do\ \do\\\do\{\do\}\do\$\do\&% + \do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~% + \do\<\do\>\do\|\do\@\do+\do\"% + % Don't do the quotes -- if we do, @set txicodequoteundirected and + % @set txicodequotebacktick will not have effect on @verb and + % @verbatim, and ?` and !` ligatures won't get disabled. + %\do\`\do\'% +} +% +% [Knuth] p. 380 +\def\uncatcodespecials{% + \def\do##1{\catcode`##1=\other}\dospecials} +% +% Setup for the @verb command. +% +% Eight spaces for a tab +\begingroup + \catcode`\^^I=\active + \gdef\tabeightspaces{\catcode`\^^I=\active\def^^I{\ \ \ \ \ \ \ \ }} +\endgroup +% +\def\setupverb{% + \tt % easiest (and conventionally used) font for verbatim + \def\par{\leavevmode\endgraf}% + \setupmarkupstyle{verb}% + \tabeightspaces + % Respect line breaks, + % print special symbols as themselves, and + % make each space count + % must do in this order: + \obeylines \uncatcodespecials \sepspaces +} + +% Setup for the @verbatim environment +% +% Real tab expansion. +\newdimen\tabw \setbox0=\hbox{\tt\space} \tabw=8\wd0 % tab amount +% +% We typeset each line of the verbatim in an \hbox, so we can handle +% tabs. The \global is in case the verbatim line starts with an accent, +% or some other command that starts with a begin-group. Otherwise, the +% entire \verbbox would disappear at the corresponding end-group, before +% it is typeset. Meanwhile, we can't have nested verbatim commands +% (can we?), so the \global won't be overwriting itself. +\newbox\verbbox +\def\starttabbox{\global\setbox\verbbox=\hbox\bgroup} +% +\begingroup + \catcode`\^^I=\active + \gdef\tabexpand{% + \catcode`\^^I=\active + \def^^I{\leavevmode\egroup + \dimen\verbbox=\wd\verbbox % the width so far, or since the previous tab + \divide\dimen\verbbox by\tabw + \multiply\dimen\verbbox by\tabw % compute previous multiple of \tabw + \advance\dimen\verbbox by\tabw % advance to next multiple of \tabw + \wd\verbbox=\dimen\verbbox \box\verbbox \starttabbox + }% + } +\endgroup + +% start the verbatim environment. +\def\setupverbatim{% + \let\nonarrowing = t% + \nonfillstart + \tt % easiest (and conventionally used) font for verbatim + % The \leavevmode here is for blank lines. Otherwise, we would + % never \starttabox and the \egroup would end verbatim mode. + \def\par{\leavevmode\egroup\box\verbbox\endgraf}% + \tabexpand + \setupmarkupstyle{verbatim}% + % Respect line breaks, + % print special symbols as themselves, and + % make each space count. + % Must do in this order: + \obeylines \uncatcodespecials \sepspaces + \everypar{\starttabbox}% +} + +% Do the @verb magic: verbatim text is quoted by unique +% delimiter characters. Before first delimiter expect a +% right brace, after last delimiter expect closing brace: +% +% \def\doverb'{'#1'}'{#1} +% +% [Knuth] p. 382; only eat outer {} +\begingroup + \catcode`[=1\catcode`]=2\catcode`\{=\other\catcode`\}=\other + \gdef\doverb{#1[\def\next##1#1}[##1\endgroup]\next] +\endgroup +% +\def\verb{\begingroup\setupverb\doverb} +% +% +% Do the @verbatim magic: define the macro \doverbatim so that +% the (first) argument ends when '@end verbatim' is reached, ie: +% +% \def\doverbatim#1@end verbatim{#1} +% +% For Texinfo it's a lot easier than for LaTeX, +% because texinfo's \verbatim doesn't stop at '\end{verbatim}': +% we need not redefine '\', '{' and '}'. +% +% Inspired by LaTeX's verbatim command set [latex.ltx] +% +\begingroup + \catcode`\ =\active + \obeylines % + % ignore everything up to the first ^^M, that's the newline at the end + % of the @verbatim input line itself. Otherwise we get an extra blank + % line in the output. + \xdef\doverbatim#1^^M#2@end verbatim{#2\noexpand\end\gobble verbatim}% + % We really want {...\end verbatim} in the body of the macro, but + % without the active space; thus we have to use \xdef and \gobble. +\endgroup +% +\envdef\verbatim{% + \setupverbatim\doverbatim +} +\let\Everbatim = \afterenvbreak + + +% @verbatiminclude FILE - insert text of file in verbatim environment. +% +\def\verbatiminclude{\parseargusing\filenamecatcodes\doverbatiminclude} +% +\def\doverbatiminclude#1{% + {% + \makevalueexpandable + \setupverbatim + \indexnofonts % Allow `@@' and other weird things in file names. + \wlog{texinfo.tex: doing @verbatiminclude of #1^^J}% + \input #1 + \afterenvbreak + }% +} + +% @copying ... @end copying. +% Save the text away for @insertcopying later. +% +% We save the uninterpreted tokens, rather than creating a box. +% Saving the text in a box would be much easier, but then all the +% typesetting commands (@smallbook, font changes, etc.) have to be done +% beforehand -- and a) we want @copying to be done first in the source +% file; b) letting users define the frontmatter in as flexible order as +% possible is desirable. +% +\def\copying{\checkenv{}\begingroup\scanargctxt\docopying} +\def\docopying#1@end copying{\endgroup\def\copyingtext{#1}} +% +\def\insertcopying{% + \begingroup + \parindent = 0pt % paragraph indentation looks wrong on title page + \scanexp\copyingtext + \endgroup +} + + +\message{defuns,} +% @defun etc. + +\newskip\defbodyindent \defbodyindent=.4in +\newskip\defargsindent \defargsindent=50pt +\newskip\deflastargmargin \deflastargmargin=18pt +\newcount\defunpenalty + +% Start the processing of @deffn: +\def\startdefun{% + \ifnum\lastpenalty<10000 + \medbreak + \defunpenalty=10003 % Will keep this @deffn together with the + % following @def command, see below. + \else + % If there are two @def commands in a row, we'll have a \nobreak, + % which is there to keep the function description together with its + % header. But if there's nothing but headers, we need to allow a + % break somewhere. Check specifically for penalty 10002, inserted + % by \printdefunline, instead of 10000, since the sectioning + % commands also insert a nobreak penalty, and we don't want to allow + % a break between a section heading and a defun. + % + % As a further refinement, we avoid "club" headers by signalling + % with penalty of 10003 after the very first @deffn in the + % sequence (see above), and penalty of 10002 after any following + % @def command. + \ifnum\lastpenalty=10002 \penalty2000 \else \defunpenalty=10002 \fi + % + % Similarly, after a section heading, do not allow a break. + % But do insert the glue. + \medskip % preceded by discardable penalty, so not a breakpoint + \fi + % + \parindent=0in + \advance\leftskip by \defbodyindent + \exdentamount=\defbodyindent +} + +\def\dodefunx#1{% + % First, check whether we are in the right environment: + \checkenv#1% + % + % As above, allow line break if we have multiple x headers in a row. + % It's not a great place, though. + \ifnum\lastpenalty=10002 \penalty3000 \else \defunpenalty=10002 \fi + % + % And now, it's time to reuse the body of the original defun: + \expandafter\gobbledefun#1% +} +\def\gobbledefun#1\startdefun{} + +% \printdefunline \deffnheader{text} +% +\def\printdefunline#1#2{% + \begingroup + % call \deffnheader: + #1#2 \endheader + % common ending: + \interlinepenalty = 10000 + \advance\rightskip by 0pt plus 1fil\relax + \endgraf + \nobreak\vskip -\parskip + \penalty\defunpenalty % signal to \startdefun and \dodefunx + % Some of the @defun-type tags do not enable magic parentheses, + % rendering the following check redundant. But we don't optimize. + \checkparencounts + \endgroup +} + +\def\Edefun{\endgraf\medbreak} + +% \makedefun{deffn} creates \deffn, \deffnx and \Edeffn; +% the only thing remaining is to define \deffnheader. +% +\def\makedefun#1{% + \expandafter\let\csname E#1\endcsname = \Edefun + \edef\temp{\noexpand\domakedefun + \makecsname{#1}\makecsname{#1x}\makecsname{#1header}}% + \temp +} + +% \domakedefun \deffn \deffnx \deffnheader { (defn. of \deffnheader) } +% +% Define \deffn and \deffnx, without parameters. +% \deffnheader has to be defined explicitly. +% +\def\domakedefun#1#2#3{% + \envdef#1{% + \startdefun + \doingtypefnfalse % distinguish typed functions from all else + \parseargusing\activeparens{\printdefunline#3}% + }% + \def#2{\dodefunx#1}% + \def#3% +} + +\newif\ifdoingtypefn % doing typed function? +\newif\ifrettypeownline % typeset return type on its own line? + +% @deftypefnnewline on|off says whether the return type of typed functions +% are printed on their own line. This affects @deftypefn, @deftypefun, +% @deftypeop, and @deftypemethod. +% +\parseargdef\deftypefnnewline{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxideftypefnnl\endcsname + = \empty + \else\ifx\temp\offword + \expandafter\let\csname SETtxideftypefnnl\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @txideftypefnnl value `\temp', + must be on|off}% + \fi\fi +} + +% Untyped functions: + +% @deffn category name args +\makedefun{deffn}{\deffngeneral{}} + +% @deffn category class name args +\makedefun{defop}#1 {\defopon{#1\ \putwordon}} + +% \defopon {category on}class name args +\def\defopon#1#2 {\deffngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} } + +% \deffngeneral {subind}category name args +% +\def\deffngeneral#1#2 #3 #4\endheader{% + % Remember that \dosubind{fn}{foo}{} is equivalent to \doind{fn}{foo}. + \dosubind{fn}{\code{#3}}{#1}% + \defname{#2}{}{#3}\magicamp\defunargs{#4\unskip}% +} + +% Typed functions: + +% @deftypefn category type name args +\makedefun{deftypefn}{\deftypefngeneral{}} + +% @deftypeop category class type name args +\makedefun{deftypeop}#1 {\deftypeopon{#1\ \putwordon}} + +% \deftypeopon {category on}class type name args +\def\deftypeopon#1#2 {\deftypefngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} } + +% \deftypefngeneral {subind}category type name args +% +\def\deftypefngeneral#1#2 #3 #4 #5\endheader{% + \dosubind{fn}{\code{#4}}{#1}% + \doingtypefntrue + \defname{#2}{#3}{#4}\defunargs{#5\unskip}% +} + +% Typed variables: + +% @deftypevr category type var args +\makedefun{deftypevr}{\deftypecvgeneral{}} + +% @deftypecv category class type var args +\makedefun{deftypecv}#1 {\deftypecvof{#1\ \putwordof}} + +% \deftypecvof {category of}class type var args +\def\deftypecvof#1#2 {\deftypecvgeneral{\putwordof\ \code{#2}}{#1\ \code{#2}} } + +% \deftypecvgeneral {subind}category type var args +% +\def\deftypecvgeneral#1#2 #3 #4 #5\endheader{% + \dosubind{vr}{\code{#4}}{#1}% + \defname{#2}{#3}{#4}\defunargs{#5\unskip}% +} + +% Untyped variables: + +% @defvr category var args +\makedefun{defvr}#1 {\deftypevrheader{#1} {} } + +% @defcv category class var args +\makedefun{defcv}#1 {\defcvof{#1\ \putwordof}} + +% \defcvof {category of}class var args +\def\defcvof#1#2 {\deftypecvof{#1}#2 {} } + +% Types: + +% @deftp category name args +\makedefun{deftp}#1 #2 #3\endheader{% + \doind{tp}{\code{#2}}% + \defname{#1}{}{#2}\defunargs{#3\unskip}% +} + +% Remaining @defun-like shortcuts: +\makedefun{defun}{\deffnheader{\putwordDeffunc} } +\makedefun{defmac}{\deffnheader{\putwordDefmac} } +\makedefun{defspec}{\deffnheader{\putwordDefspec} } +\makedefun{deftypefun}{\deftypefnheader{\putwordDeffunc} } +\makedefun{defvar}{\defvrheader{\putwordDefvar} } +\makedefun{defopt}{\defvrheader{\putwordDefopt} } +\makedefun{deftypevar}{\deftypevrheader{\putwordDefvar} } +\makedefun{defmethod}{\defopon\putwordMethodon} +\makedefun{deftypemethod}{\deftypeopon\putwordMethodon} +\makedefun{defivar}{\defcvof\putwordInstanceVariableof} +\makedefun{deftypeivar}{\deftypecvof\putwordInstanceVariableof} + +% \defname, which formats the name of the @def (not the args). +% #1 is the category, such as "Function". +% #2 is the return type, if any. +% #3 is the function name. +% +% We are followed by (but not passed) the arguments, if any. +% +\def\defname#1#2#3{% + \par + % Get the values of \leftskip and \rightskip as they were outside the @def... + \advance\leftskip by -\defbodyindent + % + % Determine if we are typesetting the return type of a typed function + % on a line by itself. + \rettypeownlinefalse + \ifdoingtypefn % doing a typed function specifically? + % then check user option for putting return type on its own line: + \expandafter\ifx\csname SETtxideftypefnnl\endcsname\relax \else + \rettypeownlinetrue + \fi + \fi + % + % How we'll format the category name. Putting it in brackets helps + % distinguish it from the body text that may end up on the next line + % just below it. + \def\temp{#1}% + \setbox0=\hbox{\kern\deflastargmargin \ifx\temp\empty\else [\rm\temp]\fi} + % + % Figure out line sizes for the paragraph shape. We'll always have at + % least two. + \tempnum = 2 + % + % The first line needs space for \box0; but if \rightskip is nonzero, + % we need only space for the part of \box0 which exceeds it: + \dimen0=\hsize \advance\dimen0 by -\wd0 \advance\dimen0 by \rightskip + % + % If doing a return type on its own line, we'll have another line. + \ifrettypeownline + \advance\tempnum by 1 + \def\maybeshapeline{0in \hsize}% + \else + \def\maybeshapeline{}% + \fi + % + % The continuations: + \dimen2=\hsize \advance\dimen2 by -\defargsindent + % + % The final paragraph shape: + \parshape \tempnum 0in \dimen0 \maybeshapeline \defargsindent \dimen2 + % + % Put the category name at the right margin. + \noindent + \hbox to 0pt{% + \hfil\box0 \kern-\hsize + % \hsize has to be shortened this way: + \kern\leftskip + % Intentionally do not respect \rightskip, since we need the space. + }% + % + % Allow all lines to be underfull without complaint: + \tolerance=10000 \hbadness=10000 + \exdentamount=\defbodyindent + {% + % defun fonts. We use typewriter by default (used to be bold) because: + % . we're printing identifiers, they should be in tt in principle. + % . in languages with many accents, such as Czech or French, it's + % common to leave accents off identifiers. The result looks ok in + % tt, but exceedingly strange in rm. + % . we don't want -- and --- to be treated as ligatures. + % . this still does not fix the ?` and !` ligatures, but so far no + % one has made identifiers using them :). + \df \tt + \def\temp{#2}% text of the return type + \ifx\temp\empty\else + \tclose{\temp}% typeset the return type + \ifrettypeownline + % put return type on its own line; prohibit line break following: + \hfil\vadjust{\nobreak}\break + \else + \space % type on same line, so just followed by a space + \fi + \fi % no return type + #3% output function name + }% + {\rm\enskip}% hskip 0.5 em of \rmfont + % + \boldbrax + % arguments will be output next, if any. +} + +% Print arguments in slanted roman (not ttsl), inconsistently with using +% tt for the name. This is because literal text is sometimes needed in +% the argument list (groff manual), and ttsl and tt are not very +% distinguishable. Prevent hyphenation at `-' chars. +% +\def\defunargs#1{% + % use sl by default (not ttsl), + % tt for the names. + \df \sl \hyphenchar\font=0 + % + % On the other hand, if an argument has two dashes (for instance), we + % want a way to get ttsl. We used to recommend @var for that, so + % leave the code in, but it's strange for @var to lead to typewriter. + % Nowadays we recommend @code, since the difference between a ttsl hyphen + % and a tt hyphen is pretty tiny. @code also disables ?` !`. + \def\var##1{{\setupmarkupstyle{var}\ttslanted{##1}}}% + #1% + \sl\hyphenchar\font=45 +} + +% We want ()&[] to print specially on the defun line. +% +\def\activeparens{% + \catcode`\(=\active \catcode`\)=\active + \catcode`\[=\active \catcode`\]=\active + \catcode`\&=\active +} + +% Make control sequences which act like normal parenthesis chars. +\let\lparen = ( \let\rparen = ) + +% Be sure that we always have a definition for `(', etc. For example, +% if the fn name has parens in it, \boldbrax will not be in effect yet, +% so TeX would otherwise complain about undefined control sequence. +{ + \activeparens + \global\let(=\lparen \global\let)=\rparen + \global\let[=\lbrack \global\let]=\rbrack + \global\let& = \& + + \gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb} + \gdef\magicamp{\let&=\amprm} +} + +\newcount\parencount + +% If we encounter &foo, then turn on ()-hacking afterwards +\newif\ifampseen +\def\amprm#1 {\ampseentrue{\bf\ }} + +\def\parenfont{% + \ifampseen + % At the first level, print parens in roman, + % otherwise use the default font. + \ifnum \parencount=1 \rm \fi + \else + % The \sf parens (in \boldbrax) actually are a little bolder than + % the contained text. This is especially needed for [ and ] . + \sf + \fi +} +\def\infirstlevel#1{% + \ifampseen + \ifnum\parencount=1 + #1% + \fi + \fi +} +\def\bfafterword#1 {#1 \bf} + +\def\opnr{% + \global\advance\parencount by 1 + {\parenfont(}% + \infirstlevel \bfafterword +} +\def\clnr{% + {\parenfont)}% + \infirstlevel \sl + \global\advance\parencount by -1 +} + +\newcount\brackcount +\def\lbrb{% + \global\advance\brackcount by 1 + {\bf[}% +} +\def\rbrb{% + {\bf]}% + \global\advance\brackcount by -1 +} + +\def\checkparencounts{% + \ifnum\parencount=0 \else \badparencount \fi + \ifnum\brackcount=0 \else \badbrackcount \fi +} +% these should not use \errmessage; the glibc manual, at least, actually +% has such constructs (when documenting function pointers). +\def\badparencount{% + \message{Warning: unbalanced parentheses in @def...}% + \global\parencount=0 +} +\def\badbrackcount{% + \message{Warning: unbalanced square brackets in @def...}% + \global\brackcount=0 +} + + +\message{macros,} +% @macro. + +% To do this right we need a feature of e-TeX, \scantokens, +% which we arrange to emulate with a temporary file in ordinary TeX. +\ifx\eTeXversion\thisisundefined + \newwrite\macscribble + \def\scantokens#1{% + \toks0={#1}% + \immediate\openout\macscribble=\jobname.tmp + \immediate\write\macscribble{\the\toks0}% + \immediate\closeout\macscribble + \input \jobname.tmp + } +\fi + +% alias because \c means cedilla in @tex or @math +\let\texinfoc=\c + +\newcount\savedcatcodeone +\newcount\savedcatcodetwo + +% Used at the time of macro expansion. +% Argument is macro body with arguments substituted +\def\scanmacro#1{% + \newlinechar`\^^M + \def\xeatspaces{\eatspaces}% + % + % Temporarily undo catcode changes of \printindex. Set catcode of @ to + % 0 so that @-commands in macro expansions aren't printed literally when + % formatting an index file, where \ is used as the escape character. + \savedcatcodeone=\catcode`\@ + \savedcatcodetwo=\catcode`\\ + \catcode`\@=0 + \catcode`\\=\active + % + % Process the macro body under the current catcode regime. + \scantokens{#1@texinfoc}% + % + \catcode`\@=\savedcatcodeone + \catcode`\\=\savedcatcodetwo + % + % The \texinfoc is to remove the \newlinechar added by \scantokens, and + % can be noticed by \parsearg. + % We avoid surrounding the call to \scantokens with \bgroup and \egroup + % to allow macros to open or close groups themselves. +} + +% Used for copying and captions +\def\scanexp#1{% + \expandafter\scanmacro\expandafter{#1}% +} + +\newcount\paramno % Count of parameters +\newtoks\macname % Macro name +\newif\ifrecursive % Is it recursive? + +% List of all defined macros in the form +% \commondummyword\macro1\commondummyword\macro2... +% Currently is also contains all @aliases; the list can be split +% if there is a need. +\def\macrolist{} + +% Add the macro to \macrolist +\def\addtomacrolist#1{\expandafter \addtomacrolistxxx \csname#1\endcsname} +\def\addtomacrolistxxx#1{% + \toks0 = \expandafter{\macrolist\commondummyword#1}% + \xdef\macrolist{\the\toks0}% +} + +% Utility routines. +% This does \let #1 = #2, with \csnames; that is, +% \let \csname#1\endcsname = \csname#2\endcsname +% (except of course we have to play expansion games). +% +\def\cslet#1#2{% + \expandafter\let + \csname#1\expandafter\endcsname + \csname#2\endcsname +} + +% Trim leading and trailing spaces off a string. +% Concepts from aro-bend problem 15 (see CTAN). +{\catcode`\@=11 +\gdef\eatspaces #1{\expandafter\trim@\expandafter{#1 }} +\gdef\trim@ #1{\trim@@ @#1 @ #1 @ @@} +\gdef\trim@@ #1@ #2@ #3@@{\trim@@@\empty #2 @} +\def\unbrace#1{#1} +\unbrace{\gdef\trim@@@ #1 } #2@{#1} +} + +% Trim a single trailing ^^M off a string. +{\catcode`\^^M=\other \catcode`\Q=3% +\gdef\eatcr #1{\eatcra #1Q^^MQ}% +\gdef\eatcra#1^^MQ{\eatcrb#1Q}% +\gdef\eatcrb#1Q#2Q{#1}% +} + +% Macro bodies are absorbed as an argument in a context where +% all characters are catcode 10, 11 or 12, except \ which is active +% (as in normal texinfo). It is necessary to change the definition of \ +% to recognize macro arguments; this is the job of \mbodybackslash. +% +% Non-ASCII encodings make 8-bit characters active, so un-activate +% them to avoid their expansion. Must do this non-globally, to +% confine the change to the current group. +% +% It's necessary to have hard CRs when the macro is executed. This is +% done by making ^^M (\endlinechar) catcode 12 when reading the macro +% body, and then making it the \newlinechar in \scanmacro. +% +\def\scanctxt{% used as subroutine + \catcode`\"=\other + \catcode`\+=\other + \catcode`\<=\other + \catcode`\>=\other + \catcode`\^=\other + \catcode`\_=\other + \catcode`\|=\other + \catcode`\~=\other + \passthroughcharstrue +} + +\def\scanargctxt{% used for copying and captions, not macros. + \scanctxt + \catcode`\@=\other + \catcode`\\=\other + \catcode`\^^M=\other +} + +\def\macrobodyctxt{% used for @macro definitions + \scanctxt + \catcode`\ =\other + \catcode`\@=\other + \catcode`\{=\other + \catcode`\}=\other + \catcode`\^^M=\other + \usembodybackslash +} + +% Used when scanning braced macro arguments. Note, however, that catcode +% changes here are ineffectual if the macro invocation was nested inside +% an argument to another Texinfo command. +\def\macroargctxt{% + \scanctxt + \catcode`\ =\active + \catcode`\^^M=\other + \catcode`\\=\active +} + +\def\macrolineargctxt{% used for whole-line arguments without braces + \scanctxt + \catcode`\{=\other + \catcode`\}=\other +} + +% \mbodybackslash is the definition of \ in @macro bodies. +% It maps \foo\ => \csname macarg.foo\endcsname => #N +% where N is the macro parameter number. +% We define \csname macarg.\endcsname to be \realbackslash, so +% \\ in macro replacement text gets you a backslash. +% +{\catcode`@=0 @catcode`@\=@active + @gdef@usembodybackslash{@let\=@mbodybackslash} + @gdef@mbodybackslash#1\{@csname macarg.#1@endcsname} +} +\expandafter\def\csname macarg.\endcsname{\realbackslash} + +\def\margbackslash#1{\char`\#1 } + +\def\macro{\recursivefalse\parsearg\macroxxx} +\def\rmacro{\recursivetrue\parsearg\macroxxx} + +\def\macroxxx#1{% + \getargs{#1}% now \macname is the macname and \argl the arglist + \ifx\argl\empty % no arguments + \paramno=0\relax + \else + \expandafter\parsemargdef \argl;% + \if\paramno>256\relax + \ifx\eTeXversion\thisisundefined + \errhelp = \EMsimple + \errmessage{You need eTeX to compile a file with macros with more than 256 arguments} + \fi + \fi + \fi + \if1\csname ismacro.\the\macname\endcsname + \message{Warning: redefining \the\macname}% + \else + \expandafter\ifx\csname \the\macname\endcsname \relax + \else \errmessage{Macro name \the\macname\space already defined}\fi + \global\cslet{macsave.\the\macname}{\the\macname}% + \global\expandafter\let\csname ismacro.\the\macname\endcsname=1% + \addtomacrolist{\the\macname}% + \fi + \begingroup \macrobodyctxt + \ifrecursive \expandafter\parsermacbody + \else \expandafter\parsemacbody + \fi} + +\parseargdef\unmacro{% + \if1\csname ismacro.#1\endcsname + \global\cslet{#1}{macsave.#1}% + \global\expandafter\let \csname ismacro.#1\endcsname=0% + % Remove the macro name from \macrolist: + \begingroup + \expandafter\let\csname#1\endcsname \relax + \let\commondummyword\unmacrodo + \xdef\macrolist{\macrolist}% + \endgroup + \else + \errmessage{Macro #1 not defined}% + \fi +} + +% Called by \do from \dounmacro on each macro. The idea is to omit any +% macro definitions that have been changed to \relax. +% +\def\unmacrodo#1{% + \ifx #1\relax + % remove this + \else + \noexpand\commondummyword \noexpand#1% + \fi +} + +% \getargs -- Parse the arguments to a @macro line. Set \macname to +% the name of the macro, and \argl to the braced argument list. +\def\getargs#1{\getargsxxx#1{}} +\def\getargsxxx#1#{\getmacname #1 \relax\getmacargs} +\def\getmacname#1 #2\relax{\macname={#1}} +\def\getmacargs#1{\def\argl{#1}} +% This made use of the feature that if the last token of a +% is #, then the preceding argument is delimited by +% an opening brace, and that opening brace is not consumed. + +% Parse the optional {params} list to @macro or @rmacro. +% Set \paramno to the number of arguments, +% and \paramlist to a parameter text for the macro (e.g. #1,#2,#3 for a +% three-param macro.) Define \macarg.BLAH for each BLAH in the params +% list to some hook where the argument is to be expanded. If there are +% less than 10 arguments that hook is to be replaced by ##N where N +% is the position in that list, that is to say the macro arguments are to be +% defined `a la TeX in the macro body. +% +% That gets used by \mbodybackslash (above). +% +% If there are 10 or more arguments, a different technique is used: see +% \parsemmanyargdef. +% +\def\parsemargdef#1;{% + \paramno=0\def\paramlist{}% + \let\hash\relax + % \hash is redefined to `#' later to get it into definitions + \let\xeatspaces\relax + \parsemargdefxxx#1,;,% + \ifnum\paramno<10\relax\else + \paramno0\relax + \parsemmanyargdef@@#1,;,% 10 or more arguments + \fi +} +\def\parsemargdefxxx#1,{% + \if#1;\let\next=\relax + \else \let\next=\parsemargdefxxx + \advance\paramno by 1 + \expandafter\edef\csname macarg.\eatspaces{#1}\endcsname + {\xeatspaces{\hash\the\paramno}}% + \edef\paramlist{\paramlist\hash\the\paramno,}% + \fi\next} + +% \parsemacbody, \parsermacbody +% +% Read recursive and nonrecursive macro bodies. (They're different since +% rec and nonrec macros end differently.) +% +% We are in \macrobodyctxt, and the \xdef causes backslashshes in the macro +% body to be transformed. +% Set \macrobody to the body of the macro, and call \defmacro. +% +{\catcode`\ =\other\long\gdef\parsemacbody#1@end macro{% +\xdef\macrobody{\eatcr{#1}}\endgroup\defmacro}}% +{\catcode`\ =\other\long\gdef\parsermacbody#1@end rmacro{% +\xdef\macrobody{\eatcr{#1}}\endgroup\defmacro}}% + +% Make @ a letter, so that we can make private-to-Texinfo macro names. +\edef\texiatcatcode{\the\catcode`\@} +\catcode `@=11\relax + +%%%%%%%%%%%%%% Code for > 10 arguments only %%%%%%%%%%%%%%%%%% + +% If there are 10 or more arguments, a different technique is used, where the +% hook remains in the body, and when macro is to be expanded the body is +% processed again to replace the arguments. +% +% In that case, the hook is \the\toks N-1, and we simply set \toks N-1 to the +% argument N value and then \edef the body (nothing else will expand because of +% the catcode regime under which the body was input). +% +% If you compile with TeX (not eTeX), and you have macros with 10 or more +% arguments, no macro can have more than 256 arguments (else error). +% +% In case that there are 10 or more arguments we parse again the arguments +% list to set new definitions for the \macarg.BLAH macros corresponding to +% each BLAH argument. It was anyhow needed to parse already once this list +% in order to count the arguments, and as macros with at most 9 arguments +% are by far more frequent than macro with 10 or more arguments, defining +% twice the \macarg.BLAH macros does not cost too much processing power. +\def\parsemmanyargdef@@#1,{% + \if#1;\let\next=\relax + \else + \let\next=\parsemmanyargdef@@ + \edef\tempb{\eatspaces{#1}}% + \expandafter\def\expandafter\tempa + \expandafter{\csname macarg.\tempb\endcsname}% + % Note that we need some extra \noexpand\noexpand, this is because we + % don't want \the to be expanded in the \parsermacbody as it uses an + % \xdef . + \expandafter\edef\tempa + {\noexpand\noexpand\noexpand\the\toks\the\paramno}% + \advance\paramno by 1\relax + \fi\next} + + +\let\endargs@\relax +\let\nil@\relax +\def\nilm@{\nil@}% +\long\def\nillm@{\nil@}% + +% This macro is expanded during the Texinfo macro expansion, not during its +% definition. It gets all the arguments' values and assigns them to macros +% macarg.ARGNAME +% +% #1 is the macro name +% #2 is the list of argument names +% #3 is the list of argument values +\def\getargvals@#1#2#3{% + \def\macargdeflist@{}% + \def\saveparamlist@{#2}% Need to keep a copy for parameter expansion. + \def\paramlist{#2,\nil@}% + \def\macroname{#1}% + \begingroup + \macroargctxt + \def\argvaluelist{#3,\nil@}% + \def\@tempa{#3}% + \ifx\@tempa\empty + \setemptyargvalues@ + \else + \getargvals@@ + \fi +} +\def\getargvals@@{% + \ifx\paramlist\nilm@ + % Some sanity check needed here that \argvaluelist is also empty. + \ifx\argvaluelist\nillm@ + \else + \errhelp = \EMsimple + \errmessage{Too many arguments in macro `\macroname'!}% + \fi + \let\next\macargexpandinbody@ + \else + \ifx\argvaluelist\nillm@ + % No more arguments values passed to macro. Set remaining named-arg + % macros to empty. + \let\next\setemptyargvalues@ + \else + % pop current arg name into \@tempb + \def\@tempa##1{\pop@{\@tempb}{\paramlist}##1\endargs@}% + \expandafter\@tempa\expandafter{\paramlist}% + % pop current argument value into \@tempc + \def\@tempa##1{\longpop@{\@tempc}{\argvaluelist}##1\endargs@}% + \expandafter\@tempa\expandafter{\argvaluelist}% + % Here \@tempb is the current arg name and \@tempc is the current arg value. + % First place the new argument macro definition into \@tempd + \expandafter\macname\expandafter{\@tempc}% + \expandafter\let\csname macarg.\@tempb\endcsname\relax + \expandafter\def\expandafter\@tempe\expandafter{% + \csname macarg.\@tempb\endcsname}% + \edef\@tempd{\long\def\@tempe{\the\macname}}% + \push@\@tempd\macargdeflist@ + \let\next\getargvals@@ + \fi + \fi + \next +} + +\def\push@#1#2{% + \expandafter\expandafter\expandafter\def + \expandafter\expandafter\expandafter#2% + \expandafter\expandafter\expandafter{% + \expandafter#1#2}% +} + +% Replace arguments by their values in the macro body, and place the result +% in macro \@tempa. +% +\def\macvalstoargs@{% + % To do this we use the property that token registers that are \the'ed + % within an \edef expand only once. So we are going to place all argument + % values into respective token registers. + % + % First we save the token context, and initialize argument numbering. + \begingroup + \paramno0\relax + % Then, for each argument number #N, we place the corresponding argument + % value into a new token list register \toks#N + \expandafter\putargsintokens@\saveparamlist@,;,% + % Then, we expand the body so that argument are replaced by their + % values. The trick for values not to be expanded themselves is that they + % are within tokens and that tokens expand only once in an \edef . + \edef\@tempc{\csname mac.\macroname .body\endcsname}% + % Now we restore the token stack pointer to free the token list registers + % which we have used, but we make sure that expanded body is saved after + % group. + \expandafter + \endgroup + \expandafter\def\expandafter\@tempa\expandafter{\@tempc}% + } + +% Define the named-macro outside of this group and then close this group. +% +\def\macargexpandinbody@{% + \expandafter + \endgroup + \macargdeflist@ + % First the replace in body the macro arguments by their values, the result + % is in \@tempa . + \macvalstoargs@ + % Then we point at the \norecurse or \gobble (for recursive) macro value + % with \@tempb . + \expandafter\let\expandafter\@tempb\csname mac.\macroname .recurse\endcsname + % Depending on whether it is recursive or not, we need some tailing + % \egroup . + \ifx\@tempb\gobble + \let\@tempc\relax + \else + \let\@tempc\egroup + \fi + % And now we do the real job: + \edef\@tempd{\noexpand\@tempb{\macroname}\noexpand\scanmacro{\@tempa}\@tempc}% + \@tempd +} + +\def\putargsintokens@#1,{% + \if#1;\let\next\relax + \else + \let\next\putargsintokens@ + % First we allocate the new token list register, and give it a temporary + % alias \@tempb . + \toksdef\@tempb\the\paramno + % Then we place the argument value into that token list register. + \expandafter\let\expandafter\@tempa\csname macarg.#1\endcsname + \expandafter\@tempb\expandafter{\@tempa}% + \advance\paramno by 1\relax + \fi + \next +} + +% Trailing missing arguments are set to empty. +% +\def\setemptyargvalues@{% + \ifx\paramlist\nilm@ + \let\next\macargexpandinbody@ + \else + \expandafter\setemptyargvaluesparser@\paramlist\endargs@ + \let\next\setemptyargvalues@ + \fi + \next +} + +\def\setemptyargvaluesparser@#1,#2\endargs@{% + \expandafter\def\expandafter\@tempa\expandafter{% + \expandafter\def\csname macarg.#1\endcsname{}}% + \push@\@tempa\macargdeflist@ + \def\paramlist{#2}% +} + +% #1 is the element target macro +% #2 is the list macro +% #3,#4\endargs@ is the list value +\def\pop@#1#2#3,#4\endargs@{% + \def#1{#3}% + \def#2{#4}% +} +\long\def\longpop@#1#2#3,#4\endargs@{% + \long\def#1{#3}% + \long\def#2{#4}% +} + + +%%%%%%%%%%%%%% End of code for > 10 arguments %%%%%%%%%%%%%%%%%% + + +% This defines a Texinfo @macro or @rmacro, called by \parsemacbody. +% \macrobody has the body of the macro in it, with placeholders for +% its parameters, looking like "\xeatspaces{\hash 1}". +% \paramno is the number of parameters +% \paramlist is a TeX parameter text, e.g. "#1,#2,#3," +% There are four cases: macros of zero, one, up to nine, and many arguments. +% \xdef is used so that macro definitions will survive the file +% they're defined in: @include reads the file inside a group. +% +\def\defmacro{% + \let\hash=##% convert placeholders to macro parameter chars + \ifnum\paramno=1 + \def\xeatspaces##1{##1}% + % This removes the pair of braces around the argument. We don't + % use \eatspaces, because this can cause ends of lines to be lost + % when the argument to \eatspaces is read, leading to line-based + % commands like "@itemize" not being read correctly. + \else + \let\xeatspaces\relax % suppress expansion + \fi + \ifcase\paramno + % 0 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup + \noexpand\spaceisspace + \noexpand\endlineisspace + \noexpand\expandafter % skip any whitespace after the macro name. + \expandafter\noexpand\csname\the\macname @@@\endcsname}% + \expandafter\xdef\csname\the\macname @@@\endcsname{% + \egroup + \noexpand\scanmacro{\macrobody}}% + \or % 1 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup + \noexpand\braceorline + \expandafter\noexpand\csname\the\macname @@@\endcsname}% + \expandafter\xdef\csname\the\macname @@@\endcsname##1{% + \egroup + \noexpand\scanmacro{\macrobody}% + }% + \else % at most 9 + \ifnum\paramno<10\relax + % @MACNAME sets the context for reading the macro argument + % @MACNAME@@ gets the argument, processes backslashes and appends a + % comma. + % @MACNAME@@@ removes braces surrounding the argument list. + % @MACNAME@@@@ scans the macro body with arguments substituted. + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup + \noexpand\expandafter % This \expandafter skip any spaces after the + \noexpand\macroargctxt % macro before we change the catcode of space. + \noexpand\expandafter + \expandafter\noexpand\csname\the\macname @@\endcsname}% + \expandafter\xdef\csname\the\macname @@\endcsname##1{% + \noexpand\passargtomacro + \expandafter\noexpand\csname\the\macname @@@\endcsname{##1,}}% + \expandafter\xdef\csname\the\macname @@@\endcsname##1{% + \expandafter\noexpand\csname\the\macname @@@@\endcsname ##1}% + \expandafter\expandafter + \expandafter\xdef + \expandafter\expandafter + \csname\the\macname @@@@\endcsname\paramlist{% + \egroup\noexpand\scanmacro{\macrobody}}% + \else % 10 or more: + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\getargvals@{\the\macname}{\argl}% + }% + \global\expandafter\let\csname mac.\the\macname .body\endcsname\macrobody + \global\expandafter\let\csname mac.\the\macname .recurse\endcsname\gobble + \fi + \fi} + +\catcode `\@\texiatcatcode\relax % end private-to-Texinfo catcodes + +\def\norecurse#1{\bgroup\cslet{#1}{macsave.#1}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +{\catcode`\@=0 \catcode`\\=13 % We need to manipulate \ so use @ as escape +@catcode`@_=11 % private names +@catcode`@!=11 % used as argument separator + +% \passargtomacro#1#2 - +% Call #1 with a list of tokens #2, with any doubled backslashes in #2 +% compressed to one. +% +% This implementation works by expansion, and not execution (so we cannot use +% \def or similar). This reduces the risk of this failing in contexts where +% complete expansion is done with no execution (for example, in writing out to +% an auxiliary file for an index entry). +% +% State is kept in the input stream: the argument passed to +% @look_ahead, @gobble_and_check_finish and @add_segment is +% +% THE_MACRO ARG_RESULT ! {PENDING_BS} NEXT_TOKEN (... rest of input) +% +% where: +% THE_MACRO - name of the macro we want to call +% ARG_RESULT - argument list we build to pass to that macro +% PENDING_BS - either a backslash or nothing +% NEXT_TOKEN - used to look ahead in the input stream to see what's coming next + +@gdef@passargtomacro#1#2{% + @add_segment #1!{}@relax#2\@_finish\% +} +@gdef@_finish{@_finishx} @global@let@_finishx@relax + +% #1 - THE_MACRO ARG_RESULT +% #2 - PENDING_BS +% #3 - NEXT_TOKEN +% #4 used to look ahead +% +% If the next token is not a backslash, process the rest of the argument; +% otherwise, remove the next token. +@gdef@look_ahead#1!#2#3#4{% + @ifx#4\% + @expandafter@gobble_and_check_finish + @else + @expandafter@add_segment + @fi#1!{#2}#4#4% +} + +% #1 - THE_MACRO ARG_RESULT +% #2 - PENDING_BS +% #3 - NEXT_TOKEN +% #4 should be a backslash, which is gobbled. +% #5 looks ahead +% +% Double backslash found. Add a single backslash, and look ahead. +@gdef@gobble_and_check_finish#1!#2#3#4#5{% + @add_segment#1\!{}#5#5% +} + +@gdef@is_fi{@fi} + +% #1 - THE_MACRO ARG_RESULT +% #2 - PENDING_BS +% #3 - NEXT_TOKEN +% #4 is input stream until next backslash +% +% Input stream is either at the start of the argument, or just after a +% backslash sequence, either a lone backslash, or a doubled backslash. +% NEXT_TOKEN contains the first token in the input stream: if it is \finish, +% finish; otherwise, append to ARG_RESULT the segment of the argument up until +% the next backslash. PENDING_BACKSLASH contains a backslash to represent +% a backslash just before the start of the input stream that has not been +% added to ARG_RESULT. +@gdef@add_segment#1!#2#3#4\{% +@ifx#3@_finish + @call_the_macro#1!% +@else + % append the pending backslash to the result, followed by the next segment + @expandafter@is_fi@look_ahead#1#2#4!{\}@fi + % this @fi is discarded by @look_ahead. + % we can't get rid of it with \expandafter because we don't know how + % long #4 is. +} + +% #1 - THE_MACRO +% #2 - ARG_RESULT +% #3 discards the res of the conditional in @add_segment, and @is_fi ends the +% conditional. +@gdef@call_the_macro#1#2!#3@fi{@is_fi #1{#2}} + +} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% \braceorline MAC is used for a one-argument macro MAC. It checks +% whether the next non-whitespace character is a {. It sets the context +% for reading the argument (slightly different in the two cases). Then, +% to read the argument, in the whole-line case, it then calls the regular +% \parsearg MAC; in the lbrace case, it calls \passargtomacro MAC. +% +\def\braceorline#1{\let\macnamexxx=#1\futurelet\nchar\braceorlinexxx} +\def\braceorlinexxx{% + \ifx\nchar\bgroup + \macroargctxt + \expandafter\passargtomacro + \else + \macrolineargctxt\expandafter\parsearg + \fi \macnamexxx} + + +% @alias. +% We need some trickery to remove the optional spaces around the equal +% sign. Make them active and then expand them all to nothing. +% +\def\alias{\parseargusing\obeyspaces\aliasxxx} +\def\aliasxxx #1{\aliasyyy#1\relax} +\def\aliasyyy #1=#2\relax{% + {% + \expandafter\let\obeyedspace=\empty + \addtomacrolist{#1}% + \xdef\next{\global\let\makecsname{#1}=\makecsname{#2}}% + }% + \next +} + + +\message{cross references,} + +\newwrite\auxfile +\newif\ifhavexrefs % True if xref values are known. +\newif\ifwarnedxrefs % True if we warned once that they aren't known. + +% @inforef is relatively simple. +\def\inforef #1{\inforefzzz #1,,,,**} +\def\inforefzzz #1,#2,#3,#4**{% + \putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}}, + node \samp{\ignorespaces#1{}}} + +% @node's only job in TeX is to define \lastnode, which is used in +% cross-references. The @node line might or might not have commas, and +% might or might not have spaces before the first comma, like: +% @node foo , bar , ... +% We don't want such trailing spaces in the node name. +% +\parseargdef\node{\checkenv{}\donode #1 ,\finishnodeparse} +% +% also remove a trailing comma, in case of something like this: +% @node Help-Cross, , , Cross-refs +\def\donode#1 ,#2\finishnodeparse{\dodonode #1,\finishnodeparse} +\def\dodonode#1,#2\finishnodeparse{\gdef\lastnode{#1}} + +\let\nwnode=\node +\let\lastnode=\empty + +% Write a cross-reference definition for the current node. #1 is the +% type (Ynumbered, Yappendix, Ynothing). +% +\def\donoderef#1{% + \ifx\lastnode\empty\else + \setref{\lastnode}{#1}% + \global\let\lastnode=\empty + \fi +} + +% @anchor{NAME} -- define xref target at arbitrary point. +% +\newcount\savesfregister +% +\def\savesf{\relax \ifhmode \savesfregister=\spacefactor \fi} +\def\restoresf{\relax \ifhmode \spacefactor=\savesfregister \fi} +\def\anchor#1{\savesf \setref{#1}{Ynothing}\restoresf \ignorespaces} + +% \setref{NAME}{SNT} defines a cross-reference point NAME (a node or an +% anchor), which consists of three parts: +% 1) NAME-title - the current sectioning name taken from \lastsection, +% or the anchor name. +% 2) NAME-snt - section number and type, passed as the SNT arg, or +% empty for anchors. +% 3) NAME-pg - the page number. +% +% This is called from \donoderef, \anchor, and \dofloat. In the case of +% floats, there is an additional part, which is not written here: +% 4) NAME-lof - the text as it should appear in a @listoffloats. +% +\def\setref#1#2{% + \pdfmkdest{#1}% + \iflinks + {% + \requireauxfile + \atdummies % preserve commands, but don't expand them + % match definition in \xrdef, \refx, \xrefX. + \def\value##1{##1}% + \edef\writexrdef##1##2{% + \write\auxfile{@xrdef{#1-% #1 of \setref, expanded by the \edef + ##1}{##2}}% these are parameters of \writexrdef + }% + \toks0 = \expandafter{\lastsection}% + \immediate \writexrdef{title}{\the\toks0 }% + \immediate \writexrdef{snt}{\csname #2\endcsname}% \Ynumbered etc. + \safewhatsit{\writexrdef{pg}{\folio}}% will be written later, at \shipout + }% + \fi +} + +% @xrefautosectiontitle on|off says whether @section(ing) names are used +% automatically in xrefs, if the third arg is not explicitly specified. +% This was provided as a "secret" @set xref-automatic-section-title +% variable, now it's official. +% +\parseargdef\xrefautomaticsectiontitle{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETxref-automatic-section-title\endcsname + = \empty + \else\ifx\temp\offword + \expandafter\let\csname SETxref-automatic-section-title\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @xrefautomaticsectiontitle value `\temp', + must be on|off}% + \fi\fi +} + +% +% @xref, @pxref, and @ref generate cross-references. For \xrefX, #1 is +% the node name, #2 the name of the Info cross-reference, #3 the printed +% node name, #4 the name of the Info file, #5 the name of the printed +% manual. All but the node name can be omitted. +% +\def\pxref{\putwordsee{} \xrefXX} +\def\xref{\putwordSee{} \xrefXX} +\def\ref{\xrefXX} + +\def\xrefXX#1{\def\xrefXXarg{#1}\futurelet\tokenafterxref\xrefXXX} +\def\xrefXXX{\expandafter\xrefX\expandafter[\xrefXXarg,,,,,,,]} +% +\newbox\toprefbox +\newbox\printedrefnamebox +\newbox\infofilenamebox +\newbox\printedmanualbox +% +\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup + \unsepspaces + % + % Get args without leading/trailing spaces. + \def\printedrefname{\ignorespaces #3}% + \setbox\printedrefnamebox = \hbox{\printedrefname\unskip}% + % + \def\infofilename{\ignorespaces #4}% + \setbox\infofilenamebox = \hbox{\infofilename\unskip}% + % + \def\printedmanual{\ignorespaces #5}% + \setbox\printedmanualbox = \hbox{\printedmanual\unskip}% + % + % If the printed reference name (arg #3) was not explicitly given in + % the @xref, figure out what we want to use. + \ifdim \wd\printedrefnamebox = 0pt + % No printed node name was explicitly given. + \expandafter\ifx\csname SETxref-automatic-section-title\endcsname \relax + % Not auto section-title: use node name inside the square brackets. + \def\printedrefname{\ignorespaces #1}% + \else + % Auto section-title: use chapter/section title inside + % the square brackets if we have it. + \ifdim \wd\printedmanualbox > 0pt + % It is in another manual, so we don't have it; use node name. + \def\printedrefname{\ignorespaces #1}% + \else + \ifhavexrefs + % We (should) know the real title if we have the xref values. + \def\printedrefname{\refx{#1-title}{}}% + \else + % Otherwise just copy the Info node name. + \def\printedrefname{\ignorespaces #1}% + \fi% + \fi + \fi + \fi + % + % Make link in pdf output. + \ifpdf + % For pdfTeX and LuaTeX + {\indexnofonts + \makevalueexpandable + \turnoffactive + % This expands tokens, so do it after making catcode changes, so _ + % etc. don't get their TeX definitions. This ignores all spaces in + % #4, including (wrongly) those in the middle of the filename. + \getfilename{#4}% + % + % This (wrongly) does not take account of leading or trailing + % spaces in #1, which should be ignored. + \setpdfdestname{#1}% + % + \ifx\pdfdestname\empty + \def\pdfdestname{Top}% no empty targets + \fi + % + \leavevmode + \startlink attr{/Border [0 0 0]}% + \ifnum\filenamelength>0 + goto file{\the\filename.pdf} name{\pdfdestname}% + \else + goto name{\pdfmkpgn{\pdfdestname}}% + \fi + }% + \setcolor{\linkcolor}% + \else + \ifx\XeTeXrevision\thisisundefined + \else + % For XeTeX + {\indexnofonts + \makevalueexpandable + \turnoffactive + % This expands tokens, so do it after making catcode changes, so _ + % etc. don't get their TeX definitions. This ignores all spaces in + % #4, including (wrongly) those in the middle of the filename. + \getfilename{#4}% + % + % This (wrongly) does not take account of leading or trailing + % spaces in #1, which should be ignored. + \setpdfdestname{#1}% + % + \ifx\pdfdestname\empty + \def\pdfdestname{Top}% no empty targets + \fi + % + \leavevmode + \ifnum\filenamelength>0 + % With default settings, + % XeTeX (xdvipdfmx) replaces link destination names with integers. + % In this case, the replaced destination names of + % remote PDFs are no longer known. In order to avoid a replacement, + % you can use xdvipdfmx's command line option `-C 0x0010'. + % If you use XeTeX 0.99996+ (TeX Live 2016+), + % this command line option is no longer necessary + % because we can use the `dvipdfmx:config' special. + \special{pdf:bann << /Border [0 0 0] /Type /Annot /Subtype /Link /A + << /S /GoToR /F (\the\filename.pdf) /D (\pdfdestname) >> >>}% + \else + \special{pdf:bann << /Border [0 0 0] /Type /Annot /Subtype /Link /A + << /S /GoTo /D (\pdfdestname) >> >>}% + \fi + }% + \setcolor{\linkcolor}% + \fi + \fi + {% + % Have to otherify everything special to allow the \csname to + % include an _ in the xref name, etc. + \indexnofonts + \turnoffactive + \def\value##1{##1}% + \expandafter\global\expandafter\let\expandafter\Xthisreftitle + \csname XR#1-title\endcsname + }% + % + % Float references are printed completely differently: "Figure 1.2" + % instead of "[somenode], p.3". \iffloat distinguishes them by + % \Xthisreftitle being set to a magic string. + \iffloat\Xthisreftitle + % If the user specified the print name (third arg) to the ref, + % print it instead of our usual "Figure 1.2". + \ifdim\wd\printedrefnamebox = 0pt + \refx{#1-snt}{}% + \else + \printedrefname + \fi + % + % If the user also gave the printed manual name (fifth arg), append + % "in MANUALNAME". + \ifdim \wd\printedmanualbox > 0pt + \space \putwordin{} \cite{\printedmanual}% + \fi + \else + % node/anchor (non-float) references. + % + % If we use \unhbox to print the node names, TeX does not insert + % empty discretionaries after hyphens, which means that it will not + % find a line break at a hyphen in a node names. Since some manuals + % are best written with fairly long node names, containing hyphens, + % this is a loss. Therefore, we give the text of the node name + % again, so it is as if TeX is seeing it for the first time. + % + \ifdim \wd\printedmanualbox > 0pt + % Cross-manual reference with a printed manual name. + % + \crossmanualxref{\cite{\printedmanual\unskip}}% + % + \else\ifdim \wd\infofilenamebox > 0pt + % Cross-manual reference with only an info filename (arg 4), no + % printed manual name (arg 5). This is essentially the same as + % the case above; we output the filename, since we have nothing else. + % + \crossmanualxref{\code{\infofilename\unskip}}% + % + \else + % Reference within this manual. + % + % _ (for example) has to be the character _ for the purposes of the + % control sequence corresponding to the node, but it has to expand + % into the usual \leavevmode...\vrule stuff for purposes of + % printing. So we \turnoffactive for the \refx-snt, back on for the + % printing, back off for the \refx-pg. + {\turnoffactive + % Only output a following space if the -snt ref is nonempty; for + % @unnumbered and @anchor, it won't be. + \setbox2 = \hbox{\ignorespaces \refx{#1-snt}{}}% + \ifdim \wd2 > 0pt \refx{#1-snt}\space\fi + }% + % output the `[mynode]' via the macro below so it can be overridden. + \xrefprintnodename\printedrefname + % + % But we always want a comma and a space: + ,\space + % + % output the `page 3'. + \turnoffactive \putwordpage\tie\refx{#1-pg}{}% + % Add a , if xref followed by a space + \if\space\noexpand\tokenafterxref ,% + \else\ifx\ \tokenafterxref ,% @TAB + \else\ifx\*\tokenafterxref ,% @* + \else\ifx\ \tokenafterxref ,% @SPACE + \else\ifx\ + \tokenafterxref ,% @NL + \else\ifx\tie\tokenafterxref ,% @tie + \fi\fi\fi\fi\fi\fi + \fi\fi + \fi + \endlink +\endgroup} + +% Output a cross-manual xref to #1. Used just above (twice). +% +% Only include the text "Section ``foo'' in" if the foo is neither +% missing or Top. Thus, @xref{,,,foo,The Foo Manual} outputs simply +% "see The Foo Manual", the idea being to refer to the whole manual. +% +% But, this being TeX, we can't easily compare our node name against the +% string "Top" while ignoring the possible spaces before and after in +% the input. By adding the arbitrary 7sp below, we make it much less +% likely that a real node name would have the same width as "Top" (e.g., +% in a monospaced font). Hopefully it will never happen in practice. +% +% For the same basic reason, we retypeset the "Top" at every +% reference, since the current font is indeterminate. +% +\def\crossmanualxref#1{% + \setbox\toprefbox = \hbox{Top\kern7sp}% + \setbox2 = \hbox{\ignorespaces \printedrefname \unskip \kern7sp}% + \ifdim \wd2 > 7sp % nonempty? + \ifdim \wd2 = \wd\toprefbox \else % same as Top? + \putwordSection{} ``\printedrefname'' \putwordin{}\space + \fi + \fi + #1% +} + +% This macro is called from \xrefX for the `[nodename]' part of xref +% output. It's a separate macro only so it can be changed more easily, +% since square brackets don't work well in some documents. Particularly +% one that Bob is working on :). +% +\def\xrefprintnodename#1{[#1]} + +% Things referred to by \setref. +% +\def\Ynothing{} +\def\Yomitfromtoc{} +\def\Ynumbered{% + \ifnum\secno=0 + \putwordChapter@tie \the\chapno + \else \ifnum\subsecno=0 + \putwordSection@tie \the\chapno.\the\secno + \else \ifnum\subsubsecno=0 + \putwordSection@tie \the\chapno.\the\secno.\the\subsecno + \else + \putwordSection@tie \the\chapno.\the\secno.\the\subsecno.\the\subsubsecno + \fi\fi\fi +} +\def\Yappendix{% + \ifnum\secno=0 + \putwordAppendix@tie @char\the\appendixno{}% + \else \ifnum\subsecno=0 + \putwordSection@tie @char\the\appendixno.\the\secno + \else \ifnum\subsubsecno=0 + \putwordSection@tie @char\the\appendixno.\the\secno.\the\subsecno + \else + \putwordSection@tie + @char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno + \fi\fi\fi +} + +% \refx{NAME}{SUFFIX} - reference a cross-reference string named NAME. SUFFIX +% is output afterwards if non-empty. +\def\refx#1#2{% + \requireauxfile + {% + \indexnofonts + \otherbackslash + \def\value##1{##1}% + \expandafter\global\expandafter\let\expandafter\thisrefX + \csname XR#1\endcsname + }% + \ifx\thisrefX\relax + % If not defined, say something at least. + \angleleft un\-de\-fined\angleright + \iflinks + \ifhavexrefs + {\toks0 = {#1}% avoid expansion of possibly-complex value + \message{\linenumber Undefined cross reference `\the\toks0'.}}% + \else + \ifwarnedxrefs\else + \global\warnedxrefstrue + \message{Cross reference values unknown; you must run TeX again.}% + \fi + \fi + \fi + \else + % It's defined, so just use it. + \thisrefX + \fi + #2% Output the suffix in any case. +} + +% This is the macro invoked by entries in the aux file. Define a control +% sequence for a cross-reference target (we prepend XR to the control sequence +% name to avoid collisions). The value is the page number. If this is a float +% type, we have more work to do. +% +\def\xrdef#1#2{% + {% Expand the node or anchor name to remove control sequences. + % \turnoffactive stops 8-bit characters being changed to commands + % like @'e. \refx does the same to retrieve the value in the definition. + \indexnofonts + \turnoffactive + \def\value##1{##1}% + \xdef\safexrefname{#1}% + }% + % + \bgroup + \expandafter\gdef\csname XR\safexrefname\endcsname{#2}% + \egroup + % We put the \gdef inside a group to avoid the definitions building up on + % TeX's save stack, which can cause it to run out of space for aux files with + % thousands of lines. \gdef doesn't use the save stack, but \csname does + % when it defines an unknown control sequence as \relax. + % + % Was that xref control sequence that we just defined for a float? + \expandafter\iffloat\csname XR\safexrefname\endcsname + % it was a float, and we have the (safe) float type in \iffloattype. + \expandafter\let\expandafter\floatlist + \csname floatlist\iffloattype\endcsname + % + % Is this the first time we've seen this float type? + \expandafter\ifx\floatlist\relax + \toks0 = {\do}% yes, so just \do + \else + % had it before, so preserve previous elements in list. + \toks0 = \expandafter{\floatlist\do}% + \fi + % + % Remember this xref in the control sequence \floatlistFLOATTYPE, + % for later use in \listoffloats. + \expandafter\xdef\csname floatlist\iffloattype\endcsname{\the\toks0 + {\safexrefname}}% + \fi +} + +% If working on a large document in chapters, it is convenient to +% be able to disable indexing, cross-referencing, and contents, for test runs. +% This is done with @novalidate at the beginning of the file. +% +\newif\iflinks \linkstrue % by default we want the aux files. +\let\novalidate = \linksfalse + +% Used when writing to the aux file, or when using data from it. +\def\requireauxfile{% + \iflinks + \tryauxfile + % Open the new aux file. TeX will close it automatically at exit. + \immediate\openout\auxfile=\jobname.aux + \fi + \global\let\requireauxfile=\relax % Only do this once. +} + +% Read the last existing aux file, if any. No error if none exists. +% +\def\tryauxfile{% + \openin 1 \jobname.aux + \ifeof 1 \else + \readdatafile{aux}% + \global\havexrefstrue + \fi + \closein 1 +} + +\def\setupdatafile{% + \catcode`\^^@=\other + \catcode`\^^A=\other + \catcode`\^^B=\other + \catcode`\^^C=\other + \catcode`\^^D=\other + \catcode`\^^E=\other + \catcode`\^^F=\other + \catcode`\^^G=\other + \catcode`\^^H=\other + \catcode`\^^K=\other + \catcode`\^^L=\other + \catcode`\^^N=\other + \catcode`\^^P=\other + \catcode`\^^Q=\other + \catcode`\^^R=\other + \catcode`\^^S=\other + \catcode`\^^T=\other + \catcode`\^^U=\other + \catcode`\^^V=\other + \catcode`\^^W=\other + \catcode`\^^X=\other + \catcode`\^^Z=\other + \catcode`\^^[=\other + \catcode`\^^\=\other + \catcode`\^^]=\other + \catcode`\^^^=\other + \catcode`\^^_=\other + % It was suggested to set the catcode of ^ to 7, which would allow ^^e4 etc. + % in xref tags, i.e., node names. But since ^^e4 notation isn't + % supported in the main text, it doesn't seem desirable. Furthermore, + % that is not enough: for node names that actually contain a ^ + % character, we would end up writing a line like this: 'xrdef {'hat + % b-title}{'hat b} and \xrdef does a \csname...\endcsname on the first + % argument, and \hat is not an expandable control sequence. It could + % all be worked out, but why? Either we support ^^ or we don't. + % + % The other change necessary for this was to define \auxhat: + % \def\auxhat{\def^{'hat }}% extra space so ok if followed by letter + % and then to call \auxhat in \setq. + % + \catcode`\^=\other + % + % Special characters. Should be turned off anyway, but... + \catcode`\~=\other + \catcode`\[=\other + \catcode`\]=\other + \catcode`\"=\other + \catcode`\_=\other + \catcode`\|=\other + \catcode`\<=\other + \catcode`\>=\other + \catcode`\$=\other + \catcode`\#=\other + \catcode`\&=\other + \catcode`\%=\other + \catcode`+=\other % avoid \+ for paranoia even though we've turned it off + % + % This is to support \ in node names and titles, since the \ + % characters end up in a \csname. It's easier than + % leaving it active and making its active definition an actual \ + % character. What I don't understand is why it works in the *value* + % of the xrdef. Seems like it should be a catcode12 \, and that + % should not typeset properly. But it works, so I'm moving on for + % now. --karl, 15jan04. + \catcode`\\=\other + % + % @ is our escape character in .aux files, and we need braces. + \catcode`\{=1 + \catcode`\}=2 + \catcode`\@=0 +} + +\def\readdatafile#1{% +\begingroup + \setupdatafile + \input\jobname.#1 +\endgroup} + + +\message{insertions,} +% including footnotes. + +\newcount \footnoteno + +% The trailing space in the following definition for supereject is +% vital for proper filling; pages come out unaligned when you do a +% pagealignmacro call if that space before the closing brace is +% removed. (Generally, numeric constants should always be followed by a +% space to prevent strange expansion errors.) +\def\supereject{\par\penalty -20000\footnoteno =0 } + +% @footnotestyle is meaningful for Info output only. +\let\footnotestyle=\comment + +{\catcode `\@=11 +% +% Auto-number footnotes. Otherwise like plain. +\gdef\footnote{% + \global\advance\footnoteno by \@ne + \edef\thisfootno{$^{\the\footnoteno}$}% + % + % In case the footnote comes at the end of a sentence, preserve the + % extra spacing after we do the footnote number. + \let\@sf\empty + \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\ptexslash\fi + % + % Remove inadvertent blank space before typesetting the footnote number. + \unskip + \thisfootno\@sf + \dofootnote +}% + +% Don't bother with the trickery in plain.tex to not require the +% footnote text as a parameter. Our footnotes don't need to be so general. +% +% Oh yes, they do; otherwise, @ifset (and anything else that uses +% \parseargline) fails inside footnotes because the tokens are fixed when +% the footnote is read. --karl, 16nov96. +% +\gdef\dofootnote{% + \insert\footins\bgroup + % + % Nested footnotes are not supported in TeX, that would take a lot + % more work. (\startsavinginserts does not suffice.) + \let\footnote=\errfootnotenest + % + % We want to typeset this text as a normal paragraph, even if the + % footnote reference occurs in (for example) a display environment. + % So reset some parameters. + \hsize=\txipagewidth + \interlinepenalty\interfootnotelinepenalty + \splittopskip\ht\strutbox % top baseline for broken footnotes + \splitmaxdepth\dp\strutbox + \floatingpenalty\@MM + \leftskip\z@skip + \rightskip\z@skip + \spaceskip\z@skip + \xspaceskip\z@skip + \parindent\defaultparindent + % + \smallfonts \rm + % + % Because we use hanging indentation in footnotes, a @noindent appears + % to exdent this text, so make it be a no-op. makeinfo does not use + % hanging indentation so @noindent can still be needed within footnote + % text after an @example or the like (not that this is good style). + \let\noindent = \relax + % + % Hang the footnote text off the number. Use \everypar in case the + % footnote extends for more than one paragraph. + \everypar = {\hang}% + \textindent{\thisfootno}% + % + % Don't crash into the line above the footnote text. Since this + % expands into a box, it must come within the paragraph, lest it + % provide a place where TeX can split the footnote. + \footstrut + % + % Invoke rest of plain TeX footnote routine. + \futurelet\next\fo@t +} +}%end \catcode `\@=11 + +\def\errfootnotenest{% + \errhelp=\EMsimple + \errmessage{Nested footnotes not supported in texinfo.tex, + even though they work in makeinfo; sorry} +} + +\def\errfootnoteheading{% + \errhelp=\EMsimple + \errmessage{Footnotes in chapters, sections, etc., are not supported} +} + +% In case a @footnote appears in a vbox, save the footnote text and create +% the real \insert just after the vbox finished. Otherwise, the insertion +% would be lost. +% Similarly, if a @footnote appears inside an alignment, save the footnote +% text to a box and make the \insert when a row of the table is finished. +% And the same can be done for other insert classes. --kasal, 16nov03. +% +% Replace the \insert primitive by a cheating macro. +% Deeper inside, just make sure that the saved insertions are not spilled +% out prematurely. +% +\def\startsavinginserts{% + \ifx \insert\ptexinsert + \let\insert\saveinsert + \else + \let\checkinserts\relax + \fi +} + +% This \insert replacement works for both \insert\footins{foo} and +% \insert\footins\bgroup foo\egroup, but it doesn't work for \insert27{foo}. +% +\def\saveinsert#1{% + \edef\next{\noexpand\savetobox \makeSAVEname#1}% + \afterassignment\next + % swallow the left brace + \let\temp = +} +\def\makeSAVEname#1{\makecsname{SAVE\expandafter\gobble\string#1}} +\def\savetobox#1{\global\setbox#1 = \vbox\bgroup \unvbox#1} + +\def\checksaveins#1{\ifvoid#1\else \placesaveins#1\fi} + +\def\placesaveins#1{% + \ptexinsert \csname\expandafter\gobblesave\string#1\endcsname + {\box#1}% +} + +% eat @SAVE -- beware, all of them have catcode \other: +{ + \def\dospecials{\do S\do A\do V\do E} \uncatcodespecials % ;-) + \gdef\gobblesave @SAVE{} +} + +% initialization: +\def\newsaveins #1{% + \edef\next{\noexpand\newsaveinsX \makeSAVEname#1}% + \next +} +\def\newsaveinsX #1{% + \csname newbox\endcsname #1% + \expandafter\def\expandafter\checkinserts\expandafter{\checkinserts + \checksaveins #1}% +} + +% initialize: +\let\checkinserts\empty +\newsaveins\footins +\newsaveins\margin + + +% @image. We use the macros from epsf.tex to support this. +% If epsf.tex is not installed and @image is used, we complain. +% +% Check for and read epsf.tex up front. If we read it only at @image +% time, we might be inside a group, and then its definitions would get +% undone and the next image would fail. +\openin 1 = epsf.tex +\ifeof 1 \else + % Do not bother showing banner with epsf.tex v2.7k (available in + % doc/epsf.tex and on ctan). + \def\epsfannounce{\toks0 = }% + \input epsf.tex +\fi +\closein 1 +% +% We will only complain once about lack of epsf.tex. +\newif\ifwarnednoepsf +\newhelp\noepsfhelp{epsf.tex must be installed for images to + work. It is also included in the Texinfo distribution, or you can get + it from https://ctan.org/texarchive/macros/texinfo/texinfo/doc/epsf.tex.} +% +\def\image#1{% + \ifx\epsfbox\thisisundefined + \ifwarnednoepsf \else + \errhelp = \noepsfhelp + \errmessage{epsf.tex not found, images will be ignored}% + \global\warnednoepsftrue + \fi + \else + \imagexxx #1,,,,,\finish + \fi +} +% +% Arguments to @image: +% #1 is (mandatory) image filename; we tack on .eps extension. +% #2 is (optional) width, #3 is (optional) height. +% #4 is (ignored optional) html alt text. +% #5 is (ignored optional) extension. +% #6 is just the usual extra ignored arg for parsing stuff. +\newif\ifimagevmode +\def\imagexxx#1,#2,#3,#4,#5,#6\finish{\begingroup + \catcode`\^^M = 5 % in case we're inside an example + \normalturnoffactive % allow _ et al. in names + \def\xprocessmacroarg{\eatspaces}% in case we are being used via a macro + % If the image is by itself, center it. + \ifvmode + \imagevmodetrue + \else \ifx\centersub\centerV + % for @center @image, we need a vbox so we can have our vertical space + \imagevmodetrue + \vbox\bgroup % vbox has better behavior than vtop herev + \fi\fi + % + \ifimagevmode + \nobreak\medskip + % Usually we'll have text after the image which will insert + % \parskip glue, so insert it here too to equalize the space + % above and below. + \nobreak\vskip\parskip + \nobreak + \fi + % + % Leave vertical mode so that indentation from an enclosing + % environment such as @quotation is respected. + % However, if we're at the top level, we don't want the + % normal paragraph indentation. + % On the other hand, if we are in the case of @center @image, we don't + % want to start a paragraph, which will create a hsize-width box and + % eradicate the centering. + \ifx\centersub\centerV\else \noindent \fi + % + % Output the image. + \ifpdf + % For pdfTeX and LuaTeX <= 0.80 + \dopdfimage{#1}{#2}{#3}% + \else + \ifx\XeTeXrevision\thisisundefined + % For epsf.tex + % \epsfbox itself resets \epsf?size at each figure. + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt \epsfxsize=#2\relax \fi + \setbox0 = \hbox{\ignorespaces #3}% + \ifdim\wd0 > 0pt \epsfysize=#3\relax \fi + \epsfbox{#1.eps}% + \else + % For XeTeX + \doxeteximage{#1}{#2}{#3}% + \fi + \fi + % + \ifimagevmode + \medskip % space after a standalone image + \fi + \ifx\centersub\centerV \egroup \fi +\endgroup} + + +% @float FLOATTYPE,LABEL,LOC ... @end float for displayed figures, tables, +% etc. We don't actually implement floating yet, we always include the +% float "here". But it seemed the best name for the future. +% +\envparseargdef\float{\eatcommaspace\eatcommaspace\dofloat#1, , ,\finish} + +% There may be a space before second and/or third parameter; delete it. +\def\eatcommaspace#1, {#1,} + +% #1 is the optional FLOATTYPE, the text label for this float, typically +% "Figure", "Table", "Example", etc. Can't contain commas. If omitted, +% this float will not be numbered and cannot be referred to. +% +% #2 is the optional xref label. Also must be present for the float to +% be referable. +% +% #3 is the optional positioning argument; for now, it is ignored. It +% will somehow specify the positions allowed to float to (here, top, bottom). +% +% We keep a separate counter for each FLOATTYPE, which we reset at each +% chapter-level command. +\let\resetallfloatnos=\empty +% +\def\dofloat#1,#2,#3,#4\finish{% + \let\thiscaption=\empty + \let\thisshortcaption=\empty + % + % don't lose footnotes inside @float. + % + % BEWARE: when the floats start float, we have to issue warning whenever an + % insert appears inside a float which could possibly float. --kasal, 26may04 + % + \startsavinginserts + % + % We can't be used inside a paragraph. + \par + % + \vtop\bgroup + \def\floattype{#1}% + \def\floatlabel{#2}% + \def\floatloc{#3}% we do nothing with this yet. + % + \ifx\floattype\empty + \let\safefloattype=\empty + \else + {% + % the floattype might have accents or other special characters, + % but we need to use it in a control sequence name. + \indexnofonts + \turnoffactive + \xdef\safefloattype{\floattype}% + }% + \fi + % + % If label is given but no type, we handle that as the empty type. + \ifx\floatlabel\empty \else + % We want each FLOATTYPE to be numbered separately (Figure 1, + % Table 1, Figure 2, ...). (And if no label, no number.) + % + \expandafter\getfloatno\csname\safefloattype floatno\endcsname + \global\advance\floatno by 1 + % + {% + % This magic value for \lastsection is output by \setref as the + % XREFLABEL-title value. \xrefX uses it to distinguish float + % labels (which have a completely different output format) from + % node and anchor labels. And \xrdef uses it to construct the + % lists of floats. + % + \edef\lastsection{\floatmagic=\safefloattype}% + \setref{\floatlabel}{Yfloat}% + }% + \fi + % + % start with \parskip glue, I guess. + \vskip\parskip + % + % Don't suppress indentation if a float happens to start a section. + \restorefirstparagraphindent +} + +% we have these possibilities: +% @float Foo,lbl & @caption{Cap}: Foo 1.1: Cap +% @float Foo,lbl & no caption: Foo 1.1 +% @float Foo & @caption{Cap}: Foo: Cap +% @float Foo & no caption: Foo +% @float ,lbl & Caption{Cap}: 1.1: Cap +% @float ,lbl & no caption: 1.1 +% @float & @caption{Cap}: Cap +% @float & no caption: +% +\def\Efloat{% + \let\floatident = \empty + % + % In all cases, if we have a float type, it comes first. + \ifx\floattype\empty \else \def\floatident{\floattype}\fi + % + % If we have an xref label, the number comes next. + \ifx\floatlabel\empty \else + \ifx\floattype\empty \else % if also had float type, need tie first. + \appendtomacro\floatident{\tie}% + \fi + % the number. + \appendtomacro\floatident{\chaplevelprefix\the\floatno}% + \fi + % + % Start the printed caption with what we've constructed in + % \floatident, but keep it separate; we need \floatident again. + \let\captionline = \floatident + % + \ifx\thiscaption\empty \else + \ifx\floatident\empty \else + \appendtomacro\captionline{: }% had ident, so need a colon between + \fi + % + % caption text. + \appendtomacro\captionline{\scanexp\thiscaption}% + \fi + % + % If we have anything to print, print it, with space before. + % Eventually this needs to become an \insert. + \ifx\captionline\empty \else + \vskip.5\parskip + \captionline + % + % Space below caption. + \vskip\parskip + \fi + % + % If have an xref label, write the list of floats info. Do this + % after the caption, to avoid chance of it being a breakpoint. + \ifx\floatlabel\empty \else + % Write the text that goes in the lof to the aux file as + % \floatlabel-lof. Besides \floatident, we include the short + % caption if specified, else the full caption if specified, else nothing. + {% + \requireauxfile + \atdummies + % + \ifx\thisshortcaption\empty + \def\gtemp{\thiscaption}% + \else + \def\gtemp{\thisshortcaption}% + \fi + \immediate\write\auxfile{@xrdef{\floatlabel-lof}{\floatident + \ifx\gtemp\empty \else : \gtemp \fi}}% + }% + \fi + \egroup % end of \vtop + % + \checkinserts +} + +% Append the tokens #2 to the definition of macro #1, not expanding either. +% +\def\appendtomacro#1#2{% + \expandafter\def\expandafter#1\expandafter{#1#2}% +} + +% @caption, @shortcaption +% +\def\caption{\docaption\thiscaption} +\def\shortcaption{\docaption\thisshortcaption} +\def\docaption{\checkenv\float \bgroup\scanargctxt\defcaption} +\def\defcaption#1#2{\egroup \def#1{#2}} + +% The parameter is the control sequence identifying the counter we are +% going to use. Create it if it doesn't exist and assign it to \floatno. +\def\getfloatno#1{% + \ifx#1\relax + % Haven't seen this figure type before. + \csname newcount\endcsname #1% + % + % Remember to reset this floatno at the next chap. + \expandafter\gdef\expandafter\resetallfloatnos + \expandafter{\resetallfloatnos #1=0 }% + \fi + \let\floatno#1% +} + +% \setref calls this to get the XREFLABEL-snt value. We want an @xref +% to the FLOATLABEL to expand to "Figure 3.1". We call \setref when we +% first read the @float command. +% +\def\Yfloat{\floattype@tie \chaplevelprefix\the\floatno}% + +% Magic string used for the XREFLABEL-title value, so \xrefX can +% distinguish floats from other xref types. +\def\floatmagic{!!float!!} + +% #1 is the control sequence we are passed; we expand into a conditional +% which is true if #1 represents a float ref. That is, the magic +% \lastsection value which we \setref above. +% +\def\iffloat#1{\expandafter\doiffloat#1==\finish} +% +% #1 is (maybe) the \floatmagic string. If so, #2 will be the +% (safe) float type for this float. We set \iffloattype to #2. +% +\def\doiffloat#1=#2=#3\finish{% + \def\temp{#1}% + \def\iffloattype{#2}% + \ifx\temp\floatmagic +} + +% @listoffloats FLOATTYPE - print a list of floats like a table of contents. +% +\parseargdef\listoffloats{% + \def\floattype{#1}% floattype + {% + % the floattype might have accents or other special characters, + % but we need to use it in a control sequence name. + \indexnofonts + \turnoffactive + \xdef\safefloattype{\floattype}% + }% + % + % \xrdef saves the floats as a \do-list in \floatlistSAFEFLOATTYPE. + \expandafter\ifx\csname floatlist\safefloattype\endcsname \relax + \ifhavexrefs + % if the user said @listoffloats foo but never @float foo. + \message{\linenumber No `\safefloattype' floats to list.}% + \fi + \else + \begingroup + \leftskip=\tocindent % indent these entries like a toc + \let\do=\listoffloatsdo + \csname floatlist\safefloattype\endcsname + \endgroup + \fi +} + +% This is called on each entry in a list of floats. We're passed the +% xref label, in the form LABEL-title, which is how we save it in the +% aux file. We strip off the -title and look up \XRLABEL-lof, which +% has the text we're supposed to typeset here. +% +% Figures without xref labels will not be included in the list (since +% they won't appear in the aux file). +% +\def\listoffloatsdo#1{\listoffloatsdoentry#1\finish} +\def\listoffloatsdoentry#1-title\finish{{% + % Can't fully expand XR#1-lof because it can contain anything. Just + % pass the control sequence. On the other hand, XR#1-pg is just the + % page number, and we want to fully expand that so we can get a link + % in pdf output. + \toksA = \expandafter{\csname XR#1-lof\endcsname}% + % + % use the same \entry macro we use to generate the TOC and index. + \edef\writeentry{\noexpand\entry{\the\toksA}{\csname XR#1-pg\endcsname}}% + \writeentry +}} + + +\message{localization,} + +% For single-language documents, @documentlanguage is usually given very +% early, just after @documentencoding. Single argument is the language +% (de) or locale (de_DE) abbreviation. +% +{ + \catcode`\_ = \active + \globaldefs=1 +\parseargdef\documentlanguage{% + \tex % read txi-??.tex file in plain TeX. + % Read the file by the name they passed if it exists. + \let_ = \normalunderscore % normal _ character for filename test + \openin 1 txi-#1.tex + \ifeof 1 + \documentlanguagetrywithoutunderscore #1_\finish + \else + \globaldefs = 1 % everything in the txi-LL files needs to persist + \input txi-#1.tex + \fi + \closein 1 + \endgroup % end raw TeX +} +% +% If they passed de_DE, and txi-de_DE.tex doesn't exist, +% try txi-de.tex. +% +\gdef\documentlanguagetrywithoutunderscore#1_#2\finish{% + \openin 1 txi-#1.tex + \ifeof 1 + \errhelp = \nolanghelp + \errmessage{Cannot read language file txi-#1.tex}% + \else + \globaldefs = 1 % everything in the txi-LL files needs to persist + \input txi-#1.tex + \fi + \closein 1 +} +}% end of special _ catcode +% +\newhelp\nolanghelp{The given language definition file cannot be found or +is empty. Maybe you need to install it? Putting it in the current +directory should work if nowhere else does.} + +% This macro is called from txi-??.tex files; the first argument is the +% \language name to set (without the "\lang@" prefix), the second and +% third args are \{left,right}hyphenmin. +% +% The language names to pass are determined when the format is built. +% See the etex.log file created at that time, e.g., +% /usr/local/texlive/2008/texmf-var/web2c/pdftex/etex.log. +% +% With TeX Live 2008, etex now includes hyphenation patterns for all +% available languages. This means we can support hyphenation in +% Texinfo, at least to some extent. (This still doesn't solve the +% accented characters problem.) +% +\catcode`@=11 +\def\txisetlanguage#1#2#3{% + % do not set the language if the name is undefined in the current TeX. + \expandafter\ifx\csname lang@#1\endcsname \relax + \message{no patterns for #1}% + \else + \global\language = \csname lang@#1\endcsname + \fi + % but there is no harm in adjusting the hyphenmin values regardless. + \global\lefthyphenmin = #2\relax + \global\righthyphenmin = #3\relax +} + +% XeTeX and LuaTeX can handle Unicode natively. +% Their default I/O uses UTF-8 sequences instead of a byte-wise operation. +% Other TeX engines' I/O (pdfTeX, etc.) is byte-wise. +% +\newif\iftxinativeunicodecapable +\newif\iftxiusebytewiseio + +\ifx\XeTeXrevision\thisisundefined + \ifx\luatexversion\thisisundefined + \txinativeunicodecapablefalse + \txiusebytewiseiotrue + \else + \txinativeunicodecapabletrue + \txiusebytewiseiofalse + \fi +\else + \txinativeunicodecapabletrue + \txiusebytewiseiofalse +\fi + +% Set I/O by bytes instead of UTF-8 sequence for XeTeX and LuaTex +% for non-UTF-8 (byte-wise) encodings. +% +\def\setbytewiseio{% + \ifx\XeTeXrevision\thisisundefined + \else + \XeTeXdefaultencoding "bytes" % For subsequent files to be read + \XeTeXinputencoding "bytes" % For document root file + % Unfortunately, there seems to be no corresponding XeTeX command for + % output encoding. This is a problem for auxiliary index and TOC files. + % The only solution would be perhaps to write out @U{...} sequences in + % place of non-ASCII characters. + \fi + + \ifx\luatexversion\thisisundefined + \else + \directlua{ + local utf8_char, byte, gsub = unicode.utf8.char, string.byte, string.gsub + local function convert_char (char) + return utf8_char(byte(char)) + end + + local function convert_line (line) + return gsub(line, ".", convert_char) + end + + callback.register("process_input_buffer", convert_line) + + local function convert_line_out (line) + local line_out = "" + for c in string.utfvalues(line) do + line_out = line_out .. string.char(c) + end + return line_out + end + + callback.register("process_output_buffer", convert_line_out) + } + \fi + + \txiusebytewiseiotrue +} + + +% Helpers for encodings. +% Set the catcode of characters 128 through 255 to the specified number. +% +\def\setnonasciicharscatcode#1{% + \count255=128 + \loop\ifnum\count255<256 + \global\catcode\count255=#1\relax + \advance\count255 by 1 + \repeat +} + +\def\setnonasciicharscatcodenonglobal#1{% + \count255=128 + \loop\ifnum\count255<256 + \catcode\count255=#1\relax + \advance\count255 by 1 + \repeat +} + +% @documentencoding sets the definition of non-ASCII characters +% according to the specified encoding. +% +\def\documentencoding{\parseargusing\filenamecatcodes\documentencodingzzz} +\def\documentencodingzzz#1{% + % + % Encoding being declared for the document. + \def\declaredencoding{\csname #1.enc\endcsname}% + % + % Supported encodings: names converted to tokens in order to be able + % to compare them with \ifx. + \def\ascii{\csname US-ASCII.enc\endcsname}% + \def\latnine{\csname ISO-8859-15.enc\endcsname}% + \def\latone{\csname ISO-8859-1.enc\endcsname}% + \def\lattwo{\csname ISO-8859-2.enc\endcsname}% + \def\utfeight{\csname UTF-8.enc\endcsname}% + % + \ifx \declaredencoding \ascii + \asciichardefs + % + \else \ifx \declaredencoding \lattwo + \iftxinativeunicodecapable + \setbytewiseio + \fi + \setnonasciicharscatcode\active + \lattwochardefs + % + \else \ifx \declaredencoding \latone + \iftxinativeunicodecapable + \setbytewiseio + \fi + \setnonasciicharscatcode\active + \latonechardefs + % + \else \ifx \declaredencoding \latnine + \iftxinativeunicodecapable + \setbytewiseio + \fi + \setnonasciicharscatcode\active + \latninechardefs + % + \else \ifx \declaredencoding \utfeight + \iftxinativeunicodecapable + % For native Unicode handling (XeTeX and LuaTeX) + \nativeunicodechardefs + \else + % For treating UTF-8 as byte sequences (TeX, eTeX and pdfTeX) + \setnonasciicharscatcode\active + % since we already invoked \utfeightchardefs at the top level + % (below), do not re-invoke it, otherwise our check for duplicated + % definitions gets triggered. Making non-ascii chars active is + % sufficient. + \fi + % + \else + \message{Ignoring unknown document encoding: #1.}% + % + \fi % utfeight + \fi % latnine + \fi % latone + \fi % lattwo + \fi % ascii + % + \ifx\XeTeXrevision\thisisundefined + \else + \ifx \declaredencoding \utfeight + \else + \ifx \declaredencoding \ascii + \else + \message{Warning: XeTeX with non-UTF-8 encodings cannot handle % + non-ASCII characters in auxiliary files.}% + \fi + \fi + \fi +} + +% emacs-page +% A message to be logged when using a character that isn't available +% the default font encoding (OT1). +% +\def\missingcharmsg#1{\message{Character missing, sorry: #1.}} + +% Take account of \c (plain) vs. \, (Texinfo) difference. +\def\cedilla#1{\ifx\c\ptexc\c{#1}\else\,{#1}\fi} + +% First, make active non-ASCII characters in order for them to be +% correctly categorized when TeX reads the replacement text of +% macros containing the character definitions. +\setnonasciicharscatcode\active +% + +\def\gdefchar#1#2{% +\gdef#1{% + \ifpassthroughchars + \string#1% + \else + #2% + \fi +}} + +% Latin1 (ISO-8859-1) character definitions. +\def\latonechardefs{% + \gdefchar^^a0{\tie} + \gdefchar^^a1{\exclamdown} + \gdefchar^^a2{{\tcfont \char162}} % cent + \gdefchar^^a3{\pounds{}} + \gdefchar^^a4{{\tcfont \char164}} % currency + \gdefchar^^a5{{\tcfont \char165}} % yen + \gdefchar^^a6{{\tcfont \char166}} % broken bar + \gdefchar^^a7{\S} + \gdefchar^^a8{\"{}} + \gdefchar^^a9{\copyright{}} + \gdefchar^^aa{\ordf} + \gdefchar^^ab{\guillemetleft{}} + \gdefchar^^ac{\ensuremath\lnot} + \gdefchar^^ad{\-} + \gdefchar^^ae{\registeredsymbol{}} + \gdefchar^^af{\={}} + % + \gdefchar^^b0{\textdegree} + \gdefchar^^b1{$\pm$} + \gdefchar^^b2{$^2$} + \gdefchar^^b3{$^3$} + \gdefchar^^b4{\'{}} + \gdefchar^^b5{$\mu$} + \gdefchar^^b6{\P} + \gdefchar^^b7{\ensuremath\cdot} + \gdefchar^^b8{\cedilla\ } + \gdefchar^^b9{$^1$} + \gdefchar^^ba{\ordm} + \gdefchar^^bb{\guillemetright{}} + \gdefchar^^bc{$1\over4$} + \gdefchar^^bd{$1\over2$} + \gdefchar^^be{$3\over4$} + \gdefchar^^bf{\questiondown} + % + \gdefchar^^c0{\`A} + \gdefchar^^c1{\'A} + \gdefchar^^c2{\^A} + \gdefchar^^c3{\~A} + \gdefchar^^c4{\"A} + \gdefchar^^c5{\ringaccent A} + \gdefchar^^c6{\AE} + \gdefchar^^c7{\cedilla C} + \gdefchar^^c8{\`E} + \gdefchar^^c9{\'E} + \gdefchar^^ca{\^E} + \gdefchar^^cb{\"E} + \gdefchar^^cc{\`I} + \gdefchar^^cd{\'I} + \gdefchar^^ce{\^I} + \gdefchar^^cf{\"I} + % + \gdefchar^^d0{\DH} + \gdefchar^^d1{\~N} + \gdefchar^^d2{\`O} + \gdefchar^^d3{\'O} + \gdefchar^^d4{\^O} + \gdefchar^^d5{\~O} + \gdefchar^^d6{\"O} + \gdefchar^^d7{$\times$} + \gdefchar^^d8{\O} + \gdefchar^^d9{\`U} + \gdefchar^^da{\'U} + \gdefchar^^db{\^U} + \gdefchar^^dc{\"U} + \gdefchar^^dd{\'Y} + \gdefchar^^de{\TH} + \gdefchar^^df{\ss} + % + \gdefchar^^e0{\`a} + \gdefchar^^e1{\'a} + \gdefchar^^e2{\^a} + \gdefchar^^e3{\~a} + \gdefchar^^e4{\"a} + \gdefchar^^e5{\ringaccent a} + \gdefchar^^e6{\ae} + \gdefchar^^e7{\cedilla c} + \gdefchar^^e8{\`e} + \gdefchar^^e9{\'e} + \gdefchar^^ea{\^e} + \gdefchar^^eb{\"e} + \gdefchar^^ec{\`{\dotless i}} + \gdefchar^^ed{\'{\dotless i}} + \gdefchar^^ee{\^{\dotless i}} + \gdefchar^^ef{\"{\dotless i}} + % + \gdefchar^^f0{\dh} + \gdefchar^^f1{\~n} + \gdefchar^^f2{\`o} + \gdefchar^^f3{\'o} + \gdefchar^^f4{\^o} + \gdefchar^^f5{\~o} + \gdefchar^^f6{\"o} + \gdefchar^^f7{$\div$} + \gdefchar^^f8{\o} + \gdefchar^^f9{\`u} + \gdefchar^^fa{\'u} + \gdefchar^^fb{\^u} + \gdefchar^^fc{\"u} + \gdefchar^^fd{\'y} + \gdefchar^^fe{\th} + \gdefchar^^ff{\"y} +} + +% Latin9 (ISO-8859-15) encoding character definitions. +\def\latninechardefs{% + % Encoding is almost identical to Latin1. + \latonechardefs + % + \gdefchar^^a4{\euro{}} + \gdefchar^^a6{\v S} + \gdefchar^^a8{\v s} + \gdefchar^^b4{\v Z} + \gdefchar^^b8{\v z} + \gdefchar^^bc{\OE} + \gdefchar^^bd{\oe} + \gdefchar^^be{\"Y} +} + +% Latin2 (ISO-8859-2) character definitions. +\def\lattwochardefs{% + \gdefchar^^a0{\tie} + \gdefchar^^a1{\ogonek{A}} + \gdefchar^^a2{\u{}} + \gdefchar^^a3{\L} + \gdefchar^^a4{\missingcharmsg{CURRENCY SIGN}} + \gdefchar^^a5{\v L} + \gdefchar^^a6{\'S} + \gdefchar^^a7{\S} + \gdefchar^^a8{\"{}} + \gdefchar^^a9{\v S} + \gdefchar^^aa{\cedilla S} + \gdefchar^^ab{\v T} + \gdefchar^^ac{\'Z} + \gdefchar^^ad{\-} + \gdefchar^^ae{\v Z} + \gdefchar^^af{\dotaccent Z} + % + \gdefchar^^b0{\textdegree{}} + \gdefchar^^b1{\ogonek{a}} + \gdefchar^^b2{\ogonek{ }} + \gdefchar^^b3{\l} + \gdefchar^^b4{\'{}} + \gdefchar^^b5{\v l} + \gdefchar^^b6{\'s} + \gdefchar^^b7{\v{}} + \gdefchar^^b8{\cedilla\ } + \gdefchar^^b9{\v s} + \gdefchar^^ba{\cedilla s} + \gdefchar^^bb{\v t} + \gdefchar^^bc{\'z} + \gdefchar^^bd{\H{}} + \gdefchar^^be{\v z} + \gdefchar^^bf{\dotaccent z} + % + \gdefchar^^c0{\'R} + \gdefchar^^c1{\'A} + \gdefchar^^c2{\^A} + \gdefchar^^c3{\u A} + \gdefchar^^c4{\"A} + \gdefchar^^c5{\'L} + \gdefchar^^c6{\'C} + \gdefchar^^c7{\cedilla C} + \gdefchar^^c8{\v C} + \gdefchar^^c9{\'E} + \gdefchar^^ca{\ogonek{E}} + \gdefchar^^cb{\"E} + \gdefchar^^cc{\v E} + \gdefchar^^cd{\'I} + \gdefchar^^ce{\^I} + \gdefchar^^cf{\v D} + % + \gdefchar^^d0{\DH} + \gdefchar^^d1{\'N} + \gdefchar^^d2{\v N} + \gdefchar^^d3{\'O} + \gdefchar^^d4{\^O} + \gdefchar^^d5{\H O} + \gdefchar^^d6{\"O} + \gdefchar^^d7{$\times$} + \gdefchar^^d8{\v R} + \gdefchar^^d9{\ringaccent U} + \gdefchar^^da{\'U} + \gdefchar^^db{\H U} + \gdefchar^^dc{\"U} + \gdefchar^^dd{\'Y} + \gdefchar^^de{\cedilla T} + \gdefchar^^df{\ss} + % + \gdefchar^^e0{\'r} + \gdefchar^^e1{\'a} + \gdefchar^^e2{\^a} + \gdefchar^^e3{\u a} + \gdefchar^^e4{\"a} + \gdefchar^^e5{\'l} + \gdefchar^^e6{\'c} + \gdefchar^^e7{\cedilla c} + \gdefchar^^e8{\v c} + \gdefchar^^e9{\'e} + \gdefchar^^ea{\ogonek{e}} + \gdefchar^^eb{\"e} + \gdefchar^^ec{\v e} + \gdefchar^^ed{\'{\dotless{i}}} + \gdefchar^^ee{\^{\dotless{i}}} + \gdefchar^^ef{\v d} + % + \gdefchar^^f0{\dh} + \gdefchar^^f1{\'n} + \gdefchar^^f2{\v n} + \gdefchar^^f3{\'o} + \gdefchar^^f4{\^o} + \gdefchar^^f5{\H o} + \gdefchar^^f6{\"o} + \gdefchar^^f7{$\div$} + \gdefchar^^f8{\v r} + \gdefchar^^f9{\ringaccent u} + \gdefchar^^fa{\'u} + \gdefchar^^fb{\H u} + \gdefchar^^fc{\"u} + \gdefchar^^fd{\'y} + \gdefchar^^fe{\cedilla t} + \gdefchar^^ff{\dotaccent{}} +} + +% UTF-8 character definitions. +% +% This code to support UTF-8 is based on LaTeX's utf8.def, with some +% changes for Texinfo conventions. It is included here under the GPL by +% permission from Frank Mittelbach and the LaTeX team. +% +\newcount\countUTFx +\newcount\countUTFy +\newcount\countUTFz + +\gdef\UTFviiiTwoOctets#1#2{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\endcsname} +% +\gdef\UTFviiiThreeOctets#1#2#3{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\string #3\endcsname} +% +\gdef\UTFviiiFourOctets#1#2#3#4{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\string #3\string #4\endcsname} + +\gdef\UTFviiiDefined#1{% + \ifx #1\relax + \message{\linenumber Unicode char \string #1 not defined for Texinfo}% + \else + \expandafter #1% + \fi +} + +% Give non-ASCII bytes the active definitions for processing UTF-8 sequences +\begingroup + \catcode`\~13 + \catcode`\$12 + \catcode`\"12 + + % Loop from \countUTFx to \countUTFy, performing \UTFviiiTmp + % substituting ~ and $ with a character token of that value. + \def\UTFviiiLoop{% + \global\catcode\countUTFx\active + \uccode`\~\countUTFx + \uccode`\$\countUTFx + \uppercase\expandafter{\UTFviiiTmp}% + \advance\countUTFx by 1 + \ifnum\countUTFx < \countUTFy + \expandafter\UTFviiiLoop + \fi} + + % For bytes other than the first in a UTF-8 sequence. Not expected to + % be expanded except when writing to auxiliary files. + \countUTFx = "80 + \countUTFy = "C2 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $\fi}}% + \UTFviiiLoop + + \countUTFx = "C2 + \countUTFy = "E0 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $% + \else\expandafter\UTFviiiTwoOctets\expandafter$\fi}}% + \UTFviiiLoop + + \countUTFx = "E0 + \countUTFy = "F0 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $% + \else\expandafter\UTFviiiThreeOctets\expandafter$\fi}}% + \UTFviiiLoop + + \countUTFx = "F0 + \countUTFy = "F4 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $% + \else\expandafter\UTFviiiFourOctets\expandafter$\fi + }}% + \UTFviiiLoop +\endgroup + +\def\globallet{\global\let} % save some \expandafter's below + +% @U{xxxx} to produce U+xxxx, if we support it. +\def\U#1{% + \expandafter\ifx\csname uni:#1\endcsname \relax + \iftxinativeunicodecapable + % All Unicode characters can be used if native Unicode handling is + % active. However, if the font does not have the glyph, + % letters are missing. + \begingroup + \uccode`\.="#1\relax + \uppercase{.} + \endgroup + \else + \errhelp = \EMsimple + \errmessage{Unicode character U+#1 not supported, sorry}% + \fi + \else + \csname uni:#1\endcsname + \fi +} + +% These macros are used here to construct the name of a control +% sequence to be defined. +\def\UTFviiiTwoOctetsName#1#2{% + \csname u8:#1\string #2\endcsname}% +\def\UTFviiiThreeOctetsName#1#2#3{% + \csname u8:#1\string #2\string #3\endcsname}% +\def\UTFviiiFourOctetsName#1#2#3#4{% + \csname u8:#1\string #2\string #3\string #4\endcsname}% + +% For UTF-8 byte sequences (TeX, e-TeX and pdfTeX), +% provide a definition macro to replace a Unicode character; +% this gets used by the @U command +% +\begingroup + \catcode`\"=12 + \catcode`\<=12 + \catcode`\.=12 + \catcode`\,=12 + \catcode`\;=12 + \catcode`\!=12 + \catcode`\~=13 + \gdef\DeclareUnicodeCharacterUTFviii#1#2{% + \countUTFz = "#1\relax + \begingroup + \parseXMLCharref + + % Give \u8:... its definition. The sequence of seven \expandafter's + % expands after the \gdef three times, e.g. + % + % 1. \UTFviiTwoOctetsName B1 B2 + % 2. \csname u8:B1 \string B2 \endcsname + % 3. \u8: B1 B2 (a single control sequence token) + % + \expandafter\expandafter + \expandafter\expandafter + \expandafter\expandafter + \expandafter\gdef \UTFviiiTmp{#2}% + % + \expandafter\ifx\csname uni:#1\endcsname \relax \else + \message{Internal error, already defined: #1}% + \fi + % + % define an additional control sequence for this code point. + \expandafter\globallet\csname uni:#1\endcsname \UTFviiiTmp + \endgroup} + % + % Given the value in \countUTFz as a Unicode code point, set \UTFviiiTmp + % to the corresponding UTF-8 sequence. + \gdef\parseXMLCharref{% + \ifnum\countUTFz < "A0\relax + \errhelp = \EMsimple + \errmessage{Cannot define Unicode char value < 00A0}% + \else\ifnum\countUTFz < "800\relax + \parseUTFviiiA,% + \parseUTFviiiB C\UTFviiiTwoOctetsName.,% + \else\ifnum\countUTFz < "10000\relax + \parseUTFviiiA;% + \parseUTFviiiA,% + \parseUTFviiiB E\UTFviiiThreeOctetsName.{,;}% + \else + \parseUTFviiiA;% + \parseUTFviiiA,% + \parseUTFviiiA!% + \parseUTFviiiB F\UTFviiiFourOctetsName.{!,;}% + \fi\fi\fi + } + + % Extract a byte from the end of the UTF-8 representation of \countUTFx. + % It must be a non-initial byte in the sequence. + % Change \uccode of #1 for it to be used in \parseUTFviiiB as one + % of the bytes. + \gdef\parseUTFviiiA#1{% + \countUTFx = \countUTFz + \divide\countUTFz by 64 + \countUTFy = \countUTFz % Save to be the future value of \countUTFz. + \multiply\countUTFz by 64 + + % \countUTFz is now \countUTFx with the last 5 bits cleared. Subtract + % in order to get the last five bits. + \advance\countUTFx by -\countUTFz + + % Convert this to the byte in the UTF-8 sequence. + \advance\countUTFx by 128 + \uccode `#1\countUTFx + \countUTFz = \countUTFy} + + % Used to put a UTF-8 byte sequence into \UTFviiiTmp + % #1 is the increment for \countUTFz to yield a the first byte of the UTF-8 + % sequence. + % #2 is one of the \UTFviii*OctetsName macros. + % #3 is always a full stop (.) + % #4 is a template for the other bytes in the sequence. The values for these + % bytes is substituted in here with \uppercase using the \uccode's. + \gdef\parseUTFviiiB#1#2#3#4{% + \advance\countUTFz by "#10\relax + \uccode `#3\countUTFz + \uppercase{\gdef\UTFviiiTmp{#2#3#4}}} +\endgroup + +% For native Unicode handling (XeTeX and LuaTeX), +% provide a definition macro that sets a catcode to `other' non-globally +% +\def\DeclareUnicodeCharacterNativeOther#1#2{% + \catcode"#1=\other +} + +% https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_M +% U+0000..U+007F = https://en.wikipedia.org/wiki/Basic_Latin_(Unicode_block) +% U+0080..U+00FF = https://en.wikipedia.org/wiki/Latin-1_Supplement_(Unicode_block) +% U+0100..U+017F = https://en.wikipedia.org/wiki/Latin_Extended-A +% U+0180..U+024F = https://en.wikipedia.org/wiki/Latin_Extended-B +% +% Many of our renditions are less than wonderful, and all the missing +% characters are available somewhere. Loading the necessary fonts +% awaits user request. We can't truly support Unicode without +% reimplementing everything that's been done in LaTeX for many years, +% plus probably using luatex or xetex, and who knows what else. +% We won't be doing that here in this simple file. But we can try to at +% least make most of the characters not bomb out. +% +\def\unicodechardefs{% + \DeclareUnicodeCharacter{00A0}{\tie}% + \DeclareUnicodeCharacter{00A1}{\exclamdown}% + \DeclareUnicodeCharacter{00A2}{{\tcfont \char162}}% 0242=cent + \DeclareUnicodeCharacter{00A3}{\pounds{}}% + \DeclareUnicodeCharacter{00A4}{{\tcfont \char164}}% 0244=currency + \DeclareUnicodeCharacter{00A5}{{\tcfont \char165}}% 0245=yen + \DeclareUnicodeCharacter{00A6}{{\tcfont \char166}}% 0246=brokenbar + \DeclareUnicodeCharacter{00A7}{\S}% + \DeclareUnicodeCharacter{00A8}{\"{ }}% + \DeclareUnicodeCharacter{00A9}{\copyright{}}% + \DeclareUnicodeCharacter{00AA}{\ordf}% + \DeclareUnicodeCharacter{00AB}{\guillemetleft{}}% + \DeclareUnicodeCharacter{00AC}{\ensuremath\lnot}% + \DeclareUnicodeCharacter{00AD}{\-}% + \DeclareUnicodeCharacter{00AE}{\registeredsymbol{}}% + \DeclareUnicodeCharacter{00AF}{\={ }}% + % + \DeclareUnicodeCharacter{00B0}{\ringaccent{ }}% + \DeclareUnicodeCharacter{00B1}{\ensuremath\pm}% + \DeclareUnicodeCharacter{00B2}{$^2$}% + \DeclareUnicodeCharacter{00B3}{$^3$}% + \DeclareUnicodeCharacter{00B4}{\'{ }}% + \DeclareUnicodeCharacter{00B5}{$\mu$}% + \DeclareUnicodeCharacter{00B6}{\P}% + \DeclareUnicodeCharacter{00B7}{\ensuremath\cdot}% + \DeclareUnicodeCharacter{00B8}{\cedilla{ }}% + \DeclareUnicodeCharacter{00B9}{$^1$}% + \DeclareUnicodeCharacter{00BA}{\ordm}% + \DeclareUnicodeCharacter{00BB}{\guillemetright{}}% + \DeclareUnicodeCharacter{00BC}{$1\over4$}% + \DeclareUnicodeCharacter{00BD}{$1\over2$}% + \DeclareUnicodeCharacter{00BE}{$3\over4$}% + \DeclareUnicodeCharacter{00BF}{\questiondown}% + % + \DeclareUnicodeCharacter{00C0}{\`A}% + \DeclareUnicodeCharacter{00C1}{\'A}% + \DeclareUnicodeCharacter{00C2}{\^A}% + \DeclareUnicodeCharacter{00C3}{\~A}% + \DeclareUnicodeCharacter{00C4}{\"A}% + \DeclareUnicodeCharacter{00C5}{\AA}% + \DeclareUnicodeCharacter{00C6}{\AE}% + \DeclareUnicodeCharacter{00C7}{\cedilla{C}}% + \DeclareUnicodeCharacter{00C8}{\`E}% + \DeclareUnicodeCharacter{00C9}{\'E}% + \DeclareUnicodeCharacter{00CA}{\^E}% + \DeclareUnicodeCharacter{00CB}{\"E}% + \DeclareUnicodeCharacter{00CC}{\`I}% + \DeclareUnicodeCharacter{00CD}{\'I}% + \DeclareUnicodeCharacter{00CE}{\^I}% + \DeclareUnicodeCharacter{00CF}{\"I}% + % + \DeclareUnicodeCharacter{00D0}{\DH}% + \DeclareUnicodeCharacter{00D1}{\~N}% + \DeclareUnicodeCharacter{00D2}{\`O}% + \DeclareUnicodeCharacter{00D3}{\'O}% + \DeclareUnicodeCharacter{00D4}{\^O}% + \DeclareUnicodeCharacter{00D5}{\~O}% + \DeclareUnicodeCharacter{00D6}{\"O}% + \DeclareUnicodeCharacter{00D7}{\ensuremath\times}% + \DeclareUnicodeCharacter{00D8}{\O}% + \DeclareUnicodeCharacter{00D9}{\`U}% + \DeclareUnicodeCharacter{00DA}{\'U}% + \DeclareUnicodeCharacter{00DB}{\^U}% + \DeclareUnicodeCharacter{00DC}{\"U}% + \DeclareUnicodeCharacter{00DD}{\'Y}% + \DeclareUnicodeCharacter{00DE}{\TH}% + \DeclareUnicodeCharacter{00DF}{\ss}% + % + \DeclareUnicodeCharacter{00E0}{\`a}% + \DeclareUnicodeCharacter{00E1}{\'a}% + \DeclareUnicodeCharacter{00E2}{\^a}% + \DeclareUnicodeCharacter{00E3}{\~a}% + \DeclareUnicodeCharacter{00E4}{\"a}% + \DeclareUnicodeCharacter{00E5}{\aa}% + \DeclareUnicodeCharacter{00E6}{\ae}% + \DeclareUnicodeCharacter{00E7}{\cedilla{c}}% + \DeclareUnicodeCharacter{00E8}{\`e}% + \DeclareUnicodeCharacter{00E9}{\'e}% + \DeclareUnicodeCharacter{00EA}{\^e}% + \DeclareUnicodeCharacter{00EB}{\"e}% + \DeclareUnicodeCharacter{00EC}{\`{\dotless{i}}}% + \DeclareUnicodeCharacter{00ED}{\'{\dotless{i}}}% + \DeclareUnicodeCharacter{00EE}{\^{\dotless{i}}}% + \DeclareUnicodeCharacter{00EF}{\"{\dotless{i}}}% + % + \DeclareUnicodeCharacter{00F0}{\dh}% + \DeclareUnicodeCharacter{00F1}{\~n}% + \DeclareUnicodeCharacter{00F2}{\`o}% + \DeclareUnicodeCharacter{00F3}{\'o}% + \DeclareUnicodeCharacter{00F4}{\^o}% + \DeclareUnicodeCharacter{00F5}{\~o}% + \DeclareUnicodeCharacter{00F6}{\"o}% + \DeclareUnicodeCharacter{00F7}{\ensuremath\div}% + \DeclareUnicodeCharacter{00F8}{\o}% + \DeclareUnicodeCharacter{00F9}{\`u}% + \DeclareUnicodeCharacter{00FA}{\'u}% + \DeclareUnicodeCharacter{00FB}{\^u}% + \DeclareUnicodeCharacter{00FC}{\"u}% + \DeclareUnicodeCharacter{00FD}{\'y}% + \DeclareUnicodeCharacter{00FE}{\th}% + \DeclareUnicodeCharacter{00FF}{\"y}% + % + \DeclareUnicodeCharacter{0100}{\=A}% + \DeclareUnicodeCharacter{0101}{\=a}% + \DeclareUnicodeCharacter{0102}{\u{A}}% + \DeclareUnicodeCharacter{0103}{\u{a}}% + \DeclareUnicodeCharacter{0104}{\ogonek{A}}% + \DeclareUnicodeCharacter{0105}{\ogonek{a}}% + \DeclareUnicodeCharacter{0106}{\'C}% + \DeclareUnicodeCharacter{0107}{\'c}% + \DeclareUnicodeCharacter{0108}{\^C}% + \DeclareUnicodeCharacter{0109}{\^c}% + \DeclareUnicodeCharacter{010A}{\dotaccent{C}}% + \DeclareUnicodeCharacter{010B}{\dotaccent{c}}% + \DeclareUnicodeCharacter{010C}{\v{C}}% + \DeclareUnicodeCharacter{010D}{\v{c}}% + \DeclareUnicodeCharacter{010E}{\v{D}}% + \DeclareUnicodeCharacter{010F}{d'}% + % + \DeclareUnicodeCharacter{0110}{\DH}% + \DeclareUnicodeCharacter{0111}{\dh}% + \DeclareUnicodeCharacter{0112}{\=E}% + \DeclareUnicodeCharacter{0113}{\=e}% + \DeclareUnicodeCharacter{0114}{\u{E}}% + \DeclareUnicodeCharacter{0115}{\u{e}}% + \DeclareUnicodeCharacter{0116}{\dotaccent{E}}% + \DeclareUnicodeCharacter{0117}{\dotaccent{e}}% + \DeclareUnicodeCharacter{0118}{\ogonek{E}}% + \DeclareUnicodeCharacter{0119}{\ogonek{e}}% + \DeclareUnicodeCharacter{011A}{\v{E}}% + \DeclareUnicodeCharacter{011B}{\v{e}}% + \DeclareUnicodeCharacter{011C}{\^G}% + \DeclareUnicodeCharacter{011D}{\^g}% + \DeclareUnicodeCharacter{011E}{\u{G}}% + \DeclareUnicodeCharacter{011F}{\u{g}}% + % + \DeclareUnicodeCharacter{0120}{\dotaccent{G}}% + \DeclareUnicodeCharacter{0121}{\dotaccent{g}}% + \DeclareUnicodeCharacter{0122}{\cedilla{G}}% + \DeclareUnicodeCharacter{0123}{\cedilla{g}}% + \DeclareUnicodeCharacter{0124}{\^H}% + \DeclareUnicodeCharacter{0125}{\^h}% + \DeclareUnicodeCharacter{0126}{\missingcharmsg{H WITH STROKE}}% + \DeclareUnicodeCharacter{0127}{\missingcharmsg{h WITH STROKE}}% + \DeclareUnicodeCharacter{0128}{\~I}% + \DeclareUnicodeCharacter{0129}{\~{\dotless{i}}}% + \DeclareUnicodeCharacter{012A}{\=I}% + \DeclareUnicodeCharacter{012B}{\={\dotless{i}}}% + \DeclareUnicodeCharacter{012C}{\u{I}}% + \DeclareUnicodeCharacter{012D}{\u{\dotless{i}}}% + \DeclareUnicodeCharacter{012E}{\ogonek{I}}% + \DeclareUnicodeCharacter{012F}{\ogonek{i}}% + % + \DeclareUnicodeCharacter{0130}{\dotaccent{I}}% + \DeclareUnicodeCharacter{0131}{\dotless{i}}% + \DeclareUnicodeCharacter{0132}{IJ}% + \DeclareUnicodeCharacter{0133}{ij}% + \DeclareUnicodeCharacter{0134}{\^J}% + \DeclareUnicodeCharacter{0135}{\^{\dotless{j}}}% + \DeclareUnicodeCharacter{0136}{\cedilla{K}}% + \DeclareUnicodeCharacter{0137}{\cedilla{k}}% + \DeclareUnicodeCharacter{0138}{\ensuremath\kappa}% + \DeclareUnicodeCharacter{0139}{\'L}% + \DeclareUnicodeCharacter{013A}{\'l}% + \DeclareUnicodeCharacter{013B}{\cedilla{L}}% + \DeclareUnicodeCharacter{013C}{\cedilla{l}}% + \DeclareUnicodeCharacter{013D}{L'}% should kern + \DeclareUnicodeCharacter{013E}{l'}% should kern + \DeclareUnicodeCharacter{013F}{L\U{00B7}}% + % + \DeclareUnicodeCharacter{0140}{l\U{00B7}}% + \DeclareUnicodeCharacter{0141}{\L}% + \DeclareUnicodeCharacter{0142}{\l}% + \DeclareUnicodeCharacter{0143}{\'N}% + \DeclareUnicodeCharacter{0144}{\'n}% + \DeclareUnicodeCharacter{0145}{\cedilla{N}}% + \DeclareUnicodeCharacter{0146}{\cedilla{n}}% + \DeclareUnicodeCharacter{0147}{\v{N}}% + \DeclareUnicodeCharacter{0148}{\v{n}}% + \DeclareUnicodeCharacter{0149}{'n}% + \DeclareUnicodeCharacter{014A}{\missingcharmsg{ENG}}% + \DeclareUnicodeCharacter{014B}{\missingcharmsg{eng}}% + \DeclareUnicodeCharacter{014C}{\=O}% + \DeclareUnicodeCharacter{014D}{\=o}% + \DeclareUnicodeCharacter{014E}{\u{O}}% + \DeclareUnicodeCharacter{014F}{\u{o}}% + % + \DeclareUnicodeCharacter{0150}{\H{O}}% + \DeclareUnicodeCharacter{0151}{\H{o}}% + \DeclareUnicodeCharacter{0152}{\OE}% + \DeclareUnicodeCharacter{0153}{\oe}% + \DeclareUnicodeCharacter{0154}{\'R}% + \DeclareUnicodeCharacter{0155}{\'r}% + \DeclareUnicodeCharacter{0156}{\cedilla{R}}% + \DeclareUnicodeCharacter{0157}{\cedilla{r}}% + \DeclareUnicodeCharacter{0158}{\v{R}}% + \DeclareUnicodeCharacter{0159}{\v{r}}% + \DeclareUnicodeCharacter{015A}{\'S}% + \DeclareUnicodeCharacter{015B}{\'s}% + \DeclareUnicodeCharacter{015C}{\^S}% + \DeclareUnicodeCharacter{015D}{\^s}% + \DeclareUnicodeCharacter{015E}{\cedilla{S}}% + \DeclareUnicodeCharacter{015F}{\cedilla{s}}% + % + \DeclareUnicodeCharacter{0160}{\v{S}}% + \DeclareUnicodeCharacter{0161}{\v{s}}% + \DeclareUnicodeCharacter{0162}{\cedilla{T}}% + \DeclareUnicodeCharacter{0163}{\cedilla{t}}% + \DeclareUnicodeCharacter{0164}{\v{T}}% + \DeclareUnicodeCharacter{0165}{\v{t}}% + \DeclareUnicodeCharacter{0166}{\missingcharmsg{H WITH STROKE}}% + \DeclareUnicodeCharacter{0167}{\missingcharmsg{h WITH STROKE}}% + \DeclareUnicodeCharacter{0168}{\~U}% + \DeclareUnicodeCharacter{0169}{\~u}% + \DeclareUnicodeCharacter{016A}{\=U}% + \DeclareUnicodeCharacter{016B}{\=u}% + \DeclareUnicodeCharacter{016C}{\u{U}}% + \DeclareUnicodeCharacter{016D}{\u{u}}% + \DeclareUnicodeCharacter{016E}{\ringaccent{U}}% + \DeclareUnicodeCharacter{016F}{\ringaccent{u}}% + % + \DeclareUnicodeCharacter{0170}{\H{U}}% + \DeclareUnicodeCharacter{0171}{\H{u}}% + \DeclareUnicodeCharacter{0172}{\ogonek{U}}% + \DeclareUnicodeCharacter{0173}{\ogonek{u}}% + \DeclareUnicodeCharacter{0174}{\^W}% + \DeclareUnicodeCharacter{0175}{\^w}% + \DeclareUnicodeCharacter{0176}{\^Y}% + \DeclareUnicodeCharacter{0177}{\^y}% + \DeclareUnicodeCharacter{0178}{\"Y}% + \DeclareUnicodeCharacter{0179}{\'Z}% + \DeclareUnicodeCharacter{017A}{\'z}% + \DeclareUnicodeCharacter{017B}{\dotaccent{Z}}% + \DeclareUnicodeCharacter{017C}{\dotaccent{z}}% + \DeclareUnicodeCharacter{017D}{\v{Z}}% + \DeclareUnicodeCharacter{017E}{\v{z}}% + \DeclareUnicodeCharacter{017F}{\missingcharmsg{LONG S}}% + % + \DeclareUnicodeCharacter{01C4}{D\v{Z}}% + \DeclareUnicodeCharacter{01C5}{D\v{z}}% + \DeclareUnicodeCharacter{01C6}{d\v{z}}% + \DeclareUnicodeCharacter{01C7}{LJ}% + \DeclareUnicodeCharacter{01C8}{Lj}% + \DeclareUnicodeCharacter{01C9}{lj}% + \DeclareUnicodeCharacter{01CA}{NJ}% + \DeclareUnicodeCharacter{01CB}{Nj}% + \DeclareUnicodeCharacter{01CC}{nj}% + \DeclareUnicodeCharacter{01CD}{\v{A}}% + \DeclareUnicodeCharacter{01CE}{\v{a}}% + \DeclareUnicodeCharacter{01CF}{\v{I}}% + % + \DeclareUnicodeCharacter{01D0}{\v{\dotless{i}}}% + \DeclareUnicodeCharacter{01D1}{\v{O}}% + \DeclareUnicodeCharacter{01D2}{\v{o}}% + \DeclareUnicodeCharacter{01D3}{\v{U}}% + \DeclareUnicodeCharacter{01D4}{\v{u}}% + % + \DeclareUnicodeCharacter{01E2}{\={\AE}}% + \DeclareUnicodeCharacter{01E3}{\={\ae}}% + \DeclareUnicodeCharacter{01E6}{\v{G}}% + \DeclareUnicodeCharacter{01E7}{\v{g}}% + \DeclareUnicodeCharacter{01E8}{\v{K}}% + \DeclareUnicodeCharacter{01E9}{\v{k}}% + % + \DeclareUnicodeCharacter{01F0}{\v{\dotless{j}}}% + \DeclareUnicodeCharacter{01F1}{DZ}% + \DeclareUnicodeCharacter{01F2}{Dz}% + \DeclareUnicodeCharacter{01F3}{dz}% + \DeclareUnicodeCharacter{01F4}{\'G}% + \DeclareUnicodeCharacter{01F5}{\'g}% + \DeclareUnicodeCharacter{01F8}{\`N}% + \DeclareUnicodeCharacter{01F9}{\`n}% + \DeclareUnicodeCharacter{01FC}{\'{\AE}}% + \DeclareUnicodeCharacter{01FD}{\'{\ae}}% + \DeclareUnicodeCharacter{01FE}{\'{\O}}% + \DeclareUnicodeCharacter{01FF}{\'{\o}}% + % + \DeclareUnicodeCharacter{021E}{\v{H}}% + \DeclareUnicodeCharacter{021F}{\v{h}}% + % + \DeclareUnicodeCharacter{0226}{\dotaccent{A}}% + \DeclareUnicodeCharacter{0227}{\dotaccent{a}}% + \DeclareUnicodeCharacter{0228}{\cedilla{E}}% + \DeclareUnicodeCharacter{0229}{\cedilla{e}}% + \DeclareUnicodeCharacter{022E}{\dotaccent{O}}% + \DeclareUnicodeCharacter{022F}{\dotaccent{o}}% + % + \DeclareUnicodeCharacter{0232}{\=Y}% + \DeclareUnicodeCharacter{0233}{\=y}% + \DeclareUnicodeCharacter{0237}{\dotless{j}}% + % + \DeclareUnicodeCharacter{02DB}{\ogonek{ }}% + % + % Greek letters upper case + \DeclareUnicodeCharacter{0391}{{\it A}}% + \DeclareUnicodeCharacter{0392}{{\it B}}% + \DeclareUnicodeCharacter{0393}{\ensuremath{\mit\Gamma}}% + \DeclareUnicodeCharacter{0394}{\ensuremath{\mit\Delta}}% + \DeclareUnicodeCharacter{0395}{{\it E}}% + \DeclareUnicodeCharacter{0396}{{\it Z}}% + \DeclareUnicodeCharacter{0397}{{\it H}}% + \DeclareUnicodeCharacter{0398}{\ensuremath{\mit\Theta}}% + \DeclareUnicodeCharacter{0399}{{\it I}}% + \DeclareUnicodeCharacter{039A}{{\it K}}% + \DeclareUnicodeCharacter{039B}{\ensuremath{\mit\Lambda}}% + \DeclareUnicodeCharacter{039C}{{\it M}}% + \DeclareUnicodeCharacter{039D}{{\it N}}% + \DeclareUnicodeCharacter{039E}{\ensuremath{\mit\Xi}}% + \DeclareUnicodeCharacter{039F}{{\it O}}% + \DeclareUnicodeCharacter{03A0}{\ensuremath{\mit\Pi}}% + \DeclareUnicodeCharacter{03A1}{{\it P}}% + %\DeclareUnicodeCharacter{03A2}{} % none - corresponds to final sigma + \DeclareUnicodeCharacter{03A3}{\ensuremath{\mit\Sigma}}% + \DeclareUnicodeCharacter{03A4}{{\it T}}% + \DeclareUnicodeCharacter{03A5}{\ensuremath{\mit\Upsilon}}% + \DeclareUnicodeCharacter{03A6}{\ensuremath{\mit\Phi}}% + \DeclareUnicodeCharacter{03A7}{{\it X}}% + \DeclareUnicodeCharacter{03A8}{\ensuremath{\mit\Psi}}% + \DeclareUnicodeCharacter{03A9}{\ensuremath{\mit\Omega}}% + % + % Vowels with accents + \DeclareUnicodeCharacter{0390}{\ensuremath{\ddot{\acute\iota}}}% + \DeclareUnicodeCharacter{03AC}{\ensuremath{\acute\alpha}}% + \DeclareUnicodeCharacter{03AD}{\ensuremath{\acute\epsilon}}% + \DeclareUnicodeCharacter{03AE}{\ensuremath{\acute\eta}}% + \DeclareUnicodeCharacter{03AF}{\ensuremath{\acute\iota}}% + \DeclareUnicodeCharacter{03B0}{\ensuremath{\acute{\ddot\upsilon}}}% + % + % Standalone accent + \DeclareUnicodeCharacter{0384}{\ensuremath{\acute{\ }}}% + % + % Greek letters lower case + \DeclareUnicodeCharacter{03B1}{\ensuremath\alpha}% + \DeclareUnicodeCharacter{03B2}{\ensuremath\beta}% + \DeclareUnicodeCharacter{03B3}{\ensuremath\gamma}% + \DeclareUnicodeCharacter{03B4}{\ensuremath\delta}% + \DeclareUnicodeCharacter{03B5}{\ensuremath\epsilon}% + \DeclareUnicodeCharacter{03B6}{\ensuremath\zeta}% + \DeclareUnicodeCharacter{03B7}{\ensuremath\eta}% + \DeclareUnicodeCharacter{03B8}{\ensuremath\theta}% + \DeclareUnicodeCharacter{03B9}{\ensuremath\iota}% + \DeclareUnicodeCharacter{03BA}{\ensuremath\kappa}% + \DeclareUnicodeCharacter{03BB}{\ensuremath\lambda}% + \DeclareUnicodeCharacter{03BC}{\ensuremath\mu}% + \DeclareUnicodeCharacter{03BD}{\ensuremath\nu}% + \DeclareUnicodeCharacter{03BE}{\ensuremath\xi}% + \DeclareUnicodeCharacter{03BF}{{\it o}}% omicron + \DeclareUnicodeCharacter{03C0}{\ensuremath\pi}% + \DeclareUnicodeCharacter{03C1}{\ensuremath\rho}% + \DeclareUnicodeCharacter{03C2}{\ensuremath\varsigma}% + \DeclareUnicodeCharacter{03C3}{\ensuremath\sigma}% + \DeclareUnicodeCharacter{03C4}{\ensuremath\tau}% + \DeclareUnicodeCharacter{03C5}{\ensuremath\upsilon}% + \DeclareUnicodeCharacter{03C6}{\ensuremath\phi}% + \DeclareUnicodeCharacter{03C7}{\ensuremath\chi}% + \DeclareUnicodeCharacter{03C8}{\ensuremath\psi}% + \DeclareUnicodeCharacter{03C9}{\ensuremath\omega}% + % + % More Greek vowels with accents + \DeclareUnicodeCharacter{03CA}{\ensuremath{\ddot\iota}}% + \DeclareUnicodeCharacter{03CB}{\ensuremath{\ddot\upsilon}}% + \DeclareUnicodeCharacter{03CC}{\ensuremath{\acute o}}% + \DeclareUnicodeCharacter{03CD}{\ensuremath{\acute\upsilon}}% + \DeclareUnicodeCharacter{03CE}{\ensuremath{\acute\omega}}% + % + % Variant Greek letters + \DeclareUnicodeCharacter{03D1}{\ensuremath\vartheta}% + \DeclareUnicodeCharacter{03D6}{\ensuremath\varpi}% + \DeclareUnicodeCharacter{03F1}{\ensuremath\varrho}% + % + \DeclareUnicodeCharacter{1E02}{\dotaccent{B}}% + \DeclareUnicodeCharacter{1E03}{\dotaccent{b}}% + \DeclareUnicodeCharacter{1E04}{\udotaccent{B}}% + \DeclareUnicodeCharacter{1E05}{\udotaccent{b}}% + \DeclareUnicodeCharacter{1E06}{\ubaraccent{B}}% + \DeclareUnicodeCharacter{1E07}{\ubaraccent{b}}% + \DeclareUnicodeCharacter{1E0A}{\dotaccent{D}}% + \DeclareUnicodeCharacter{1E0B}{\dotaccent{d}}% + \DeclareUnicodeCharacter{1E0C}{\udotaccent{D}}% + \DeclareUnicodeCharacter{1E0D}{\udotaccent{d}}% + \DeclareUnicodeCharacter{1E0E}{\ubaraccent{D}}% + \DeclareUnicodeCharacter{1E0F}{\ubaraccent{d}}% + % + \DeclareUnicodeCharacter{1E1E}{\dotaccent{F}}% + \DeclareUnicodeCharacter{1E1F}{\dotaccent{f}}% + % + \DeclareUnicodeCharacter{1E20}{\=G}% + \DeclareUnicodeCharacter{1E21}{\=g}% + \DeclareUnicodeCharacter{1E22}{\dotaccent{H}}% + \DeclareUnicodeCharacter{1E23}{\dotaccent{h}}% + \DeclareUnicodeCharacter{1E24}{\udotaccent{H}}% + \DeclareUnicodeCharacter{1E25}{\udotaccent{h}}% + \DeclareUnicodeCharacter{1E26}{\"H}% + \DeclareUnicodeCharacter{1E27}{\"h}% + % + \DeclareUnicodeCharacter{1E30}{\'K}% + \DeclareUnicodeCharacter{1E31}{\'k}% + \DeclareUnicodeCharacter{1E32}{\udotaccent{K}}% + \DeclareUnicodeCharacter{1E33}{\udotaccent{k}}% + \DeclareUnicodeCharacter{1E34}{\ubaraccent{K}}% + \DeclareUnicodeCharacter{1E35}{\ubaraccent{k}}% + \DeclareUnicodeCharacter{1E36}{\udotaccent{L}}% + \DeclareUnicodeCharacter{1E37}{\udotaccent{l}}% + \DeclareUnicodeCharacter{1E3A}{\ubaraccent{L}}% + \DeclareUnicodeCharacter{1E3B}{\ubaraccent{l}}% + \DeclareUnicodeCharacter{1E3E}{\'M}% + \DeclareUnicodeCharacter{1E3F}{\'m}% + % + \DeclareUnicodeCharacter{1E40}{\dotaccent{M}}% + \DeclareUnicodeCharacter{1E41}{\dotaccent{m}}% + \DeclareUnicodeCharacter{1E42}{\udotaccent{M}}% + \DeclareUnicodeCharacter{1E43}{\udotaccent{m}}% + \DeclareUnicodeCharacter{1E44}{\dotaccent{N}}% + \DeclareUnicodeCharacter{1E45}{\dotaccent{n}}% + \DeclareUnicodeCharacter{1E46}{\udotaccent{N}}% + \DeclareUnicodeCharacter{1E47}{\udotaccent{n}}% + \DeclareUnicodeCharacter{1E48}{\ubaraccent{N}}% + \DeclareUnicodeCharacter{1E49}{\ubaraccent{n}}% + % + \DeclareUnicodeCharacter{1E54}{\'P}% + \DeclareUnicodeCharacter{1E55}{\'p}% + \DeclareUnicodeCharacter{1E56}{\dotaccent{P}}% + \DeclareUnicodeCharacter{1E57}{\dotaccent{p}}% + \DeclareUnicodeCharacter{1E58}{\dotaccent{R}}% + \DeclareUnicodeCharacter{1E59}{\dotaccent{r}}% + \DeclareUnicodeCharacter{1E5A}{\udotaccent{R}}% + \DeclareUnicodeCharacter{1E5B}{\udotaccent{r}}% + \DeclareUnicodeCharacter{1E5E}{\ubaraccent{R}}% + \DeclareUnicodeCharacter{1E5F}{\ubaraccent{r}}% + % + \DeclareUnicodeCharacter{1E60}{\dotaccent{S}}% + \DeclareUnicodeCharacter{1E61}{\dotaccent{s}}% + \DeclareUnicodeCharacter{1E62}{\udotaccent{S}}% + \DeclareUnicodeCharacter{1E63}{\udotaccent{s}}% + \DeclareUnicodeCharacter{1E6A}{\dotaccent{T}}% + \DeclareUnicodeCharacter{1E6B}{\dotaccent{t}}% + \DeclareUnicodeCharacter{1E6C}{\udotaccent{T}}% + \DeclareUnicodeCharacter{1E6D}{\udotaccent{t}}% + \DeclareUnicodeCharacter{1E6E}{\ubaraccent{T}}% + \DeclareUnicodeCharacter{1E6F}{\ubaraccent{t}}% + % + \DeclareUnicodeCharacter{1E7C}{\~V}% + \DeclareUnicodeCharacter{1E7D}{\~v}% + \DeclareUnicodeCharacter{1E7E}{\udotaccent{V}}% + \DeclareUnicodeCharacter{1E7F}{\udotaccent{v}}% + % + \DeclareUnicodeCharacter{1E80}{\`W}% + \DeclareUnicodeCharacter{1E81}{\`w}% + \DeclareUnicodeCharacter{1E82}{\'W}% + \DeclareUnicodeCharacter{1E83}{\'w}% + \DeclareUnicodeCharacter{1E84}{\"W}% + \DeclareUnicodeCharacter{1E85}{\"w}% + \DeclareUnicodeCharacter{1E86}{\dotaccent{W}}% + \DeclareUnicodeCharacter{1E87}{\dotaccent{w}}% + \DeclareUnicodeCharacter{1E88}{\udotaccent{W}}% + \DeclareUnicodeCharacter{1E89}{\udotaccent{w}}% + \DeclareUnicodeCharacter{1E8A}{\dotaccent{X}}% + \DeclareUnicodeCharacter{1E8B}{\dotaccent{x}}% + \DeclareUnicodeCharacter{1E8C}{\"X}% + \DeclareUnicodeCharacter{1E8D}{\"x}% + \DeclareUnicodeCharacter{1E8E}{\dotaccent{Y}}% + \DeclareUnicodeCharacter{1E8F}{\dotaccent{y}}% + % + \DeclareUnicodeCharacter{1E90}{\^Z}% + \DeclareUnicodeCharacter{1E91}{\^z}% + \DeclareUnicodeCharacter{1E92}{\udotaccent{Z}}% + \DeclareUnicodeCharacter{1E93}{\udotaccent{z}}% + \DeclareUnicodeCharacter{1E94}{\ubaraccent{Z}}% + \DeclareUnicodeCharacter{1E95}{\ubaraccent{z}}% + \DeclareUnicodeCharacter{1E96}{\ubaraccent{h}}% + \DeclareUnicodeCharacter{1E97}{\"t}% + \DeclareUnicodeCharacter{1E98}{\ringaccent{w}}% + \DeclareUnicodeCharacter{1E99}{\ringaccent{y}}% + % + \DeclareUnicodeCharacter{1EA0}{\udotaccent{A}}% + \DeclareUnicodeCharacter{1EA1}{\udotaccent{a}}% + % + \DeclareUnicodeCharacter{1EB8}{\udotaccent{E}}% + \DeclareUnicodeCharacter{1EB9}{\udotaccent{e}}% + \DeclareUnicodeCharacter{1EBC}{\~E}% + \DeclareUnicodeCharacter{1EBD}{\~e}% + % + \DeclareUnicodeCharacter{1ECA}{\udotaccent{I}}% + \DeclareUnicodeCharacter{1ECB}{\udotaccent{i}}% + \DeclareUnicodeCharacter{1ECC}{\udotaccent{O}}% + \DeclareUnicodeCharacter{1ECD}{\udotaccent{o}}% + % + \DeclareUnicodeCharacter{1EE4}{\udotaccent{U}}% + \DeclareUnicodeCharacter{1EE5}{\udotaccent{u}}% + % + \DeclareUnicodeCharacter{1EF2}{\`Y}% + \DeclareUnicodeCharacter{1EF3}{\`y}% + \DeclareUnicodeCharacter{1EF4}{\udotaccent{Y}}% + % + \DeclareUnicodeCharacter{1EF8}{\~Y}% + \DeclareUnicodeCharacter{1EF9}{\~y}% + % + % Punctuation + \DeclareUnicodeCharacter{2013}{--}% + \DeclareUnicodeCharacter{2014}{---}% + \DeclareUnicodeCharacter{2018}{\quoteleft{}}% + \DeclareUnicodeCharacter{2019}{\quoteright{}}% + \DeclareUnicodeCharacter{201A}{\quotesinglbase{}}% + \DeclareUnicodeCharacter{201C}{\quotedblleft{}}% + \DeclareUnicodeCharacter{201D}{\quotedblright{}}% + \DeclareUnicodeCharacter{201E}{\quotedblbase{}}% + \DeclareUnicodeCharacter{2020}{\ensuremath\dagger}% + \DeclareUnicodeCharacter{2021}{\ensuremath\ddagger}% + \DeclareUnicodeCharacter{2022}{\bullet{}}% + \DeclareUnicodeCharacter{202F}{\thinspace}% + \DeclareUnicodeCharacter{2026}{\dots{}}% + \DeclareUnicodeCharacter{2039}{\guilsinglleft{}}% + \DeclareUnicodeCharacter{203A}{\guilsinglright{}}% + % + \DeclareUnicodeCharacter{20AC}{\euro{}}% + % + \DeclareUnicodeCharacter{2192}{\expansion{}}% + \DeclareUnicodeCharacter{21D2}{\result{}}% + % + % Mathematical symbols + \DeclareUnicodeCharacter{2200}{\ensuremath\forall}% + \DeclareUnicodeCharacter{2203}{\ensuremath\exists}% + \DeclareUnicodeCharacter{2208}{\ensuremath\in}% + \DeclareUnicodeCharacter{2212}{\minus{}}% + \DeclareUnicodeCharacter{2217}{\ast}% + \DeclareUnicodeCharacter{221E}{\ensuremath\infty}% + \DeclareUnicodeCharacter{2225}{\ensuremath\parallel}% + \DeclareUnicodeCharacter{2227}{\ensuremath\wedge}% + \DeclareUnicodeCharacter{2229}{\ensuremath\cap}% + \DeclareUnicodeCharacter{2261}{\equiv{}}% + \DeclareUnicodeCharacter{2264}{\ensuremath\leq}% + \DeclareUnicodeCharacter{2265}{\ensuremath\geq}% + \DeclareUnicodeCharacter{2282}{\ensuremath\subset}% + \DeclareUnicodeCharacter{2287}{\ensuremath\supseteq}% + % + \DeclareUnicodeCharacter{2016}{\ensuremath\Vert}% + \DeclareUnicodeCharacter{2032}{\ensuremath\prime}% + \DeclareUnicodeCharacter{210F}{\ensuremath\hbar}% + \DeclareUnicodeCharacter{2111}{\ensuremath\Im}% + \DeclareUnicodeCharacter{2113}{\ensuremath\ell}% + \DeclareUnicodeCharacter{2118}{\ensuremath\wp}% + \DeclareUnicodeCharacter{211C}{\ensuremath\Re}% + \DeclareUnicodeCharacter{2135}{\ensuremath\aleph}% + \DeclareUnicodeCharacter{2190}{\ensuremath\leftarrow}% + \DeclareUnicodeCharacter{2191}{\ensuremath\uparrow}% + \DeclareUnicodeCharacter{2193}{\ensuremath\downarrow}% + \DeclareUnicodeCharacter{2194}{\ensuremath\leftrightarrow}% + \DeclareUnicodeCharacter{2195}{\ensuremath\updownarrow}% + \DeclareUnicodeCharacter{2196}{\ensuremath\nwarrow}% + \DeclareUnicodeCharacter{2197}{\ensuremath\nearrow}% + \DeclareUnicodeCharacter{2198}{\ensuremath\searrow}% + \DeclareUnicodeCharacter{2199}{\ensuremath\swarrow}% + \DeclareUnicodeCharacter{21A6}{\ensuremath\mapsto}% + \DeclareUnicodeCharacter{21A9}{\ensuremath\hookleftarrow}% + \DeclareUnicodeCharacter{21AA}{\ensuremath\hookrightarrow}% + \DeclareUnicodeCharacter{21BC}{\ensuremath\leftharpoonup}% + \DeclareUnicodeCharacter{21BD}{\ensuremath\leftharpoondown}% + \DeclareUnicodeCharacter{21C0}{\ensuremath\rightharpoonup}% + \DeclareUnicodeCharacter{21C1}{\ensuremath\rightharpoondown}% + \DeclareUnicodeCharacter{21CC}{\ensuremath\rightleftharpoons}% + \DeclareUnicodeCharacter{21D0}{\ensuremath\Leftarrow}% + \DeclareUnicodeCharacter{21D1}{\ensuremath\Uparrow}% + \DeclareUnicodeCharacter{21D3}{\ensuremath\Downarrow}% + \DeclareUnicodeCharacter{21D4}{\ensuremath\Leftrightarrow}% + \DeclareUnicodeCharacter{21D5}{\ensuremath\Updownarrow}% + \DeclareUnicodeCharacter{2202}{\ensuremath\partial}% + \DeclareUnicodeCharacter{2205}{\ensuremath\emptyset}% + \DeclareUnicodeCharacter{2207}{\ensuremath\nabla}% + \DeclareUnicodeCharacter{2209}{\ensuremath\notin}% + \DeclareUnicodeCharacter{220B}{\ensuremath\owns}% + \DeclareUnicodeCharacter{220F}{\ensuremath\prod}% + \DeclareUnicodeCharacter{2210}{\ensuremath\coprod}% + \DeclareUnicodeCharacter{2211}{\ensuremath\sum}% + \DeclareUnicodeCharacter{2213}{\ensuremath\mp}% + \DeclareUnicodeCharacter{2218}{\ensuremath\circ}% + \DeclareUnicodeCharacter{221A}{\ensuremath\surd}% + \DeclareUnicodeCharacter{221D}{\ensuremath\propto}% + \DeclareUnicodeCharacter{2220}{\ensuremath\angle}% + \DeclareUnicodeCharacter{2223}{\ensuremath\mid}% + \DeclareUnicodeCharacter{2228}{\ensuremath\vee}% + \DeclareUnicodeCharacter{222A}{\ensuremath\cup}% + \DeclareUnicodeCharacter{222B}{\ensuremath\smallint}% + \DeclareUnicodeCharacter{222E}{\ensuremath\oint}% + \DeclareUnicodeCharacter{223C}{\ensuremath\sim}% + \DeclareUnicodeCharacter{2240}{\ensuremath\wr}% + \DeclareUnicodeCharacter{2243}{\ensuremath\simeq}% + \DeclareUnicodeCharacter{2245}{\ensuremath\cong}% + \DeclareUnicodeCharacter{2248}{\ensuremath\approx}% + \DeclareUnicodeCharacter{224D}{\ensuremath\asymp}% + \DeclareUnicodeCharacter{2250}{\ensuremath\doteq}% + \DeclareUnicodeCharacter{2260}{\ensuremath\neq}% + \DeclareUnicodeCharacter{226A}{\ensuremath\ll}% + \DeclareUnicodeCharacter{226B}{\ensuremath\gg}% + \DeclareUnicodeCharacter{227A}{\ensuremath\prec}% + \DeclareUnicodeCharacter{227B}{\ensuremath\succ}% + \DeclareUnicodeCharacter{2283}{\ensuremath\supset}% + \DeclareUnicodeCharacter{2286}{\ensuremath\subseteq}% + \DeclareUnicodeCharacter{228E}{\ensuremath\uplus}% + \DeclareUnicodeCharacter{2291}{\ensuremath\sqsubseteq}% + \DeclareUnicodeCharacter{2292}{\ensuremath\sqsupseteq}% + \DeclareUnicodeCharacter{2293}{\ensuremath\sqcap}% + \DeclareUnicodeCharacter{2294}{\ensuremath\sqcup}% + \DeclareUnicodeCharacter{2295}{\ensuremath\oplus}% + \DeclareUnicodeCharacter{2296}{\ensuremath\ominus}% + \DeclareUnicodeCharacter{2297}{\ensuremath\otimes}% + \DeclareUnicodeCharacter{2298}{\ensuremath\oslash}% + \DeclareUnicodeCharacter{2299}{\ensuremath\odot}% + \DeclareUnicodeCharacter{22A2}{\ensuremath\vdash}% + \DeclareUnicodeCharacter{22A3}{\ensuremath\dashv}% + \DeclareUnicodeCharacter{22A4}{\ensuremath\ptextop}% + \DeclareUnicodeCharacter{22A5}{\ensuremath\bot}% + \DeclareUnicodeCharacter{22A8}{\ensuremath\models}% + \DeclareUnicodeCharacter{22C0}{\ensuremath\bigwedge}% + \DeclareUnicodeCharacter{22C1}{\ensuremath\bigvee}% + \DeclareUnicodeCharacter{22C2}{\ensuremath\bigcap}% + \DeclareUnicodeCharacter{22C3}{\ensuremath\bigcup}% + \DeclareUnicodeCharacter{22C4}{\ensuremath\diamond}% + \DeclareUnicodeCharacter{22C5}{\ensuremath\cdot}% + \DeclareUnicodeCharacter{22C6}{\ensuremath\star}% + \DeclareUnicodeCharacter{22C8}{\ensuremath\bowtie}% + \DeclareUnicodeCharacter{2308}{\ensuremath\lceil}% + \DeclareUnicodeCharacter{2309}{\ensuremath\rceil}% + \DeclareUnicodeCharacter{230A}{\ensuremath\lfloor}% + \DeclareUnicodeCharacter{230B}{\ensuremath\rfloor}% + \DeclareUnicodeCharacter{2322}{\ensuremath\frown}% + \DeclareUnicodeCharacter{2323}{\ensuremath\smile}% + % + \DeclareUnicodeCharacter{25B3}{\ensuremath\triangle}% + \DeclareUnicodeCharacter{25B7}{\ensuremath\triangleright}% + \DeclareUnicodeCharacter{25BD}{\ensuremath\bigtriangledown}% + \DeclareUnicodeCharacter{25C1}{\ensuremath\triangleleft}% + \DeclareUnicodeCharacter{25C7}{\ensuremath\diamond}% + \DeclareUnicodeCharacter{2660}{\ensuremath\spadesuit}% + \DeclareUnicodeCharacter{2661}{\ensuremath\heartsuit}% + \DeclareUnicodeCharacter{2662}{\ensuremath\diamondsuit}% + \DeclareUnicodeCharacter{2663}{\ensuremath\clubsuit}% + \DeclareUnicodeCharacter{266D}{\ensuremath\flat}% + \DeclareUnicodeCharacter{266E}{\ensuremath\natural}% + \DeclareUnicodeCharacter{266F}{\ensuremath\sharp}% + \DeclareUnicodeCharacter{26AA}{\ensuremath\bigcirc}% + \DeclareUnicodeCharacter{27B9}{\ensuremath\rangle}% + \DeclareUnicodeCharacter{27C2}{\ensuremath\perp}% + \DeclareUnicodeCharacter{27E8}{\ensuremath\langle}% + \DeclareUnicodeCharacter{27F5}{\ensuremath\longleftarrow}% + \DeclareUnicodeCharacter{27F6}{\ensuremath\longrightarrow}% + \DeclareUnicodeCharacter{27F7}{\ensuremath\longleftrightarrow}% + \DeclareUnicodeCharacter{27FC}{\ensuremath\longmapsto}% + \DeclareUnicodeCharacter{29F5}{\ensuremath\setminus}% + \DeclareUnicodeCharacter{2A00}{\ensuremath\bigodot}% + \DeclareUnicodeCharacter{2A01}{\ensuremath\bigoplus}% + \DeclareUnicodeCharacter{2A02}{\ensuremath\bigotimes}% + \DeclareUnicodeCharacter{2A04}{\ensuremath\biguplus}% + \DeclareUnicodeCharacter{2A06}{\ensuremath\bigsqcup}% + \DeclareUnicodeCharacter{2A3F}{\ensuremath\amalg}% + \DeclareUnicodeCharacter{2AAF}{\ensuremath\preceq}% + \DeclareUnicodeCharacter{2AB0}{\ensuremath\succeq}% + % + \global\mathchardef\checkmark="1370% actually the square root sign + \DeclareUnicodeCharacter{2713}{\ensuremath\checkmark}% +}% end of \unicodechardefs + +% UTF-8 byte sequence (pdfTeX) definitions (replacing and @U command) +% It makes the setting that replace UTF-8 byte sequence. +\def\utfeightchardefs{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterUTFviii + \unicodechardefs +} + +% Whether the active definitions of non-ASCII characters expand to +% non-active tokens with the same character code. This is used to +% write characters literally, instead of using active definitions for +% printing the correct glyphs. +\newif\ifpassthroughchars +\passthroughcharsfalse + +% For native Unicode handling (XeTeX and LuaTeX), +% provide a definition macro to replace/pass-through a Unicode character +% +\def\DeclareUnicodeCharacterNative#1#2{% + \catcode"#1=\active + \def\dodeclareunicodecharacternative##1##2##3{% + \begingroup + \uccode`\~="##2\relax + \uppercase{\gdef~}{% + \ifpassthroughchars + ##1% + \else + ##3% + \fi + } + \endgroup + } + \begingroup + \uccode`\.="#1\relax + \uppercase{\def\UTFNativeTmp{.}}% + \expandafter\dodeclareunicodecharacternative\UTFNativeTmp{#1}{#2}% + \endgroup +} + +% Native Unicode handling (XeTeX and LuaTeX) character replacing definition. +% It activates the setting that replaces Unicode characters. +\def\nativeunicodechardefs{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterNative + \unicodechardefs +} + +% For native Unicode handling (XeTeX and LuaTeX), +% make the character token expand +% to the sequences given in \unicodechardefs for printing. +\def\DeclareUnicodeCharacterNativeAtU#1#2{% + \def\UTFAtUTmp{#2} + \expandafter\globallet\csname uni:#1\endcsname \UTFAtUTmp +} + +% @U command definitions for native Unicode handling (XeTeX and LuaTeX). +\def\nativeunicodechardefsatu{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterNativeAtU + \unicodechardefs +} + +% US-ASCII character definitions. +\def\asciichardefs{% nothing need be done + \relax +} + +% define all Unicode characters we know about, for the sake of @U. +\iftxinativeunicodecapable + \nativeunicodechardefsatu +\else + \utfeightchardefs +\fi + + +% Make non-ASCII characters printable again for compatibility with +% existing Texinfo documents that may use them, even without declaring a +% document encoding. +% +\setnonasciicharscatcode \other + + +\message{formatting,} + +\newdimen\defaultparindent \defaultparindent = 15pt + +\chapheadingskip = 15pt plus 4pt minus 2pt +\secheadingskip = 12pt plus 3pt minus 2pt +\subsecheadingskip = 9pt plus 2pt minus 2pt + +% Prevent underfull vbox error messages. +\vbadness = 10000 + +% Don't be very finicky about underfull hboxes, either. +\hbadness = 6666 + +% Following George Bush, get rid of widows and orphans. +\widowpenalty=10000 +\clubpenalty=10000 + +% Use TeX 3.0's \emergencystretch to help line breaking, but if we're +% using an old version of TeX, don't do anything. We want the amount of +% stretch added to depend on the line length, hence the dependence on +% \hsize. We call this whenever the paper size is set. +% +\def\setemergencystretch{% + \ifx\emergencystretch\thisisundefined + % Allow us to assign to \emergencystretch anyway. + \def\emergencystretch{\dimen0}% + \else + \emergencystretch = .15\hsize + \fi +} + +% Parameters in order: 1) textheight; 2) textwidth; +% 3) voffset; 4) hoffset; 5) binding offset; 6) topskip; +% 7) physical page height; 8) physical page width. +% +% We also call \setleading{\textleading}, so the caller should define +% \textleading. The caller should also set \parskip. +% +\def\internalpagesizes#1#2#3#4#5#6#7#8{% + \voffset = #3\relax + \topskip = #6\relax + \splittopskip = \topskip + % + \vsize = #1\relax + \advance\vsize by \topskip + \outervsize = \vsize + \advance\outervsize by 2\topandbottommargin + \txipageheight = \vsize + % + \hsize = #2\relax + \outerhsize = \hsize + \advance\outerhsize by 0.5in + \txipagewidth = \hsize + % + \normaloffset = #4\relax + \bindingoffset = #5\relax + % + \ifpdf + \pdfpageheight #7\relax + \pdfpagewidth #8\relax + % if we don't reset these, they will remain at "1 true in" of + % whatever layout pdftex was dumped with. + \pdfhorigin = 1 true in + \pdfvorigin = 1 true in + \else + \ifx\XeTeXrevision\thisisundefined + \special{papersize=#8,#7}% + \else + \pdfpageheight #7\relax + \pdfpagewidth #8\relax + % XeTeX does not have \pdfhorigin and \pdfvorigin. + \fi + \fi + % + \setleading{\textleading} + % + \parindent = \defaultparindent + \setemergencystretch +} + +% @letterpaper (the default). +\def\letterpaper{{\globaldefs = 1 + \parskip = 3pt plus 2pt minus 1pt + \textleading = 13.2pt + % + % If page is nothing but text, make it come out even. + \internalpagesizes{607.2pt}{6in}% that's 46 lines + {\voffset}{.25in}% + {\bindingoffset}{36pt}% + {11in}{8.5in}% +}} + +% Use @smallbook to reset parameters for 7x9.25 trim size. +\def\smallbook{{\globaldefs = 1 + \parskip = 2pt plus 1pt + \textleading = 12pt + % + \internalpagesizes{7.5in}{5in}% + {-.2in}{0in}% + {\bindingoffset}{16pt}% + {9.25in}{7in}% + % + \lispnarrowing = 0.3in + \tolerance = 700 + \contentsrightmargin = 0pt + \defbodyindent = .5cm +}} + +% Use @smallerbook to reset parameters for 6x9 trim size. +% (Just testing, parameters still in flux.) +\def\smallerbook{{\globaldefs = 1 + \parskip = 1.5pt plus 1pt + \textleading = 12pt + % + \internalpagesizes{7.4in}{4.8in}% + {-.2in}{-.4in}% + {0pt}{14pt}% + {9in}{6in}% + % + \lispnarrowing = 0.25in + \tolerance = 700 + \contentsrightmargin = 0pt + \defbodyindent = .4cm +}} + +% Use @afourpaper to print on European A4 paper. +\def\afourpaper{{\globaldefs = 1 + \parskip = 3pt plus 2pt minus 1pt + \textleading = 13.2pt + % + % Double-side printing via postscript on Laserjet 4050 + % prints double-sided nicely when \bindingoffset=10mm and \hoffset=-6mm. + % To change the settings for a different printer or situation, adjust + % \normaloffset until the front-side and back-side texts align. Then + % do the same for \bindingoffset. You can set these for testing in + % your texinfo source file like this: + % @tex + % \global\normaloffset = -6mm + % \global\bindingoffset = 10mm + % @end tex + \internalpagesizes{673.2pt}{160mm}% that's 51 lines + {\voffset}{\hoffset}% + {\bindingoffset}{44pt}% + {297mm}{210mm}% + % + \tolerance = 700 + \contentsrightmargin = 0pt + \defbodyindent = 5mm +}} + +% Use @afivepaper to print on European A5 paper. +% From romildo@urano.iceb.ufop.br, 2 July 2000. +% He also recommends making @example and @lisp be small. +\def\afivepaper{{\globaldefs = 1 + \parskip = 2pt plus 1pt minus 0.1pt + \textleading = 12.5pt + % + \internalpagesizes{160mm}{120mm}% + {\voffset}{\hoffset}% + {\bindingoffset}{8pt}% + {210mm}{148mm}% + % + \lispnarrowing = 0.2in + \tolerance = 800 + \contentsrightmargin = 0pt + \defbodyindent = 2mm + \tableindent = 12mm +}} + +% A specific text layout, 24x15cm overall, intended for A4 paper. +\def\afourlatex{{\globaldefs = 1 + \afourpaper + \internalpagesizes{237mm}{150mm}% + {\voffset}{4.6mm}% + {\bindingoffset}{7mm}% + {297mm}{210mm}% + % + % Must explicitly reset to 0 because we call \afourpaper. + \globaldefs = 0 +}} + +% Use @afourwide to print on A4 paper in landscape format. +\def\afourwide{{\globaldefs = 1 + \afourpaper + \internalpagesizes{241mm}{165mm}% + {\voffset}{-2.95mm}% + {\bindingoffset}{7mm}% + {297mm}{210mm}% + \globaldefs = 0 +}} + +% @pagesizes TEXTHEIGHT[,TEXTWIDTH] +% Perhaps we should allow setting the margins, \topskip, \parskip, +% and/or leading, also. Or perhaps we should compute them somehow. +% +\parseargdef\pagesizes{\pagesizesyyy #1,,\finish} +\def\pagesizesyyy#1,#2,#3\finish{{% + \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \hsize=#2\relax \fi + \globaldefs = 1 + % + \parskip = 3pt plus 2pt minus 1pt + \setleading{\textleading}% + % + \dimen0 = #1\relax + \advance\dimen0 by \voffset + \advance\dimen0 by 1in % reference point for DVI is 1 inch from top of page + % + \dimen2 = \hsize + \advance\dimen2 by \normaloffset + \advance\dimen2 by 1in % reference point is 1 inch from left edge of page + % + \internalpagesizes{#1}{\hsize}% + {\voffset}{\normaloffset}% + {\bindingoffset}{44pt}% + {\dimen0}{\dimen2}% +}} + +% Set default to letter. +% +\letterpaper + +% Default value of \hfuzz, for suppressing warnings about overfull hboxes. +\hfuzz = 1pt + + +\message{and turning on texinfo input format.} + +\def^^L{\par} % remove \outer, so ^L can appear in an @comment + +% DEL is a comment character, in case @c does not suffice. +\catcode`\^^? = 14 + +% Define macros to output various characters with catcode for normal text. +\catcode`\"=\other \def\normaldoublequote{"} +\catcode`\$=\other \def\normaldollar{$}%$ font-lock fix +\catcode`\+=\other \def\normalplus{+} +\catcode`\<=\other \def\normalless{<} +\catcode`\>=\other \def\normalgreater{>} +\catcode`\^=\other \def\normalcaret{^} +\catcode`\_=\other \def\normalunderscore{_} +\catcode`\|=\other \def\normalverticalbar{|} +\catcode`\~=\other \def\normaltilde{~} + +% This macro is used to make a character print one way in \tt +% (where it can probably be output as-is), and another way in other fonts, +% where something hairier probably needs to be done. +% +% #1 is what to print if we are indeed using \tt; #2 is what to print +% otherwise. Since all the Computer Modern typewriter fonts have zero +% interword stretch (and shrink), and it is reasonable to expect all +% typewriter fonts to have this, we can check that font parameter. +% +\def\ifusingtt#1#2{\ifdim \fontdimen3\font=0pt #1\else #2\fi} + +% Same as above, but check for italic font. Actually this also catches +% non-italic slanted fonts since it is impossible to distinguish them from +% italic fonts. But since this is only used by $ and it uses \sl anyway +% this is not a problem. +\def\ifusingit#1#2{\ifdim \fontdimen1\font>0pt #1\else #2\fi} + +% Set catcodes for Texinfo file + +% Active characters for printing the wanted glyph. +% Most of these we simply print from the \tt font, but for some, we can +% use math or other variants that look better in normal text. +% +\catcode`\"=\active +\def\activedoublequote{{\tt\char34}} +\let"=\activedoublequote +\catcode`\~=\active \def\activetilde{{\tt\char126}} \let~ = \activetilde +\chardef\hatchar=`\^ +\catcode`\^=\active \def\activehat{{\tt \hatchar}} \let^ = \activehat + +\catcode`\_=\active +\def_{\ifusingtt\normalunderscore\_} +\def\_{\leavevmode \kern.07em \vbox{\hrule width.3em height.1ex}\kern .07em } +\let\realunder=_ + +\catcode`\|=\active \def|{{\tt\char124}} + +\chardef \less=`\< +\catcode`\<=\active \def\activeless{{\tt \less}}\let< = \activeless +\chardef \gtr=`\> +\catcode`\>=\active \def\activegtr{{\tt \gtr}}\let> = \activegtr +\catcode`\+=\active \def+{{\tt \char 43}} +\catcode`\$=\active \def${\ifusingit{{\sl\$}}\normaldollar}%$ font-lock fix +\catcode`\-=\active \let-=\normaldash + + +% used for headline/footline in the output routine, in case the page +% breaks in the middle of an @tex block. +\def\texinfochars{% + \let< = \activeless + \let> = \activegtr + \let~ = \activetilde + \let^ = \activehat + \markupsetuplqdefault \markupsetuprqdefault + \let\b = \strong + \let\i = \smartitalic + % in principle, all other definitions in \tex have to be undone too. +} + +% Used sometimes to turn off (effectively) the active characters even after +% parsing them. +\def\turnoffactive{% + \normalturnoffactive + \otherbackslash +} + +\catcode`\@=0 + +% \backslashcurfont outputs one backslash character in current font, +% as in \char`\\. +\global\chardef\backslashcurfont=`\\ +\global\let\rawbackslashxx=\backslashcurfont % let existing .??s files work + +% \realbackslash is an actual character `\' with catcode other, and +% \doublebackslash is two of them (for the pdf outlines). +{\catcode`\\=\other @gdef@realbackslash{\} @gdef@doublebackslash{\\}} + +% In Texinfo, backslash is an active character; it prints the backslash +% in fixed width font. +\catcode`\\=\active % @ for escape char from now on. + +% Print a typewriter backslash. For math mode, we can't simply use +% \backslashcurfont: the story here is that in math mode, the \char +% of \backslashcurfont ends up printing the roman \ from the math symbol +% font (because \char in math mode uses the \mathcode, and plain.tex +% sets \mathcode`\\="026E). Hence we use an explicit \mathchar, +% which is the decimal equivalent of "715c (class 7, e.g., use \fam; +% ignored family value; char position "5C). We can't use " for the +% usual hex value because it has already been made active. + +@def@ttbackslash{{@tt @ifmmode @mathchar29020 @else @backslashcurfont @fi}} +@let@backslashchar = @ttbackslash % @backslashchar{} is for user documents. + +% \rawbackslash defines an active \ to do \backslashcurfont. +% \otherbackslash defines an active \ to be a literal `\' character with +% catcode other. We switch back and forth between these. +@gdef@rawbackslash{@let\=@backslashcurfont} +@gdef@otherbackslash{@let\=@realbackslash} + +% Same as @turnoffactive except outputs \ as {\tt\char`\\} instead of +% the literal character `\'. +% +{@catcode`- = @active + @gdef@normalturnoffactive{% + @passthroughcharstrue + @let-=@normaldash + @let"=@normaldoublequote + @let$=@normaldollar %$ font-lock fix + @let+=@normalplus + @let<=@normalless + @let>=@normalgreater + @let^=@normalcaret + @let_=@normalunderscore + @let|=@normalverticalbar + @let~=@normaltilde + @let\=@ttbackslash + @markupsetuplqdefault + @markupsetuprqdefault + @unsepspaces + } +} + +% If a .fmt file is being used, characters that might appear in a file +% name cannot be active until we have parsed the command line. +% So turn them off again, and have @fixbackslash turn them back on. +@catcode`+=@other @catcode`@_=@other + +% \enablebackslashhack - allow file to begin `\input texinfo' +% +% If a .fmt file is being used, we don't want the `\input texinfo' to show up. +% That is what \eatinput is for; after that, the `\' should revert to printing +% a backslash. +% If the file did not have a `\input texinfo', then it is turned off after +% the first line; otherwise the first `\' in the file would cause an error. +% This is used on the very last line of this file, texinfo.tex. +% We also use @c to call @fixbackslash, in case ends of lines are hidden. +{ +@catcode`@^=7 +@catcode`@^^M=13@gdef@enablebackslashhack{% + @global@let\ = @eatinput% + @catcode`@^^M=13% + @def@c{@fixbackslash@c}% + % Definition for the newline at the end of this file. + @def ^^M{@let^^M@secondlinenl}% + % Definition for a newline in the main Texinfo file. + @gdef @secondlinenl{@fixbackslash}% + % In case the first line has a whole-line command on it + @let@originalparsearg@parsearg + @def@parsearg{@fixbackslash@originalparsearg} +}} + +{@catcode`@^=7 @catcode`@^^M=13% +@gdef@eatinput input texinfo#1^^M{@fixbackslash}} + +% Emergency active definition of newline, in case an active newline token +% appears by mistake. +{@catcode`@^=7 @catcode13=13% +@gdef@enableemergencynewline{% + @gdef^^M{% + @par% + %@par% +}}} + + +@gdef@fixbackslash{% + @ifx\@eatinput @let\ = @ttbackslash @fi + @catcode13=5 % regular end of line + @enableemergencynewline + @let@c=@texinfoc + @let@parsearg@originalparsearg + % Also turn back on active characters that might appear in the input + % file name, in case not using a pre-dumped format. + @catcode`+=@active + @catcode`@_=@active + % + % If texinfo.cnf is present on the system, read it. + % Useful for site-wide @afourpaper, etc. This macro, @fixbackslash, gets + % called at the beginning of every Texinfo file. Not opening texinfo.cnf + % directly in this file, texinfo.tex, makes it possible to make a format + % file for Texinfo. + % + @openin 1 texinfo.cnf + @ifeof 1 @else @input texinfo.cnf @fi + @closein 1 +} + + +% Say @foo, not \foo, in error messages. +@escapechar = `@@ + +% These (along with & and #) are made active for url-breaking, so need +% active definitions as the normal characters. +@def@normaldot{.} +@def@normalquest{?} +@def@normalslash{/} + +% These look ok in all fonts, so just make them not special. +% @hashchar{} gets its own user-level command, because of #line. +@catcode`@& = @other @def@normalamp{&} +@catcode`@# = @other @def@normalhash{#} +@catcode`@% = @other @def@normalpercent{%} + +@let @hashchar = @normalhash + +@c Finally, make ` and ' active, so that txicodequoteundirected and +@c txicodequotebacktick work right in, e.g., @w{@code{`foo'}}. If we +@c don't make ` and ' active, @code will not get them as active chars. +@c Do this last of all since we use ` in the previous @catcode assignments. +@catcode`@'=@active +@catcode`@`=@active +@markupsetuplqdefault +@markupsetuprqdefault + +@c Local variables: +@c eval: (add-hook 'write-file-hooks 'time-stamp) +@c page-delimiter: "^\\\\message\\|emacs-page" +@c time-stamp-start: "def\\\\texinfoversion{" +@c time-stamp-format: "%:y-%02m-%02d.%02H" +@c time-stamp-end: "}" +@c End: + +@c vim:sw=2: + +@enablebackslashhack