1 /*
2 ** This program scans C and C++ source files and automatically generates
3 ** appropriate header files.
4 ** %Z% %P% %I% %G% %Z%
5 */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <ctype.h>
9 #include <memory.h>
10 #include <sys/stat.h>
11 #include <assert.h>
12 #ifndef WIN32
13 # include <unistd.h>
14 #else
15 # include <string.h>
16 #endif
17
18 /*
19 ** Macros for debugging.
20 */
21 #ifdef DEBUG
22 static int debugMask = 0;
23 # define debug0(F,M) if( (F)&debugMask ){ fprintf(stderr,M); }
24 # define debug1(F,M,A) if( (F)&debugMask ){ fprintf(stderr,M,A); }
25 # define debug2(F,M,A,B) if( (F)&debugMask ){ fprintf(stderr,M,A,B); }
26 # define debug3(F,M,A,B,C) if( (F)&debugMask ){ fprintf(stderr,M,A,B,C); }
27 # define PARSER 0x00000001
28 # define DECL_DUMP 0x00000002
29 # define TOKENIZER 0x00000004
30 #else
31 # define debug0(Flags, Format)
32 # define debug1(Flags, Format, A)
33 # define debug2(Flags, Format, A, B)
34 # define debug3(Flags, Format, A, B, C)
35 #endif
36
37 /*
38 ** The following macros are purely for the purpose of testing this
39 ** program on itself. They don't really contribute to the code.
40 */
41 #define INTERFACE 1
42 #define EXPORT_INTERFACE 1
43 #define EXPORT
44
45 /*
46 ** Each token in a source file is represented by an instance of
47 ** the following structure. Tokens are collected onto a list.
48 */
49 typedef struct Token Token;
50 struct Token {
51 const char *zText; /* The text of the token */
52 int nText; /* Number of characters in the token's text */
53 int eType; /* The type of this token */
54 int nLine; /* The line number on which the token starts */
55 Token *pComment; /* Most recent block comment before this token */
56 Token *pNext; /* Next token on the list */
57 Token *pPrev; /* Previous token on the list */
58 };
59
60 /*
61 ** During tokenization, information about the state of the input
62 ** stream is held in an instance of the following structure
63 */
64 typedef struct InStream InStream;
65 struct InStream {
66 const char *z; /* Complete text of the input */
67 int i; /* Next character to read from the input */
68 int nLine; /* The line number for character z[i] */
69 };
70
71 /*
72 ** Each declaration in the C or C++ source files is parsed out and stored as
73 ** an instance of the following structure.
74 **
75 ** A "forward declaration" is a declaration that an object exists that
76 ** doesn't tell about the objects structure. A typical forward declaration
77 ** is:
78 **
79 ** struct Xyzzy;
80 **
81 ** Not every object has a forward declaration. If it does, thought, the
82 ** forward declaration will be contained in the zFwd field for C and
83 ** the zFwdCpp for C++. The zDecl field contains the complete
84 ** declaration text.
85 */
86 typedef struct Decl Decl;
87 struct Decl {
88 char *zName; /* Name of the object being declared. The appearance
89 ** of this name is a source file triggers the declaration
90 ** to be added to the header for that file. */
91 char *zFile; /* File from which extracted. */
92 char *zIf; /* Surround the declaration with this #if */
93 char *zFwd; /* A forward declaration. NULL if there is none. */
94 char *zFwdCpp; /* Use this forward declaration for C++. */
95 char *zDecl; /* A full declaration of this object */
96 struct Include *pInclude; /* #includes that come before this declaration */
97 int flags; /* See the "Properties" below */
98 Token *pComment; /* A block comment associated with this declaration */
99 Token tokenCode; /* Implementation of functions and procedures */
100 Decl *pSameName; /* Next declaration with the same "zName" */
101 Decl *pSameHash; /* Next declaration with same hash but different zName */
102 Decl *pNext; /* Next declaration with a different name */
103 };
104
105 /*
106 ** Properties associated with declarations.
107 **
108 ** DP_Forward and DP_Declared are used during the generation of a single
109 ** header file in order to prevent duplicate declarations and definitions.
110 ** DP_Forward is set after the object has been given a forward declaration
111 ** and DP_Declared is set after the object gets a full declarations.
112 ** (Example: A forward declaration is "typedef struct Abc Abc;" and the
113 ** full declaration is "struct Abc { int a; float b; };".)
114 **
115 ** The DP_Export and DP_Local flags are more permanent. They mark objects
116 ** that have EXPORT scope and LOCAL scope respectively. If both of these
117 ** marks are missing, then the object has library scope. The meanings of
118 ** the scopes are as follows:
119 **
120 ** LOCAL scope The object is only usable within the file in
121 ** which it is declared.
122 **
123 ** library scope The object is visible and usable within other
124 ** files in the same project. By if the project is
125 ** a library, then the object is not visible to users
126 ** of the library. (i.e. the object does not appear
127 ** in the output when using the -H option.)
128 **
129 ** EXPORT scope The object is visible and usable everywhere.
130 **
131 ** The DP_Flag is a temporary use flag that is used during processing to
132 ** prevent an infinite loop. It's use is localized.
133 **
134 ** The DP_Cplusplus, DP_ExternCReqd and DP_ExternReqd flags are permanent
135 ** and are used to specify what type of declaration the object requires.
136 */
137 #define DP_Forward 0x001 /* Has a forward declaration in this file */
138 #define DP_Declared 0x002 /* Has a full declaration in this file */
139 #define DP_Export 0x004 /* Export this declaration */
140 #define DP_Local 0x008 /* Declare in its home file only */
141 #define DP_Flag 0x010 /* Use to mark a subset of a Decl list
142 ** for special processing */
143 #define DP_Cplusplus 0x020 /* Has C++ linkage and cannot appear in a
144 ** C header file */
145 #define DP_ExternCReqd 0x040 /* Prepend 'extern "C"' in a C++ header.
146 ** Prepend nothing in a C header */
147 #define DP_ExternReqd 0x080 /* Prepend 'extern "C"' in a C++ header if
148 ** DP_Cplusplus is not also set. If DP_Cplusplus
149 ** is set or this is a C header then
150 ** prepend 'extern' */
151
152 /*
153 ** Convenience macros for dealing with declaration properties
154 */
155 #define DeclHasProperty(D,P) (((D)->flags&(P))==(P))
156 #define DeclHasAnyProperty(D,P) (((D)->flags&(P))!=0)
157 #define DeclSetProperty(D,P) (D)->flags |= (P)
158 #define DeclClearProperty(D,P) (D)->flags &= ~(P)
159
160 /*
161 ** These are state properties of the parser. Each of the values is
162 ** distinct from the DP_ values above so that both can be used in
163 ** the same "flags" field.
164 **
165 ** Be careful not to confuse PS_Export with DP_Export or
166 ** PS_Local with DP_Local. Their names are similar, but the meanings
167 ** of these flags are very different.
168 */
169 #define PS_Extern 0x000800 /* "extern" has been seen */
170 #define PS_Export 0x001000 /* If between "#if EXPORT_INTERFACE"
171 ** and "#endif" */
172 #define PS_Export2 0x002000 /* If "EXPORT" seen */
173 #define PS_Typedef 0x004000 /* If "typedef" has been seen */
174 #define PS_Static 0x008000 /* If "static" has been seen */
175 #define PS_Interface 0x010000 /* If within #if INTERFACE..#endif */
176 #define PS_Method 0x020000 /* If "::" token has been seen */
177 #define PS_Local 0x040000 /* If within #if LOCAL_INTERFACE..#endif */
178 #define PS_Local2 0x080000 /* If "LOCAL" seen. */
179
180 /*
181 ** The following set of flags are ORed into the "flags" field of
182 ** a Decl in order to identify what type of object is being
183 ** declared.
184 */
185 #define TY_Class 0x00100000
186 #define TY_Subroutine 0x00200000
187 #define TY_Macro 0x00400000
188 #define TY_Typedef 0x00800000
189 #define TY_Variable 0x01000000
190 #define TY_Structure 0x02000000
191 #define TY_Union 0x04000000
192 #define TY_Enumeration 0x08000000
193 #define TY_Defunct 0x10000000 /* Used to erase a declaration */
194
195 /*
196 ** Each nested #if (or #ifdef or #ifndef) is stored in a stack of
197 ** instances of the following structure.
198 */
199 typedef struct Ifmacro Ifmacro;
200 struct Ifmacro {
201 int nLine; /* Line number where this macro occurs */
202 char *zCondition; /* Text of the condition for this macro */
203 Ifmacro *pNext; /* Next down in the stack */
204 int flags; /* Can hold PS_Export, PS_Interface or PS_Local flags */
205 };
206
207 /*
208 ** When parsing a file, we need to keep track of what other files have
209 ** be #include-ed. For each #include found, we create an instance of
210 ** the following structure.
211 */
212 typedef struct Include Include;
213 struct Include {
214 char *zFile; /* The name of file include. Includes "" or <> */
215 char *zIf; /* If not NULL, #include should be enclosed in #if */
216 char *zLabel; /* A unique label used to test if this #include has
217 * appeared already in a file or not */
218 Include *pNext; /* Previous include file, or NULL if this is the first */
219 };
220
221 /*
222 ** Identifiers found in a source file that might be used later to provoke
223 ** the copying of a declaration into the corresponding header file are
224 ** stored in a hash table as instances of the following structure.
225 */
226 typedef struct Ident Ident;
227 struct Ident {
228 char *zName; /* The text of this identifier */
229 Ident *pCollide; /* Next identifier with the same hash */
230 Ident *pNext; /* Next identifier in a list of them all */
231 };
232
233 /*
234 ** A complete table of identifiers is stored in an instance of
235 ** the next structure.
236 */
237 #define IDENT_HASH_SIZE 2237
238 typedef struct IdentTable IdentTable;
239 struct IdentTable {
240 Ident *pList; /* List of all identifiers in this table */
241 Ident *apTable[IDENT_HASH_SIZE]; /* The hash table */
242 };
243
244 /*
245 ** The following structure holds all information for a single
246 ** source file named on the command line of this program.
247 */
248 typedef struct InFile InFile;
249 struct InFile {
250 char *zSrc; /* Name of input file */
251 char *zHdr; /* Name of the generated .h file for this input.
252 ** Will be NULL if input is to be scanned only */
253 int flags; /* One or more DP_, PS_ and/or TY_ flags */
254 InFile *pNext; /* Next input file in the list of them all */
255 IdentTable idTable; /* All identifiers in this input file */
256 };
257
258 /*
259 ** An unbounded string is able to grow without limit. We use these
260 ** to construct large in-memory strings from lots of smaller components.
261 */
262 typedef struct String String;
263 struct String {
264 int nAlloc; /* Number of bytes allocated */
265 int nUsed; /* Number of bytes used (not counting null terminator) */
266 char *zText; /* Text of the string */
267 };
268
269 /*
270 ** The following structure contains a lot of state information used
271 ** while generating a .h file. We put the information in this structure
272 ** and pass around a pointer to this structure, rather than pass around
273 ** all of the information separately. This helps reduce the number of
274 ** arguments to generator functions.
275 */
276 typedef struct GenState GenState;
277 struct GenState {
278 String *pStr; /* Write output to this string */
279 IdentTable *pTable; /* A table holding the zLabel of every #include that
280 * has already been generated. Used to avoid
281 * generating duplicate #includes. */
282 const char *zIf; /* If not NULL, then we are within a #if with
283 * this argument. */
284 int nErr; /* Number of errors */
285 const char *zFilename; /* Name of the source file being scanned */
286 int flags; /* Various flags (DP_ and PS_ flags above) */
287 };
288
289 /*
290 ** The following text line appears at the top of every file generated
291 ** by this program. By recognizing this line, the program can be sure
292 ** never to read a file that it generated itself.
293 */
294 const char zTopLine[] =
295 "/* \aThis file was automatically generated. Do not edit! */\n";
296 #define nTopLine (sizeof(zTopLine)-1)
297
298 /*
299 ** The name of the file currently being parsed.
300 */
301 static char *zFilename;
302
303 /*
304 ** The stack of #if macros for the file currently being parsed.
305 */
306 static Ifmacro *ifStack = 0;
307
308 /*
309 ** A list of all files that have been #included so far in a file being
310 ** parsed.
311 */
312 static Include *includeList = 0;
313
314 /*
315 ** The last block comment seen.
316 */
317 static Token *blockComment = 0;
318
319 /*
320 ** The following flag is set if the -doc flag appears on the
321 ** command line.
322 */
323 static int doc_flag = 0;
324
325 /*
326 ** If the following flag is set, then makeheaders will attempt to
327 ** generate prototypes for static functions and procedures.
328 */
329 static int proto_static = 0;
330
331 /*
332 ** A list of all declarations. The list is held together using the
333 ** pNext field of the Decl structure.
334 */
335 static Decl *pDeclFirst; /* First on the list */
336 static Decl *pDeclLast; /* Last on the list */
337
338 /*
339 ** A hash table of all declarations
340 */
341 #define DECL_HASH_SIZE 3371
342 static Decl *apTable[DECL_HASH_SIZE];
343
344 /*
345 ** The TEST macro must be defined to something. Make sure this is the
346 ** case.
347 */
348 #ifndef TEST
349 # define TEST 0
350 #endif
351
352 #ifdef NOT_USED
353 /*
354 ** We do our own assertion macro so that we can have more control
355 ** over debugging.
356 */
357 #define Assert(X) if(!(X)){ CantHappen(__LINE__); }
358 #define CANT_HAPPEN CantHappen(__LINE__)
359 static void CantHappen(int iLine){
360 fprintf(stderr,"Assertion failed on line %d\n",iLine);
361 *(char*)1 = 0; /* Force a core-dump */
362 }
363 #endif
364
365 /*
366 ** Memory allocation functions that are guaranteed never to return NULL.
367 */
368 static void *SafeMalloc(int nByte){
369 void *p = malloc( nByte );
370 if( p==0 ){
371 fprintf(stderr,"Out of memory. Can't allocate %d bytes.\n",nByte);
372 exit(1);
373 }
374 return p;
375 }
376 static void SafeFree(void *pOld){
377 if( pOld ){
378 free(pOld);
379 }
380 }
381 static void *SafeRealloc(void *pOld, int nByte){
382 void *p;
383 if( pOld==0 ){
384 p = SafeMalloc(nByte);
385 }else{
386 p = realloc(pOld, nByte);
387 if( p==0 ){
388 fprintf(stderr,
389 "Out of memory. Can't enlarge an allocation to %d bytes\n",nByte);
390 exit(1);
391 }
392 }
393 return p;
394 }
395 static char *StrDup(const char *zSrc, int nByte){
396 char *zDest;
397 if( nByte<=0 ){
398 nByte = strlen(zSrc);
399 }
400 zDest = SafeMalloc( nByte + 1 );
401 strncpy(zDest,zSrc,nByte);
402 zDest[nByte] = 0;
403 return zDest;
404 }
405
406 /*
407 ** Return TRUE if the character X can be part of an identifier
408 */
409 #define ISALNUM(X) ((X)=='_' || isalnum(X))
410
411 /*
412 ** Routines for dealing with unbounded strings.
413 */
414 static void StringInit(String *pStr){
415 pStr->nAlloc = 0;
416 pStr->nUsed = 0;
417 pStr->zText = 0;
418 }
419 static void StringReset(String *pStr){
420 SafeFree(pStr->zText);
421 StringInit(pStr);
422 }
423 static void StringAppend(String *pStr, const char *zText, int nByte){
424 if( nByte<=0 ){
425 nByte = strlen(zText);
426 }
427 if( pStr->nUsed + nByte >= pStr->nAlloc ){
428 if( pStr->nAlloc==0 ){
429 pStr->nAlloc = nByte + 100;
430 pStr->zText = SafeMalloc( pStr->nAlloc );
431 }else{
432 pStr->nAlloc = pStr->nAlloc*2 + nByte;
433 pStr->zText = SafeRealloc(pStr->zText, pStr->nAlloc);
434 }
435 }
436 strncpy(&pStr->zText[pStr->nUsed],zText,nByte);
437 pStr->nUsed += nByte;
438 pStr->zText[pStr->nUsed] = 0;
439 }
440 #define StringGet(S) ((S)->zText?(S)->zText:"")
441
442 /*
443 ** Compute a hash on a string. The number returned is a non-negative
444 ** value between 0 and 2**31 - 1
445 */
446 static int Hash(const char *z, int n){
447 int h = 0;
448 if( n<=0 ){
449 n = strlen(z);
450 }
451 while( n-- ){
452 h = h ^ (h<<5) ^ *z++;
453 }
454 if( h<0 ) h = -h;
455 return h;
456 }
457
458 /*
459 ** Given an identifier name, try to find a declaration for that
460 ** identifier in the hash table. If found, return a pointer to
461 ** the Decl structure. If not found, return 0.
462 */
463 static Decl *FindDecl(const char *zName, int len){
464 int h;
465 Decl *p;
466
467 if( len<=0 ){
468 len = strlen(zName);
469 }
470 h = Hash(zName,len) % DECL_HASH_SIZE;
471 p = apTable[h];
472 while( p && (strncmp(p->zName,zName,len)!=0 || p->zName[len]!=0) ){
473 p = p->pSameHash;
474 }
475 return p;
476 }
477
478 /*
479 ** Install the given declaration both in the hash table and on
480 ** the list of all declarations.
481 */
482 static void InstallDecl(Decl *pDecl){
483 int h;
484 Decl *pOther;
485
486 h = Hash(pDecl->zName,0) % DECL_HASH_SIZE;
487 pOther = apTable[h];
488 while( pOther && strcmp(pDecl->zName,pOther->zName)!=0 ){
489 pOther = pOther->pSameHash;
490 }
491 if( pOther ){
492 pDecl->pSameName = pOther->pSameName;
493 pOther->pSameName = pDecl;
494 }else{
495 pDecl->pSameName = 0;
496 pDecl->pSameHash = apTable[h];
497 apTable[h] = pDecl;
498 }
499 pDecl->pNext = 0;
500 if( pDeclFirst==0 ){
501 pDeclFirst = pDeclLast = pDecl;
502 }else{
503 pDeclLast->pNext = pDecl;
504 pDeclLast = pDecl;
505 }
506 }
507
508 /*
509 ** Look at the current ifStack. If anything declared at the current
510 ** position must be surrounded with
511 **
512 ** #if STUFF
513 ** #endif
514 **
515 ** Then this routine computes STUFF and returns a pointer to it. Memory
516 ** to hold the value returned is obtained from malloc().
517 */
518 static char *GetIfString(void){
519 Ifmacro *pIf;
520 char *zResult = 0;
521 int hasIf = 0;
522 String str;
523
524 for(pIf = ifStack; pIf; pIf=pIf->pNext){
525 if( pIf->zCondition==0 || *pIf->zCondition==0 ) continue;
526 if( !hasIf ){
527 hasIf = 1;
528 StringInit(&str);
529 }else{
530 StringAppend(&str," && ",4);
531 }
532 StringAppend(&str,pIf->zCondition,0);
533 }
534 if( hasIf ){
535 zResult = StrDup(StringGet(&str),0);
536 StringReset(&str);
537 }else{
538 zResult = 0;
539 }
540 return zResult;
541 }
542
543 /*
544 ** Create a new declaration and put it in the hash table. Also
545 ** return a pointer to it so that we can fill in the zFwd and zDecl
546 ** fields, and so forth.
547 */
548 static Decl *CreateDecl(
549 const char *zName, /* Name of the object being declared. */
550 int nName /* Length of the name */
551 ){
552 Decl *pDecl;
553
554 pDecl = SafeMalloc( sizeof(Decl) + nName + 1);
555 memset(pDecl,0,sizeof(Decl));
556 pDecl->zName = (char*)&pDecl[1];
557 sprintf(pDecl->zName,"%.*s",nName,zName);
558 pDecl->zFile = zFilename;
559 pDecl->pInclude = includeList;
560 pDecl->zIf = GetIfString();
561 InstallDecl(pDecl);
562 return pDecl;
563 }
564
565 /*
566 ** Insert a new identifier into an table of identifiers. Return TRUE if
567 ** a new identifier was inserted and return FALSE if the identifier was
568 ** already in the table.
569 */
570 static int IdentTableInsert(
571 IdentTable *pTable, /* The table into which we will insert */
572 const char *zId, /* Name of the identifiers */
573 int nId /* Length of the identifier name */
574 ){
575 int h;
576 Ident *pId;
577
578 if( nId<=0 ){
579 nId = strlen(zId);
580 }
581 h = Hash(zId,nId) % IDENT_HASH_SIZE;
582 for(pId = pTable->apTable[h]; pId; pId=pId->pCollide){
583 if( strncmp(zId,pId->zName,nId)==0 && pId->zName[nId]==0 ){
584 /* printf("Already in table: %.*s\n",nId,zId); */
585 return 0;
586 }
587 }
588 pId = SafeMalloc( sizeof(Ident) + nId + 1 );
589 pId->zName = (char*)&pId[1];
590 sprintf(pId->zName,"%.*s",nId,zId);
591 pId->pNext = pTable->pList;
592 pTable->pList = pId;
593 pId->pCollide = pTable->apTable[h];
594 pTable->apTable[h] = pId;
595 /* printf("Add to table: %.*s\n",nId,zId); */
596 return 1;
597 }
598
599 /*
600 ** Check to see if the given value is in the given IdentTable. Return
601 ** true if it is and false if it is not.
602 */
603 static int IdentTableTest(
604 IdentTable *pTable, /* The table in which to search */
605 const char *zId, /* Name of the identifiers */
606 int nId /* Length of the identifier name */
607 ){
608 int h;
609 Ident *pId;
610
611 if( nId<=0 ){
612 nId = strlen(zId);
613 }
614 h = Hash(zId,nId) % IDENT_HASH_SIZE;
615 for(pId = pTable->apTable[h]; pId; pId=pId->pCollide){
616 if( strncmp(zId,pId->zName,nId)==0 && pId->zName[nId]==0 ){
617 return 1;
618 }
619 }
620 return 0;
621 }
622
623 /*
624 ** Remove every identifier from the given table. Reset the table to
625 ** its initial state.
626 */
627 static void IdentTableReset(IdentTable *pTable){
628 Ident *pId, *pNext;
629
630 for(pId = pTable->pList; pId; pId = pNext){
631 pNext = pId->pNext;
632 SafeFree(pId);
633 }
634 memset(pTable,0,sizeof(IdentTable));
635 }
636
637 #ifdef DEBUG
638 /*
639 ** Print the name of every identifier in the given table, one per line
640 */
641 static void IdentTablePrint(IdentTable *pTable, FILE *pOut){
642 Ident *pId;
643
644 for(pId = pTable->pList; pId; pId = pId->pNext){
645 fprintf(pOut,"%s\n",pId->zName);
646 }
647 }
648 #endif
649
650 /*
651 ** Read an entire file into memory. Return a pointer to the memory.
652 **
653 ** The memory is obtained from SafeMalloc and must be freed by the
654 ** calling function.
655 **
656 ** If the read fails for any reason, 0 is returned.
657 */
658 static char *ReadFile(const char *zFilename){
659 struct stat sStat;
660 FILE *pIn;
661 char *zBuf;
662 int n;
663
664 if( stat(zFilename,&sStat)!=0
665 #ifndef WIN32
666 || !S_ISREG(sStat.st_mode)
667 #endif
668 ){
669 return 0;
670 }
671 pIn = fopen(zFilename,"r");
672 if( pIn==0 ){
673 return 0;
674 }
675 zBuf = SafeMalloc( sStat.st_size + 1 );
676 n = fread(zBuf,1,sStat.st_size,pIn);
677 zBuf[n] = 0;
678 fclose(pIn);
679 return zBuf;
680 }
681
682 /*
683 ** Write the contents of a string into a file. Return the number of
684 ** errors
685 */
686 static int WriteFile(const char *zFilename, const char *zOutput){
687 FILE *pOut;
688 pOut = fopen(zFilename,"w");
689 if( pOut==0 ){
690 return 1;
691 }
692 fwrite(zOutput,1,strlen(zOutput),pOut);
693 fclose(pOut);
694 return 0;
695 }
696
697 /*
698 ** Major token types
699 */
700 #define TT_Space 1 /* Contiguous white space */
701 #define TT_Id 2 /* An identifier */
702 #define TT_Preprocessor 3 /* Any C preprocessor directive */
703 #define TT_Comment 4 /* Either C or C++ style comment */
704 #define TT_Number 5 /* Any numeric constant */
705 #define TT_String 6 /* String or character constants. ".." or '.' */
706 #define TT_Braces 7 /* All text between { and a matching } */
707 #define TT_EOF 8 /* End of file */
708 #define TT_Error 9 /* An error condition */
709 #define TT_BlockComment 10 /* A C-Style comment at the left margin that
710 * spans multple lines */
711 #define TT_Other 0 /* None of the above */
712
713 /*
714 ** Get a single low-level token from the input file. Update the
715 ** file pointer so that it points to the first character beyond the
716 ** token.
717 **
718 ** A "low-level token" is any token except TT_Braces. A TT_Braces token
719 ** consists of many smaller tokens and is assembled by a routine that
720 ** calls this one.
721 **
722 ** The function returns the number of errors. An error is an
723 ** unterminated string or character literal or an unterminated
724 ** comment.
725 **
726 ** Profiling shows that this routine consumes about half the
727 ** CPU time on a typical run of makeheaders.
728 */
729 static int GetToken(InStream *pIn, Token *pToken){
730 int i;
731 const char *z;
732 int cStart;
733 int c;
734 int startLine; /* Line on which a structure begins */
735 int nlisc = 0; /* True if there is a new-line in a ".." or '..' */
736 int nErr = 0; /* Number of errors seen */
737
738 z = pIn->z;
739 i = pIn->i;
740 pToken->nLine = pIn->nLine;
741 pToken->zText = &z[i];
742 switch( z[i] ){
743 case 0:
744 pToken->eType = TT_EOF;
745 pToken->nText = 0;
746 break;
747
748 case '#':
749 if( i==0 || z[i-1]=='\n' || (i>1 && z[i-1]=='\r' && z[i-2]=='\n')){
750 /* We found a preprocessor statement */
751 pToken->eType = TT_Preprocessor;
752 i++;
753 while( z[i]!=0 && z[i]!='\n' ){
754 if( z[i]=='\\' ){
755 i++;
756 if( z[i]=='\n' ) pIn->nLine++;
757 }
758 i++;
759 }
760 pToken->nText = i - pIn->i;
761 }else{
762 /* Just an operator */
763 pToken->eType = TT_Other;
764 pToken->nText = 1;
765 }
766 break;
767
768 case ' ':
769 case '\t':
770 case '\r':
771 case '\f':
772 case '\n':
773 while( isspace(z[i]) ){
774 if( z[i]=='\n' ) pIn->nLine++;
775 i++;
776 }
777 pToken->eType = TT_Space;
778 pToken->nText = i - pIn->i;
779 break;
780
781 case '\\':
782 pToken->nText = 2;
783 pToken->eType = TT_Other;
784 if( z[i+1]=='\n' ){
785 pIn->nLine++;
786 pToken->eType = TT_Space;
787 }else if( z[i+1]==0 ){
788 pToken->nText = 1;
789 }
790 break;
791
792 case '\'':
793 case '\"':
794 cStart = z[i];
795 startLine = pIn->nLine;
796 do{
797 i++;
798 c = z[i];
799 if( c=='\n' ){
800 if( !nlisc ){
801 fprintf(stderr,
802 "%s:%d: (warning) Newline in string or character literal.\n",
803 zFilename, pIn->nLine);
804 nlisc = 1;
805 }
806 pIn->nLine++;
807 }
808 if( c=='\\' ){
809 i++;
810 c = z[i];
811 if( c=='\n' ){
812 pIn->nLine++;
813 }
814 }else if( c==cStart ){
815 i++;
816 c = 0;
817 }else if( c==0 ){
818 fprintf(stderr, "%s:%d: Unterminated string or character literal.\n",
819 zFilename, startLine);
820 nErr++;
821 }
822 }while( c );
823 pToken->eType = TT_String;
824 pToken->nText = i - pIn->i;
825 break;
826
827 case '/':
828 if( z[i+1]=='/' ){
829 /* C++ style comment */
830 while( z[i] && z[i]!='\n' ){ i++; }
831 pToken->eType = TT_Comment;
832 pToken->nText = i - pIn->i;
833 }else if( z[i+1]=='*' ){
834 /* C style comment */
835 int isBlockComment = i==0 || z[i-1]=='\n';
836 i += 2;
837 startLine = pIn->nLine;
838 while( z[i] && (z[i]!='*' || z[i+1]!='/') ){
839 if( z[i]=='\n' ){
840 pIn->nLine++;
841 if( isBlockComment ){
842 if( z[i+1]=='*' || z[i+2]=='*' ){
843 isBlockComment = 2;
844 }else{
845 isBlockComment = 0;
846 }
847 }
848 }
849 i++;
850 }
851 if( z[i] ){
852 i += 2;
853 }else{
854 isBlockComment = 0;
855 fprintf(stderr,"%s:%d: Unterminated comment\n",
856 zFilename, startLine);
857 nErr++;
858 }
859 pToken->eType = isBlockComment==2 ? TT_BlockComment : TT_Comment;
860 pToken->nText = i - pIn->i;
861 }else{
862 /* A divide operator */
863 pToken->eType = TT_Other;
864 pToken->nText = 1;
865 }
866 break;
867
868 case '':
869 if( z[i+1]=='x' || z[i+1]=='X' ){
870 /* A hex constant */
871 i += 2;
872 while( isxdigit(z[i]) ){ i++; }
873 }else{
874 /* An octal constant */
875 while( isdigit(z[i]) ){ i++; }
876 }
877 pToken->eType = TT_Number;
878 pToken->nText = i - pIn->i;
879 break;
880
881 case '1': case '2': case '3': case '4':
882 case '5': case '6': case '7': case '8': case '9':
883 while( isdigit(z[i]) ){ i++; }
884 if( (c=z[i])=='.' ){
885 i++;
886 while( isdigit(z[i]) ){ i++; }
887 c = z[i];
888 if( c=='e' || c=='E' ){
889 i++;
890 if( ((c=z[i])=='+' || c=='-') && isdigit(z[i+1]) ){ i++; }
891 while( isdigit(z[i]) ){ i++; }
892 c = z[i];
893 }
894 if( c=='f' || c=='F' || c=='l' || c=='L' ){ i++; }
895 }else if( c=='e' || c=='E' ){
896 i++;
897 if( ((c=z[i])=='+' || c=='-') && isdigit(z[i+1]) ){ i++; }
898 while( isdigit(z[i]) ){ i++; }
899 }else if( c=='L' || c=='l' ){
900 i++;
901 c = z[i];
902 if( c=='u' || c=='U' ){ i++; }
903 }else if( c=='u' || c=='U' ){
904 i++;
905 c = z[i];
906 if( c=='l' || c=='L' ){ i++; }
907 }
908 pToken->eType = TT_Number;
909 pToken->nText = i - pIn->i;
910 break;
911
912 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
913 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
914 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
915 case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B':
916 case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I':
917 case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P':
918 case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W':
919 case 'X': case 'Y': case 'Z': case '_':
920 while( isalnum(z[i]) || z[i]=='_' ){ i++; };
921 pToken->eType = TT_Id;
922 pToken->nText = i - pIn->i;
923 break;
924
925 default:
926 pToken->eType = TT_Other;
927 pToken->nText = 1;
928 break;
929 }
930 pIn->i += pToken->nText;
931 return nErr;
932 }
933
934 /*
935 ** This routine recovers the next token from the input file which is
936 ** not a space or a comment or any text between an "#if 0" and "#endif".
937 **
938 ** This routine returns the number of errors encountered. An error
939 ** is an unterminated token or unmatched "#if 0".
940 **
941 ** Profiling shows that this routine uses about a quarter of the
942 ** CPU time in a typical run.
943 */
944 static int GetNonspaceToken(InStream *pIn, Token *pToken){
945 int nIf = 0;
946 int inZero = 0;
947 const char *z;
948 int value;
949 int startLine;
950 int nErr = 0;
951
952 startLine = pIn->nLine;
953 while( 1 ){
954 nErr += GetToken(pIn,pToken);
955 /* printf("%04d: Type=%d nIf=%d [%.*s]\n",
956 pToken->nLine,pToken->eType,nIf,pToken->nText,
957 pToken->eType!=TT_Space ? pToken->zText : "<space>"); */
958 pToken->pComment = blockComment;
959 switch( pToken->eType ){
960 case TT_Comment:
961 case TT_Space:
962 break;
963
964 case TT_BlockComment:
965 if( doc_flag ){
966 blockComment = SafeMalloc( sizeof(Token) );
967 *blockComment = *pToken;
968 }
969 break;
970
971 case TT_EOF:
972 if( nIf ){
973 fprintf(stderr,"%s:%d: Unterminated \"#if\"\n",
974 zFilename, startLine);
975 nErr++;
976 }
977 return nErr;
978
979 case TT_Preprocessor:
980 z = &pToken->zText[1];
981 while( *z==' ' || *z=='\t' ) z++;
982 if( sscanf(z,"if %d",&value)==1 && value==0 ){
983 nIf++;
984 inZero = 1;
985 }else if( inZero ){
986 if( strncmp(z,"if",2)==0 ){
987 nIf++;
988 }else if( strncmp(z,"endif",5)==0 ){
989 nIf--;
990 if( nIf==0 ) inZero = 0;
991 }
992 }else{
993 return nErr;
994 }
995 break;
996
997 default:
998 if( !inZero ){
999 return nErr;
1000 }
1001 break;
1002 }
1003 }
1004 /* NOT REACHED */
1005 }
1006
1007 /*
1008 ** This routine looks for identifiers (strings of contiguous alphanumeric
1009 ** characters) within a preprocessor directive and adds every such string
1010 ** found to the given identifier table
1011 */
1012 static void FindIdentifiersInMacro(Token *pToken, IdentTable *pTable){
1013 Token sToken;
1014 InStream sIn;
1015 int go = 1;
1016
1017 sIn.z = pToken->zText;
1018 sIn.i = 1;
1019 sIn.nLine = 1;
1020 while( go && sIn.i < pToken->nText ){
1021 GetToken(&sIn,&sToken);
1022 switch( sToken.eType ){
1023 case TT_Id:
1024 IdentTableInsert(pTable,sToken.zText,sToken.nText);
1025 break;
1026
1027 case TT_EOF:
1028 go = 0;
1029 break;
1030
1031 default:
1032 break;
1033 }
1034 }
1035 }
1036
1037 /*
1038 ** This routine gets the next token. Everything contained within
1039 ** {...} is collapsed into a single TT_Braces token. Whitespace is
1040 ** omitted.
1041 **
1042 ** If pTable is not NULL, then insert every identifier seen into the