unifdef.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104
  1. /*
  2. * Copyright (c) 2002 - 2009 Tony Finch <dot@dotat.at>
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. * 1. Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * 2. Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  14. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  17. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  19. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  20. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  23. * SUCH DAMAGE.
  24. */
  25. /*
  26. * This code was derived from software contributed to Berkeley by Dave Yost.
  27. * It was rewritten to support ANSI C by Tony Finch. The original version
  28. * of unifdef carried the 4-clause BSD copyright licence. None of its code
  29. * remains in this version (though some of the names remain) so it now
  30. * carries a more liberal licence.
  31. *
  32. * The latest version is available from http://dotat.at/prog/unifdef
  33. */
  34. static const char * const copyright[] = {
  35. "@(#) Copyright (c) 2002 - 2009 Tony Finch <dot@dotat.at>\n",
  36. "$dotat: unifdef/unifdef.c,v 1.190 2009/11/27 17:21:26 fanf2 Exp $",
  37. };
  38. /*
  39. * unifdef - remove ifdef'ed lines
  40. *
  41. * Wishlist:
  42. * provide an option which will append the name of the
  43. * appropriate symbol after #else's and #endif's
  44. * provide an option which will check symbols after
  45. * #else's and #endif's to see that they match their
  46. * corresponding #ifdef or #ifndef
  47. *
  48. * The first two items above require better buffer handling, which would
  49. * also make it possible to handle all "dodgy" directives correctly.
  50. */
  51. #include <ctype.h>
  52. #include <err.h>
  53. #include <stdarg.h>
  54. #include <stdbool.h>
  55. #include <stdio.h>
  56. #include <stdlib.h>
  57. #include <string.h>
  58. #include <unistd.h>
  59. /* types of input lines: */
  60. typedef enum {
  61. LT_TRUEI, /* a true #if with ignore flag */
  62. LT_FALSEI, /* a false #if with ignore flag */
  63. LT_IF, /* an unknown #if */
  64. LT_TRUE, /* a true #if */
  65. LT_FALSE, /* a false #if */
  66. LT_ELIF, /* an unknown #elif */
  67. LT_ELTRUE, /* a true #elif */
  68. LT_ELFALSE, /* a false #elif */
  69. LT_ELSE, /* #else */
  70. LT_ENDIF, /* #endif */
  71. LT_DODGY, /* flag: directive is not on one line */
  72. LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
  73. LT_PLAIN, /* ordinary line */
  74. LT_EOF, /* end of file */
  75. LT_ERROR, /* unevaluable #if */
  76. LT_COUNT
  77. } Linetype;
  78. static char const * const linetype_name[] = {
  79. "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
  80. "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
  81. "DODGY TRUEI", "DODGY FALSEI",
  82. "DODGY IF", "DODGY TRUE", "DODGY FALSE",
  83. "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
  84. "DODGY ELSE", "DODGY ENDIF",
  85. "PLAIN", "EOF", "ERROR"
  86. };
  87. /* state of #if processing */
  88. typedef enum {
  89. IS_OUTSIDE,
  90. IS_FALSE_PREFIX, /* false #if followed by false #elifs */
  91. IS_TRUE_PREFIX, /* first non-false #(el)if is true */
  92. IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
  93. IS_FALSE_MIDDLE, /* a false #elif after a pass state */
  94. IS_TRUE_MIDDLE, /* a true #elif after a pass state */
  95. IS_PASS_ELSE, /* an else after a pass state */
  96. IS_FALSE_ELSE, /* an else after a true state */
  97. IS_TRUE_ELSE, /* an else after only false states */
  98. IS_FALSE_TRAILER, /* #elifs after a true are false */
  99. IS_COUNT
  100. } Ifstate;
  101. static char const * const ifstate_name[] = {
  102. "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
  103. "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
  104. "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
  105. "FALSE_TRAILER"
  106. };
  107. /* state of comment parser */
  108. typedef enum {
  109. NO_COMMENT = false, /* outside a comment */
  110. C_COMMENT, /* in a comment like this one */
  111. CXX_COMMENT, /* between // and end of line */
  112. STARTING_COMMENT, /* just after slash-backslash-newline */
  113. FINISHING_COMMENT, /* star-backslash-newline in a C comment */
  114. CHAR_LITERAL, /* inside '' */
  115. STRING_LITERAL /* inside "" */
  116. } Comment_state;
  117. static char const * const comment_name[] = {
  118. "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
  119. };
  120. /* state of preprocessor line parser */
  121. typedef enum {
  122. LS_START, /* only space and comments on this line */
  123. LS_HASH, /* only space, comments, and a hash */
  124. LS_DIRTY /* this line can't be a preprocessor line */
  125. } Line_state;
  126. static char const * const linestate_name[] = {
  127. "START", "HASH", "DIRTY"
  128. };
  129. /*
  130. * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
  131. */
  132. #define MAXDEPTH 64 /* maximum #if nesting */
  133. #define MAXLINE 4096 /* maximum length of line */
  134. #define MAXSYMS 4096 /* maximum number of symbols */
  135. /*
  136. * Sometimes when editing a keyword the replacement text is longer, so
  137. * we leave some space at the end of the tline buffer to accommodate this.
  138. */
  139. #define EDITSLOP 10
  140. /*
  141. * Globals.
  142. */
  143. static bool compblank; /* -B: compress blank lines */
  144. static bool lnblank; /* -b: blank deleted lines */
  145. static bool complement; /* -c: do the complement */
  146. static bool debugging; /* -d: debugging reports */
  147. static bool iocccok; /* -e: fewer IOCCC errors */
  148. static bool strictlogic; /* -K: keep ambiguous #ifs */
  149. static bool killconsts; /* -k: eval constant #ifs */
  150. static bool lnnum; /* -n: add #line directives */
  151. static bool symlist; /* -s: output symbol list */
  152. static bool text; /* -t: this is a text file */
  153. static const char *symname[MAXSYMS]; /* symbol name */
  154. static const char *value[MAXSYMS]; /* -Dsym=value */
  155. static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
  156. static int nsyms; /* number of symbols */
  157. static FILE *input; /* input file pointer */
  158. static const char *filename; /* input file name */
  159. static int linenum; /* current line number */
  160. static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
  161. static char *keyword; /* used for editing #elif's */
  162. static Comment_state incomment; /* comment parser state */
  163. static Line_state linestate; /* #if line parser state */
  164. static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
  165. static bool ignoring[MAXDEPTH]; /* ignore comments state */
  166. static int stifline[MAXDEPTH]; /* start of current #if */
  167. static int depth; /* current #if nesting */
  168. static int delcount; /* count of deleted lines */
  169. static unsigned blankcount; /* count of blank lines */
  170. static unsigned blankmax; /* maximum recent blankcount */
  171. static bool constexpr; /* constant #if expression */
  172. static int exitstat; /* program exit status */
  173. static void addsym(bool, bool, char *);
  174. static void debug(const char *, ...);
  175. static void done(void);
  176. static void error(const char *);
  177. static int findsym(const char *);
  178. static void flushline(bool);
  179. static Linetype parseline(void);
  180. static Linetype ifeval(const char **);
  181. static void ignoreoff(void);
  182. static void ignoreon(void);
  183. static void keywordedit(const char *);
  184. static void nest(void);
  185. static void process(void);
  186. static const char *skipargs(const char *);
  187. static const char *skipcomment(const char *);
  188. static const char *skipsym(const char *);
  189. static void state(Ifstate);
  190. static int strlcmp(const char *, const char *, size_t);
  191. static void unnest(void);
  192. static void usage(void);
  193. #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
  194. /*
  195. * The main program.
  196. */
  197. int
  198. main(int argc, char *argv[])
  199. {
  200. int opt;
  201. while ((opt = getopt(argc, argv, "i:D:U:I:BbcdeKklnst")) != -1)
  202. switch (opt) {
  203. case 'i': /* treat stuff controlled by these symbols as text */
  204. /*
  205. * For strict backwards-compatibility the U or D
  206. * should be immediately after the -i but it doesn't
  207. * matter much if we relax that requirement.
  208. */
  209. opt = *optarg++;
  210. if (opt == 'D')
  211. addsym(true, true, optarg);
  212. else if (opt == 'U')
  213. addsym(true, false, optarg);
  214. else
  215. usage();
  216. break;
  217. case 'D': /* define a symbol */
  218. addsym(false, true, optarg);
  219. break;
  220. case 'U': /* undef a symbol */
  221. addsym(false, false, optarg);
  222. break;
  223. case 'I':
  224. /* no-op for compatibility with cpp */
  225. break;
  226. case 'B': /* compress blank lines around removed section */
  227. compblank = true;
  228. break;
  229. case 'b': /* blank deleted lines instead of omitting them */
  230. case 'l': /* backwards compatibility */
  231. lnblank = true;
  232. break;
  233. case 'c': /* treat -D as -U and vice versa */
  234. complement = true;
  235. break;
  236. case 'd':
  237. debugging = true;
  238. break;
  239. case 'e': /* fewer errors from dodgy lines */
  240. iocccok = true;
  241. break;
  242. case 'K': /* keep ambiguous #ifs */
  243. strictlogic = true;
  244. break;
  245. case 'k': /* process constant #ifs */
  246. killconsts = true;
  247. break;
  248. case 'n': /* add #line directive after deleted lines */
  249. lnnum = true;
  250. break;
  251. case 's': /* only output list of symbols that control #ifs */
  252. symlist = true;
  253. break;
  254. case 't': /* don't parse C comments */
  255. text = true;
  256. break;
  257. default:
  258. usage();
  259. }
  260. argc -= optind;
  261. argv += optind;
  262. if (compblank && lnblank)
  263. errx(2, "-B and -b are mutually exclusive");
  264. if (argc > 1) {
  265. errx(2, "can only do one file");
  266. } else if (argc == 1 && strcmp(*argv, "-") != 0) {
  267. filename = *argv;
  268. input = fopen(filename, "r");
  269. if (input == NULL)
  270. err(2, "can't open %s", filename);
  271. } else {
  272. filename = "[stdin]";
  273. input = stdin;
  274. }
  275. process();
  276. abort(); /* bug */
  277. }
  278. static void
  279. usage(void)
  280. {
  281. fprintf(stderr, "usage: unifdef [-BbcdeKknst] [-Ipath]"
  282. " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
  283. exit(2);
  284. }
  285. /*
  286. * A state transition function alters the global #if processing state
  287. * in a particular way. The table below is indexed by the current
  288. * processing state and the type of the current line.
  289. *
  290. * Nesting is handled by keeping a stack of states; some transition
  291. * functions increase or decrease the depth. They also maintain the
  292. * ignore state on a stack. In some complicated cases they have to
  293. * alter the preprocessor directive, as follows.
  294. *
  295. * When we have processed a group that starts off with a known-false
  296. * #if/#elif sequence (which has therefore been deleted) followed by a
  297. * #elif that we don't understand and therefore must keep, we edit the
  298. * latter into a #if to keep the nesting correct.
  299. *
  300. * When we find a true #elif in a group, the following block will
  301. * always be kept and the rest of the sequence after the next #elif or
  302. * #else will be discarded. We edit the #elif into a #else and the
  303. * following directive to #endif since this has the desired behaviour.
  304. *
  305. * "Dodgy" directives are split across multiple lines, the most common
  306. * example being a multi-line comment hanging off the right of the
  307. * directive. We can handle them correctly only if there is no change
  308. * from printing to dropping (or vice versa) caused by that directive.
  309. * If the directive is the first of a group we have a choice between
  310. * failing with an error, or passing it through unchanged instead of
  311. * evaluating it. The latter is not the default to avoid questions from
  312. * users about unifdef unexpectedly leaving behind preprocessor directives.
  313. */
  314. typedef void state_fn(void);
  315. /* report an error */
  316. static void Eelif (void) { error("Inappropriate #elif"); }
  317. static void Eelse (void) { error("Inappropriate #else"); }
  318. static void Eendif(void) { error("Inappropriate #endif"); }
  319. static void Eeof (void) { error("Premature EOF"); }
  320. static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
  321. /* plain line handling */
  322. static void print (void) { flushline(true); }
  323. static void drop (void) { flushline(false); }
  324. /* output lacks group's start line */
  325. static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); }
  326. static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); }
  327. static void Selse (void) { drop(); state(IS_TRUE_ELSE); }
  328. /* print/pass this block */
  329. static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
  330. static void Pelse (void) { print(); state(IS_PASS_ELSE); }
  331. static void Pendif(void) { print(); unnest(); }
  332. /* discard this block */
  333. static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); }
  334. static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); }
  335. static void Delse (void) { drop(); state(IS_FALSE_ELSE); }
  336. static void Dendif(void) { drop(); unnest(); }
  337. /* first line of group */
  338. static void Fdrop (void) { nest(); Dfalse(); }
  339. static void Fpass (void) { nest(); Pelif(); }
  340. static void Ftrue (void) { nest(); Strue(); }
  341. static void Ffalse(void) { nest(); Sfalse(); }
  342. /* variable pedantry for obfuscated lines */
  343. static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
  344. static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); }
  345. static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
  346. /* ignore comments in this block */
  347. static void Idrop (void) { Fdrop(); ignoreon(); }
  348. static void Itrue (void) { Ftrue(); ignoreon(); }
  349. static void Ifalse(void) { Ffalse(); ignoreon(); }
  350. /* edit this line */
  351. static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); }
  352. static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); }
  353. static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
  354. static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
  355. static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
  356. /* IS_OUTSIDE */
  357. { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
  358. Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
  359. print, done, abort },
  360. /* IS_FALSE_PREFIX */
  361. { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
  362. Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
  363. drop, Eeof, abort },
  364. /* IS_TRUE_PREFIX */
  365. { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
  366. Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
  367. print, Eeof, abort },
  368. /* IS_PASS_MIDDLE */
  369. { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
  370. Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
  371. print, Eeof, abort },
  372. /* IS_FALSE_MIDDLE */
  373. { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
  374. Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
  375. drop, Eeof, abort },
  376. /* IS_TRUE_MIDDLE */
  377. { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
  378. Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
  379. print, Eeof, abort },
  380. /* IS_PASS_ELSE */
  381. { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
  382. Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
  383. print, Eeof, abort },
  384. /* IS_FALSE_ELSE */
  385. { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
  386. Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
  387. drop, Eeof, abort },
  388. /* IS_TRUE_ELSE */
  389. { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
  390. Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
  391. print, Eeof, abort },
  392. /* IS_FALSE_TRAILER */
  393. { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
  394. Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
  395. drop, Eeof, abort }
  396. /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
  397. TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
  398. PLAIN EOF ERROR */
  399. };
  400. /*
  401. * State machine utility functions
  402. */
  403. static void
  404. done(void)
  405. {
  406. if (incomment)
  407. error("EOF in comment");
  408. exit(exitstat);
  409. }
  410. static void
  411. ignoreoff(void)
  412. {
  413. if (depth == 0)
  414. abort(); /* bug */
  415. ignoring[depth] = ignoring[depth-1];
  416. }
  417. static void
  418. ignoreon(void)
  419. {
  420. ignoring[depth] = true;
  421. }
  422. static void
  423. keywordedit(const char *replacement)
  424. {
  425. size_t size = tline + sizeof(tline) - keyword;
  426. char *dst = keyword;
  427. const char *src = replacement;
  428. if (size != 0) {
  429. while ((--size != 0) && (*src != '\0'))
  430. *dst++ = *src++;
  431. *dst = '\0';
  432. }
  433. print();
  434. }
  435. static void
  436. nest(void)
  437. {
  438. if (depth > MAXDEPTH-1)
  439. abort(); /* bug */
  440. if (depth == MAXDEPTH-1)
  441. error("Too many levels of nesting");
  442. depth += 1;
  443. stifline[depth] = linenum;
  444. }
  445. static void
  446. unnest(void)
  447. {
  448. if (depth == 0)
  449. abort(); /* bug */
  450. depth -= 1;
  451. }
  452. static void
  453. state(Ifstate is)
  454. {
  455. ifstate[depth] = is;
  456. }
  457. /*
  458. * Write a line to the output or not, according to command line options.
  459. */
  460. static void
  461. flushline(bool keep)
  462. {
  463. if (symlist)
  464. return;
  465. if (keep ^ complement) {
  466. bool blankline = tline[strspn(tline, " \t\n")] == '\0';
  467. if (blankline && compblank && blankcount != blankmax) {
  468. delcount += 1;
  469. blankcount += 1;
  470. } else {
  471. if (lnnum && delcount > 0)
  472. printf("#line %d\n", linenum);
  473. fputs(tline, stdout);
  474. delcount = 0;
  475. blankmax = blankcount = blankline ? blankcount + 1 : 0;
  476. }
  477. } else {
  478. if (lnblank)
  479. putc('\n', stdout);
  480. exitstat = 1;
  481. delcount += 1;
  482. blankcount = 0;
  483. }
  484. }
  485. /*
  486. * The driver for the state machine.
  487. */
  488. static void
  489. process(void)
  490. {
  491. Linetype lineval;
  492. /* When compressing blank lines, act as if the file
  493. is preceded by a large number of blank lines. */
  494. blankmax = blankcount = 1000;
  495. for (;;) {
  496. linenum++;
  497. lineval = parseline();
  498. trans_table[ifstate[depth]][lineval]();
  499. debug("process %s -> %s depth %d",
  500. linetype_name[lineval],
  501. ifstate_name[ifstate[depth]], depth);
  502. }
  503. }
  504. /*
  505. * Parse a line and determine its type. We keep the preprocessor line
  506. * parser state between calls in the global variable linestate, with
  507. * help from skipcomment().
  508. */
  509. static Linetype
  510. parseline(void)
  511. {
  512. const char *cp;
  513. int cursym;
  514. int kwlen;
  515. Linetype retval;
  516. Comment_state wascomment;
  517. if (fgets(tline, MAXLINE, input) == NULL)
  518. return (LT_EOF);
  519. retval = LT_PLAIN;
  520. wascomment = incomment;
  521. cp = skipcomment(tline);
  522. if (linestate == LS_START) {
  523. if (*cp == '#') {
  524. linestate = LS_HASH;
  525. cp = skipcomment(cp + 1);
  526. } else if (*cp != '\0')
  527. linestate = LS_DIRTY;
  528. }
  529. if (!incomment && linestate == LS_HASH) {
  530. keyword = tline + (cp - tline);
  531. cp = skipsym(cp);
  532. kwlen = cp - keyword;
  533. /* no way can we deal with a continuation inside a keyword */
  534. if (strncmp(cp, "\\\n", 2) == 0)
  535. Eioccc();
  536. if (strlcmp("ifdef", keyword, kwlen) == 0 ||
  537. strlcmp("ifndef", keyword, kwlen) == 0) {
  538. cp = skipcomment(cp);
  539. if ((cursym = findsym(cp)) < 0)
  540. retval = LT_IF;
  541. else {
  542. retval = (keyword[2] == 'n')
  543. ? LT_FALSE : LT_TRUE;
  544. if (value[cursym] == NULL)
  545. retval = (retval == LT_TRUE)
  546. ? LT_FALSE : LT_TRUE;
  547. if (ignore[cursym])
  548. retval = (retval == LT_TRUE)
  549. ? LT_TRUEI : LT_FALSEI;
  550. }
  551. cp = skipsym(cp);
  552. } else if (strlcmp("if", keyword, kwlen) == 0)
  553. retval = ifeval(&cp);
  554. else if (strlcmp("elif", keyword, kwlen) == 0)
  555. retval = ifeval(&cp) - LT_IF + LT_ELIF;
  556. else if (strlcmp("else", keyword, kwlen) == 0)
  557. retval = LT_ELSE;
  558. else if (strlcmp("endif", keyword, kwlen) == 0)
  559. retval = LT_ENDIF;
  560. else {
  561. linestate = LS_DIRTY;
  562. retval = LT_PLAIN;
  563. }
  564. cp = skipcomment(cp);
  565. if (*cp != '\0') {
  566. linestate = LS_DIRTY;
  567. if (retval == LT_TRUE || retval == LT_FALSE ||
  568. retval == LT_TRUEI || retval == LT_FALSEI)
  569. retval = LT_IF;
  570. if (retval == LT_ELTRUE || retval == LT_ELFALSE)
  571. retval = LT_ELIF;
  572. }
  573. if (retval != LT_PLAIN && (wascomment || incomment)) {
  574. retval += LT_DODGY;
  575. if (incomment)
  576. linestate = LS_DIRTY;
  577. }
  578. /* skipcomment normally changes the state, except
  579. if the last line of the file lacks a newline, or
  580. if there is too much whitespace in a directive */
  581. if (linestate == LS_HASH) {
  582. size_t len = cp - tline;
  583. if (fgets(tline + len, MAXLINE - len, input) == NULL) {
  584. /* append the missing newline */
  585. tline[len+0] = '\n';
  586. tline[len+1] = '\0';
  587. cp++;
  588. linestate = LS_START;
  589. } else {
  590. linestate = LS_DIRTY;
  591. }
  592. }
  593. }
  594. if (linestate == LS_DIRTY) {
  595. while (*cp != '\0')
  596. cp = skipcomment(cp + 1);
  597. }
  598. debug("parser %s comment %s line",
  599. comment_name[incomment], linestate_name[linestate]);
  600. return (retval);
  601. }
  602. /*
  603. * These are the binary operators that are supported by the expression
  604. * evaluator.
  605. */
  606. static Linetype op_strict(int *p, int v, Linetype at, Linetype bt) {
  607. if(at == LT_IF || bt == LT_IF) return (LT_IF);
  608. return (*p = v, v ? LT_TRUE : LT_FALSE);
  609. }
  610. static Linetype op_lt(int *p, Linetype at, int a, Linetype bt, int b) {
  611. return op_strict(p, a < b, at, bt);
  612. }
  613. static Linetype op_gt(int *p, Linetype at, int a, Linetype bt, int b) {
  614. return op_strict(p, a > b, at, bt);
  615. }
  616. static Linetype op_le(int *p, Linetype at, int a, Linetype bt, int b) {
  617. return op_strict(p, a <= b, at, bt);
  618. }
  619. static Linetype op_ge(int *p, Linetype at, int a, Linetype bt, int b) {
  620. return op_strict(p, a >= b, at, bt);
  621. }
  622. static Linetype op_eq(int *p, Linetype at, int a, Linetype bt, int b) {
  623. return op_strict(p, a == b, at, bt);
  624. }
  625. static Linetype op_ne(int *p, Linetype at, int a, Linetype bt, int b) {
  626. return op_strict(p, a != b, at, bt);
  627. }
  628. static Linetype op_or(int *p, Linetype at, int a, Linetype bt, int b) {
  629. if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
  630. return (*p = 1, LT_TRUE);
  631. return op_strict(p, a || b, at, bt);
  632. }
  633. static Linetype op_and(int *p, Linetype at, int a, Linetype bt, int b) {
  634. if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
  635. return (*p = 0, LT_FALSE);
  636. return op_strict(p, a && b, at, bt);
  637. }
  638. /*
  639. * An evaluation function takes three arguments, as follows: (1) a pointer to
  640. * an element of the precedence table which lists the operators at the current
  641. * level of precedence; (2) a pointer to an integer which will receive the
  642. * value of the expression; and (3) a pointer to a char* that points to the
  643. * expression to be evaluated and that is updated to the end of the expression
  644. * when evaluation is complete. The function returns LT_FALSE if the value of
  645. * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
  646. * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
  647. */
  648. struct ops;
  649. typedef Linetype eval_fn(const struct ops *, int *, const char **);
  650. static eval_fn eval_table, eval_unary;
  651. /*
  652. * The precedence table. Expressions involving binary operators are evaluated
  653. * in a table-driven way by eval_table. When it evaluates a subexpression it
  654. * calls the inner function with its first argument pointing to the next
  655. * element of the table. Innermost expressions have special non-table-driven
  656. * handling.
  657. */
  658. static const struct ops {
  659. eval_fn *inner;
  660. struct op {
  661. const char *str;
  662. Linetype (*fn)(int *, Linetype, int, Linetype, int);
  663. } op[5];
  664. } eval_ops[] = {
  665. { eval_table, { { "||", op_or } } },
  666. { eval_table, { { "&&", op_and } } },
  667. { eval_table, { { "==", op_eq },
  668. { "!=", op_ne } } },
  669. { eval_unary, { { "<=", op_le },
  670. { ">=", op_ge },
  671. { "<", op_lt },
  672. { ">", op_gt } } }
  673. };
  674. /*
  675. * Function for evaluating the innermost parts of expressions,
  676. * viz. !expr (expr) number defined(symbol) symbol
  677. * We reset the constexpr flag in the last two cases.
  678. */
  679. static Linetype
  680. eval_unary(const struct ops *ops, int *valp, const char **cpp)
  681. {
  682. const char *cp;
  683. char *ep;
  684. int sym;
  685. bool defparen;
  686. Linetype lt;
  687. cp = skipcomment(*cpp);
  688. if (*cp == '!') {
  689. debug("eval%d !", ops - eval_ops);
  690. cp++;
  691. lt = eval_unary(ops, valp, &cp);
  692. if (lt == LT_ERROR)
  693. return (LT_ERROR);
  694. if (lt != LT_IF) {
  695. *valp = !*valp;
  696. lt = *valp ? LT_TRUE : LT_FALSE;
  697. }
  698. } else if (*cp == '(') {
  699. cp++;
  700. debug("eval%d (", ops - eval_ops);
  701. lt = eval_table(eval_ops, valp, &cp);
  702. if (lt == LT_ERROR)
  703. return (LT_ERROR);
  704. cp = skipcomment(cp);
  705. if (*cp++ != ')')
  706. return (LT_ERROR);
  707. } else if (isdigit((unsigned char)*cp)) {
  708. debug("eval%d number", ops - eval_ops);
  709. *valp = strtol(cp, &ep, 0);
  710. if (ep == cp)
  711. return (LT_ERROR);
  712. lt = *valp ? LT_TRUE : LT_FALSE;
  713. cp = skipsym(cp);
  714. } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
  715. cp = skipcomment(cp+7);
  716. debug("eval%d defined", ops - eval_ops);
  717. if (*cp == '(') {
  718. cp = skipcomment(cp+1);
  719. defparen = true;
  720. } else {
  721. defparen = false;
  722. }
  723. sym = findsym(cp);
  724. if (sym < 0) {
  725. lt = LT_IF;
  726. } else {
  727. *valp = (value[sym] != NULL);
  728. lt = *valp ? LT_TRUE : LT_FALSE;
  729. }
  730. cp = skipsym(cp);
  731. cp = skipcomment(cp);
  732. if (defparen && *cp++ != ')')
  733. return (LT_ERROR);
  734. constexpr = false;
  735. } else if (!endsym(*cp)) {
  736. debug("eval%d symbol", ops - eval_ops);
  737. sym = findsym(cp);
  738. cp = skipsym(cp);
  739. if (sym < 0) {
  740. lt = LT_IF;
  741. cp = skipargs(cp);
  742. } else if (value[sym] == NULL) {
  743. *valp = 0;
  744. lt = LT_FALSE;
  745. } else {
  746. *valp = strtol(value[sym], &ep, 0);
  747. if (*ep != '\0' || ep == value[sym])
  748. return (LT_ERROR);
  749. lt = *valp ? LT_TRUE : LT_FALSE;
  750. cp = skipargs(cp);
  751. }
  752. constexpr = false;
  753. } else {
  754. debug("eval%d bad expr", ops - eval_ops);
  755. return (LT_ERROR);
  756. }
  757. *cpp = cp;
  758. debug("eval%d = %d", ops - eval_ops, *valp);
  759. return (lt);
  760. }
  761. /*
  762. * Table-driven evaluation of binary operators.
  763. */
  764. static Linetype
  765. eval_table(const struct ops *ops, int *valp, const char **cpp)
  766. {
  767. const struct op *op;
  768. const char *cp;
  769. int val;
  770. Linetype lt, rt;
  771. debug("eval%d", ops - eval_ops);
  772. cp = *cpp;
  773. lt = ops->inner(ops+1, valp, &cp);
  774. if (lt == LT_ERROR)
  775. return (LT_ERROR);
  776. for (;;) {
  777. cp = skipcomment(cp);
  778. for (op = ops->op; op->str != NULL; op++)
  779. if (strncmp(cp, op->str, strlen(op->str)) == 0)
  780. break;
  781. if (op->str == NULL)
  782. break;
  783. cp += strlen(op->str);
  784. debug("eval%d %s", ops - eval_ops, op->str);
  785. rt = ops->inner(ops+1, &val, &cp);
  786. if (rt == LT_ERROR)
  787. return (LT_ERROR);
  788. lt = op->fn(valp, lt, *valp, rt, val);
  789. }
  790. *cpp = cp;
  791. debug("eval%d = %d", ops - eval_ops, *valp);
  792. debug("eval%d lt = %s", ops - eval_ops, linetype_name[lt]);
  793. return (lt);
  794. }
  795. /*
  796. * Evaluate the expression on a #if or #elif line. If we can work out
  797. * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
  798. * return just a generic LT_IF.
  799. */
  800. static Linetype
  801. ifeval(const char **cpp)
  802. {
  803. int ret;
  804. int val = 0;
  805. debug("eval %s", *cpp);
  806. constexpr = killconsts ? false : true;
  807. ret = eval_table(eval_ops, &val, cpp);
  808. debug("eval = %d", val);
  809. return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
  810. }
  811. /*
  812. * Skip over comments, strings, and character literals and stop at the
  813. * next character position that is not whitespace. Between calls we keep
  814. * the comment state in the global variable incomment, and we also adjust
  815. * the global variable linestate when we see a newline.
  816. * XXX: doesn't cope with the buffer splitting inside a state transition.
  817. */
  818. static const char *
  819. skipcomment(const char *cp)
  820. {
  821. if (text || ignoring[depth]) {
  822. for (; isspace((unsigned char)*cp); cp++)
  823. if (*cp == '\n')
  824. linestate = LS_START;
  825. return (cp);
  826. }
  827. while (*cp != '\0')
  828. /* don't reset to LS_START after a line continuation */
  829. if (strncmp(cp, "\\\n", 2) == 0)
  830. cp += 2;
  831. else switch (incomment) {
  832. case NO_COMMENT:
  833. if (strncmp(cp, "/\\\n", 3) == 0) {
  834. incomment = STARTING_COMMENT;
  835. cp += 3;
  836. } else if (strncmp(cp, "/*", 2) == 0) {
  837. incomment = C_COMMENT;
  838. cp += 2;
  839. } else if (strncmp(cp, "//", 2) == 0) {
  840. incomment = CXX_COMMENT;
  841. cp += 2;
  842. } else if (strncmp(cp, "\'", 1) == 0) {
  843. incomment = CHAR_LITERAL;
  844. linestate = LS_DIRTY;
  845. cp += 1;
  846. } else if (strncmp(cp, "\"", 1) == 0) {
  847. incomment = STRING_LITERAL;
  848. linestate = LS_DIRTY;
  849. cp += 1;
  850. } else if (strncmp(cp, "\n", 1) == 0) {
  851. linestate = LS_START;
  852. cp += 1;
  853. } else if (strchr(" \t", *cp) != NULL) {
  854. cp += 1;
  855. } else
  856. return (cp);
  857. continue;
  858. case CXX_COMMENT:
  859. if (strncmp(cp, "\n", 1) == 0) {
  860. incomment = NO_COMMENT;
  861. linestate = LS_START;
  862. }
  863. cp += 1;
  864. continue;
  865. case CHAR_LITERAL:
  866. case STRING_LITERAL:
  867. if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
  868. (incomment == STRING_LITERAL && cp[0] == '\"')) {
  869. incomment = NO_COMMENT;
  870. cp += 1;
  871. } else if (cp[0] == '\\') {
  872. if (cp[1] == '\0')
  873. cp += 1;
  874. else
  875. cp += 2;
  876. } else if (strncmp(cp, "\n", 1) == 0) {
  877. if (incomment == CHAR_LITERAL)
  878. error("unterminated char literal");
  879. else
  880. error("unterminated string literal");
  881. } else
  882. cp += 1;
  883. continue;
  884. case C_COMMENT:
  885. if (strncmp(cp, "*\\\n", 3) == 0) {
  886. incomment = FINISHING_COMMENT;
  887. cp += 3;
  888. } else if (strncmp(cp, "*/", 2) == 0) {
  889. incomment = NO_COMMENT;
  890. cp += 2;
  891. } else
  892. cp += 1;
  893. continue;
  894. case STARTING_COMMENT:
  895. if (*cp == '*') {
  896. incomment = C_COMMENT;
  897. cp += 1;
  898. } else if (*cp == '/') {
  899. incomment = CXX_COMMENT;
  900. cp += 1;
  901. } else {
  902. incomment = NO_COMMENT;
  903. linestate = LS_DIRTY;
  904. }
  905. continue;
  906. case FINISHING_COMMENT:
  907. if (*cp == '/') {
  908. incomment = NO_COMMENT;
  909. cp += 1;
  910. } else
  911. incomment = C_COMMENT;
  912. continue;
  913. default:
  914. abort(); /* bug */
  915. }
  916. return (cp);
  917. }
  918. /*
  919. * Skip macro arguments.
  920. */
  921. static const char *
  922. skipargs(const char *cp)
  923. {
  924. const char *ocp = cp;
  925. int level = 0;
  926. cp = skipcomment(cp);
  927. if (*cp != '(')
  928. return (cp);
  929. do {
  930. if (*cp == '(')
  931. level++;
  932. if (*cp == ')')
  933. level--;
  934. cp = skipcomment(cp+1);
  935. } while (level != 0 && *cp != '\0');
  936. if (level == 0)
  937. return (cp);
  938. else
  939. /* Rewind and re-detect the syntax error later. */
  940. return (ocp);
  941. }
  942. /*
  943. * Skip over an identifier.
  944. */
  945. static const char *
  946. skipsym(const char *cp)
  947. {
  948. while (!endsym(*cp))
  949. ++cp;
  950. return (cp);
  951. }
  952. /*
  953. * Look for the symbol in the symbol table. If it is found, we return
  954. * the symbol table index, else we return -1.
  955. */
  956. static int
  957. findsym(const char *str)
  958. {
  959. const char *cp;
  960. int symind;
  961. cp = skipsym(str);
  962. if (cp == str)
  963. return (-1);
  964. if (symlist) {
  965. printf("%.*s\n", (int)(cp-str), str);
  966. /* we don't care about the value of the symbol */
  967. return (0);
  968. }
  969. for (symind = 0; symind < nsyms; ++symind) {
  970. if (strlcmp(symname[symind], str, cp-str) == 0) {
  971. debug("findsym %s %s", symname[symind],
  972. value[symind] ? value[symind] : "");
  973. return (symind);
  974. }
  975. }
  976. return (-1);
  977. }
  978. /*
  979. * Add a symbol to the symbol table.
  980. */
  981. static void
  982. addsym(bool ignorethis, bool definethis, char *sym)
  983. {
  984. int symind;
  985. char *val;
  986. symind = findsym(sym);
  987. if (symind < 0) {
  988. if (nsyms >= MAXSYMS)
  989. errx(2, "too many symbols");
  990. symind = nsyms++;
  991. }
  992. symname[symind] = sym;
  993. ignore[symind] = ignorethis;
  994. val = sym + (skipsym(sym) - sym);
  995. if (definethis) {
  996. if (*val == '=') {
  997. value[symind] = val+1;
  998. *val = '\0';
  999. } else if (*val == '\0')
  1000. value[symind] = "";
  1001. else
  1002. usage();
  1003. } else {
  1004. if (*val != '\0')
  1005. usage();
  1006. value[symind] = NULL;
  1007. }
  1008. }
  1009. /*
  1010. * Compare s with n characters of t.
  1011. * The same as strncmp() except that it checks that s[n] == '\0'.
  1012. */
  1013. static int
  1014. strlcmp(const char *s, const char *t, size_t n)
  1015. {
  1016. while (n-- && *t != '\0')
  1017. if (*s != *t)
  1018. return ((unsigned char)*s - (unsigned char)*t);
  1019. else
  1020. ++s, ++t;
  1021. return ((unsigned char)*s);
  1022. }
  1023. /*
  1024. * Diagnostics.
  1025. */
  1026. static void
  1027. debug(const char *msg, ...)
  1028. {
  1029. va_list ap;
  1030. if (debugging) {
  1031. va_start(ap, msg);
  1032. vwarnx(msg, ap);
  1033. va_end(ap);
  1034. }
  1035. }
  1036. static void
  1037. error(const char *msg)
  1038. {
  1039. if (depth == 0)
  1040. warnx("%s: %d: %s", filename, linenum, msg);
  1041. else
  1042. warnx("%s: %d: %s (#if line %d depth %d)",
  1043. filename, linenum, msg, stifline[depth], depth);
  1044. errx(2, "output may be truncated");
  1045. }