Qt 6.x
The Qt SDK
Loading...
Searching...
No Matches
preprocessor.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include "preprocessor.h"
6#include "utils.h"
7#include <qstringlist.h>
8#include <qfile.h>
9#include <qdir.h>
10#include <qfileinfo.h>
11#include <qvarlengtharray.h>
12
14
15using namespace QtMiscUtils;
16
17#include "ppkeywords.cpp"
18#include "keywords.cpp"
19
20// transform \r\n into \n
21// \r into \n (os9 style)
22// backslash-newlines into newlines
24{
26 result.resize(input.size());
27 const char *data = input.constData();
28 const char *end = input.constData() + input.size();
29 char *output = result.data();
30
31 int newlines = 0;
32 while (data != end) {
33 while (data != end && is_space(*data))
34 ++data;
35 bool takeLine = (*data == '#');
36 if (*data == '%' && *(data+1) == ':') {
37 takeLine = true;
38 ++data;
39 }
40 if (takeLine) {
41 *output = '#';
42 ++output;
43 do ++data; while (data != end && is_space(*data));
44 }
45 while (data != end) {
46 // handle \\\n, \\\r\n and \\\r
47 if (*data == '\\') {
48 if (*(data + 1) == '\r') {
49 ++data;
50 }
51 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
52 ++newlines;
53 data += 1;
54 if (data != end && *data != '\r')
55 data += 1;
56 continue;
57 }
58 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
59 ++data;
60 }
61 if (data == end)
62 break;
63
64 char ch = *data;
65 if (ch == '\r') // os9: replace \r with \n
66 ch = '\n';
67 *output = ch;
68 ++output;
69
70 if (*data == '\n') {
71 // output additional newlines to keep the correct line-numbering
72 // for the lines following the backslash-newline sequence(s)
73 while (newlines) {
74 *output = '\n';
75 ++output;
76 --newlines;
77 }
78 ++data;
79 break;
80 }
81 ++data;
82 }
83 }
84 result.resize(output - result.constData());
85 return result;
86}
87
90{
91 while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
92 switch (symbols.at(index).token) {
93 case PP_IF:
94 case PP_IFDEF:
95 case PP_IFNDEF:
96 ++index;
98 break;
99 default:
100 ;
101 }
102 ++index;
103 }
104}
105
107{
108 while (index < symbols.size() - 1
109 && (symbols.at(index).token != PP_ENDIF
110 && symbols.at(index).token != PP_ELIF
111 && symbols.at(index).token != PP_ELSE)
112 ){
113 switch (symbols.at(index).token) {
114 case PP_IF:
115 case PP_IFDEF:
116 case PP_IFNDEF:
117 ++index;
119 break;
120 default:
121 ;
122 }
123 ++index;
124 }
125 return (index < symbols.size() - 1);
126}
127
128
130{
132 // Preallocate some space to speed up the code below.
133 // The magic divisor value was found by calculating the average ratio between
134 // input size and the final size of symbols.
135 // This yielded a value of 16.x when compiling Qt Base.
136 symbols.reserve(input.size() / 16);
137 const char *begin = input.constData();
138 const char *data = begin;
139 while (*data) {
140 if (mode == TokenizeCpp || mode == TokenizeDefine) {
141 int column = 0;
142
143 const char *lexem = data;
144 int state = 0;
145 Token token = NOTOKEN;
146 for (;;) {
147 if (static_cast<signed char>(*data) < 0) {
148 ++data;
149 continue;
150 }
151 int nextindex = keywords[state].next;
152 int next = 0;
153 if (*data == keywords[state].defchar)
154 next = keywords[state].defnext;
155 else if (!state || nextindex)
156 next = keyword_trans[nextindex][(int)*data];
157 if (!next)
158 break;
159 state = next;
160 token = keywords[state].token;
161 ++data;
162 }
163
164 // suboptimal, is_ident_char should use a table
167
168 if (token == NOTOKEN) {
169 if (*data)
170 ++data;
171 // an error really, but let's ignore this input
172 // to not confuse moc later. However in pre-processor
173 // only mode let's continue.
175 continue;
176 }
177
178 ++column;
179
180 if (token > SPECIAL_TREATMENT_MARK) {
181 switch (token) {
182 case QUOTE:
184 token = STRING_LITERAL;
185 // concatenate multi-line strings for easier
186 // STRING_LITERAL handling in moc
188 && !symbols.isEmpty()
189 && symbols.constLast().token == STRING_LITERAL) {
190
191 const QByteArray newString
192 = '\"'
194 + input.mid(lexem - begin + 1, data - lexem - 2)
195 + '\"';
197 STRING_LITERAL,
198 newString);
199 continue;
200 }
201 break;
202 case SINGLEQUOTE:
203 while (*data && (*data != '\''
204 || (*(data-1)=='\\'
205 && *(data-2)!='\\')))
206 ++data;
207 if (*data)
208 ++data;
209 token = CHARACTER_LITERAL;
210 break;
211 case LANGLE_SCOPE:
212 // split <:: into two tokens, < and ::
213 token = LANGLE;
214 data -= 2;
215 break;
216 case DIGIT:
217 while (isAsciiDigit(*data) || *data == '\'')
218 ++data;
219 if (!*data || *data != '.') {
220 token = INTEGER_LITERAL;
221 if (data - lexem == 1 &&
222 (*data == 'x' || *data == 'X'
223 || *data == 'b' || *data == 'B')
224 && *lexem == '0') {
225 ++data;
226 while (isHexDigit(*data) || *data == '\'')
227 ++data;
228 }
229 break;
230 }
231 token = FLOATING_LITERAL;
232 ++data;
234 case FLOATING_LITERAL:
235 while (isAsciiDigit(*data) || *data == '\'')
236 ++data;
237 if (*data == '+' || *data == '-')
238 ++data;
239 if (*data == 'e' || *data == 'E') {
240 ++data;
241 while (isAsciiDigit(*data) || *data == '\'')
242 ++data;
243 }
244 if (*data == 'f' || *data == 'F'
245 || *data == 'l' || *data == 'L')
246 ++data;
247 break;
248 case HASH:
249 if (column == 1 && mode == TokenizeCpp) {
251 while (*data && (*data == ' ' || *data == '\t'))
252 ++data;
253 if (is_ident_char(*data))
255 continue;
256 }
257 break;
258 case PP_HASHHASH:
259 if (mode == TokenizeCpp)
260 continue;
261 break;
262 case NEWLINE:
263 ++lineNum;
264 if (mode == TokenizeDefine) {
266 // emit the newline token
267 break;
268 }
269 continue;
270 case BACKSLASH:
271 {
272 const char *rewind = data;
273 while (*data && (*data == ' ' || *data == '\t'))
274 ++data;
275 if (*data && *data == '\n') {
276 ++data;
277 continue;
278 }
279 data = rewind;
280 } break;
281 case CHARACTER:
282 while (is_ident_char(*data))
283 ++data;
284 token = IDENTIFIER;
285 break;
286 case C_COMMENT:
287 if (*data) {
288 if (*data == '\n')
289 ++lineNum;
290 ++data;
291 if (*data) {
292 if (*data == '\n')
293 ++lineNum;
294 ++data;
295 }
296 }
297 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
298 if (*data == '\n')
299 ++lineNum;
300 ++data;
301 }
302 token = WHITESPACE; // one comment, one whitespace
304 case WHITESPACE:
305 if (column == 1)
306 column = 0;
307 while (*data && (*data == ' ' || *data == '\t'))
308 ++data;
309 if (Preprocessor::preprocessOnly) // tokenize whitespace
310 break;
311 continue;
312 case CPP_COMMENT:
313 while (*data && *data != '\n')
314 ++data;
315 continue; // ignore safely, the newline is a separator
316 default:
317 continue; //ignore
318 }
319 }
320#ifdef USE_LEXEM_STORE
322 && token != IDENTIFIER
323 && token != STRING_LITERAL
324 && token != FLOATING_LITERAL
325 && token != INTEGER_LITERAL)
326 symbols += Symbol(lineNum, token);
327 else
328#endif
329 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
330
331 } else { // Preprocessor
332
333 const char *lexem = data;
334 int state = 0;
335 Token token = NOTOKEN;
337 state = pp_keyword_trans[0][(int)'#'];
339 }
340 for (;;) {
341 if (static_cast<signed char>(*data) < 0) {
342 ++data;
343 continue;
344 }
345 int nextindex = pp_keywords[state].next;
346 int next = 0;
347 if (*data == pp_keywords[state].defchar)
348 next = pp_keywords[state].defnext;
349 else if (!state || nextindex)
350 next = pp_keyword_trans[nextindex][(int)*data];
351 if (!next)
352 break;
353 state = next;
354 token = pp_keywords[state].token;
355 ++data;
356 }
357 // suboptimal, is_ident_char should use a table
360
361 switch (token) {
362 case NOTOKEN:
363 if (*data)
364 ++data;
365 break;
366 case PP_DEFINE:
368 break;
369 case PP_IFDEF:
370 symbols += Symbol(lineNum, PP_IF);
371 symbols += Symbol(lineNum, PP_DEFINED);
372 continue;
373 case PP_IFNDEF:
374 symbols += Symbol(lineNum, PP_IF);
375 symbols += Symbol(lineNum, PP_NOT);
376 symbols += Symbol(lineNum, PP_DEFINED);
377 continue;
378 case PP_INCLUDE:
380 break;
381 case PP_QUOTE:
384 break;
385 case PP_SINGLEQUOTE:
386 while (*data && (*data != '\''
387 || (*(data-1)=='\\'
388 && *(data-2)!='\\')))
389 ++data;
390 if (*data)
391 ++data;
393 break;
394 case PP_DIGIT:
395 while (isAsciiDigit(*data) || *data == '\'')
396 ++data;
397 if (!*data || *data != '.') {
399 if (data - lexem == 1 &&
400 (*data == 'x' || *data == 'X')
401 && *lexem == '0') {
402 ++data;
403 while (isHexDigit(*data) || *data == '\'')
404 ++data;
405 }
406 break;
407 }
409 ++data;
412 while (isAsciiDigit(*data) || *data == '\'')
413 ++data;
414 if (*data == '+' || *data == '-')
415 ++data;
416 if (*data == 'e' || *data == 'E') {
417 ++data;
418 while (isAsciiDigit(*data) || *data == '\'')
419 ++data;
420 }
421 if (*data == 'f' || *data == 'F'
422 || *data == 'l' || *data == 'L')
423 ++data;
424 break;
425 case PP_CHARACTER:
427 // rewind entire token to begin
428 data = lexem;
430 continue;
431 }
432 while (is_ident_char(*data))
433 ++data;
435
436 if (mode == PrepareDefine) {
437 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
438 // make sure we explicitly add the whitespace here if the next char
439 // is not an opening brace, so we can distinguish correctly between
440 // regular and function macros
441 if (*data != '(')
442 symbols += Symbol(lineNum, WHITESPACE);
444 continue;
445 }
446 break;
447 case PP_C_COMMENT:
448 if (*data) {
449 if (*data == '\n')
450 ++lineNum;
451 ++data;
452 if (*data) {
453 if (*data == '\n')
454 ++lineNum;
455 ++data;
456 }
457 }
458 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
459 if (*data == '\n')
460 ++lineNum;
461 ++data;
462 }
463 token = PP_WHITESPACE; // one comment, one whitespace
465 case PP_WHITESPACE:
466 while (*data && (*data == ' ' || *data == '\t'))
467 ++data;
468 continue; // the preprocessor needs no whitespace
469 case PP_CPP_COMMENT:
470 while (*data && *data != '\n')
471 ++data;
472 continue; // ignore safely, the newline is a separator
473 case PP_NEWLINE:
474 ++lineNum;
476 break;
477 case PP_BACKSLASH:
478 {
479 const char *rewind = data;
480 while (*data && (*data == ' ' || *data == '\t'))
481 ++data;
482 if (*data && *data == '\n') {
483 ++data;
484 continue;
485 }
486 data = rewind;
487 } break;
488 case PP_LANGLE:
489 if (mode != TokenizeInclude)
490 break;
492 while (*data && *data != '\n' && *(data-1) != '>')
493 ++data;
494 break;
495 default:
496 break;
497 }
499 continue;
500#ifdef USE_LEXEM_STORE
501 if (token != PP_IDENTIFIER
505 symbols += Symbol(lineNum, token);
506 else
507#endif
508 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
509 }
510 }
511 symbols += Symbol(); // eof symbol
512 return symbols;
513}
514
516 int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
517{
519 SafeSymbols sf;
520 sf.symbols = toExpand;
521 sf.index = index;
522 sf.excludedSymbols = excludeSymbols;
523 symbols.push(sf);
524
525 if (toExpand.isEmpty())
526 return;
527
528 for (;;) {
529 QByteArray macro;
530 Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, &macro);
531
532 if (macro.isEmpty()) {
533 // not a macro
534 Symbol s = symbols.symbol();
535 s.lineNum = lineNum;
536 *into += s;
537 } else {
538 SafeSymbols sf;
539 sf.symbols = newSyms;
540 sf.index = 0;
541 sf.expandedMacro = macro;
542 symbols.push(sf);
543 }
544 if (!symbols.hasNext() || (one && symbols.size() == 1))
545 break;
546 symbols.next();
547 }
548
549 if (symbols.size())
550 index = symbols.top().index;
551 else
552 index = toExpand.size();
553}
554
555
557{
558 Symbol s = symbols.symbol();
559
560 // not a macro
561 if (s.token != PP_IDENTIFIER || !that->macros.contains(s) || symbols.dontReplaceSymbol(s.lexem())) {
562 return Symbols();
563 }
564
565 const Macro &macro = that->macros.value(s);
566 *macroName = s.lexem();
567
568 Symbols expansion;
569 if (!macro.isFunction) {
570 expansion = macro.symbols;
571 } else {
572 bool haveSpace = false;
573 while (symbols.test(PP_WHITESPACE)) { haveSpace = true; }
574 if (!symbols.test(PP_LPAREN)) {
575 *macroName = QByteArray();
576 Symbols syms;
577 if (haveSpace)
578 syms += Symbol(lineNum, PP_WHITESPACE);
579 syms += s;
580 syms.last().lineNum = lineNum;
581 return syms;
582 }
584 while (symbols.hasNext()) {
586 // strip leading space
587 while (symbols.test(PP_WHITESPACE)) {}
588 int nesting = 0;
589 bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
590 while (symbols.hasNext()) {
591 Token t = symbols.next();
592 if (t == PP_LPAREN) {
593 ++nesting;
594 } else if (t == PP_RPAREN) {
595 --nesting;
596 if (nesting < 0)
597 break;
598 } else if (t == PP_COMMA && nesting == 0) {
599 if (!vararg)
600 break;
601 }
602 argument += symbols.symbol();
603 }
605
606 if (nesting < 0)
607 break;
608 else if (!symbols.hasNext())
609 that->error("missing ')' in macro usage");
610 }
611
612 // empty VA_ARGS
613 if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
614 arguments += Symbols();
615
616 // now replace the macro arguments with the expanded arguments
617 enum Mode {
618 Normal,
619 Hash,
620 HashHash
621 } mode = Normal;
622
623 const auto end = macro.symbols.cend();
624 auto it = macro.symbols.cbegin();
625 const auto lastSym = std::prev(macro.symbols.cend(), !macro.symbols.isEmpty() ? 1 : 0);
626 for (; it != end; ++it) {
627 const Symbol &s = *it;
628 if (s.token == HASH || s.token == PP_HASHHASH) {
629 mode = (s.token == HASH ? Hash : HashHash);
630 continue;
631 }
632 const qsizetype index = macro.arguments.indexOf(s);
633 if (mode == Normal) {
634 if (index >= 0 && index < arguments.size()) {
635 // each argument undoergoes macro expansion if it's not used as part of a # or ##
636 if (it == lastSym || std::next(it)->token != PP_HASHHASH) {
638 qsizetype idx = 1;
639 macroExpand(&expansion, that, arg, idx, lineNum, false, symbols.excludeSymbols());
640 } else {
641 expansion += arguments.at(index);
642 }
643 } else {
644 expansion += s;
645 }
646 } else if (mode == Hash) {
647 if (index < 0) {
648 that->error("'#' is not followed by a macro parameter");
649 continue;
650 } else if (index >= arguments.size()) {
651 that->error("Macro invoked with too few parameters for a use of '#'");
652 continue;
653 }
654
655 const Symbols &arg = arguments.at(index);
656 QByteArray stringified;
657 for (const Symbol &sym : arg)
658 stringified += sym.lexem();
659
660 stringified.replace('"', "\\\"");
661 stringified.prepend('"');
662 stringified.append('"');
663 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
664 } else if (mode == HashHash){
665 if (s.token == WHITESPACE)
666 continue;
667
668 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
669 expansion.pop_back();
670
671 Symbol next = s;
672 if (index >= 0 && index < arguments.size()) {
673 const Symbols &arg = arguments.at(index);
674 if (arg.size() == 0) {
675 mode = Normal;
676 continue;
677 }
678 next = arg.at(0);
679 }
680
681 if (!expansion.isEmpty() && expansion.constLast().token == s.token
682 && expansion.constLast().token != STRING_LITERAL) {
683 Symbol last = expansion.takeLast();
684
685 QByteArray lexem = last.lexem() + next.lexem();
686 expansion += Symbol(lineNum, last.token, lexem);
687 } else {
688 expansion += next;
689 }
690
691 if (index >= 0 && index < arguments.size()) {
692 const Symbols &arg = arguments.at(index);
693 if (!arg.isEmpty())
694 expansion.append(arg.cbegin() + 1, arg.cend());
695 }
696 }
697 mode = Normal;
698 }
699 if (mode != Normal)
700 that->error("'#' or '##' found at the end of a macro argument");
701
702 }
703
704 return expansion;
705}
706
708{
709 while (hasNext()) {
710 Token token = next();
711 if (token == PP_IDENTIFIER) {
712 macroExpand(&substituted, this, symbols, index, symbol().lineNum, true);
713 } else if (token == PP_DEFINED) {
714 bool braces = test(PP_LPAREN);
716 Symbol definedOrNotDefined = symbol();
717 definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
718 substituted += definedOrNotDefined;
719 if (braces)
721 continue;
722 } else if (token == PP_NEWLINE) {
723 substituted += symbol();
724 break;
725 } else {
726 substituted += symbol();
727 }
728 }
729}
730
731
732class PP_Expression : public Parser
733{
734public:
736
742 int AND_expression();
745 int shift_expression();
748 int unary_expression();
750 int primary_expression();
752};
753
755{
757 if (test(PP_QUESTION)) {
758 int alt1 = conditional_expression();
759 int alt2 = test(PP_COLON) ? conditional_expression() : 0;
760 return value ? alt1 : alt2;
761 }
762 return value;
763}
764
766{
768 if (test(PP_OROR))
769 return logical_OR_expression() || value;
770 return value;
771}
772
774{
776 if (test(PP_ANDAND))
777 return logical_AND_expression() && value;
778 return value;
779}
780
782{
784 if (test(PP_OR))
786 return value;
787}
788
790{
791 int value = AND_expression();
792 if (test(PP_HAT))
794 return value;
795}
796
798{
800 if (test(PP_AND))
801 return value & AND_expression();
802 return value;
803}
804
806{
808 switch (next()) {
809 case PP_EQEQ:
810 return value == equality_expression();
811 case PP_NE:
812 return value != equality_expression();
813 default:
814 prev();
815 return value;
816 }
817}
818
820{
821 int value = shift_expression();
822 switch (next()) {
823 case PP_LANGLE:
824 return value < relational_expression();
825 case PP_RANGLE:
826 return value > relational_expression();
827 case PP_LE:
828 return value <= relational_expression();
829 case PP_GE:
830 return value >= relational_expression();
831 default:
832 prev();
833 return value;
834 }
835}
836
838{
840 switch (next()) {
841 case PP_LTLT:
842 return value << shift_expression();
843 case PP_GTGT:
844 return value >> shift_expression();
845 default:
846 prev();
847 return value;
848 }
849}
850
852{
854 switch (next()) {
855 case PP_PLUS:
856 return value + additive_expression();
857 case PP_MINUS:
858 return value - additive_expression();
859 default:
860 prev();
861 return value;
862 }
863}
864
866{
867 int value = unary_expression();
868 switch (next()) {
869 case PP_STAR:
870 {
871 // get well behaved overflow behavior by converting to long
872 // and then back to int
873 // NOTE: A conformant preprocessor would need to work intmax_t/
874 // uintmax_t according to [cpp.cond], 19.1 §10
875 // But we're not compliant anyway
877 return int(result);
878 }
879 case PP_PERCENT:
880 {
881 int remainder = multiplicative_expression();
882 return remainder ? value % remainder : 0;
883 }
884 case PP_SLASH:
885 {
886 int div = multiplicative_expression();
887 return div ? value / div : 0;
888 }
889 default:
890 prev();
891 return value;
892 };
893}
894
896{
897 switch (next()) {
898 case PP_PLUS:
899 return unary_expression();
900 case PP_MINUS:
901 return -unary_expression();
902 case PP_NOT:
903 return !unary_expression();
904 case PP_TILDE:
905 return ~unary_expression();
906 case PP_MOC_TRUE:
907 return 1;
908 case PP_MOC_FALSE:
909 return 0;
910 default:
911 prev();
912 return primary_expression();
913 }
914}
915
917{
918 Token t = lookup();
920 || t == PP_PLUS
921 || t == PP_MINUS
922 || t == PP_NOT
923 || t == PP_TILDE
924 || t == PP_DEFINED);
925}
926
928{
929 int value;
930 if (test(PP_LPAREN)) {
933 } else {
934 next();
935 value = lexem().toInt(nullptr, 0);
936 }
937 return value;
938}
939
941{
942 Token t = lookup();
943 return (t == PP_IDENTIFIER
946 || t == PP_MOC_TRUE
947 || t == PP_MOC_FALSE
948 || t == PP_LPAREN);
949}
950
952{
953 PP_Expression expression;
955
957
958 return expression.value();
959}
960
962{
963 const qint64 size = file->size();
964 char *rawInput = reinterpret_cast<char*>(file->map(0, size));
965 return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
966}
967
968static void mergeStringLiterals(Symbols *_symbols)
969{
970 Symbols &symbols = *_symbols;
971 for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
972 if (i->token == STRING_LITERAL) {
973 Symbols::Iterator mergeSymbol = i;
974 qsizetype literalsLength = mergeSymbol->len;
975 while (++i != symbols.end() && i->token == STRING_LITERAL)
976 literalsLength += i->len - 2; // no quotes
977
978 if (literalsLength != mergeSymbol->len) {
979 QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem();
980 QByteArray &mergeSymbolLexem = mergeSymbol->lex;
981 mergeSymbolLexem.resize(0);
982 mergeSymbolLexem.reserve(literalsLength);
983 mergeSymbolLexem.append('"');
984 mergeSymbolLexem.append(mergeSymbolOriginalLexem);
985 for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j)
986 mergeSymbolLexem.append(j->lex.constData() + j->from + 1, j->len - 2); // append j->unquotedLexem()
987 mergeSymbolLexem.append('"');
988 mergeSymbol->len = mergeSymbol->lex.size();
989 mergeSymbol->from = 0;
990 i = symbols.erase(mergeSymbol + 1, i);
991 }
992 if (i == symbols.end())
993 break;
994 }
995 }
996}
997
999 const QByteArray &include,
1000 const bool debugIncludes)
1001{
1002 QFileInfo fi;
1003
1004 if (Q_UNLIKELY(debugIncludes)) {
1005 fprintf(stderr, "debug-includes: searching for '%s'\n", include.constData());
1006 }
1007
1008 for (const Parser::IncludePath &p : includepaths) {
1009 if (fi.exists())
1010 break;
1011
1012 if (p.isFrameworkPath) {
1013 const qsizetype slashPos = include.indexOf('/');
1014 if (slashPos == -1)
1015 continue;
1016 fi.setFile(QString::fromLocal8Bit(p.path + '/' + include.left(slashPos) + ".framework/Headers/"),
1017 QString::fromLocal8Bit(include.mid(slashPos + 1)));
1018 } else {
1020 }
1021
1022 if (Q_UNLIKELY(debugIncludes)) {
1023 const auto candidate = fi.filePath().toLocal8Bit();
1024 fprintf(stderr, "debug-includes: considering '%s'\n", candidate.constData());
1025 }
1026
1027 // try again, maybe there's a file later in the include paths with the same name
1028 // (186067)
1029 if (fi.isDir()) {
1030 fi = QFileInfo();
1031 continue;
1032 }
1033 }
1034
1035 if (!fi.exists() || fi.isDir()) {
1036 if (Q_UNLIKELY(debugIncludes)) {
1037 fprintf(stderr, "debug-includes: can't find '%s'\n", include.constData());
1038 }
1039 return QByteArray();
1040 }
1041
1042 const auto result = fi.canonicalFilePath().toLocal8Bit();
1043
1044 if (Q_UNLIKELY(debugIncludes)) {
1045 fprintf(stderr, "debug-includes: found '%s'\n", result.constData());
1046 }
1047
1048 return result;
1049}
1050
1052{
1053 if (!relativeTo.isEmpty()) {
1054 QFileInfo fi;
1056 if (fi.exists() && !fi.isDir())
1057 return fi.canonicalFilePath().toLocal8Bit();
1058 }
1059
1064 includes,
1065 include,
1066 debugIncludes));
1067 return it.value();
1068}
1069
1070void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1071{
1072 currentFilenames.push(filename);
1074 while (hasNext()) {
1075 Token token = next();
1076
1077 switch (token) {
1078 case PP_INCLUDE:
1079 {
1080 int lineNum = symbol().lineNum;
1081 QByteArray include;
1082 bool local = false;
1083 if (test(PP_STRING_LITERAL)) {
1084 local = lexem().startsWith('\"');
1085 include = unquotedLexem();
1086 } else
1087 continue;
1088 until(PP_NEWLINE);
1089
1090 include = resolveInclude(include, local ? filename : QByteArray());
1091 if (include.isNull())
1092 continue;
1093
1095 continue;
1097
1100 continue;
1101
1103
1104 file.close();
1105 if (input.isEmpty())
1106 continue;
1107
1108 Symbols saveSymbols = symbols;
1109 qsizetype saveIndex = index;
1110
1111 // phase 1: get rid of backslash-newlines
1112 input = cleaned(input);
1113
1114 // phase 2: tokenize for the preprocessor
1116 input.clear();
1117
1118 index = 0;
1119
1120 // phase 3: preprocess conditions and substitute macros
1121 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1122 preprocess(include, preprocessed);
1123 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1124
1125 symbols = saveSymbols;
1126 index = saveIndex;
1127 continue;
1128 }
1129 case PP_DEFINE:
1130 {
1131 next();
1132 QByteArray name = lexem();
1133 if (name.isEmpty() || !is_ident_start(name[0]))
1134 error();
1135 Macro macro;
1136 macro.isVariadic = false;
1137 if (test(LPAREN)) {
1138 // we have a function macro
1139 macro.isFunction = true;
1140 parseDefineArguments(&macro);
1141 } else {
1142 macro.isFunction = false;
1143 }
1145 until(PP_NEWLINE);
1146 macro.symbols.reserve(index - start - 1);
1147
1148 // remove whitespace where there shouldn't be any:
1149 // Before and after the macro, after a # and around ##
1150 Token lastToken = HASH; // skip shitespace at the beginning
1151 for (qsizetype i = start; i < index - 1; ++i) {
1153 if (token == WHITESPACE) {
1154 if (lastToken == PP_HASH || lastToken == HASH ||
1155 lastToken == PP_HASHHASH ||
1156 lastToken == WHITESPACE)
1157 continue;
1158 } else if (token == PP_HASHHASH) {
1159 if (!macro.symbols.isEmpty() &&
1160 lastToken == WHITESPACE)
1161 macro.symbols.pop_back();
1162 }
1163 macro.symbols.append(symbols.at(i));
1164 lastToken = token;
1165 }
1166 // remove trailing whitespace
1167 while (!macro.symbols.isEmpty() &&
1168 (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1169 macro.symbols.pop_back();
1170
1171 if (!macro.symbols.isEmpty()) {
1172 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1173 macro.symbols.constLast().token == PP_HASHHASH) {
1174 error("'##' cannot appear at either end of a macro expansion");
1175 }
1176 }
1177 macros.insert(name, macro);
1178 continue;
1179 }
1180 case PP_UNDEF: {
1181 next();
1182 QByteArray name = lexem();
1183 until(PP_NEWLINE);
1185 continue;
1186 }
1187 case PP_IDENTIFIER: {
1188 // substitute macros
1189 macroExpand(&preprocessed, this, symbols, index, symbol().lineNum, true);
1190 continue;
1191 }
1192 case PP_HASH:
1193 until(PP_NEWLINE);
1194 continue; // skip unknown preprocessor statement
1195 case PP_IFDEF:
1196 case PP_IFNDEF:
1197 case PP_IF:
1198 while (!evaluateCondition()) {
1199 if (!skipBranch())
1200 break;
1201 if (test(PP_ELIF)) {
1202 } else {
1203 until(PP_NEWLINE);
1204 break;
1205 }
1206 }
1207 continue;
1208 case PP_ELIF:
1209 case PP_ELSE:
1211 Q_FALLTHROUGH();
1212 case PP_ENDIF:
1213 until(PP_NEWLINE);
1214 continue;
1215 case PP_NEWLINE:
1216 continue;
1217 case SIGNALS:
1218 case SLOTS: {
1219 Symbol sym = symbol();
1220 if (macros.contains("QT_NO_KEYWORDS"))
1221 sym.token = IDENTIFIER;
1222 else
1223 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1224 preprocessed += sym;
1225 } continue;
1226 default:
1227 break;
1228 }
1229 preprocessed += symbol();
1230 }
1231
1232 currentFilenames.pop();
1233}
1234
1236{
1238
1239 if (input.isEmpty())
1240 return symbols;
1241
1242 // phase 1: get rid of backslash-newlines
1243 input = cleaned(input);
1244
1245 // phase 2: tokenize for the preprocessor
1246 index = 0;
1248
1249#if 0
1250 for (int j = 0; j < symbols.size(); ++j)
1251 fprintf(stderr, "line %d: %s(%s)\n",
1252 symbols[j].lineNum,
1253 symbols[j].lexem().constData(),
1254 tokenTypeName(symbols[j].token));
1255#endif
1256
1257 // phase 3: preprocess conditions and substitute macros
1259 // Preallocate some space to speed up the code below.
1260 // The magic value was found by logging the final size
1261 // and calculating an average when running moc over FOSS projects.
1262 result.reserve(file->size() / 300000);
1263 preprocess(filename, result);
1265
1266#if 0
1267 for (int j = 0; j < result.size(); ++j)
1268 fprintf(stderr, "line %d: %s(%s)\n",
1269 result[j].lineNum,
1270 result[j].lexem().constData(),
1271 tokenTypeName(result[j].token));
1272#endif
1273
1274 return result;
1275}
1276
1278{
1280 while (hasNext()) {
1281 while (test(PP_WHITESPACE)) {}
1282 Token t = next();
1283 if (t == PP_RPAREN)
1284 break;
1285 if (t != PP_IDENTIFIER) {
1286 QByteArray l = lexem();
1287 if (l == "...") {
1288 m->isVariadic = true;
1289 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1290 while (test(PP_WHITESPACE)) {}
1291 if (!test(PP_RPAREN))
1292 error("missing ')' in macro argument list");
1293 break;
1294 } else if (!is_identifier(l.constData(), l.size())) {
1295 error("Unexpected character in macro argument list.");
1296 }
1297 }
1298
1299 Symbol arg = symbol();
1300 if (arguments.contains(arg))
1301 error("Duplicate macro parameter.");
1302 arguments += symbol();
1303
1304 while (test(PP_WHITESPACE)) {}
1305 t = next();
1306 if (t == PP_RPAREN)
1307 break;
1308 if (t == PP_COMMA)
1309 continue;
1310 if (lexem() == "...") {
1311 //GCC extension: #define FOO(x, y...) x(y)
1312 // The last argument was already parsed. Just mark the macro as variadic.
1313 m->isVariadic = true;
1314 while (test(PP_WHITESPACE)) {}
1315 if (!test(PP_RPAREN))
1316 error("missing ')' in macro argument list");
1317 break;
1318 }
1319 error("Unexpected character in macro argument list.");
1320 }
1321 m->arguments = arguments;
1322 while (test(PP_WHITESPACE)) {}
1323}
1324
1325void Preprocessor::until(Token t)
1326{
1327 while(hasNext() && next() != t)
1328 ;
1329}
1330
1332{
1333 debugIncludes = value;
1334}
1335
1336
int relational_expression()
int exclusive_OR_expression()
bool unary_expression_lookup()
int logical_OR_expression()
int equality_expression()
int logical_AND_expression()
int additive_expression()
int multiplicative_expression()
int conditional_expression()
bool primary_expression_lookup()
int inclusive_OR_expression()
QList< IncludePath > includes
Definition parser.h:30
Token next()
Definition parser.h:35
Token token()
Definition parser.h:43
Q_NORETURN void error(const Symbol &symbol)
Definition parser.cpp:58
bool hasNext() const
Definition parser.h:34
qsizetype index
Definition parser.h:19
void prev()
Definition parser.h:40
bool test(Token)
Definition parser.h:59
Token lookup(int k=1)
Definition parser.h:68
const Symbol & symbol()
Definition parser.h:46
QByteArray lexem()
Definition parser.h:44
Symbols symbols
Definition parser.h:18
std::stack< QByteArray, QByteArrayList > currentFilenames
Definition parser.h:32
QByteArray unquotedLexem()
Definition parser.h:45
int evaluateCondition()
QSet< QByteArray > preprocessedIncludes
void setDebugIncludes(bool value)
void parseDefineArguments(Macro *m)
void skipUntilEndif()
QHash< QByteArray, QByteArray > nonlocalIncludePathResolutionCache
Symbols preprocessed(const QByteArray &filename, QFile *device)
void substituteUntilNewline(Symbols &substituted)
static bool preprocessOnly
static Symbols macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
QByteArray resolveInclude(const QByteArray &filename, const QByteArray &relativeTo)
static Symbols tokenize(const QByteArray &input, int lineNum=1, TokenizeMode mode=TokenizeCpp)
static void macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, qsizetype &index, int lineNum, bool one, const QSet< QByteArray > &excludeSymbols=QSet< QByteArray >())
@ PreparePreprocessorStatement
@ TokenizePreprocessorStatement
\inmodule QtCore
Definition qbytearray.h:57
QByteArray & prepend(char c)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition qbytearray.h:216
qsizetype size() const noexcept
Returns the number of bytes in this byte array.
Definition qbytearray.h:474
void reserve(qsizetype size)
Attempts to allocate memory for at least size bytes.
Definition qbytearray.h:557
const char * constData() const noexcept
Returns a pointer to the const data stored in the byte array.
Definition qbytearray.h:122
int toInt(bool *ok=nullptr, int base=10) const
Returns the byte array converted to an int using base base, which is ten by default.
qsizetype indexOf(char c, qsizetype from=0) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
bool startsWith(QByteArrayView bv) const
Definition qbytearray.h:170
bool isEmpty() const noexcept
Returns true if the byte array has size 0; otherwise returns false.
Definition qbytearray.h:106
QByteArray left(qsizetype len) const
Returns a byte array that contains the first len bytes of this byte array.
void resize(qsizetype size)
Sets the size of the byte array to size bytes.
QByteArray & append(char c)
This is an overloaded member function, provided for convenience. It differs from the above function o...
bool isNull() const noexcept
Returns true if this byte array is null; otherwise returns false.
QByteArray mid(qsizetype index, qsizetype len=-1) const
Returns a byte array containing len bytes from this byte array, starting at position pos.
static QByteArray fromRawData(const char *data, qsizetype size)
Constructs a QByteArray that uses the first size bytes of the data array.
Definition qbytearray.h:394
QByteArray & replace(qsizetype index, qsizetype len, const char *s, qsizetype alen)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition qbytearray.h:275
uchar * map(qint64 offset, qint64 size, MemoryMapFlags flags=NoOptions)
Maps size bytes of the file into memory starting at offset.
void close() override
Calls QFileDevice::flush() and closes the file.
\inmodule QtCore \reentrant
Definition qfileinfo.h:22
void setFile(const QString &file)
Sets the file that the QFileInfo provides information about to file.
bool isDir() const
Returns true if this object points to a directory or to a symbolic link to a directory.
QString canonicalFilePath() const
Returns the canonical path including the file name, i.e.
QString filePath() const
Returns the file name, including the path (which may be absolute or relative).
bool exists() const
Returns true if the file exists; otherwise returns false.
\inmodule QtCore
Definition qfile.h:93
bool open(OpenMode flags) override
Opens the file using OpenMode mode, returning true if successful; otherwise false.
Definition qfile.cpp:881
qint64 size() const override
\reimp
Definition qfile.cpp:1156
bool remove(const Key &key)
Removes the item that has the key from the hash.
Definition qhash.h:956
iterator find(const Key &key)
Returns an iterator pointing to the item with the key in the hash.
Definition qhash.h:1258
bool contains(const Key &key) const noexcept
Returns true if the hash contains an item with the key; otherwise returns false.
Definition qhash.h:991
T value(const Key &key) const noexcept
Definition qhash.h:1044
iterator end() noexcept
Returns an \l{STL-style iterators}{STL-style iterator} pointing to the imaginary item after the last ...
Definition qhash.h:1206
iterator insert(const Key &key, const T &value)
Inserts a new item with the key and a value of value.
Definition qhash.h:1283
QByteArray readAll()
Reads all remaining data from the device, and returns it as a byte array.
void pop_back() noexcept
Definition qlist.h:676
qsizetype size() const noexcept
Definition qlist.h:386
bool isEmpty() const noexcept
Definition qlist.h:390
T & last()
Definition qlist.h:631
const T & constLast() const noexcept
Definition qlist.h:633
iterator erase(const_iterator begin, const_iterator end)
Definition qlist.h:882
iterator Iterator
Definition qlist.h:250
iterator end()
Definition qlist.h:609
const_reference at(qsizetype i) const noexcept
Definition qlist.h:429
value_type takeLast()
Definition qlist.h:550
iterator begin()
Definition qlist.h:608
const T & constFirst() const noexcept
Definition qlist.h:630
void reserve(qsizetype size)
Definition qlist.h:746
const_iterator cend() const noexcept
Definition qlist.h:614
void append(parameter_type t)
Definition qlist.h:441
const_iterator cbegin() const noexcept
Definition qlist.h:613
Definition qset.h:18
iterator insert(const T &value)
Definition qset.h:155
static QString fromLocal8Bit(QByteArrayView ba)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition qstring.cpp:5788
QByteArray toLocal8Bit() const &
Definition qstring.h:567
QSet< QString >::iterator it
QList< QVariant > arguments
else opt state
[0]
bool is_ident_start(char s)
static const short keyword_trans[][128]
Definition keywords.cpp:7
static const struct @480 keywords[]
Token ident
Definition keywords.cpp:448
char defchar
Definition keywords.cpp:446
bool is_identifier(const char *s, qsizetype len)
Definition utils.h:35
const char * skipQuote(const char *data)
Definition utils.h:42
Combined button and popup list for selecting options.
constexpr bool isAsciiDigit(char32_t c) noexcept
Definition qtools_p.h:67
constexpr bool isHexDigit(char32_t c) noexcept
Definition qtools_p.h:37
static const short pp_keyword_trans[][128]
Definition ppkeywords.cpp:7
static const struct @478 pp_keywords[]
static QByteArray readOrMapFile(QFile *file)
static void mergeStringLiterals(Symbols *_symbols)
static QByteArray searchIncludePaths(const QList< Parser::IncludePath > &includepaths, const QByteArray &include, const bool debugIncludes)
static QByteArray cleaned(const QByteArray &input)
#define Q_FALLTHROUGH()
#define Q_UNLIKELY(x)
DBusConnection const char DBusError * error
EGLOutputLayerEXT EGLint EGLAttrib value
[5]
static bool is_ident_char(char s)
static bool is_space(char s)
static bool contains(const QJsonArray &haystack, unsigned needle)
Definition qopengl.cpp:116
GLenum mode
const GLfloat * m
GLenum GLuint GLintptr GLsizeiptr size
[1]
GLuint index
[2]
GLuint GLuint end
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLuint start
GLuint name
GLenum GLenum GLsizei void GLsizei void * column
GLdouble GLdouble t
Definition qopenglext.h:243
GLuint64EXT * result
[6]
GLdouble s
[6]
Definition qopenglext.h:235
GLfloat GLfloat p
[1]
GLenum GLenum GLenum input
QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator)
SSL_CTX int(*) void arg)
ptrdiff_t qsizetype
Definition qtypes.h:70
long long qint64
Definition qtypes.h:55
QT_BEGIN_NAMESPACE typedef uchar * output
QFile file
[0]
QFileInfo fi("c:/temp/foo")
[newstuff]
QString dir
[11]
QDBusArgument argument
Symbols symbols
bool isVariadic
bool isFunction
Symbols arguments
TokenType token
qsizetype indexOf(const AT &t, qsizetype from=0) const noexcept
Definition qlist.h:955
bool contains(const AT &t) const noexcept
Definition qlist.h:44
qsizetype index
Definition symbols.h:113
Symbols symbols
Definition symbols.h:110
QSet< QByteArray > excludedSymbols
Definition symbols.h:112
QByteArray expandedMacro
Definition symbols.h:111
Token token
Definition symbols.h:91
int lineNum
Definition symbols.h:90
QByteArray lexem() const
Definition symbols.h:92
QByteArray unquotedLexem() const
Definition symbols.h:93
QList< Symbol > Symbols
Definition symbols.h:107
Token
Definition token.h:194
@ PP_PLUS
Definition token.h:230
@ PP_LTLT
Definition token.h:221
@ PP_STAR
Definition token.h:237
@ PP_SLASH
Definition token.h:236
@ PP_CHARACTER
Definition token.h:204
@ PP_C_COMMENT
Definition token.h:209
@ PP_NEWLINE
Definition token.h:224
@ PP_FLOATING_LITERAL
Definition token.h:212
@ PP_STRING_LITERAL
Definition token.h:238
@ PP_NOT
Definition token.h:226
@ PP_LANGLE
Definition token.h:218
@ PP_RANGLE
Definition token.h:233
@ PP_COLON
Definition token.h:206
@ PP_QUESTION
Definition token.h:231
@ PP_HAT
Definition token.h:215
@ PP_COMMA
Definition token.h:207
@ PP_IDENTIFIER
Definition token.h:216
@ PP_ANDAND
Definition token.h:202
@ PP_TILDE
Definition token.h:239
@ PP_GE
Definition token.h:213
@ PP_EQEQ
Definition token.h:211
@ PP_PERCENT
Definition token.h:229
@ PP_OR
Definition token.h:227
@ PP_CPP_COMMENT
Definition token.h:208
@ PP_INTEGER_LITERAL
Definition token.h:217
@ PP_LPAREN
Definition token.h:220
@ PP_AND
Definition token.h:201
@ PP_SINGLEQUOTE
Definition token.h:235
@ PP_QUOTE
Definition token.h:232
@ PP_WHITESPACE
Definition token.h:240
@ PP_GTGT
Definition token.h:214
@ PP_CHARACTER_LITERAL
Definition token.h:205
@ PP_BACKSLASH
Definition token.h:203
@ PP_NE
Definition token.h:223
@ PP_OROR
Definition token.h:228
@ PP_MINUS
Definition token.h:222
@ PP_DIGIT
Definition token.h:210
@ PP_LE
Definition token.h:219
@ PP_RPAREN
Definition token.h:234