5 * LTTng filter expression parser
7 * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
9 * SPDX-License-Identifier: LGPL-2.1-only
11 * Grammar inspired from http://www.quut.com/c/ANSI-C-grammar-y.html
21 #include "filter-ast.h"
22 #include "filter-parser.h"
24 #include <common/macros.h>
26 #define WIDTH_u64_SCANF_IS_A_BROKEN_API "20"
27 #define WIDTH_o64_SCANF_IS_A_BROKEN_API "22"
28 #define WIDTH_x64_SCANF_IS_A_BROKEN_API "17"
29 #define WIDTH_lg_SCANF_IS_A_BROKEN_API "4096" /* Hugely optimistic approximation */
34 int filter_parser_debug = 0;
37 int yyparse(struct filter_parser_ctx *parser_ctx, yyscan_t scanner);
39 int yylex(union YYSTYPE *yyval, yyscan_t scanner);
41 int yylex_init_extra(struct filter_parser_ctx *parser_ctx, yyscan_t * ptr_yy_globals);
43 int yylex_destroy(yyscan_t yyparser_ctx);
45 void yyrestart(FILE * in_str, yyscan_t parser_ctx);
48 struct cds_list_head gc;
53 static const char *node_type_to_str[] = {
54 [ NODE_UNKNOWN ] = "NODE_UNKNOWN",
55 [ NODE_ROOT ] = "NODE_ROOT",
56 [ NODE_EXPRESSION ] = "NODE_EXPRESSION",
57 [ NODE_OP ] = "NODE_OP",
58 [ NODE_UNARY_OP ] = "NODE_UNARY_OP",
62 const char *node_type(struct filter_node *node)
64 if (node->type < NR_NODE_TYPES)
65 return node_type_to_str[node->type];
70 static struct gc_string *gc_string_alloc(struct filter_parser_ctx *parser_ctx,
73 struct gc_string *gstr;
76 /* TODO: could be faster with find first bit or glib Gstring */
77 /* sizeof long to account for malloc header (int or long ?) */
78 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + len;
81 gstr = zmalloc(alloclen);
85 cds_list_add(&gstr->gc, &parser_ctx->allocated_strings);
86 gstr->alloclen = alloclen;
92 * note: never use gc_string_append on a string that has external references.
93 * gsrc will be garbage collected immediately, and gstr might be.
94 * Should only be used to append characters to a string literal or constant.
97 struct gc_string *gc_string_append(struct filter_parser_ctx *parser_ctx,
98 struct gc_string *gstr,
99 struct gc_string *gsrc)
101 size_t newlen = strlen(gsrc->s) + strlen(gstr->s) + 1;
104 /* TODO: could be faster with find first bit or glib Gstring */
105 /* sizeof long to account for malloc header (int or long ?) */
106 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + newlen;
109 if (alloclen > gstr->alloclen) {
110 struct gc_string *newgstr;
112 newgstr = gc_string_alloc(parser_ctx, newlen);
113 strcpy(newgstr->s, gstr->s);
114 strcat(newgstr->s, gsrc->s);
115 cds_list_del(&gstr->gc);
119 strcat(gstr->s, gsrc->s);
121 cds_list_del(&gsrc->gc);
127 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src)
129 lvalp->gs = gc_string_alloc(parser_ctx, strlen(src) + 1);
130 strcpy(lvalp->gs->s, src);
133 static struct filter_node *make_node(struct filter_parser_ctx *scanner,
136 struct filter_ast *ast = filter_parser_get_ast(scanner);
137 struct filter_node *node;
139 node = zmalloc(sizeof(*node));
142 memset(node, 0, sizeof(*node));
144 cds_list_add(&node->gc, &ast->allocated_nodes);
148 fprintf(stderr, "[error] %s: trying to create root node\n", __func__);
151 case NODE_EXPRESSION:
160 fprintf(stderr, "[error] %s: unknown node type %d\n", __func__,
168 static struct filter_node *make_op_node(struct filter_parser_ctx *scanner,
170 struct filter_node *lchild,
171 struct filter_node *rchild)
173 struct filter_ast *ast = filter_parser_get_ast(scanner);
174 struct filter_node *node;
176 node = zmalloc(sizeof(*node));
179 memset(node, 0, sizeof(*node));
180 node->type = NODE_OP;
181 cds_list_add(&node->gc, &ast->allocated_nodes);
182 node->u.op.type = type;
183 node->u.op.lchild = lchild;
184 node->u.op.rchild = rchild;
189 void yyerror(struct filter_parser_ctx *parser_ctx, yyscan_t scanner, const char *str)
191 fprintf(stderr, "error %s\n", str);
194 #define parse_error(parser_ctx, str) \
196 yyerror(parser_ctx, parser_ctx->scanner, YY_("parse error: " str "\n")); \
200 static void free_strings(struct cds_list_head *list)
202 struct gc_string *gstr, *tmp;
204 cds_list_for_each_entry_safe(gstr, tmp, list, gc)
208 static struct filter_ast *filter_ast_alloc(void)
210 struct filter_ast *ast;
212 ast = zmalloc(sizeof(*ast));
215 memset(ast, 0, sizeof(*ast));
216 CDS_INIT_LIST_HEAD(&ast->allocated_nodes);
217 ast->root.type = NODE_ROOT;
221 static void filter_ast_free(struct filter_ast *ast)
223 struct filter_node *node, *tmp;
225 cds_list_for_each_entry_safe(node, tmp, &ast->allocated_nodes, gc)
231 int filter_parser_ctx_append_ast(struct filter_parser_ctx *parser_ctx)
233 return yyparse(parser_ctx, parser_ctx->scanner);
237 struct filter_parser_ctx *filter_parser_ctx_alloc(FILE *input)
239 struct filter_parser_ctx *parser_ctx;
242 yydebug = filter_parser_debug;
244 parser_ctx = zmalloc(sizeof(*parser_ctx));
247 memset(parser_ctx, 0, sizeof(*parser_ctx));
249 ret = yylex_init_extra(parser_ctx, &parser_ctx->scanner);
251 fprintf(stderr, "yylex_init error\n");
252 goto cleanup_parser_ctx;
254 /* Start processing new stream */
255 yyrestart(input, parser_ctx->scanner);
257 parser_ctx->ast = filter_ast_alloc();
258 if (!parser_ctx->ast)
260 CDS_INIT_LIST_HEAD(&parser_ctx->allocated_strings);
263 fprintf(stdout, "parser_ctx input is a%s.\n",
264 isatty(fileno(input)) ? "n interactive tty" :
265 " noninteractive file");
270 ret = yylex_destroy(parser_ctx->scanner);
272 fprintf(stderr, "yylex_destroy error\n");
279 void filter_parser_ctx_free(struct filter_parser_ctx *parser_ctx)
283 free_strings(&parser_ctx->allocated_strings);
284 filter_ast_free(parser_ctx->ast);
285 ret = yylex_destroy(parser_ctx->scanner);
287 fprintf(stderr, "yylex_destroy error\n");
295 #include "common/macros.h"
298 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src);
303 %parse-param {struct filter_parser_ctx *parser_ctx}
304 %parse-param {yyscan_t scanner}
305 %lex-param {yyscan_t scanner}
306 %start translation_unit
307 %token CHARACTER_CONSTANT_START SQUOTE STRING_LITERAL_START DQUOTE
308 %token ESCSEQ CHAR_STRING_TOKEN
309 %token DECIMAL_CONSTANT OCTAL_CONSTANT HEXADECIMAL_CONSTANT FLOAT_CONSTANT
310 %token LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW
311 %token STAR PLUS MINUS
312 %token MOD_OP DIV_OP RIGHT_OP LEFT_OP
313 %token EQ_OP NE_OP LE_OP GE_OP LT_OP GT_OP AND_OP OR_OP NOT_OP
314 %token ASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA
315 %token XOR_BIN AND_BIN OR_BIN NOT_BIN
317 %token <gs> IDENTIFIER GLOBAL_IDENTIFIER
323 struct gc_string *gs;
324 struct filter_node *n;
327 %type <gs> s_char s_char_sequence c_char c_char_sequence
329 %type <n> primary_expression
330 %type <n> prefix_expression
331 %type <n> prefix_expression_rec
332 %type <n> postfix_expression
333 %type <n> unary_expression
334 %type <n> unary_operator
335 %type <n> multiplicative_expression
336 %type <n> additive_expression
337 %type <n> shift_expression
338 %type <n> relational_expression
339 %type <n> equality_expression
340 %type <n> and_expression
341 %type <n> exclusive_or_expression
342 %type <n> inclusive_or_expression
343 %type <n> logical_and_expression
344 %type <n> logical_or_expression
346 %type <n> identifiers
356 | c_char_sequence c_char
357 { $$ = gc_string_append(parser_ctx, $1, $2); }
365 parse_error(parser_ctx, "escape sequences not supported yet");
369 /* 1.6 String literals */
374 | s_char_sequence s_char
375 { $$ = gc_string_append(parser_ctx, $1, $2); }
383 parse_error(parser_ctx, "escape sequences not supported yet");
390 $$ = make_node(parser_ctx, NODE_EXPRESSION);
391 $$->u.expression.type = AST_EXP_CONSTANT;
392 if (sscanf(yylval.gs->s, "%" WIDTH_u64_SCANF_IS_A_BROKEN_API SCNu64,
393 &$$->u.expression.u.constant) != 1) {
394 parse_error(parser_ctx, "cannot scanf decimal constant");
399 $$ = make_node(parser_ctx, NODE_EXPRESSION);
400 $$->u.expression.type = AST_EXP_CONSTANT;
401 if (!strcmp(yylval.gs->s, "0")) {
402 $$->u.expression.u.constant = 0;
403 } else if (sscanf(yylval.gs->s, "0%" WIDTH_o64_SCANF_IS_A_BROKEN_API SCNo64,
404 &$$->u.expression.u.constant) != 1) {
405 parse_error(parser_ctx, "cannot scanf octal constant");
408 | HEXADECIMAL_CONSTANT
410 $$ = make_node(parser_ctx, NODE_EXPRESSION);
411 $$->u.expression.type = AST_EXP_CONSTANT;
412 if (sscanf(yylval.gs->s, "0x%" WIDTH_x64_SCANF_IS_A_BROKEN_API SCNx64,
413 &$$->u.expression.u.constant) != 1) {
414 parse_error(parser_ctx, "cannot scanf hexadecimal constant");
419 $$ = make_node(parser_ctx, NODE_EXPRESSION);
420 $$->u.expression.type = AST_EXP_FLOAT_CONSTANT;
421 if (sscanf(yylval.gs->s, "%" WIDTH_lg_SCANF_IS_A_BROKEN_API "lg",
422 &$$->u.expression.u.float_constant) != 1) {
423 parse_error(parser_ctx, "cannot scanf float constant");
426 | STRING_LITERAL_START DQUOTE
428 $$ = make_node(parser_ctx, NODE_EXPRESSION);
429 $$->u.expression.type = AST_EXP_STRING;
430 $$->u.expression.u.string = "";
432 | STRING_LITERAL_START s_char_sequence DQUOTE
434 $$ = make_node(parser_ctx, NODE_EXPRESSION);
435 $$->u.expression.type = AST_EXP_STRING;
436 $$->u.expression.u.string = $2->s;
438 | CHARACTER_CONSTANT_START c_char_sequence SQUOTE
440 $$ = make_node(parser_ctx, NODE_EXPRESSION);
441 $$->u.expression.type = AST_EXP_STRING;
442 $$->u.expression.u.string = $2->s;
444 | LPAREN expression RPAREN
446 $$ = make_node(parser_ctx, NODE_EXPRESSION);
447 $$->u.expression.type = AST_EXP_NESTED;
448 $$->u.expression.u.child = $2;
455 $$ = make_node(parser_ctx, NODE_EXPRESSION);
456 $$->u.expression.type = AST_EXP_IDENTIFIER;
457 $$->u.expression.u.identifier = yylval.gs->s;
461 $$ = make_node(parser_ctx, NODE_EXPRESSION);
462 $$->u.expression.type = AST_EXP_GLOBAL_IDENTIFIER;
463 $$->u.expression.u.identifier = yylval.gs->s;
467 prefix_expression_rec
468 : LSBRAC unary_expression RSBRAC
472 | LSBRAC unary_expression RSBRAC prefix_expression_rec
475 $$->u.expression.pre_op = AST_LINK_BRACKET;
476 $$->u.expression.prev = $4;
485 | identifiers prefix_expression_rec
488 $$->u.expression.pre_op = AST_LINK_BRACKET;
489 $$->u.expression.next_bracket = $2;
498 | postfix_expression DOT prefix_expression
501 $$->u.expression.post_op = AST_LINK_DOT;
502 $$->u.expression.prev = $1;
504 | postfix_expression RARROW prefix_expression
507 $$->u.expression.post_op = AST_LINK_RARROW;
508 $$->u.expression.prev = $1;
517 | unary_operator unary_expression
520 $$->u.unary_op.child = $2;
527 $$ = make_node(parser_ctx, NODE_UNARY_OP);
528 $$->u.unary_op.type = AST_UNARY_PLUS;
532 $$ = make_node(parser_ctx, NODE_UNARY_OP);
533 $$->u.unary_op.type = AST_UNARY_MINUS;
537 $$ = make_node(parser_ctx, NODE_UNARY_OP);
538 $$->u.unary_op.type = AST_UNARY_NOT;
542 $$ = make_node(parser_ctx, NODE_UNARY_OP);
543 $$->u.unary_op.type = AST_UNARY_BIT_NOT;
547 multiplicative_expression
550 | multiplicative_expression STAR unary_expression
552 $$ = make_op_node(parser_ctx, AST_OP_MUL, $1, $3);
554 | multiplicative_expression DIV_OP unary_expression
556 $$ = make_op_node(parser_ctx, AST_OP_DIV, $1, $3);
558 | multiplicative_expression MOD_OP unary_expression
560 $$ = make_op_node(parser_ctx, AST_OP_MOD, $1, $3);
565 : multiplicative_expression
567 | additive_expression PLUS multiplicative_expression
569 $$ = make_op_node(parser_ctx, AST_OP_PLUS, $1, $3);
571 | additive_expression MINUS multiplicative_expression
573 $$ = make_op_node(parser_ctx, AST_OP_MINUS, $1, $3);
578 : additive_expression
580 | shift_expression LEFT_OP additive_expression
582 $$ = make_op_node(parser_ctx, AST_OP_BIT_LSHIFT, $1, $3);
584 | shift_expression RIGHT_OP additive_expression
586 $$ = make_op_node(parser_ctx, AST_OP_BIT_RSHIFT, $1, $3);
593 | and_expression AND_BIN shift_expression
595 $$ = make_op_node(parser_ctx, AST_OP_BIT_AND, $1, $3);
599 exclusive_or_expression
602 | exclusive_or_expression XOR_BIN and_expression
604 $$ = make_op_node(parser_ctx, AST_OP_BIT_XOR, $1, $3);
608 inclusive_or_expression
609 : exclusive_or_expression
611 | inclusive_or_expression OR_BIN exclusive_or_expression
613 $$ = make_op_node(parser_ctx, AST_OP_BIT_OR, $1, $3);
617 relational_expression
618 : inclusive_or_expression
620 | relational_expression LT_OP inclusive_or_expression
622 $$ = make_op_node(parser_ctx, AST_OP_LT, $1, $3);
624 | relational_expression GT_OP inclusive_or_expression
626 $$ = make_op_node(parser_ctx, AST_OP_GT, $1, $3);
628 | relational_expression LE_OP inclusive_or_expression
630 $$ = make_op_node(parser_ctx, AST_OP_LE, $1, $3);
632 | relational_expression GE_OP inclusive_or_expression
634 $$ = make_op_node(parser_ctx, AST_OP_GE, $1, $3);
639 : relational_expression
641 | equality_expression EQ_OP relational_expression
643 $$ = make_op_node(parser_ctx, AST_OP_EQ, $1, $3);
645 | equality_expression NE_OP relational_expression
647 $$ = make_op_node(parser_ctx, AST_OP_NE, $1, $3);
651 logical_and_expression
652 : equality_expression
654 | logical_and_expression AND_OP equality_expression
656 $$ = make_op_node(parser_ctx, AST_OP_AND, $1, $3);
660 logical_or_expression
661 : logical_and_expression
663 | logical_or_expression OR_OP logical_and_expression
665 $$ = make_op_node(parser_ctx, AST_OP_OR, $1, $3);
670 : logical_or_expression
677 parser_ctx->ast->root.u.root.child = $1;