From 7d6a248f2c68d70f58387afc69e73e695c3d940c Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 4 May 2020 23:20:08 -0400 Subject: [PATCH] rc: move free carat handling into parser This fixes at least one shell script (printfont) that expected 'x'`{y}'z' to mean 'x'^`{y}^'z' as it now does. Before it meant: 'x'^`{y} 'z' One surprise is that adjacent lists get a free carat: (x y z)(1 2 3) is (x1 y2 z3) This doesn't affect any rc script in Plan 9 or plan9port. --- man/man1/rc.1 | 26 +------- src/cmd/rc/lex.c | 6 +- src/cmd/rc/parse.c | 162 +++++++++++++++++++++++++-------------------- src/cmd/rc/syn.y | 2 +- src/cmd/rc/test.rc | 11 +++ 5 files changed, 108 insertions(+), 99 deletions(-) diff --git a/man/man1/rc.1 b/man/man1/rc.1 index 7ea8998a..df7af05b 100644 --- a/man/man1/rc.1 +++ b/man/man1/rc.1 @@ -290,28 +290,10 @@ then one operand must have one component, and the other must be non-empty, and concatenation is distributive. .PD .SS Free Carets -In most circumstances, -.I rc +.I Rc will insert the .B ^ operator automatically between words that are not separated by white space. -Whenever one of -.B $ -.B ' -.B ` -follows a quoted or unquoted word or an unquoted word follows a quoted word -with no intervening blanks or tabs, -a -.B ^ -is inserted between the two. -If an unquoted word immediately follows a -.BR $ -and contains a character other than an alphanumeric, underscore, -or -.BR * , -a -.B ^ -is inserted before the first such character. Thus .IP .B cc -$flags $stem.c @@ -367,7 +349,7 @@ or .I Fd1 is a previously opened file descriptor and .I fd0 -becomes a new copy (in the sense of +becomes a new copy (in the sense of .IR dup (3)) of it. A file descriptor may be closed by writing @@ -477,7 +459,7 @@ is executed. The .I command is executed once for each -.IR argument +.IR argument with that argument assigned to .IR name . If the argument list is omitted, @@ -982,8 +964,6 @@ changes .PP Functions that use here documents don't work. .PP -Free carets don't get inserted next to keywords. -.PP The .BI <{ command } syntax depends on the underlying operating system diff --git a/src/cmd/rc/lex.c b/src/cmd/rc/lex.c index 58338479..48bd70de 100644 --- a/src/cmd/rc/lex.c +++ b/src/cmd/rc/lex.c @@ -202,7 +202,7 @@ yylex(void) * if the next character is the first character of a simple or compound word, * we insert a `^' before it. */ - if(lastword){ + if(lastword && flag['Y']){ lastword = 0; if(d=='('){ advance(); @@ -215,8 +215,8 @@ yylex(void) } } inquote = 0; - if(skipwhite() && flag['Z']) - return SP; + if(skipwhite() && !flag['Y']) + return ' '; switch(c = advance()){ case EOF: lastdol = 0; diff --git a/src/cmd/rc/parse.c b/src/cmd/rc/parse.c index 466b7da2..11be951b 100644 --- a/src/cmd/rc/parse.c +++ b/src/cmd/rc/parse.c @@ -23,7 +23,15 @@ static jmp_buf yyjmp; static int dropnl(int tok) { - while(tok == '\n') + while(tok == ' ' || tok == '\n') + tok = yylex(); + return tok; +} + +static int +dropsp(int tok) +{ + while(tok == ' ') tok = yylex(); return tok; } @@ -49,7 +57,7 @@ parse(void) // rc: { return 1;} // | line '\n' {return !compile($1);} - tok = yylex(); + tok = dropsp(yylex()); if(tok == EOF) return 1; t = line(tok, &tok); @@ -117,6 +125,7 @@ brace(int tok) // brace: '{' body '}' {$$=tree1(BRACE, $2);} + tok = dropsp(tok); if(tok != '{') syntax(tok); t = body(yylex(), &tok); @@ -132,6 +141,7 @@ paren(int tok) // paren: '(' body ')' {$$=tree1(PCMD, $2);} + tok = dropsp(tok); if(tok != '(') syntax(tok); t = body(yylex(), &tok); @@ -172,11 +182,12 @@ yyredir(int tok, int *ptok) syntax(tok); case DUP: r = yylval.tree; - *ptok = yylex(); + *ptok = dropsp(yylex()); break; case REDIR: r = yylval.tree; - w = yyword(yylex(), ptok); + w = yyword(yylex(), &tok); + *ptok = dropsp(tok); r = mung1(r, r->rtype==HERE?heredoc(w):w); break; } @@ -186,69 +197,11 @@ yyredir(int tok, int *ptok) static tree* cmd(int tok, int *ptok) { - tree *t1, *t2, *t3, *t4; - + tok = dropsp(tok); switch(tok) { default: return cmd2(tok, ptok); - case IF: - // | IF paren {skipnl();} cmd {$$=mung2($1, $2, $4);} - // | IF NOT {skipnl();} cmd {$$=mung1($2, $4);} - t1 = yylval.tree; - tok = yylex(); - if(tok == NOT) { - t1 = yylval.tree; - t2 = cmd(dropnl(yylex()), ptok); - return mung1(t1, t2); - } - t2 = paren(tok); - t3 = cmd(dropnl(yylex()), ptok); - return mung2(t1, t2, t3); - - case FOR: - // | FOR '(' word IN words ')' {skipnl();} cmd - // {$$=mung3($1, $3, $5 ? $5 : tree1(PAREN, $5), $8);} - // | FOR '(' word ')' {skipnl();} cmd - // {$$=mung3($1, $3, (tree *)0, $6);} - t1 = yylval.tree; - tok = yylex(); - if(tok != '(') - syntax(tok); - t2 = yyword(yylex(), &tok); - switch(tok) { - default: - syntax(tok); - case ')': - t3 = nil; - break; - case IN: - t3 = words(yylex(), &tok); - if(t3 == nil) - t3 = tree1(PAREN, nil); - if(tok != ')') - syntax(tok); - break; - } - t4 = cmd(dropnl(yylex()), ptok); - return mung3(t1, t2, t3, t4); - - case WHILE: - // | WHILE paren {skipnl();} cmd - // {$$=mung2($1, $2, $4);} - t1 = yylval.tree; - t2 = paren(yylex()); - t3 = cmd(dropnl(yylex()), ptok); - return mung2(t1, t2, t3); - - case SWITCH: - // | SWITCH word {skipnl();} brace - // {$$=tree2(SWITCH, $2, $4);} - t1 = yyword(yylex(), &tok); - tok = dropnl(tok); // doesn't work in yacc grammar but works here! - t2 = brace(tok); - *ptok = yylex(); - return tree2(SWITCH, t1, t2); } } @@ -290,8 +243,9 @@ cmd3(int tok, int *ptok) static tree* cmd4(int tok, int *ptok) { - tree *t1, *t2, *t3; + tree *t1, *t2, *t3, *t4; + tok = dropsp(tok); switch(tok) { case ';': case '&': @@ -300,9 +254,62 @@ cmd4(int tok, int *ptok) return nil; case IF: + // | IF paren {skipnl();} cmd {$$=mung2($1, $2, $4);} + // | IF NOT {skipnl();} cmd {$$=mung1($2, $4);} + t1 = yylval.tree; + tok = dropsp(yylex()); + if(tok == NOT) { + t1 = yylval.tree; + t2 = cmd(dropnl(yylex()), ptok); + return mung1(t1, t2); + } + t2 = paren(tok); + t3 = cmd(dropnl(yylex()), ptok); + return mung2(t1, t2, t3); + case FOR: - case SWITCH: + // | FOR '(' word IN words ')' {skipnl();} cmd + // {$$=mung3($1, $3, $5 ? $5 : tree1(PAREN, $5), $8);} + // | FOR '(' word ')' {skipnl();} cmd + // {$$=mung3($1, $3, (tree *)0, $6);} + t1 = yylval.tree; + tok = dropsp(yylex()); + if(tok != '(') + syntax(tok); + t2 = yyword(yylex(), &tok); + switch(tok) { + default: + syntax(tok); + case ')': + t3 = nil; + break; + case IN: + t3 = words(yylex(), &tok); + if(t3 == nil) + t3 = tree1(PAREN, nil); + if(tok != ')') + syntax(tok); + break; + } + t4 = cmd(dropnl(yylex()), ptok); + return mung3(t1, t2, t3, t4); + case WHILE: + // | WHILE paren {skipnl();} cmd + // {$$=mung2($1, $2, $4);} + t1 = yylval.tree; + t2 = paren(yylex()); + t3 = cmd(dropnl(yylex()), ptok); + return mung2(t1, t2, t3); + + case SWITCH: + // | SWITCH word {skipnl();} brace + // {$$=tree2(SWITCH, $2, $4);} + t1 = yyword(yylex(), &tok); + tok = dropnl(tok); // doesn't work in yacc grammar but works here! + t2 = brace(tok); + *ptok = dropsp(yylex()); + return tree2(SWITCH, t1, t2); // Note: cmd: a && for(x) y && b is a && {for (x) {y && b}}. return cmd(tok, ptok); @@ -315,7 +322,7 @@ cmd4(int tok, int *ptok) return tree1(FN, t1); } t2 = brace(tok); - *ptok = yylex(); + *ptok = dropsp(yylex()); return tree2(FN, t1, t2); case TWIDDLE: @@ -344,7 +351,7 @@ cmd4(int tok, int *ptok) case '{': // | brace epilog {$$=epimung($1, $2);} t1 = brace(tok); - tok = yylex(); + tok = dropsp(yylex()); t2 = epilog(tok, ptok); return epimung(t1, t2); } @@ -396,6 +403,7 @@ words(int tok, int *ptok) // | words word {$$=tree2(WORDS, $1, $2);} t = nil; + tok = dropsp(tok); while(iswordtok(tok)) t = tree2(WORDS, t, yyword(tok, &tok)); *ptok = tok; @@ -428,9 +436,19 @@ yyword(int tok, int *ptok) // word1: keyword | comword t = word1(tok, &tok); - while(tok == '^') - t = tree2('^', t, word1(yylex(), &tok)); - *ptok = tok; + for(;;) { + if(iswordtok(tok)) { + t = tree2('^', t, word1(tok, &tok)); + continue; + } + tok = dropsp(tok); + if(tok == '^') { + t = tree2('^', t, word1(yylex(), &tok)); + continue; + } + break; + } + *ptok = dropsp(tok); return t; } @@ -439,6 +457,7 @@ word1(int tok, int *ptok) { tree *w, *sub, *t; + tok = dropsp(tok); switch(tok) { default: syntax(tok); @@ -458,7 +477,6 @@ word1(int tok, int *ptok) // keyword: FOR|IN|WHILE|IF|NOT|TWIDDLE|BANG|SUBSHELL|SWITCH|FN t = yylval.tree; t->type = WORD; - lastword = 1; *ptok = yylex(); return t; @@ -466,7 +484,7 @@ word1(int tok, int *ptok) // comword: '$' word1 {$$=tree1('$', $2);} // | '$' word1 SUB words ')' {$$=tree2(SUB, $2, $4);} w = word1(yylex(), &tok); - if(tok == SUB) { + if(tok == '(') { sub = words(yylex(), &tok); if(tok != ')') syntax(tok); diff --git a/src/cmd/rc/syn.y b/src/cmd/rc/syn.y index e3decd41..5c98ef80 100644 --- a/src/cmd/rc/syn.y +++ b/src/cmd/rc/syn.y @@ -1,4 +1,4 @@ -%term FOR IN WHILE IF NOT TWIDDLE BANG SUBSHELL SWITCH FN SP +%term FOR IN WHILE IF NOT TWIDDLE BANG SUBSHELL SWITCH FN %term WORD REDIR REDIRW DUP PIPE SUB %term SIMPLE ARGLIST WORDS BRACE PAREN PCMD PIPEFD /* not used in syntax */ /* operator priorities -- lowest first */ diff --git a/src/cmd/rc/test.rc b/src/cmd/rc/test.rc index 5c658132..7a83ad17 100644 --- a/src/cmd/rc/test.rc +++ b/src/cmd/rc/test.rc @@ -1,5 +1,9 @@ # test for parser +a +a b +a|b +a | b {a; b; c} x=y a && b || c x=y a | b | c @@ -63,3 +67,10 @@ x || y x | y +switch x {y} && z +switch x {} | y + +OPTIONS=$OPTIONS' /axescount '^`{echo $1 | sed s/-a//}^' def' + +# bug in old printfont script - expected more free carats +# OPTIONS=$OPTIONS' /axescount '`{echo $1 | sed s/-a//}' def'