diff --git a/analyzers/test/lex.l b/analyzers/test/lex.l deleted file mode 100644 index 5e2182f..0000000 --- a/analyzers/test/lex.l +++ /dev/null @@ -1,151 +0,0 @@ -%{ -#include -#include "y.tab.h" // Заголовочный файл, генерируемый Bison -%} - -%option noyywrap -%option yylineno - -DIGIT [0-9] -LETTER [a-zA-Z_] -HEX [0-9a-fA-F] -OCT [0-7] -BIN [0-1] -EXP [eE][+-]?{DIGIT}+ -FLOAT_SUFF [fF] - -%% - -/* Ключевые слова */ -"break" { return BREAK; } -"case" { return CASE; } -"chan" { return CHAN; } -"const" { return CONST; } -"continue" { return CONTINUE; } -"default" { return DEFAULT; } -"defer" { return DEFER; } -"else" { return ELSE; } -"fallthrough" { return FALLTHROUGH; } -"for" { return FOR; } -"func" { return FUNC; } -"go" { return GO; } -"goto" { return GOTO; } -"if" { return IF; } -"import" { return IMPORT; } -"interface" { return INTERFACE; } -"map" { return MAP; } -"package" { return PACKAGE; } -"range" { return RANGE; } -"return" { return RETURN; } -"select" { return SELECT; } -"struct" { return STRUCT; } -"switch" { return SWITCH; } -"type" { return TYPE; } -"var" { return VAR; } - -/* Предопределенные идентификаторы */ -"true" { yylval.bool_val = true; return BOOL_LIT; } -"false" { yylval.bool_val = false; return BOOL_LIT; } -"nil" { return NIL; } - -/* Идентификаторы */ -{LETTER}({LETTER}|{DIGIT})* { - yylval.str_val = strdup(yytext); - return IDENT; -} - -/* Целочисленные литералы */ -0[xX]{HEX}+ { yylval.int_val = strtol(yytext, NULL, 16); return INT_LIT; } -0[oO]?{OCT}+ { yylval.int_val = strtol(yytext, NULL, 8); return INT_LIT; } -0[bB]{BIN}+ { yylval.int_val = strtol(yytext, NULL, 2); return INT_LIT; } -{DIGIT}+ { yylval.int_val = atoi(yytext); return INT_LIT; } - -/* Вещественные литералы */ -{DIGIT}+{EXP} { yylval.float_val = atof(yytext); return FLOAT_LIT; } -{DIGIT}+"."{DIGIT}*{EXP}? { yylval.float_val = atof(yytext); return FLOAT_LIT; } -{DIGIT}+"."{DIGIT}*{EXP}?{FLOAT_SUFF} { yylval.float_val = atof(yytext); return FLOAT_LIT; } - -/* Строковые литералы */ -\"(\\.|[^\\"])*\" { - yylval.str_val = strdup(yytext); - return STRING_LIT; -} - -`[^`]*` { - yylval.str_val = strdup(yytext); - return RAW_STRING_LIT; -} - -/* Руны */ -'(\\.|[^\\'])' { - yylval.rune_val = yytext[1]; - return RUNE_LIT; -} - -/* Операторы */ -"+" { return PLUS; } -"-" { return MINUS; } -"*" { return STAR; } -"/" { return SLASH; } -"%" { return PERCENT; } -"&" { return AMPERSAND; } -"|" { return PIPE; } -"^" { return CARET; } -"<<" { return LSHIFT; } -">>" { return RSHIFT; } -"&^" { return AMPERSAND_CARET; } -"+=" { return PLUS_EQ; } -"-=" { return MINUS_EQ; } -"*=" { return STAR_EQ; } -"/=" { return SLASH_EQ; } -"%=" { return PERCENT_EQ; } -"&=" { return AMPERSAND_EQ; } -"|=" { return PIPE_EQ; } -"^=" { return CARET_EQ; } -"<<=" { return LSHIFT_EQ; } -">>=" { return RSHIFT_EQ; } -"&^=" { return AMPERSAND_CARET_EQ; } -"&&" { return LAND; } -"||" { return LOR; } -"<-" { return ARROW; } -"++" { return INC; } -"--" { return DEC; } -"==" { return EQ; } -"<" { return LT; } -">" { return GT; } -"=" { return ASSIGN; } -"!" { return NOT; } -"!=" { return NEQ; } -"<=" { return LEQ; } -">=" { return GEQ; } -":=" { return DECLARE_ASSIGN; } -"..." { return ELLIPSIS; } -":" { return COLON; } -";" { return SEMICOLON; } -"," { return COMMA; } -"." { return DOT; } - -/* Скобки и разделители */ -"(" { return LPAREN; } -")" { return RPAREN; } -"[" { return LBRACK; } -"]" { return RBRACK; } -"{" { return LBRACE; } -"}" { return RBRACE; } - -/* Комментарии */ -"//".* { /* Игнорируем однострочные комментарии */ } -"/\*"(.|\n)*?"\*/" { /* Игнорируем многострочные комментарии */ } - -/* Пробельные символы */ -[ \t\n\r]+ { /* Игнорируем пробелы, табы, переводы строк */ } - -/* Ошибка */ -. { fprintf(stderr, "Неизвестный символ: %s\n", yytext); } - -%% - -/* Дополнительные функции */ -void yyerror(const char *s) { - fprintf(stderr, "Ошибка лексического анализа: %s\n", s); -} \ No newline at end of file diff --git a/analyzers/test/test.l b/analyzers/test/test.l index 3cb7338..0ddfe57 100644 --- a/analyzers/test/test.l +++ b/analyzers/test/test.l @@ -1,22 +1,133 @@ %{ -#include "test.tab.h" #include -#include - -int CURRENT_LINE_NUMBER; - +#include +#include "test.tab.h" %} +%option noyywrap +%option yylineno + +DIGIT [0-9] +LETTER [a-zA-Z_] +HEX [0-9a-fA-F] +OCT [0-7] +BIN [0-1] +QUOTE ["'] %% -"{" { return LBRACE; } -"}" { return RBRACE; } -\n { CURRENT_LINE_NUMBER++; } -[^{}]+ { - yylval.str = strdup(yytext); - return TEXT; - } +{QUOTE}{LETTER}*{QUOTE} {return STRING;} +{LETTER}({LETTER}|{DIGIT})* { return IDENT;} +{DIGIT}+ { yylval.num = atoi(yytext); return NUMBER; } +"break" { return BREAK; } +"switch" { return SWITCH; } +"case" { return CASE; } +"chan" { return CHAN; } +"const" { return CONST; } +"continue" { return CONTINUE; } +"default" { return DEFAULT; } +"defer" { return DEFER; } +"else" { return ELSE; } +"fallthrough" { return FALLTHROUGH; } +"for" { return FOR; } +"func" { return FUNC; } +"go" { return GO; } +"goto" { return GOTO; } +"if" { return IF; } +"import" { return IMPORT; } +"interface" { return INTERFACE; } +"map" { return MAP; } +"package" { return PACKAGE; } +"range" { return RANGE; } +"return" { return RETURN; } +"select" { return SELECT; } +"struct" { return STRUCT; } +"type" { return TYPE; } +"var" { return VAR; } + +"uint8" { return UINT8; } +"uint16" { return UINT16; } +"uint32" { return UINT32; } +"uint64" { return UINT64; } + +"int8" { return INT8; } +"int16" { return INT16; } +"int32" { return INT32; } +"int64" { return INT64; } + +"float32" { return FLOAT32; } +"float64" { return FLOAT64; } + +"complex64" { return COMPLEX64; } +"complex128" { return COMPLEX128; } + +"byte" { return BYTE; } +"rune" { return RUNE; } +"uint" { return UINT; } +"int" { return INT; } +"uintptr" { return UINT_PTR; } +"string" { return STRING_LIT;} + +"true" { yylval.bool_val = true; return BOOL_LIT; } +"false" { yylval.bool_val = false; return BOOL_LIT; } +"nil" { return NIL; } + +"+" { return PLUS; } +"-" { return MINUS; } +"*" { return STAR; } +"/" { return DIV; } +"%" { return MOD; } +"&" { return AMPERSAND; } +"|" { return PIPE; } +"^" { return XOR; } +"<<" { return LSHIFT; } +">>" { return RSHIFT; } +"&^" { return AND_NOT; } + +"+=" { return PLUS_EQ; } +"-=" { return MINUS_EQ; } +"*=" { return MUL_EQ; } +"/=" { return DIV_EQ; } +"%=" { return MOD_EQ; } +"&=" { return AMPERSAND_EQ; } +"|=" { return PIPE_EQ; } +"^=" { return XOR_EQ; } +"<<=" { return LSHIFT_EQ; } +">>=" { return RSHIFT_EQ; } +"&^=" { return AND_NOT_EQ; } + +"++" { return INC; } +"--" { return DEC; } + +"==" { return EQ; } +"&&" { return AND; } +"||" { return OR; } +"!" { return NOT; } +"!=" { return NEQ; } + +"<-" { return ARROW; } + +"<" { return LT; } +">" { return GT; } +"=" { return ASSIGN; } +"<=" { return LEQ; } +">=" { return GEQ; } +":=" { return SHORT_ASSIGN; } +"..." { return DOTS; } +":" { return COLON; } +";" { return SEMICOLON; } +"," { return COMMA; } +"." { return DOT; } + +"(" { return LPAREN; } +")" { return RPAREN; } +"[" { return LBRACK; } +"]" { return RBRACK; } +"{" { return LBRACE; } +"}" { return RBRACE; } + +"//".* { } +"/\*"(.|\n)*?"\*/" { } +[ \t\n\r]+ { } + +. { fprintf(stderr, "Unexpected symbol: %s\n", yytext); } %% -int yywrap() { - return 1; -} \ No newline at end of file diff --git a/analyzers/test/test.y b/analyzers/test/test.y index 3c4989c..6c86c6f 100644 --- a/analyzers/test/test.y +++ b/analyzers/test/test.y @@ -2,46 +2,251 @@ #include #include #include +#include -extern int CURRENT_LINE_NUMBER; +extern int yylineno; extern char *yytext; void yyerror(const char *s) { - fprintf(stderr, "\033[91mError at line %i\033[0m", CURRENT_LINE_NUMBER); + fprintf(stderr, "\033[91mError at line %d: %s\033[0m\n", yylineno, s); exit(1); } + extern int yylex(); extern FILE *yyin; %} %union { - char *str; + char *str_val; + int int_val; + float float_val; + char rune_val; + bool bool_val; } -%token TEXT -%token LBRACE RBRACE +%token BREAK CASE CHAN CONST CONTINUE DEFAULT DEFER ELSE FALLTHROUGH FOR FUNC +%token GO GOTO IF IMPORT INTERFACE MAP PACKAGE RANGE RETURN SELECT STRUCT +%token SWITCH TYPE VAR + +%token BOOL_LIT +%token NIL +%token IDENT +%token INT_LIT +%token FLOAT_LIT +%token STRING_LIT +%token RAW_STRING_LIT +%token RUNE_LIT + +%token PLUS MINUS STAR SLASH PERCENT +%token AMPERSAND PIPE CARET LSHIFT RSHIFT AMPERSAND_CARET +%token PLUS_EQ MINUS_EQ STAR_EQ SLASH_EQ PERCENT_EQ +%token AMPERSAND_EQ PIPE_EQ CARET_EQ LSHIFT_EQ RSHIFT_EQ AMPERSAND_CARET_EQ +%token LAND LOR ARROW INC DEC +%token EQ NEQ LT GT LEQ GEQ +%token NOT ASSIGN DECLARE_ASSIGN ELLIPSIS + +%token COLON SEMICOLON COMMA DOT +%token LPAREN RPAREN LBRACK RBRACK LBRACE RBRACE + +%right INC DEC +%right ARROW +%right NOT +%left STAR SLASH PERCENT +%left PLUS MINUS +%left LSHIFT RSHIFT AMPERSAND_CARET +%left AMPERSAND PIPE CARET +%left EQ NEQ LT GT LEQ GEQ +%left LAND +%left LOR +%right ASSIGN DECLARE_ASSIGN +%right PLUS_EQ MINUS_EQ STAR_EQ SLASH_EQ PERCENT_EQ +%right AMPERSAND_EQ PIPE_EQ CARET_EQ LSHIFT_EQ RSHIFT_EQ AMPERSAND_CARET_EQ +%left DOT ELLIPSIS +%left COLON SEMICOLON COMMA %% -block: - LBRACE content RBRACE +program: + | program top_level_stmt ; -content: - | content TEXT { - printf("TOKEN ('%s')\n", $2); - free($2); } - | content block +top_level_stmt: + decl_stmt + | func_decl + | block ; + +decl_stmt: + VAR IDENT type ASSIGN expr + | CONST IDENT type ASSIGN expr + ; + +func_decl: + FUNC IDENT LPAREN params RPAREN block + ; + +params: + | param_list + ; + +param_list: + IDENT type + | param_list COMMA IDENT type + ; + +type: + IDENT + | LBRACK RBRACK type + | STAR type + ; + +expr: + literal + | IDENT + | expr PLUS expr + | expr MINUS expr + | expr STAR expr + | expr SLASH expr + | expr PERCENT expr + | expr LSHIFT expr + | expr RSHIFT expr + | expr AMPERSAND expr + | expr PIPE expr + | expr CARET expr + | expr AMPERSAND_CARET expr + | expr LAND expr + | expr LOR expr + | expr EQ expr + | expr NEQ expr + | expr LT expr + | expr GT expr + | expr LEQ expr + | expr GEQ expr + | NOT expr + | MINUS expr %prec NOT + | LPAREN expr RPAREN + | expr DOT IDENT + | IDENT LPAREN args RPAREN + ; + +args: + | expr + | args COMMA expr + ; + +type: + basic_type + | array + | slice + | pointer + | struct_type + | interface + | function + | map + | channel + +basic_type: + uint8 + | uint16 + | uint32 + | uint64 + | int8 + | "int16" + | "int32" + | "int64" + | "float32" + | "float64" + | "complex64" + | "complex128" + | "uint" + | "int" + | "uintptr" + | "bool" + | "string" + +array: + LBRACK INT RBRACK type + +slice: + LBRACK RBRACK type + +pointer: + STAR type + +struct_type: + STRUCT "{" (field_decl ";")* "}" + +interface: + INTERFACE "{" (method_spec ";")* "}" + +function_type: + FUNC signature + +map: + MAP "[" key_type "]" type + +channel: + CHAN type + | CHAN ARROW type + | ARROW CHAN type + + +field_decl: + identifier_list type + +method_spec: + method_name signature + +signature: + parameters [ result ] + +parameters: + "(" [ parameter_list ] ")" + +result: + type + | "(" type_list ")" + +key_type: + type // для map ключом может быть только comparable тип + +literal: + INT_LIT + | FLOAT_LIT + | STRING_LIT + | RAW_STRING_LIT + | RUNE_LIT + | BOOL_LIT + | NIL + ; + +block: + LBRACE stmt_list RBRACE + ; + +stmt_list: + | stmt_list stmt + ; + +stmt: + expr SEMICOLON + | decl_stmt SEMICOLON + | block + | RETURN expr SEMICOLON + | IF expr block + | IF expr block ELSE block + | FOR block + | FOR expr SEMICOLON expr SEMICOLON expr block + | FOR IDENT ASSIGN expr SEMICOLON expr SEMICOLON expr block + ; + %% int main(int argc, char **argv) { - CURRENT_LINE_NUMBER = 1; - if (argc > 1) { FILE *f = fopen(argv[1], "r"); if (!f) { - perror("\033[91mFail open file\033[0m"); + perror("\033[91mFailed to open file\033[0m"); return 1; } yyin = f; diff --git a/analyzers/test/мусор.txt b/analyzers/test/мусор.txt new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py index 08aba1c..c31213a 100644 --- a/main.py +++ b/main.py @@ -3,9 +3,9 @@ import subprocess # ЭТИ ПУТИ НАДО ЗАДАТЬ ВРУЧНУЮ # *.l и *.y файлы из директории ANALYZERS_DIR ДОЛЖНЫ НАЗЫВАТЬСЯ как basename этой директории!!! -ANALYZERS_DIR = r'C:\Users\user\Desktop\УЧЕБА\6_СЕМ\КОМПИЛЯТОРЫ\go-analyzer\analyzers\test' -FLEX_EXE_PATH = r"C:\tools\win_flex_bison\win_flex.exe" -BISON_EXE_PATH = r"C:\tools\win_flex_bison\win_bison.exe" +ANALYZERS_DIR = r'C:\Users\Илья\Desktop\6sem\Компиляторы\Курсач\go-analyzer\analyzers\test' +FLEX_EXE_PATH = r"C:\Users\Илья\Desktop\win_flex_bison-latest\win_flex.exe" +BISON_EXE_PATH = r"C:\Users\Илья\Desktop\win_flex_bison-latest\win_bison.exe" def main(): # Подготовка путей diff --git a/tests/test_blocks.txt b/tests/test_blocks.txt index 57acaa9..df52076 100644 --- a/tests/test_blocks.txt +++ b/tests/test_blocks.txt @@ -1,5 +1,5 @@ { - { 1 231233 + { {1}{} block1 { block2 { block3 } diff --git a/tests/test_var.txt b/tests/test_var.txt new file mode 100644 index 0000000..5f6bec2 --- /dev/null +++ b/tests/test_var.txt @@ -0,0 +1,7 @@ +func main() { + println("Hello world"); +} + +func add(a int, b int) int { + return a + b +} \ No newline at end of file