diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 075411c..3381f61 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,9 +3,9 @@ name: CI on: push: - branches: [ main, dev ] pull_request: - branches: [ main, dev ] + branches: + - '**' jobs: build-and-test: @@ -19,7 +19,7 @@ jobs: - name: fmt check run: cargo fmt -- --check - name: lint - run: cargo clippy -- -D warnings + run: cargo clippy - name: build run: cargo build --verbose - name: run tests diff --git a/.gitignore b/.gitignore index 577f287..465b5ff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target **/*.rs.bk -.idea/ \ No newline at end of file +.idea/ +.DS_Store \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 5e86c1c..2553954 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,5 +3,5 @@ version = 4 [[package]] -name = "rustc-tape4" +name = "rustc_tape4" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index fdb7ae8..f0424b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "rustc-tape4" +name = "rustc_tape4" version = "0.1.0" edition = "2024" diff --git a/README.md b/README.md index 2680f1a..0a6b52f 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ A learning‐by‐doing project - **Basic types** - `int` (32‑bit signed) - `char` (8‑bit signed) + - `void` - **Derived types** - Single‑level pointers (`int*`, `char*`) @@ -29,7 +30,7 @@ A learning‐by‐doing project - Arithmetic: `+`, `-`, `*`, `/`, `%` - Comparison: `==`, `!=`, `<`, `>`, `<=`, `>=` - Logical: `!`, `&&`, `||` - - Bitwise: `&`, `|` + - Bitwise: `&`, `|`, `^` - Compound assignment: `+=`, `-=` - Increment / decrement: `++`, `--` - Assignment: `=` diff --git a/grammer.md b/grammer.md new file mode 100644 index 0000000..ec39546 --- /dev/null +++ b/grammer.md @@ -0,0 +1,79 @@ +# Grammar + +```bnf +program ::= function* + +function ::= function_declaration + | function_definition + +function_declaration ::= type_specifier identifier "(" ( "void" | parameter_list )? ")" ";" +function_definition ::= type_specifier identifier "(" ( "void" | parameter_list )? ")" block + +parameter_list ::= parameter ( "," parameter )* +parameter ::= type_specifier identifier ( "[" int_literal? "]" )? + +type_specifier ::= ( "int" | "char" | "void" ) "*"* + +block ::= "{" statement* "}" + +statement ::= block + | if_statement + | while_statement + | for_statement + | return_statement + | break_statement + | continue_statement + | declaration_statement + | expression_statement + +declaration_statement ::= type_specifier init_declarator_list ";" +init_declarator_list ::= init_declarator ( "," init_declarator )* +init_declarator ::= declarator ( "=" initializer )? +declarator ::= identifier ( "[" int_literal "]" )? + +initializer ::= expression + | "{" initializer_list? "}" +initializer_list ::= initializer ( "," initializer )* ","? + +expression_statement ::= expression? ";" + +if_statement ::= "if" "(" expression ")" statement ( "else" statement )? +while_statement ::= "while" "(" expression ")" statement +for_statement ::= "for" "(" expression? ";" expression? ";" expression? ")" statement +return_statement ::= "return" expression? ";" +break_statement ::= "break" ";" +continue_statement ::= "continue" ";" + +expression ::= assignment +assignment ::= logical_or ( ( "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" ) assignment )? + +logical_or ::= logical_and ( "||" logical_and )* +logical_and ::= bitwise_or ( "&&" bitwise_or )* +bitwise_or ::= bitwise_xor ( "|" bitwise_xor )* +bitwise_xor ::= bitwise_and ( "^" bitwise_and )* +bitwise_and ::= equality ( "&" equality )* + +equality ::= relational ( ( "==" | "!=" ) relational )* +relational ::= additive ( ( "<" | "<=" | ">" | ">=" ) additive )* +additive ::= multiplicative ( ( "+" | "-" ) multiplicative )* +multiplicative ::= unary ( ( "*" | "/" | "%" ) unary )* + +unary ::= ( "!" | "-" | "&" | "*" | "++" | "--" ) unary + | postfix +postfix ::= primary postfix_op* +postfix_op ::= "(" argument_list? ")" + | "[" expression "]" + | "++" + | "--" + +primary ::= identifier + | int_literal + | char_literal + | "(" expression ")" + | "{" initializer_list? "}" + +argument_list ::= expression ( "," expression )* + +identifier ::= /* Ident(String) */ +int_literal ::= /* IntLiteral(i64) */ +char_literal ::= /* CharLiteral(char) */ \ No newline at end of file diff --git a/src/ast/expr.rs b/src/ast/expr.rs new file mode 100644 index 0000000..52be178 --- /dev/null +++ b/src/ast/expr.rs @@ -0,0 +1,86 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + Ident(String), // variable or function name + IntLiteral(i64), + CharLiteral(char), + + // 단항연산자 + UnaryPrefixOp { + op: PrefixOp, + rhs: Box, + }, + UnaryPostfixOp { + lhs: Box, + op: PostfixOp, + }, + + BinaryOp { + lhs: Box, + op: BinaryOp, + rhs: Box, + }, // 이항연산자 + Call { + func: Box, + args: Vec, + }, // 함수 호출 + ArrayIndex { + array: Box, + index: Box, + }, // 인덱싱 + InitializerList(Vec), // 배열 초기화 ex) {1, 2, 3} + + Assignment { + left: Box, + op: AssignOp, + right: Box, + }, // 할당 x = y, x += 1. +} + +#[derive(Debug, Clone, PartialEq)] +pub enum PrefixOp { + Address, // & + Deref, // * + Neg, // - + Not, // ! + PreInc, // ++x + PreDec, // --x +} + +#[derive(Debug, Clone, PartialEq)] +pub enum PostfixOp { + PostInc, // x++ + PostDec, // x-- +} + +#[derive(Debug, Clone, PartialEq)] +pub enum BinaryOp { + Add, // + + Sub, // - + Mul, // * + Div, // / + Rem, // % + Eq, // == + Ne, // != + Lt, // < + Le, // <= + Gt, // > + Ge, // >= + And, // && + Or, // || + BitAnd, // & + BitOr, // | + BitXor, // ^ +} + +#[derive(Clone, Debug, PartialEq)] +pub enum AssignOp { + Assign, // = + PlusAssign, // += + MinusAssign, // -= + MulAssign, // *= + DivAssign, // /= + RemAssign, // %= + BitAndAssign, // &= + BitOrAssign, // |= + BitXorAssign, // ^= +} diff --git a/src/ast/functions.rs b/src/ast/functions.rs new file mode 100644 index 0000000..3113c4b --- /dev/null +++ b/src/ast/functions.rs @@ -0,0 +1,24 @@ +use crate::ast::stmt::Block; +use crate::ast::ty::TypeSpecifier; + +#[derive(Debug, Clone)] +pub struct Parameter { + pub name: String, + pub ty: TypeSpecifier, +} + +#[derive(Debug, Clone)] +pub struct Function { + pub name: String, + pub return_ty: TypeSpecifier, + pub params: Vec, + pub body: Block, +} + +// 함수 정의: 반환 타입, 함수 이름, 매개변수 목록, 함수 본문 +// function_definition ::= type_specifier identifier "(" parameter_list? ")" block + +// 매개변수 목록: 첫 매개변수 + 쉼표로 구분된 추가 매개변수 0개 이상 +// parameter_list ::= parameter ( "," parameter )* +// 단일 매개변수: 타입 + 이름 +// parameter ::= type_specifier identifier diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 0000000..6b6547c --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,11 @@ +pub mod expr; +pub mod functions; +pub mod program; +pub mod stmt; +pub mod ty; + +pub use expr::Expr; +pub use functions::Function; +pub use program::Program; +pub use stmt::Stmt; +pub use ty::TypeSpecifier; diff --git a/src/ast/program.rs b/src/ast/program.rs new file mode 100644 index 0000000..279e41e --- /dev/null +++ b/src/ast/program.rs @@ -0,0 +1,9 @@ +use crate::ast::Function; + +#[derive(Debug, Clone)] +pub struct Program { + pub functions: Vec, +} + +// 프로그램 전체: 0개 이상의 함수 정의 +// program ::= function_definition* diff --git a/src/ast/stmt.rs b/src/ast/stmt.rs new file mode 100644 index 0000000..c08ca8d --- /dev/null +++ b/src/ast/stmt.rs @@ -0,0 +1,53 @@ +use crate::ast::TypeSpecifier; +use crate::ast::expr::Expr; + +#[derive(Debug, Clone, PartialEq)] +pub struct Block { + pub statements: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Stmt { + Block(Block), + If { + cond: Expr, + then_branch: Box, + else_branch: Option>, + }, + While { + cond: Expr, + body: Box, + }, + For { + init: Option>, + cond: Option, + step: Option, + body: Box, + }, + Return(Option), + Break, + Continue, + Declaration { + ty: TypeSpecifier, + declarators: Vec, + }, + ExprStmt(Option), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Declarator { + pub name: String, + pub array_size: Option, + pub init: Option, +} + +// 문장: 구문들 +// statement ::= block +// | if_statement +// | while_statement +// | for_statement +// | return_statement +// | break_statement +// | continue_statement +// | declaration_statement +// | expression_statement diff --git a/src/ast/ty.rs b/src/ast/ty.rs new file mode 100644 index 0000000..3b68b4a --- /dev/null +++ b/src/ast/ty.rs @@ -0,0 +1,10 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum TypeSpecifier { + Int, + Char, + Void, + Pointer(Box), +} + +// 타입 지정자: 기본 타입(int|char|void) + 0개 이상 포인터 +// type_specifier ::= ( "int" | "char" | "void" ) "*"* diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs new file mode 100644 index 0000000..b9f04e6 --- /dev/null +++ b/src/lexer/lexer.rs @@ -0,0 +1,352 @@ +use super::token::{LexError, SpannedToken, Token, lookup_ident}; + +/// 렉서 구조 +pub struct Lexer { + input: Vec, // full codes + pos: usize, // current position + read_pos: usize, // next reading position + ch: Option, // current char + line: usize, // current line + column: usize, // current column +} + +impl Lexer { + pub fn new(input: &str) -> Self { + let mut l = Lexer { + input: input.chars().collect(), + pos: 0, + read_pos: 0, + ch: None, + line: 1, + column: 0, + }; + l.read_char(); + l + } + + pub fn collect_spanned_tokens(&mut self) -> Vec { + let mut tokens = vec![]; + loop { + let token = self.next_token(); + tokens.push(token.clone()); + if token.kind == Token::EOF { + break; + } + } + tokens + } + + /// 다음 문자로 넘어가기 + fn read_char(&mut self) { + if let Some('\n') = self.ch { + self.line += 1; + self.column = 0; + } + + self.ch = if self.read_pos >= self.input.len() { + None + } else { + Some(self.input[self.read_pos]) + }; + self.pos = self.read_pos; + self.read_pos += 1; + self.column += 1; + } + + /// 다음 문자 보기 (Consume) 없이 + fn peek_char(&self) -> Option { + if self.read_pos >= self.input.len() { + None + } else { + Some(self.input[self.read_pos]) + } + } + + /// 주석, 빈칸 건너뛰기 + fn skip_trivia(&mut self) { + loop { + match self.ch { + // Skip plain whitespace + Some(c) if c.is_whitespace() => { + self.read_char(); + } + + // 단일 줄 주석: // ... + Some('/') if self.peek_char() == Some('/') => { + self.read_char(); + self.read_char(); + + while let Some(ch) = self.ch { + if ch == '\n' { + break; + } + self.read_char(); + } + } + + // 여러 줄 주석: /* ... */ + Some('/') if self.peek_char() == Some('*') => { + self.read_char(); + self.read_char(); + + while let Some(ch) = self.ch { + if ch == '*' && self.peek_char() == Some('/') { + // consume '*/' + self.read_char(); + self.read_char(); + break; + } + self.read_char(); + } + } + + _ => break, + } + } + } + + /// 다음 토큰 얻기 + pub fn next_token(&mut self) -> SpannedToken { + self.skip_trivia(); + let line = self.line; + let column = self.column; + + let tok = match self.ch { + Some('+') => { + // ++ + if self.peek_char() == Some('+') { + self.read_char(); + Token::Increment + // += + } else if self.peek_char() == Some('=') { + self.read_char(); + Token::PlusAssign + // + + } else { + Token::Plus + } + } + + Some('-') => { + // -- + if self.peek_char() == Some('-') { + self.read_char(); + Token::Decrement + // -= + } else if self.peek_char() == Some('=') { + self.read_char(); + Token::MinusAssign + // - + } else { + Token::Minus + } + } + + Some('*') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::AsteriskAssign + } else { + Token::Asterisk + } + } + + Some('/') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::SlashAssign + } else { + Token::Slash + } + } + Some('%') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::ModuloAssign + } else { + Token::Percent + } + } + + Some('=') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::Equal + } else { + Token::Assign + } + } + + Some('!') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::NotEqual + } else { + Token::Not + } + } + Some('<') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::Le + } else { + Token::Lt + } + } + Some('>') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::Ge + } else { + Token::Gt + } + } + + Some('&') => { + if self.peek_char() == Some('&') { + self.read_char(); + Token::And + } else if self.peek_char() == Some('=') { + self.read_char(); + Token::BitAndAssign + } else { + Token::Ampersand + } + } + Some('|') => { + if self.peek_char() == Some('|') { + self.read_char(); + Token::Or + } else if self.peek_char() == Some('=') { + self.read_char(); + Token::BitOrAssign + } else { + Token::BitOr + } + } + Some('^') => { + if self.peek_char() == Some('=') { + self.read_char(); + Token::BitXorAssign + } else { + Token::BitXor + } + } + + Some(';') => Token::Semicolon, + Some(',') => Token::Comma, + Some('(') => Token::LParen, + Some(')') => Token::RParen, + Some('{') => Token::LBrace, + Some('}') => Token::RBrace, + Some('[') => Token::LBracket, + Some(']') => Token::RBracket, + + Some('\'') => { + return match self.read_char_literal() { + Ok(ch) => SpannedToken { + kind: Token::CharLiteral(ch), + line, + column, + }, + Err(e) => SpannedToken { + kind: Token::Error(e), + line, + column, + }, + }; + } + + Some(c) if c.is_alphabetic() || c == '_' => { + let ident = self.read_identifier(); + return SpannedToken { + kind: lookup_ident(&ident), + line, + column, + }; + } + + Some(c) if c.is_numeric() => { + return match self.read_number() { + Ok(num) => SpannedToken { + kind: Token::IntLiteral(num), + line, + column, + }, + Err(e) => SpannedToken { + kind: Token::Error(e), + line, + column, + }, + }; + } + + None => Token::EOF, + Some(c) => Token::Illegal(c), + }; + + self.read_char(); + SpannedToken { + kind: tok, + line: self.line, + column: self.column, + } + } + + fn read_identifier(&mut self) -> String { + let start = self.pos; + while let Some(c) = self.ch { + if !c.is_alphanumeric() && c != '_' { + break; + } + self.read_char() + } + self.input[start..self.pos].iter().collect() + } + + fn read_number(&mut self) -> Result { + let start = self.pos; + while let Some(c) = self.ch { + if !c.is_numeric() { + break; + } + self.read_char() + } + + let lit: String = self.input[start..self.pos].iter().collect(); + let value = lit + .parse::() + .map_err(|_| LexError::InvalidNumericLiteral(lit))?; + Ok(value) + } + + fn read_char_literal(&mut self) -> Result { + // current ch == Some(') 일때, + self.read_char(); + + let lit = match self.ch { + Some('\\') => { + self.read_char(); + match self.ch { + Some('0') => '\0', + Some('n') => '\n', + Some('t') => '\t', + Some('\\') => '\\', + Some('\'') => '\'', + _ => self.ch.unwrap_or('\0'), + } + } + Some(c) => c, + None => '\0', + }; + // 리터럴 문자 소비 + self.read_char(); + + if self.ch != Some('\'') { + return Err(LexError::UnterminatedCharLiteral); + } + // 닫는 따옴표 소비 + self.read_char(); + Ok(lit) + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..4f3489f --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,6 @@ +pub mod lexer; +pub mod token; + +pub use lexer::Lexer; +pub use token::LexError; +pub use token::Token; diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..24d96b6 --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,105 @@ +/// Lex Errors +#[derive(Debug, PartialEq, Clone)] +pub enum LexError { + // 닫는 따옴표 없이 끝난 문자 리터럴 + UnterminatedCharLiteral, + // 잘못된 정수 리터럴 (파싱 실패) + InvalidNumericLiteral(String), +} + +#[derive(Debug, PartialEq, Clone)] +pub struct SpannedToken { + pub kind: Token, + pub line: usize, + pub column: usize, +} + +/// Token types +#[derive(Debug, PartialEq, Clone)] +pub enum Token { + // Special + EOF, + Error(LexError), // 에러용 + Illegal(char), + + // Identifiers + Ident(String), + + // literals + IntLiteral(i64), // 123 + CharLiteral(char), // 'a', '\n', '\0' + + // Operators + Plus, // + + Minus, // - + Asterisk, // * + Slash, // / + Percent, // % + + Equal, // == + NotEqual, // != + Lt, // < + Gt, // > + Le, // <= + Ge, // >= + + Assign, // = + And, // && + Or, // || + Not, // ! + Ampersand, // & + BitOr, // | + BitXor, // ^ + + Semicolon, // ; + Comma, // , + + LParen, // ( + RParen, // ) + LBrace, // { + RBrace, // } + LBracket, // [ + RBracket, // ] + + // Keywords + If, + Else, + While, + For, + Return, + Break, + Continue, + + // Types + Int, + Char, + Void, + + Increment, // ++ + Decrement, // -- + PlusAssign, // += + MinusAssign, // -= + AsteriskAssign, // *= + SlashAssign, // /= + ModuloAssign, // %= + BitOrAssign, // |= + BitAndAssign, // &= + BitXorAssign, // ^= +} + +/// Lookup identifier keyword +pub fn lookup_ident(ident: &str) -> Token { + match ident { + "int" => Token::Int, + "char" => Token::Char, + "void" => Token::Void, + "if" => Token::If, + "else" => Token::Else, + "while" => Token::While, + "for" => Token::For, + "return" => Token::Return, + "break" => Token::Break, + "continue" => Token::Continue, + _ => Token::Ident(ident.to_string()), + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..483656f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod lexer; +pub mod parser; diff --git a/src/main.rs b/src/main.rs index e7a11a9..214dd0c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +mod lexer; + fn main() { println!("Hello, world!"); } diff --git a/src/parser/error.rs b/src/parser/error.rs new file mode 100644 index 0000000..57d7c4f --- /dev/null +++ b/src/parser/error.rs @@ -0,0 +1,21 @@ +use crate::lexer::Token; + +#[derive(Debug, Clone, PartialEq)] +pub enum ParserError { + UnexpectedToken { + expected: Token, + found: Token, + line: usize, + col: usize, + }, + UnexpectedEOF { + expected: String, + line: usize, + col: usize, + }, + UnsupportedToken { + found: Token, + line: usize, + col: usize, + }, +} diff --git a/src/parser/error_helpers.rs b/src/parser/error_helpers.rs new file mode 100644 index 0000000..5630f33 --- /dev/null +++ b/src/parser/error_helpers.rs @@ -0,0 +1,56 @@ +use crate::lexer::Token; +use crate::parser::Parser; +use crate::parser::error::ParserError; +use crate::parser::parser::ParseResult; + +// #[macro_export] +// macro_rules! expect { +// ($p:expr, $pat:pat_param) => {{ +// let tok = $p.current_token().clone(); +// match tok { +// Token::EOF => { +// return $p.unexpected_eof(concat!("expected ", stringify!($pat))); +// } +// $pat => { +// $p.next_token(); +// Ok(()) +// } +// other => { +// return $p.unexpected_token(other); +// } +// } +// }}; +// } + +impl Parser { + /// 더 읽을 토큰이 없어서 EOF 만난 경우 + pub fn unexpected_eof(&self, expected: impl Into) -> ParseResult { + let (line, col) = self.current_span(); + Err(ParserError::UnexpectedEOF { + expected: expected.into(), + line, + col, + }) + } + + /// 기대한 토큰이 오지 않았을 때 + pub fn unexpected_token(&self, expected: Token) -> ParseResult { + let (line, col) = self.current_span(); + Err(ParserError::UnexpectedToken { + expected, + found: self.current_token().clone(), + line, + col, + }) + } + + /// 아예 지원하지 않는 토큰을 만난 경우 + pub fn unsupported_token(&self) -> ParseResult { + let (line, col) = self.current_span(); + Err(ParserError::UnsupportedToken { + found: self.current_token().clone(), + line, + col, + }) + } +} diff --git a/src/parser/expression.rs b/src/parser/expression.rs new file mode 100644 index 0000000..481d86f --- /dev/null +++ b/src/parser/expression.rs @@ -0,0 +1,382 @@ +use crate::ast::Expr; +use crate::ast::expr::{AssignOp, BinaryOp, PostfixOp, PrefixOp}; +use crate::lexer::Token; +use crate::parser::Parser; +use crate::parser::parser::ParseResult; + +impl Parser { + /// expression ::= assignment + pub fn parse_expr(&mut self) -> ParseResult { + self.parse_assignment() + } + + /// assignment ::= logical_or ( ( "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" ) assignment )? + fn parse_assignment(&mut self) -> ParseResult { + let mut lhs = self.parse_logical_or()?; + let op = match self.current_token() { + Token::Assign => AssignOp::Assign, + Token::PlusAssign => AssignOp::PlusAssign, + Token::MinusAssign => AssignOp::MinusAssign, + Token::AsteriskAssign => AssignOp::MulAssign, + Token::SlashAssign => AssignOp::DivAssign, + Token::ModuloAssign => AssignOp::RemAssign, + Token::BitAndAssign => AssignOp::BitAndAssign, + Token::BitOrAssign => AssignOp::BitOrAssign, + Token::BitXorAssign => AssignOp::BitXorAssign, + _ => { + // 할당 연산자가 아니면 그대로 lhs 리턴 + return Ok(lhs); + } + }; + + // 할당 연산자 소비 + self.next_token(); + match &lhs { + Expr::Ident(_) + | Expr::UnaryPrefixOp { + op: PrefixOp::Deref, + .. + } + | Expr::ArrayIndex { .. } => {} + _ => return self.unsupported_token(), + } + + let rhs = self.parse_assignment()?; + + lhs = Expr::Assignment { + left: Box::new(lhs), + op, + right: Box::new(rhs), + }; + Ok(lhs) + } + + /// unary ::= ( "!" | "-" | "&" | "*" | "++" | "--" ) unary | postfix + fn parse_unary(&mut self) -> ParseResult { + let op = match self.current_token() { + Token::Not => PrefixOp::Not, + Token::Minus => PrefixOp::Neg, + Token::Ampersand => PrefixOp::Address, + Token::Asterisk => PrefixOp::Deref, + Token::Increment => PrefixOp::PreInc, + Token::Decrement => PrefixOp::PreDec, + _ => return self.parse_postfix(), + }; + self.next_token(); + let rhs = self.parse_unary()?; + Ok(Expr::UnaryPrefixOp { + op, + rhs: Box::new(rhs), + }) + } + + /// multiplicative ::= unary ( ( "*" | "/" | "%" ) unary )* + fn parse_multiplicative(&mut self) -> ParseResult { + let mut expr = self.parse_unary()?; + + loop { + let op = match self.current_token() { + Token::Asterisk => BinaryOp::Mul, + Token::Slash => BinaryOp::Div, + Token::Percent => BinaryOp::Rem, + _ => break, + }; + self.next_token(); // 연산자소비 + let rhs = self.parse_unary()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op, + rhs: Box::new(rhs), + }; + } + + Ok(expr) + } + + /// additive ::= multiplicative ( ( "+" | "-" ) multiplicative )* + fn parse_additive(&mut self) -> ParseResult { + let mut expr = self.parse_multiplicative()?; + + loop { + let op = match self.current_token() { + Token::Plus => BinaryOp::Add, + Token::Minus => BinaryOp::Sub, + _ => break, + }; + self.next_token(); // 연산자소비 + let rhs = self.parse_multiplicative()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op, + rhs: Box::new(rhs), + }; + } + + Ok(expr) + } + + /// relational ::= additive ( ( "<" | "<=" | ">" | ">=" ) additive )* + fn parse_relational(&mut self) -> ParseResult { + let mut expr = self.parse_additive()?; + + loop { + let op = match self.current_token() { + Token::Lt => BinaryOp::Lt, + Token::Le => BinaryOp::Le, + Token::Gt => BinaryOp::Gt, + Token::Ge => BinaryOp::Ge, + _ => break, + }; + self.next_token(); // 연산자소비 + let rhs = self.parse_additive()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op, + rhs: Box::new(rhs), + }; + } + + Ok(expr) + } + + /// equality ::= relational ( ( "==" | "!=" ) relational )* + fn parse_equality(&mut self) -> ParseResult { + let mut expr = self.parse_relational()?; + loop { + let op = match self.current_token() { + Token::Equal => BinaryOp::Eq, + Token::NotEqual => BinaryOp::Ne, + _ => break, + }; + self.next_token(); // 연산자 소비 + let rhs = self.parse_relational()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op, + rhs: Box::new(rhs), + }; + } + + Ok(expr) + } + + /// logical_and ::= bitwise_or ( "&&" bitwise_or )* + fn parse_logical_and(&mut self) -> ParseResult { + let mut expr = self.parse_bitwise_or()?; + + while self.current_token() == &Token::And { + self.next_token(); // '&&' 소비 + let rhs = self.parse_bitwise_or()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op: BinaryOp::And, + rhs: Box::new(rhs), + }; + } + + Ok(expr) + } + + /// bitwise_or ::= bitwise_xor ( "|" bitwise_xor )* + fn parse_bitwise_or(&mut self) -> ParseResult { + let mut expr = self.parse_bitwise_xor()?; + while self.current_token() == &Token::BitOr { + self.next_token(); + let rhs = self.parse_bitwise_xor()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op: BinaryOp::BitOr, + rhs: Box::new(rhs), + }; + } + Ok(expr) + } + + /// bitwise_xor ::= bitwise_and ( "^" bitwise_and )* + fn parse_bitwise_xor(&mut self) -> ParseResult { + let mut expr = self.parse_bitwise_and()?; + while self.current_token() == &Token::BitXor { + self.next_token(); + let rhs = self.parse_bitwise_and()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op: BinaryOp::BitXor, + rhs: Box::new(rhs), + }; + } + Ok(expr) + } + + /// bitwise_and ::= equality ( "&" equality )* + fn parse_bitwise_and(&mut self) -> ParseResult { + let mut expr = self.parse_equality()?; + while self.current_token() == &Token::Ampersand { + self.next_token(); + let rhs = self.parse_equality()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op: BinaryOp::BitAnd, + rhs: Box::new(rhs), + }; + } + Ok(expr) + } + + /// logical_or ::= logical_and ( "||" logical_and )* + fn parse_logical_or(&mut self) -> ParseResult { + let mut expr = self.parse_logical_and()?; + + while self.current_token() == &Token::Or { + self.next_token(); // '||' 소비 + let rhs = self.parse_logical_and()?; + expr = Expr::BinaryOp { + lhs: Box::new(expr), + op: BinaryOp::Or, + rhs: Box::new(rhs), + }; + } + + Ok(expr) + } + + /// postfix ::= primary postfix_op* + /// postfix_op ::= "(" argument_list? ")" | "[" expression "]" | "++" | "--" + fn parse_postfix(&mut self) -> ParseResult { + let mut expr = self.parse_primary()?; + + loop { + expr = match self.current_token() { + Token::LParen => { + self.next_token(); // '(' + + // 첫 토큰이 쉼표 UnexpectedToken + if self.current_token() == &Token::Comma { + return self.unexpected_token(Token::Ident(String::from("expression"))); + } + + let args = if self.current_token() != &Token::RParen { + let mut v = Vec::new(); + loop { + v.push(self.parse_expr()?); + if self.current_token() == &Token::Comma { + self.next_token(); + continue; + } + break; + } + v + } else { + Vec::new() + }; + self.expect(Token::RParen)?; + Expr::Call { + func: Box::new(expr), + args, + } + } + + Token::LBracket => { + self.next_token(); // '[' + let idx = self.parse_expr()?; + self.expect(Token::RBracket)?; // ']' + Expr::ArrayIndex { + array: Box::new(expr), + index: Box::new(idx), + } + } + + Token::Increment => { + self.next_token(); // 후위 ++ + Expr::UnaryPostfixOp { + lhs: Box::new(expr), + op: PostfixOp::PostInc, + } + } + + Token::Decrement => { + self.next_token(); // 후위 -- + Expr::UnaryPostfixOp { + lhs: Box::new(expr), + op: PostfixOp::PostDec, + } + } + _ => break, + }; + } + + Ok(expr) + } + + /// primary ::= identifier | int_literal | char_literal | "(" expression ")" | "{" initializer_list? "}" + fn parse_primary(&mut self) -> ParseResult { + let expr = match self.current_token() { + Token::Ident(_) => self.parse_identifier()?, + Token::IntLiteral(_) => self.parse_int_literal()?, + Token::CharLiteral(_) => self.parse_char_literal()?, + Token::LParen => { + self.next_token(); // '(' + let e = self.parse_expr()?; + self.expect(Token::RParen)?; + e + } + Token::LBrace => self.parse_initializer_list()?, + Token::EOF => return self.unexpected_eof("primary expression"), + _ => return self.unsupported_token(), + }; + Ok(expr) + } + + /// initializer ::= expression | "{" initializer_list? "}" + pub fn parse_initializer(&mut self) -> ParseResult { + if self.current_token() == &Token::LBrace { + self.parse_initializer_list() + } else { + self.parse_assignment() + } + } + + /// initializer_list ::= initializer ( "," initializer )* ","? + fn parse_initializer_list(&mut self) -> ParseResult { + self.expect(Token::LBrace)?; // '{' 소비 + let mut exprs = Vec::new(); + + if self.current_token() != &Token::RBrace { + loop { + exprs.push(self.parse_initializer()?); + + // ',' 가 있다면 소비 + if self.current_token() == &Token::Comma { + self.next_token(); + + // ',' 다음에 '}' 이면 종료 + if self.current_token() == &Token::RBrace { + break; + } + continue; + } + // ',' 가 없다면 종료 + break; + } + } + + self.expect(Token::RBrace)?; // '}' 소비 + Ok(Expr::InitializerList(exprs)) + } + /// char_literal ::= /* CharLiteral(char) */ + fn parse_char_literal(&mut self) -> ParseResult { + let value = self.expect_char_literal()?; + Ok(Expr::CharLiteral(value)) + } + + /// int_literal ::= /* IntLiteral(i64) */ + fn parse_int_literal(&mut self) -> ParseResult { + let value = self.expect_int_literal()?; + Ok(Expr::IntLiteral(value)) + } + + /// identifier ::= /* Ident(String) */ + fn parse_identifier(&mut self) -> ParseResult { + let string = self.expect_ident()?; + Ok(Expr::Ident(string)) + } +} diff --git a/src/parser/function.rs b/src/parser/function.rs new file mode 100644 index 0000000..fd34536 --- /dev/null +++ b/src/parser/function.rs @@ -0,0 +1,134 @@ +use crate::ast::functions::Parameter; +use crate::ast::stmt::Block; +use crate::ast::{Function, TypeSpecifier}; +use crate::lexer::Token; +use crate::parser::Parser; +use crate::parser::error::ParserError; +use crate::parser::parser::ParseResult; + +impl Parser { + /// 함수 헤더만 파싱 + fn parse_function_header(&mut self) -> ParseResult<(TypeSpecifier, String, Vec)> { + let return_ty = self.parse_type_specifier()?; + let name = self.expect_ident()?; + self.expect(Token::LParen)?; + let params = self.parse_parameters()?; + self.expect(Token::RParen)?; + Ok((return_ty, name, params)) + } + + /// function ::= function_declaration | function_definition + pub fn parse_function(&mut self) -> ParseResult { + let (return_ty, name, params) = self.parse_function_header()?; + + // 선언/정의 분기 + let body = match self.current_token() { + Token::Semicolon => { + // 선언: 세미콜론만 소비하고 빈 블록(body) 생성 + self.next_token(); + Block { + statements: Vec::new(), + } + } + Token::LBrace => { + // 정의: 실제 블록 파싱 + self.parse_block_statement()? + } + other => { + return Err(ParserError::UnexpectedToken { + expected: Token::Semicolon, // 또는 Token::LBrace + found: other.clone(), + line: self.current_span().0, + col: self.current_span().1, + }); + } + }; + + Ok(Function { + name, + return_ty, + params, + body, + }) + } + + fn parse_parameters(&mut self) -> ParseResult> { + // void만 있고 바로 ')' 이면 파라미터 없음 + if self.current_token() == &Token::Void && self.peek_token() == &Token::RParen { + self.next_token(); // void 소비 + return Ok(Vec::new()); + } + // 아무것도 없으면 빈 벡터 + if self.current_token() == &Token::RParen { + return Ok(Vec::new()); + } + // 진짜 파라미터 목록 + self.parse_parameter_list() + } + + /// type_specifier ::= ( "int" | "char" | "void" ) "*"* + pub fn parse_type_specifier(&mut self) -> ParseResult { + // 기본 타입(int|char|void) 확인 + let base_ty = match self.current_token() { + Token::Int => TypeSpecifier::Int, + Token::Char => TypeSpecifier::Char, + Token::Void => TypeSpecifier::Void, + _ => return self.unsupported_token(), + }; + self.next_token(); + + // 뒤따르는 "*" 만큼 포인터 레벨 올리기 + let mut ty = base_ty; + while self.current_token() == &Token::Asterisk { + self.next_token(); // '*' 소비 + ty = TypeSpecifier::Pointer(Box::new(ty)); + } + + Ok(ty) + } + + /// parameter_list ::= parameter ( "," parameter )* + fn parse_parameter_list(&mut self) -> ParseResult> { + let mut params = Vec::new(); + + // ')' + if self.current_token() == &Token::RParen { + return Ok(params); + } + + loop { + // 변수명, 타입 + let base_ty = self.parse_type_specifier()?; + let name = self.expect_ident()?; + let ty = if self.current_token() == &Token::LBracket { + // '[' + self.next_token(); + // 크기 명시(optional) + if self.current_token() != &Token::RBracket { + // ex) int arr[10] + let _sz = self.expect_int_literal()?; + } + // ']' 소비 + self.expect(Token::RBracket)?; + // 배열 파라미터를 포인터로 + TypeSpecifier::Pointer(Box::new(base_ty)) + } else { + base_ty + }; + + params.push(Parameter { name, ty }); + + // ',' 혹은 ')' + match self.current_token() { + Token::Comma => { + self.next_token(); + continue; + } + Token::RParen => break, + _ => return self.unsupported_token(), + } + } + + Ok(params) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..0ee851d --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,11 @@ +#[macro_use] +mod error_helpers; +pub mod error; +pub mod utils; + +pub mod expression; +pub mod function; +pub mod parser; +pub mod statements; + +pub use parser::Parser; diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..bf8d98c --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,28 @@ +use crate::ast::Program; +use crate::lexer::Token; +use crate::lexer::token::SpannedToken; +use crate::parser::error::ParserError; + +pub type ParseResult = Result; + +pub struct Parser { + pub tokens: Vec, + pub pos: usize, +} + +impl Parser { + pub fn new(tokens: Vec) -> Self { + Parser { tokens, pos: 0 } + } + + /// program ::= function_definition* + pub fn parse_program(&mut self) -> ParseResult { + let mut functions = Vec::new(); + while self.current_token() != &Token::EOF { + // 토큰이 'int'|'char'|'void' 시작이면 함수 정의 + let func = self.parse_function()?; + functions.push(func); + } + Ok(Program { functions }) + } +} diff --git a/src/parser/statements.rs b/src/parser/statements.rs new file mode 100644 index 0000000..d0a8d82 --- /dev/null +++ b/src/parser/statements.rs @@ -0,0 +1,220 @@ +use crate::ast::Stmt; +use crate::ast::Stmt::{Break, For, Return, While}; +use crate::ast::stmt::{Block, Declarator}; +use crate::lexer::Token; +use crate::parser::Parser; +use crate::parser::parser::ParseResult; + +impl Parser { + /// statement ::= block + /// | if_statement + /// | while_statement + /// | for_statement + /// | return_statement + /// | break_statement + /// | continue_statement + /// | declaration_statement + /// | expression_statement + pub fn parse_statement(&mut self) -> ParseResult { + let stmt = match self.current_token() { + Token::LBrace => { + let block = self.parse_block_statement()?; + Stmt::Block(block) + } + Token::If => self.parse_if_statement()?, + Token::While => self.parse_while_statement()?, + Token::For => self.parse_for_statement()?, + Token::Return => self.parse_return_statement()?, + Token::Break => self.parse_break_statement()?, + Token::Continue => self.parse_continue_statement()?, + Token::Int | Token::Char => self.parse_declaration_statement()?, + _ => self.parse_expression_statement()?, + }; + + Ok(stmt) + } + + /// expression_statement ::= expression? ";" + fn parse_expression_statement(&mut self) -> ParseResult { + let expr = match self.current_token() { + &Token::Semicolon => None, + _ => Some(self.parse_expr()?), + }; + self.expect(Token::Semicolon)?; + Ok(Stmt::ExprStmt(expr)) + } + + /// declaration_statement ::= type_specifier init_declarator_list ";" + fn parse_declaration_statement(&mut self) -> ParseResult { + let ty = self.parse_type_specifier()?; + let declarators = self.parse_init_declarator_list()?; + self.expect(Token::Semicolon)?; + Ok(Stmt::Declaration { ty, declarators }) + } + + /// init_declarator_list ::= init_declarator ( "," init_declarator )* + fn parse_init_declarator_list(&mut self) -> ParseResult> { + let mut list = Vec::new(); + // 첫 번째 선언자는 반드시 있어야 함 + list.push(self.parse_init_declarator()?); + + // 콤마로 이어지는 추가 선언자들 + while self.current_token() == &Token::Comma { + self.next_token(); // ',' + list.push(self.parse_init_declarator()?); + } + Ok(list) + } + + /// init_declarator ::= declarator ( "=" initializer )? + fn parse_init_declarator(&mut self) -> ParseResult { + // 1) 선언자 파싱 + let mut decl = self.parse_declarator()?; + // 2) 선택적 초기화 + if self.current_token() == &Token::Assign { + self.next_token(); // '=' + let init_expr = self.parse_initializer()?; + decl.init = Some(init_expr); + } + Ok(decl) + } + + /// declarator ::= identifier ( "[" int_literal "]" )? + fn parse_declarator(&mut self) -> ParseResult { + // 1) 이름 + let name = self.expect_ident()?; + // 2) 선택적 배열 첨자 + let array_size = if self.current_token() == &Token::LBracket { + self.next_token(); // '[' + let sz = self.expect_int_literal()?; + self.expect(Token::RBracket)?; // ']' + Some(sz) + } else { + None + }; + Ok(Declarator { + name, + array_size, + init: None, + }) + } + + /// if_statement ::= "if" "(" expression ")" statement ( "else" statement )? + fn parse_if_statement(&mut self) -> ParseResult { + self.expect(Token::If)?; // 'if' 소비 + self.expect(Token::LParen)?; // '(' 소비 + let cond = self.parse_expr()?; + self.expect(Token::RParen)?; // ')' 소비 + + let then_branch = Box::new(self.parse_statement()?); + + let else_branch = match self.current_token() { + Token::Else => { + self.expect(Token::Else)?; // 'else' 소비 + Some(Box::new(self.parse_statement()?)) + } + _ => None, + }; + + Ok(Stmt::If { + cond, + then_branch, + else_branch, + }) + } + + /// continue_statement ::= "continue" ";" + fn parse_continue_statement(&mut self) -> ParseResult { + self.expect(Token::Continue)?; // continue 소비 + self.expect(Token::Semicolon)?; // ';' 소비 + Ok(Stmt::Continue) + } + + /// break_statement ::= "break" ";" + fn parse_break_statement(&mut self) -> ParseResult { + self.expect(Token::Break)?; // break 소비 + self.expect(Token::Semicolon)?; // ';' 소비 + Ok(Break) + } + + /// block ::= "{" statement* "}" + pub fn parse_block_statement(&mut self) -> ParseResult { + self.expect(Token::LBrace)?; // '{' 소비 + + let mut statements = Vec::new(); + while self.current_token() != &Token::RBrace { + if self.current_token() == &Token::EOF { + return self.unexpected_eof(concat!("`", "}", "`")); + } + let stmt = self.parse_statement()?; + statements.push(stmt); + } + + self.expect(Token::RBrace)?; // '}' 소비 + Ok(Block { statements }) + } + + /// while_statement ::= "while" "(" expression ")" statement + fn parse_while_statement(&mut self) -> ParseResult { + self.expect(Token::While)?; + self.expect(Token::LParen)?; + let cond = self.parse_expr()?; + self.expect(Token::RParen)?; + let body = Box::new(self.parse_statement()?); + Ok(While { cond, body }) + } + + /// for_statement ::= "for" "(" expression? ";" expression? ";" expression? ")" statement + fn parse_for_statement(&mut self) -> ParseResult { + self.expect(Token::For)?; // 'for' 소비 + self.expect(Token::LParen)?; // '(' 소비 + + // init + let init = match self.current_token() { + Token::Semicolon => { + self.expect(Token::Semicolon)?; // ';' 소비 + None + } + Token::Void | Token::Int | Token::Char => { + Some(Box::new(self.parse_declaration_statement()?)) + } + _ => { + let e = self.parse_expr()?; + self.expect(Token::Semicolon)?; + Some(Box::new(Stmt::ExprStmt(Some(e)))) + } + }; + + // condition + let cond = match self.current_token() { + Token::Semicolon => None, + _ => Some(self.parse_expr()?), + }; + self.expect(Token::Semicolon)?; // ';' 소비 + + let step = match self.current_token() { + Token::RParen => None, + _ => Some(self.parse_expr()?), + }; + self.expect(Token::RParen)?; // ')' 소비 + let body = Box::new(self.parse_statement()?); + + Ok(For { + init, + cond, + step, + body, + }) + } + + /// return_statement ::= "return" expression? ";" + fn parse_return_statement(&mut self) -> ParseResult { + self.expect(Token::Return)?; // 'return' 소비 + let expr = match self.current_token() { + Token::Semicolon => None, + _ => Some(self.parse_expr()?), + }; + self.expect(Token::Semicolon)?; // ';' 소비 + Ok(Return(expr)) + } +} diff --git a/src/parser/utils.rs b/src/parser/utils.rs new file mode 100644 index 0000000..26c4185 --- /dev/null +++ b/src/parser/utils.rs @@ -0,0 +1,82 @@ +use crate::lexer::Token; +use crate::parser::Parser; +use crate::parser::parser::ParseResult; + +impl Parser { + pub fn expect_ident(&mut self) -> ParseResult { + let tok = self.current_token().clone(); + match tok { + Token::EOF => self.unexpected_eof("identifier"), + Token::Ident(name) => { + self.next_token(); + Ok(name) + } + other => self.unexpected_token(other), + } + } + + pub fn expect_int_literal(&mut self) -> ParseResult { + let tok = self.current_token().clone(); + match tok { + Token::EOF => self.unexpected_eof("integer literal"), + Token::IntLiteral(value) => { + self.next_token(); + Ok(value) + } + other => self.unexpected_token(other), + } + } + + pub fn expect_char_literal(&mut self) -> ParseResult { + let tok = self.current_token().clone(); + match tok { + Token::EOF => self.unexpected_eof("char literal"), + Token::CharLiteral(value) => { + self.next_token(); + Ok(value) + } + other => self.unexpected_token(other), + } + } + + pub fn current_token(&self) -> &Token { + &self + .tokens + .get(self.pos) + .map(|st| &st.kind) + .unwrap_or(&Token::EOF) + } + + pub fn current_span(&self) -> (usize, usize) { + if let Some(st) = self.tokens.get(self.pos) { + (st.line, st.column) + } else { + (0, 0) + } + } + + pub fn peek_token(&self) -> &Token { + &self + .tokens + .get(self.pos + 1) + .map(|st| &st.kind) + .unwrap_or(&Token::EOF) + } + + pub fn next_token(&mut self) { + if self.pos < self.tokens.len() { + self.pos += 1; + } + } + + pub fn expect(&mut self, expected: Token) -> ParseResult<()> { + if self.current_token() == &expected { + self.next_token(); + Ok(()) + } else if self.current_token() == &Token::EOF { + self.unexpected_eof(format!("`{:?}`", expected)) + } else { + self.unexpected_token(expected) + } + } +} diff --git a/tests/.tomorrow b/tests/.tomorrow new file mode 100644 index 0000000..5a8fc48 --- /dev/null +++ b/tests/.tomorrow @@ -0,0 +1,119 @@ +// fn collect_tokens(input: &str) -> Vec { +// let mut l = Lexer::new(input); +// let mut tokens = Vec::new(); +// loop { +// let tok = l.next_token(); +// tokens.push(tok.clone()); +// if tok == Token::EOF { +// break; +// } +// } +// tokens +// } + +// #[test] +// fn simple_tokens() { +// let input = "+ - * / % = == != < <= > >= & && | || ; , ( ) { } [ ]"; +// let expected = vec![ +// Token::Plus, +// Token::Minus, +// Token::Asterisk, +// Token::Slash, +// Token::Percent, +// Token::Assign, +// Token::Equal, +// Token::NotEqual, +// Token::Lt, +// Token::Le, +// Token::Gt, +// Token::Ge, +// Token::Ampersand, +// Token::And, +// Token::BitOr, +// Token::Or, +// Token::Semicolon, +// Token::Comma, +// Token::LParen, +// Token::RParen, +// Token::LBrace, +// Token::RBrace, +// Token::LBracket, +// Token::RBracket, +// Token::EOF, +// ]; +// assert_eq!(collect_tokens(input), expected); +// } +// +// #[test] +// fn identifiers_and_numbers() { +// let input = "foo _bar Baz123 42 007"; +// let expected = vec![ +// Token::Ident("foo".into()), +// Token::Ident("_bar".into()), +// Token::Ident("Baz123".into()), +// Token::IntLiteral(42), +// Token::IntLiteral(7), // 앞 0은 무시 +// Token::EOF, +// ]; +// assert_eq!(collect_tokens(input), expected); +// } +// +// #[test] +// fn char_literals_and_errors() { +// let input = r"'a' '\n' '\0' 'x"; +// let expected = vec![ +// Token::CharLiteral('a'), +// Token::CharLiteral('\n'), +// Token::CharLiteral('\0'), +// Token::Error(LexError::UnterminatedCharLiteral), +// Token::EOF, +// ]; +// assert_eq!(collect_tokens(input), expected); +// } +// +// #[test] +// fn comments_and_whitespace() { +// let input = " +// // single line comment +// foo /* multi +// line */ 123 +// "; +// let expected = vec![ +// Token::Ident("foo".into()), +// Token::IntLiteral(123), +// Token::EOF, +// ]; +// assert_eq!(collect_tokens(input), expected); +// } +// +// #[test] +// fn test_increment_decrement_tokens() { +// let input = "i++ j-- ++k --l"; +// let expected = vec![ +// Token::Ident("i".into()), +// Token::Increment, +// Token::Ident("j".into()), +// Token::Decrement, +// Token::Increment, +// Token::Ident("k".into()), +// Token::Decrement, +// Token::Ident("l".into()), +// Token::EOF, +// ]; +// assert_eq!(collect_tokens(input), expected); +// } +// +// #[test] +// fn test_plus_minus_assign_tokens() { +// let input = "a+=1 b-=2"; +// let expected = vec![ +// Token::Ident("a".into()), +// Token::PlusAssign, +// Token::IntLiteral(1), +// Token::Ident("b".into()), +// Token::MinusAssign, +// Token::IntLiteral(2), +// Token::EOF, +// ]; +// assert_eq!(collect_tokens(input), expected); +// } \ No newline at end of file diff --git a/tests/fixtures/sample.c b/tests/fixtures/sample.c new file mode 100644 index 0000000..7fa0d93 --- /dev/null +++ b/tests/fixtures/sample.c @@ -0,0 +1,69 @@ +int add(int a, int b) { + return a + b; +} + +int factorial(int n) { + int result = 1; + for (int i = 1; i <= n; ++i) { + result *= i; + } + return result; +} + +char to_uppercase(char c) { + // 'a'..'z' 범위이면 대문자로 변환 + if (c >= 'a' && c <= 'z') { + return c - ('a' - 'A'); + } + return c; +} + +int main() { + // 변수 초기화 + int x = 42; + int oct = 007; // 8진수 리터럴 + int *p = &x; // 주소 연산자 + char ch1 = 'A'; + char ch2 = '\n'; // 이스케이프 시퀀스 + char ch3 = '\0'; + char buf[5] = {'h','e','l','l','o'}; // 고정 크기 배열 + int arr2[3]; + for (int i = 0; i < 3; i++) { + arr2[i] = i * 2; + } + + // 포인터 산술 및 복합 할당, 증감 연산 + *p += 10; + x -= 5; + x++; + ++x; + --x; + x--; + + // 논리 연산자, 비교 연산자 + if (x > 10 && x < 100) { + x = add(x, oct); + } else if (x == 0 || x == -1) { + x = factorial(5); + } + + /* 다중 줄 주석입니다. + 비트 연산자 테스트 */ + int b_and = x & oct; + int b_or = x | oct; + + // while 루프와 continue/break + int count = 0; + while (count < 5) { + if (count == 2) { + count++; + continue; + } + if (count == 4) { + break; + } + count += 1; + } + + return 0; +} \ No newline at end of file diff --git a/tests/lexer/basic_tests.rs b/tests/lexer/basic_tests.rs new file mode 100644 index 0000000..9624899 --- /dev/null +++ b/tests/lexer/basic_tests.rs @@ -0,0 +1,77 @@ +use crate::utils::collect_tokens; +use rustc_tape4::lexer::{LexError, Token}; + +#[test] +fn simple_tokens() { + let input = "+ - * / % = == != < <= > >= & && | || ; , ( ) { } [ ]"; + let expected = vec![ + Token::Plus, + Token::Minus, + Token::Asterisk, + Token::Slash, + Token::Percent, + Token::Assign, + Token::Equal, + Token::NotEqual, + Token::Lt, + Token::Le, + Token::Gt, + Token::Ge, + Token::Ampersand, + Token::And, + Token::BitOr, + Token::Or, + Token::Semicolon, + Token::Comma, + Token::LParen, + Token::RParen, + Token::LBrace, + Token::RBrace, + Token::LBracket, + Token::RBracket, + Token::EOF, + ]; + assert_eq!(collect_tokens(input), expected); +} + +#[test] +fn identifiers_and_numbers() { + let input = "foo _bar Baz123 42 007"; + let expected = vec![ + Token::Ident("foo".into()), + Token::Ident("_bar".into()), + Token::Ident("Baz123".into()), + Token::IntLiteral(42), + Token::IntLiteral(7), // 앞 0은 무시 + Token::EOF, + ]; + assert_eq!(collect_tokens(input), expected); +} + +#[test] +fn char_literals_and_errors() { + let input = r"'a' '\n' '\0' 'x"; + let expected = vec![ + Token::CharLiteral('a'), + Token::CharLiteral('\n'), + Token::CharLiteral('\0'), + Token::Error(LexError::UnterminatedCharLiteral), + Token::EOF, + ]; + assert_eq!(collect_tokens(input), expected); +} + +#[test] +fn comments_and_whitespace() { + let input = " + // single line comment + foo /* multi + line */ 123 + "; + let expected = vec![ + Token::Ident("foo".into()), + Token::IntLiteral(123), + Token::EOF, + ]; + assert_eq!(collect_tokens(input), expected); +} diff --git a/tests/lexer/fixture_test.rs b/tests/lexer/fixture_test.rs new file mode 100644 index 0000000..63e9925 --- /dev/null +++ b/tests/lexer/fixture_test.rs @@ -0,0 +1,405 @@ +use rustc_tape4::lexer::{Lexer, Token}; + +#[test] +fn test_sample_c_tokens() { + let src = include_str!("../fixtures/sample.c"); + let mut l = Lexer::new(src); + let mut next_token = || l.next_token().kind; + + // int add(int a, int b) { + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("add".into())); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("a".into())); + assert_eq!(next_token(), Token::Comma); + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("b".into())); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // return a + b; + assert_eq!(next_token(), Token::Return); + assert_eq!(next_token(), Token::Ident("a".into())); + assert_eq!(next_token(), Token::Plus); + assert_eq!(next_token(), Token::Ident("b".into())); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // int factorial(int n) { + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("factorial".into())); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("n".into())); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // int result = 1; + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("result".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::IntLiteral(1)); + assert_eq!(next_token(), Token::Semicolon); + + // for (int i = 1; i <= n; ++i) { + assert_eq!(next_token(), Token::For); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::IntLiteral(1)); + assert_eq!(next_token(), Token::Semicolon); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::Le); + assert_eq!(next_token(), Token::Ident("n".into())); + assert_eq!(next_token(), Token::Semicolon); + assert_eq!(next_token(), Token::Increment); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // result *= i; + assert_eq!(next_token(), Token::Ident("result".into())); + assert_eq!(next_token(), Token::AsteriskAssign); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // return result; + assert_eq!(next_token(), Token::Return); + assert_eq!(next_token(), Token::Ident("result".into())); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // char to_uppercase(char c) { + assert_eq!(next_token(), Token::Char); + assert_eq!(next_token(), Token::Ident("to_uppercase".into())); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Char); + assert_eq!(next_token(), Token::Ident("c".into())); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // if (c >= 'a' && c <= 'z') { + assert_eq!(next_token(), Token::If); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Ident("c".into())); + assert_eq!(next_token(), Token::Ge); + assert_eq!(next_token(), Token::CharLiteral('a')); + assert_eq!(next_token(), Token::And); + assert_eq!(next_token(), Token::Ident("c".into())); + assert_eq!(next_token(), Token::Le); + assert_eq!(next_token(), Token::CharLiteral('z')); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // return c - ('a' - 'A'); + assert_eq!(next_token(), Token::Return); + assert_eq!(next_token(), Token::Ident("c".into())); + assert_eq!(next_token(), Token::Minus); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::CharLiteral('a')); + assert_eq!(next_token(), Token::Minus); + assert_eq!(next_token(), Token::CharLiteral('A')); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // return c; + assert_eq!(next_token(), Token::Return); + assert_eq!(next_token(), Token::Ident("c".into())); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // int main() { + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("main".into())); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // int x = 42; + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::IntLiteral(42)); + assert_eq!(next_token(), Token::Semicolon); + + // int oct = 007; // 8진수 리터럴 + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("oct".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::IntLiteral(7)); + assert_eq!(next_token(), Token::Semicolon); + + // int *p = &x; // 주소 연산자 + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Asterisk); + assert_eq!(next_token(), Token::Ident("p".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::Ampersand); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Semicolon); + + // char ch1 = 'A'; + assert_eq!(next_token(), Token::Char); + assert_eq!(next_token(), Token::Ident("ch1".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::CharLiteral('A')); + assert_eq!(next_token(), Token::Semicolon); + + // char ch2 = '\n'; // 이스케이프 시퀀스 + assert_eq!(next_token(), Token::Char); + assert_eq!(next_token(), Token::Ident("ch2".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::CharLiteral('\n')); + assert_eq!(next_token(), Token::Semicolon); + + // char ch3 = '\0'; + assert_eq!(next_token(), Token::Char); + assert_eq!(next_token(), Token::Ident("ch3".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::CharLiteral('\0')); + assert_eq!(next_token(), Token::Semicolon); + + // char buf[5] = {'h','e','l','l','o'}; // 고정 크기 배열 + assert_eq!(next_token(), Token::Char); + assert_eq!(next_token(), Token::Ident("buf".into())); + assert_eq!(next_token(), Token::LBracket); + assert_eq!(next_token(), Token::IntLiteral(5)); + assert_eq!(next_token(), Token::RBracket); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::LBrace); + assert_eq!(next_token(), Token::CharLiteral('h')); + assert_eq!(next_token(), Token::Comma); + assert_eq!(next_token(), Token::CharLiteral('e')); + assert_eq!(next_token(), Token::Comma); + assert_eq!(next_token(), Token::CharLiteral('l')); + assert_eq!(next_token(), Token::Comma); + assert_eq!(next_token(), Token::CharLiteral('l')); + assert_eq!(next_token(), Token::Comma); + assert_eq!(next_token(), Token::CharLiteral('o')); + assert_eq!(next_token(), Token::RBrace); + assert_eq!(next_token(), Token::Semicolon); + + // int arr2[3]; + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("arr2".into())); + assert_eq!(next_token(), Token::LBracket); + assert_eq!(next_token(), Token::IntLiteral(3)); + assert_eq!(next_token(), Token::RBracket); + assert_eq!(next_token(), Token::Semicolon); + + // for (int i = 0; i < 3; i++) { + assert_eq!(next_token(), Token::For); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::IntLiteral(0)); + assert_eq!(next_token(), Token::Semicolon); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::Lt); + assert_eq!(next_token(), Token::IntLiteral(3)); + assert_eq!(next_token(), Token::Semicolon); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::Increment); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // arr2[i] = i * 2; + assert_eq!(next_token(), Token::Ident("arr2".into())); + assert_eq!(next_token(), Token::LBracket); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::RBracket); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::Ident("i".into())); + assert_eq!(next_token(), Token::Asterisk); + assert_eq!(next_token(), Token::IntLiteral(2)); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // *p += 10; + assert_eq!(next_token(), Token::Asterisk); + assert_eq!(next_token(), Token::Ident("p".into())); + assert_eq!(next_token(), Token::PlusAssign); + assert_eq!(next_token(), Token::IntLiteral(10)); + assert_eq!(next_token(), Token::Semicolon); + + // x -= 5; + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::MinusAssign); + assert_eq!(next_token(), Token::IntLiteral(5)); + assert_eq!(next_token(), Token::Semicolon); + + // x++; + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Increment); + assert_eq!(next_token(), Token::Semicolon); + // ++x; + assert_eq!(next_token(), Token::Increment); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Semicolon); + // --x; + assert_eq!(next_token(), Token::Decrement); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Semicolon); + // x-- + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Decrement); + assert_eq!(next_token(), Token::Semicolon); + + // if (x > 10 && x < 100) { + assert_eq!(next_token(), Token::If); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Gt); + assert_eq!(next_token(), Token::IntLiteral(10)); + assert_eq!(next_token(), Token::And); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Lt); + assert_eq!(next_token(), Token::IntLiteral(100)); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // x = add(x, oct); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::Ident("add".into())); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Comma); + assert_eq!(next_token(), Token::Ident("oct".into())); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::Semicolon); + + // } else if (x == 0 || x == -1) { + assert_eq!(next_token(), Token::RBrace); + assert_eq!(next_token(), Token::Else); + assert_eq!(next_token(), Token::If); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Equal); + assert_eq!(next_token(), Token::IntLiteral(0)); + assert_eq!(next_token(), Token::Or); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Equal); + assert_eq!(next_token(), Token::Minus); + assert_eq!(next_token(), Token::IntLiteral(1)); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // x = factorial(5); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::Ident("factorial".into())); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::IntLiteral(5)); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // int b_and = x & oct; + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("b_and".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::Ampersand); + assert_eq!(next_token(), Token::Ident("oct".into())); + assert_eq!(next_token(), Token::Semicolon); + + // int b_or = x | oct; + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("b_or".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::Ident("x".into())); + assert_eq!(next_token(), Token::BitOr); + assert_eq!(next_token(), Token::Ident("oct".into())); + assert_eq!(next_token(), Token::Semicolon); + + // int count = 0; + assert_eq!(next_token(), Token::Int); + assert_eq!(next_token(), Token::Ident("count".into())); + assert_eq!(next_token(), Token::Assign); + assert_eq!(next_token(), Token::IntLiteral(0)); + assert_eq!(next_token(), Token::Semicolon); + + // while (count < 5) { + assert_eq!(next_token(), Token::While); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Ident("count".into())); + assert_eq!(next_token(), Token::Lt); + assert_eq!(next_token(), Token::IntLiteral(5)); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // if (count == 2) { + assert_eq!(next_token(), Token::If); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Ident("count".into())); + assert_eq!(next_token(), Token::Equal); + assert_eq!(next_token(), Token::IntLiteral(2)); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // count++; + assert_eq!(next_token(), Token::Ident("count".into())); + assert_eq!(next_token(), Token::Increment); + assert_eq!(next_token(), Token::Semicolon); + + // continue; + assert_eq!(next_token(), Token::Continue); + assert_eq!(next_token(), Token::Semicolon); + // } + assert_eq!(next_token(), Token::RBrace); + + // if (count == 4) { + assert_eq!(next_token(), Token::If); + assert_eq!(next_token(), Token::LParen); + assert_eq!(next_token(), Token::Ident("count".into())); + assert_eq!(next_token(), Token::Equal); + assert_eq!(next_token(), Token::IntLiteral(4)); + assert_eq!(next_token(), Token::RParen); + assert_eq!(next_token(), Token::LBrace); + + // break; + assert_eq!(next_token(), Token::Break); + assert_eq!(next_token(), Token::Semicolon); + + assert_eq!(next_token(), Token::RBrace); + + // count += 1; + assert_eq!(next_token(), Token::Ident("count".into())); + assert_eq!(next_token(), Token::PlusAssign); + assert_eq!(next_token(), Token::IntLiteral(1)); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // return 0; + assert_eq!(next_token(), Token::Return); + assert_eq!(next_token(), Token::IntLiteral(0)); + assert_eq!(next_token(), Token::Semicolon); + + // } + assert_eq!(next_token(), Token::RBrace); + + // End with EOF + assert_eq!(next_token(), Token::EOF); +} diff --git a/tests/lexer/mod.rs b/tests/lexer/mod.rs new file mode 100644 index 0000000..55e23e6 --- /dev/null +++ b/tests/lexer/mod.rs @@ -0,0 +1,2 @@ +mod basic_tests; +mod fixture_test; diff --git a/tests/mod.rs b/tests/mod.rs new file mode 100644 index 0000000..2e5ff18 --- /dev/null +++ b/tests/mod.rs @@ -0,0 +1,3 @@ +pub mod lexer; +pub mod parser; +pub mod utils; diff --git a/tests/parser/integration_tests.rs b/tests/parser/integration_tests.rs new file mode 100644 index 0000000..496c9b6 --- /dev/null +++ b/tests/parser/integration_tests.rs @@ -0,0 +1,689 @@ +use crate::utils::parse_program; +use rustc_tape4::ast::Expr; +use rustc_tape4::ast::Stmt; +use rustc_tape4::ast::expr::{BinaryOp, PrefixOp}; +use rustc_tape4::ast::{Function, Program, TypeSpecifier}; +use std::fs; + +/// fixture 파일의 내용을 읽어오는 함수 +fn read_fixture(filename: &str) -> String { + fs::read_to_string(format!("tests/fixtures/{}", filename)) + .unwrap_or_else(|_| panic!("Failed to read fixture: {}", filename)) +} + +#[test] +fn test_parse_sample_c() { + // 기존 sample.c 파일 파싱 테스트 + let source = read_fixture("sample.c"); + let program = parse_program(&source).unwrap(); + + // 최소한의 기본 검증 + assert!(!program.functions.is_empty()); + + // main 함수 찾기 + let main_func = program + .functions + .iter() + .find(|f| f.name == "main") + .expect("main function not found"); + + assert_eq!(main_func.return_ty, TypeSpecifier::Int); + + // add, factorial, to_uppercase 함수 존재 확인 + assert!(program.functions.iter().any(|f| f.name == "add")); + assert!(program.functions.iter().any(|f| f.name == "factorial")); + assert!(program.functions.iter().any(|f| f.name == "to_uppercase")); +} + +#[test] +fn test_expressions_fixture() { + // 다양한 표현식을 포함하는 테스트 + let source = r#" + int test_expressions() { + // 산술 표현식 + int a = 10; + int b = 20; + int c = a + b * (30 - 5) / 5; + + // 비교 및 논리 표현식 + if (a > 0 && b <= 30) { + c = 1; + } else if (a == 0 || b != 20) { + c = 2; + } else { + c = 3; + } + + // 비트 연산 표현식 + int d = a & b; + int e = a | b; + int f = a ^ b; + + // 복합 할당 표현식 + c += 5; + c -= 3; + c *= 2; + c /= 4; + c %= 3; + + // 증감 연산자 + c++; + ++c; + c--; + --c; + + // 포인터 연산 + int *p = &c; + *p = 100; + + // 함수 호출 + int result = add(10, 20); + + // 배열 접근 + int arr[5] = {1, 2, 3, 4, 5}; + arr[2] = arr[0] + arr[1]; + + return c; + } + "#; + + let program = parse_program(source).unwrap(); + assert_eq!(program.functions.len(), 1); + assert_eq!(program.functions[0].name, "test_expressions"); + + // 함수 본문 내 문장 개수 확인 + assert!(program.functions[0].body.statements.len() > 10); +} + +#[test] +fn test_statements_fixture() { + // 다양한 문장을 포함하는 테스트 + let source = r#" + int test_statements() { + // 변수 선언 문장 + int x; + int y = 10; + int arr[5]; + char c = 'A'; + + // if-else 문장 + if (x > 0) { + y = 1; + } else if (x < 0) { + y = -1; + } else { + y = 0; + } + + // while 루프 + int i = 0; + while (i < 5) { + arr[i] = i * i; + i++; + } + + // for 루프 + int sum = 0; + for (int j = 0; j < 5; j++) { + sum += arr[j]; + + if (sum > 10) { + break; + } + + if (j % 2 == 0) { + continue; + } + } + + // 중첩 블록 + { + int temp = x; + x = y; + y = temp; + + { + int z = x + y; + } + } + + // return 문장 + return sum; + } + "#; + + let program = parse_program(source).unwrap(); + assert_eq!(program.functions.len(), 1); + assert_eq!(program.functions[0].name, "test_statements"); + + // 전체 AST 구조 탐색으로 특정 문장 유형 확인 + let has_if = contains_stmt_type(&program, |s| matches!(s, Stmt::If { .. })); + let has_while = contains_stmt_type(&program, |s| matches!(s, Stmt::While { .. })); + let has_for = contains_stmt_type(&program, |s| matches!(s, Stmt::For { .. })); + let has_break = contains_stmt_type(&program, |s| matches!(s, Stmt::Break)); + let has_continue = contains_stmt_type(&program, |s| matches!(s, Stmt::Continue)); + let has_return = contains_stmt_type(&program, |s| matches!(s, Stmt::Return(..))); + + assert!(has_if, "프로그램에 if 문이 없습니다"); + assert!(has_while, "프로그램에 while 문이 없습니다"); + assert!(has_for, "프로그램에 for 문이 없습니다"); + assert!(has_break, "프로그램에 break 문이 없습니다"); + assert!(has_continue, "프로그램에 continue 문이 없습니다"); + assert!(has_return, "프로그램에 return 문이 없습니다"); +} + +#[test] +fn test_pointers_and_arrays_fixture() { + // 포인터와 배열 관련 테스트 + let source = r#" + void swap(int *a, int *b) { + int temp = *a; + *a = *b; + *b = temp; + } + + int* createArray(int size) { + // 실제로는 메모리 할당을 해야 하지만 파서 테스트이므로 0을 리턴 + return 0; + } + + void arrayOperations() { + int arr[10]; + int *ptr = arr; + + // 배열 초기화 + for (int i = 0; i < 10; i++) { + arr[i] = i * 10; + } + + // 포인터 산술 + ptr = ptr + 5; // arr[5]를 가리킴 + *ptr = 100; // arr[5] = 100 + + ptr -= 2; // arr[3]를 가리킴 + *(ptr + 1) = 50; // arr[4] = 50 + + // 1차원 배열 + int matrix[9]; + for (int k = 0; k < 9; k++) { + matrix[k] = k; // 0,1,2,…,8 + } + + // 포인터 배열 + int *ptrArray[5]; + for (int i = 0; i < 5; i++) { + ptrArray[i] = &arr[i * 2]; + } + } + "#; + + let program = parse_program(source).unwrap(); + assert_eq!(program.functions.len(), 3); + + // swap 함수 검증 + let swap_func = program + .functions + .iter() + .find(|f| f.name == "swap") + .expect("swap 함수를 찾을 수 없음"); + + // swap 함수의 매개변수가 포인터 타입인지 확인 + assert_eq!(swap_func.params.len(), 2); + for param in &swap_func.params { + match ¶m.ty { + TypeSpecifier::Pointer(inner) => { + assert_eq!(**inner, TypeSpecifier::Int); + } + _ => panic!("swap 함수의 매개변수가 포인터 타입이 아님"), + } + } + + // createArray 함수의 반환 타입이 int* 인지 확인 + let create_array_func = program + .functions + .iter() + .find(|f| f.name == "createArray") + .expect("createArray 함수를 찾을 수 없음"); + + match &create_array_func.return_ty { + TypeSpecifier::Pointer(inner) => { + assert_eq!(**inner, TypeSpecifier::Int); + } + _ => panic!("createArray 함수의 반환 타입이 int* 가 아님"), + } + + // 포인터 연산이 AST에 존재하는지 확인 + let array_ops_func = program + .functions + .iter() + .find(|f| f.name == "arrayOperations") + .expect("arrayOperations 함수를 찾을 수 없음"); + + let has_pointer_arith = contains_expr_type(array_ops_func, |e| { + matches!( + e, + Expr::BinaryOp { + op: BinaryOp::Add, + .. + } | Expr::BinaryOp { + op: BinaryOp::Sub, + .. + } + ) + }); + + let has_dereference = contains_expr_type(array_ops_func, |e| { + matches!( + e, + Expr::UnaryPrefixOp { + op: PrefixOp::Deref, + .. + } + ) + }); + + assert!(has_pointer_arith, "포인터 산술 연산을 찾을 수 없음"); + assert!(has_dereference, "포인터 역참조 연산을 찾을 수 없음"); +} + +#[test] +fn test_complex_program_fixture() { + // 복잡한 프로그램 예제 + let source = r#" + // 간단한 버블 정렬 구현 + void bubbleSort(int arr[], int n) { + for (int i = 0; i < n-1; i++) { + for (int j = 0; j < n-i-1; j++) { + if (arr[j] > arr[j+1]) { + // 두 원소 교환 + int temp = arr[j]; + arr[j] = arr[j+1]; + arr[j+1] = temp; + } + } + } + } + + // 이진 검색 구현 + int binarySearch(int arr[], int left, int right, int x) { + if (right >= left) { + int mid = left + (right - left) / 2; + + // 중간 원소인 경우 + if (arr[mid] == x) + return mid; + + // 중간 원소보다 작은 경우: 왼쪽 하위 배열 검색 + if (arr[mid] > x) + return binarySearch(arr, left, mid-1, x); + + // 중간 원소보다 큰 경우: 오른쪽 하위 배열 검색 + return binarySearch(arr, mid+1, right, x); + } + + // 원소가 배열에 없음 + return -1; + } + + // 팩토리얼 계산 (재귀) + int factorial(int n) { + if (n <= 1) + return 1; + return n * factorial(n-1); + } + + // 피보나치 수열 계산 (반복) + int fibonacci(int n) { + int a = 0, b = 1, c; + if (n == 0) return a; + + for (int i = 2; i <= n; i++) { + c = a + b; + a = b; + b = c; + } + return b; + } + + int main() { + int arr[10] = {64, 34, 25, 12, 22, 11, 90, 87, 56, 45}; + int n = 10; + + bubbleSort(arr, n); + + // 정렬된 배열에서 22 검색 + int result = binarySearch(arr, 0, n-1, 22); + + int fact5 = factorial(5); // 5! = 120 + int fib10 = fibonacci(10); // fib(10) = 55 + + return 0; + } + "#; + + let program = parse_program(source).unwrap(); + assert_eq!(program.functions.len(), 5); + + // 모든 함수가 존재하는지 확인 + let function_names = vec![ + "bubbleSort", + "binarySearch", + "factorial", + "fibonacci", + "main", + ]; + for name in function_names { + assert!( + program.functions.iter().any(|f| f.name == name), + "함수 {}를 찾을 수 없음", + name + ); + } + + // 재귀 호출이 있는지 확인 + let has_recursion = contains_recursive_call(&program); + assert!(has_recursion, "재귀 함수 호출을 찾을 수 없음"); + + // 중첩된 반복문이 있는지 확인 + let has_nested_loops = contains_nested_loops(&program); + assert!(has_nested_loops, "중첩된 반복문을 찾을 수 없음"); +} + +/// 프로그램에서 주어진 조건을 만족하는 문장이 있는지 검사 +fn contains_stmt_type(program: &Program, predicate: F) -> bool +where + F: Fn(&Stmt) -> bool, +{ + for function in &program.functions { + if contains_stmt_in_block(&function.body.statements, &predicate) { + return true; + } + } + false +} + +/// 블록 내 문장 중 조건을 만족하는 문장이 있는지 재귀적으로 검사 +fn contains_stmt_in_block(statements: &[Stmt], predicate: &F) -> bool +where + F: Fn(&Stmt) -> bool, +{ + for stmt in statements { + if predicate(stmt) { + return true; + } + + // 블록, if, while, for 등 내부에 문장을 포함하는 경우 재귀 검사 + match stmt { + Stmt::Block(block) => { + if contains_stmt_in_block(&block.statements, predicate) { + return true; + } + } + Stmt::If { + then_branch, + else_branch, + .. + } => { + if let Stmt::Block(block) = &**then_branch { + if contains_stmt_in_block(&block.statements, predicate) { + return true; + } + } else if predicate(then_branch) { + return true; + } + + if let Some(else_stmt) = else_branch { + if let Stmt::Block(block) = &**else_stmt { + if contains_stmt_in_block(&block.statements, predicate) { + return true; + } + } else if predicate(else_stmt) { + return true; + } + } + } + Stmt::While { body, .. } => { + if let Stmt::Block(block) = &**body { + if contains_stmt_in_block(&block.statements, predicate) { + return true; + } + } else if predicate(body) { + return true; + } + } + Stmt::For { body, .. } => { + if let Stmt::Block(block) = &**body { + if contains_stmt_in_block(&block.statements, predicate) { + return true; + } + } else if predicate(body) { + return true; + } + } + _ => {} + } + } + false +} + +/// 함수에서 주어진 조건을 만족하는 표현식이 있는지 검사 +fn contains_expr_type(function: &Function, predicate: F) -> bool +where + F: Fn(&Expr) -> bool, +{ + contains_expr_in_block(&function.body.statements, &predicate) +} + +/// 블록 내 문장 중 조건을 만족하는 표현식이 있는지 재귀적으로 검사 +fn contains_expr_in_block(statements: &[Stmt], predicate: &F) -> bool +where + F: Fn(&Expr) -> bool, +{ + for stmt in statements { + match stmt { + Stmt::ExprStmt(Some(expr)) => { + if contains_expr_in_expr(expr, predicate) { + return true; + } + } + Stmt::Declaration { declarators, .. } => { + for decl in declarators { + if let Some(expr) = &decl.init { + if contains_expr_in_expr(expr, predicate) { + return true; + } + } + } + } + Stmt::If { + cond, + then_branch, + else_branch, + } => { + if contains_expr_in_expr(cond, predicate) { + return true; + } + + if let Stmt::Block(block) = &**then_branch { + if contains_expr_in_block(&block.statements, predicate) { + return true; + } + } + + if let Some(else_stmt) = else_branch { + if let Stmt::Block(block) = &**else_stmt { + if contains_expr_in_block(&block.statements, predicate) { + return true; + } + } + } + } + Stmt::While { cond, body } => { + if contains_expr_in_expr(cond, predicate) { + return true; + } + + if let Stmt::Block(block) = &**body { + if contains_expr_in_block(&block.statements, predicate) { + return true; + } + } + } + Stmt::For { + init, + cond, + step, + body, + } => { + if let Some(expr) = init { + match &**expr { + // 선언문일 수도 있지만, ExprStmt(Some(e)) 일 때만 살펴보고 + Stmt::ExprStmt(Some(expr)) => { + if contains_expr_in_expr(expr, predicate) { + return true; + } + } + _ => {} + } + } + + if let Some(expr) = cond { + if contains_expr_in_expr(expr, predicate) { + return true; + } + } + + if let Some(expr) = step { + if contains_expr_in_expr(expr, predicate) { + return true; + } + } + + if let Stmt::Block(block) = &**body { + if contains_expr_in_block(&block.statements, predicate) { + return true; + } + } + } + Stmt::Return(Some(expr)) => { + if contains_expr_in_expr(expr, predicate) { + return true; + } + } + Stmt::Block(block) => { + if contains_expr_in_block(&block.statements, predicate) { + return true; + } + } + _ => {} + } + } + false +} + +/// 표현식 내에서 조건을 만족하는 표현식이 있는지 재귀적으로 검사 +fn contains_expr_in_expr(expr: &Expr, predicate: &F) -> bool +where + F: Fn(&Expr) -> bool, +{ + if predicate(expr) { + return true; + } + + match expr { + Expr::BinaryOp { lhs, rhs, .. } => { + if contains_expr_in_expr(lhs, predicate) || contains_expr_in_expr(rhs, predicate) { + return true; + } + } + Expr::UnaryPrefixOp { rhs, .. } => { + if contains_expr_in_expr(rhs, predicate) { + return true; + } + } + Expr::UnaryPostfixOp { lhs, .. } => { + if contains_expr_in_expr(lhs, predicate) { + return true; + } + } + Expr::Call { func, args } => { + if contains_expr_in_expr(func, predicate) { + return true; + } + + for arg in args { + if contains_expr_in_expr(arg, predicate) { + return true; + } + } + } + Expr::ArrayIndex { array, index } => { + if contains_expr_in_expr(array, predicate) || contains_expr_in_expr(index, predicate) { + return true; + } + } + Expr::Assignment { left, right, .. } => { + if contains_expr_in_expr(left, predicate) || contains_expr_in_expr(right, predicate) { + return true; + } + } + Expr::InitializerList(items) => { + for item in items { + if contains_expr_in_expr(item, predicate) { + return true; + } + } + } + _ => {} + } + + false +} + +/// 프로그램에 재귀 함수 호출이 있는지 검사 +fn contains_recursive_call(program: &Program) -> bool { + for function in &program.functions { + let func_name = &function.name; + + let has_recursive = contains_expr_in_block(&function.body.statements, &|expr| { + if let Expr::Call { func, .. } = expr { + if let Expr::Ident(name) = &**func { + return name == func_name; + } + } + false + }); + + if has_recursive { + return true; + } + } + false +} + +/// 프로그램에 중첩 반복문이 있는지 검사 +fn contains_nested_loops(program: &Program) -> bool { + for function in &program.functions { + // 반복문 내에 반복문이 있는지 검사 + let has_nested = contains_stmt_in_block(&function.body.statements, &|stmt| { + match stmt { + Stmt::While { body, .. } | Stmt::For { body, .. } => { + // 반복문 본문에 또 다른 반복문이 있는지 확인 + if let Stmt::Block(block) = &**body { + contains_stmt_in_block(&block.statements, &|inner_stmt| { + matches!(inner_stmt, Stmt::While { .. } | Stmt::For { .. }) + }) + } else { + matches!(**body, Stmt::While { .. } | Stmt::For { .. }) + } + } + _ => false, + } + }); + + if has_nested { + return true; + } + } + false +} diff --git a/tests/parser/mod.rs b/tests/parser/mod.rs new file mode 100644 index 0000000..8266a8f --- /dev/null +++ b/tests/parser/mod.rs @@ -0,0 +1,2 @@ +mod integration_tests; +pub mod unit_tests; diff --git a/tests/parser/unit_tests/expression_tests.rs b/tests/parser/unit_tests/expression_tests.rs new file mode 100644 index 0000000..0320358 --- /dev/null +++ b/tests/parser/unit_tests/expression_tests.rs @@ -0,0 +1,743 @@ +use crate::utils::parse_expression; +use rustc_tape4::ast::Expr; +use rustc_tape4::ast::expr::{AssignOp, BinaryOp, PostfixOp, PrefixOp}; + +#[test] +fn test_integer_literal() { + // 정수 리터럴 테스트 + let expr = parse_expression("42"); + assert_eq!(expr, Expr::IntLiteral(42)); +} + +#[test] +fn test_char_literal() { + // 문자 리터럴 테스트 + let expr = parse_expression("'a'"); + assert_eq!(expr, Expr::CharLiteral('a')); + + // 이스케이프 문자 테스트 + let expr = parse_expression("'\\n'"); + assert_eq!(expr, Expr::CharLiteral('\n')); + + let expr = parse_expression("'\\0'"); + assert_eq!(expr, Expr::CharLiteral('\0')); +} + +#[test] +fn test_identifier() { + // 식별자 테스트 + let expr = parse_expression("variable"); + assert_eq!(expr, Expr::Ident("variable".to_string())); +} + +#[test] +fn test_binary_arithmetic_ops() { + // 이항 산술 연산자 테스트 + let expr = parse_expression("1 + 2"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Add, + .. + } + )); + + let expr = parse_expression("3 - 4"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Sub, + .. + } + )); + + let expr = parse_expression("5 * 6"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Mul, + .. + } + )); + + let expr = parse_expression("8 / 2"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Div, + .. + } + )); + + let expr = parse_expression("10 % 3"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Rem, + .. + } + )); +} + +#[test] +fn test_comparison_ops() { + // 비교 연산자 테스트 + let expr = parse_expression("a == b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Eq, + .. + } + )); + + let expr = parse_expression("a != b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Ne, + .. + } + )); + + let expr = parse_expression("a < b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Lt, + .. + } + )); + + let expr = parse_expression("a <= b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Le, + .. + } + )); + + let expr = parse_expression("a > b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Gt, + .. + } + )); + + let expr = parse_expression("a >= b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Ge, + .. + } + )); +} + +#[test] +fn test_logical_ops() { + // 논리 연산자 테스트 + let expr = parse_expression("a && b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::And, + .. + } + )); + + let expr = parse_expression("a || b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Or, + .. + } + )); + + let expr = parse_expression("!a"); + assert!(matches!( + expr, + Expr::UnaryPrefixOp { + op: PrefixOp::Not, + .. + } + )); +} + +#[test] +fn test_bitwise_ops() { + // 비트 연산자 테스트 + let expr = parse_expression("a & b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::BitAnd, + .. + } + )); + + let expr = parse_expression("a | b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::BitOr, + .. + } + )); + + let expr = parse_expression("a ^ b"); + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::BitXor, + .. + } + )); +} + +#[test] +fn test_assignment_ops() { + // 할당 연산자 테스트 + let expr = parse_expression("x = 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::Assign, + .. + } + )); + + let expr = parse_expression("x += 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::PlusAssign, + .. + } + )); + + let expr = parse_expression("x -= 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::MinusAssign, + .. + } + )); + + let expr = parse_expression("x *= 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::MulAssign, + .. + } + )); + + let expr = parse_expression("x /= 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::DivAssign, + .. + } + )); + + let expr = parse_expression("x %= 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::RemAssign, + .. + } + )); + + let expr = parse_expression("x &= 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::BitAndAssign, + .. + } + )); + + let expr = parse_expression("x |= 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::BitOrAssign, + .. + } + )); + + let expr = parse_expression("x ^= 5"); + assert!(matches!( + expr, + Expr::Assignment { + op: AssignOp::BitXorAssign, + .. + } + )); +} + +#[test] +fn test_increment_decrement() { + // 증감 연산자 테스트 + let expr = parse_expression("++x"); + assert!(matches!( + expr, + Expr::UnaryPrefixOp { + op: PrefixOp::PreInc, + .. + } + )); + + let expr = parse_expression("--x"); + assert!(matches!( + expr, + Expr::UnaryPrefixOp { + op: PrefixOp::PreDec, + .. + } + )); + + let expr = parse_expression("x++"); + assert!(matches!( + expr, + Expr::UnaryPostfixOp { + op: PostfixOp::PostInc, + .. + } + )); + + let expr = parse_expression("x--"); + assert!(matches!( + expr, + Expr::UnaryPostfixOp { + op: PostfixOp::PostDec, + .. + } + )); +} + +#[test] +fn test_function_call() { + // 함수 호출 테스트 + let expr = parse_expression("foo()"); + assert!(matches!(expr, Expr::Call { .. })); + + let expr = parse_expression("bar(1, 2, 3)"); + if let Expr::Call { args, .. } = expr { + assert_eq!(args.len(), 3); + } else { + panic!("Expected function call"); + } +} + +#[test] +fn test_array_indexing() { + // 배열 인덱싱 테스트 + let expr = parse_expression("arr[0]"); + assert!(matches!(expr, Expr::ArrayIndex { .. })); + + let expr = parse_expression("matrix[i][j]"); + if let Expr::ArrayIndex { array, .. } = expr { + assert!(matches!(*array, Expr::ArrayIndex { .. })); + } else { + panic!("Expected nested array index"); + } +} + +#[test] +fn test_pointer_ops() { + // 포인터 연산자 테스트 + let expr = parse_expression("&x"); + assert!(matches!( + expr, + Expr::UnaryPrefixOp { + op: PrefixOp::Address, + .. + } + )); + + let expr = parse_expression("*p"); + assert!(matches!( + expr, + Expr::UnaryPrefixOp { + op: PrefixOp::Deref, + .. + } + )); +} + +#[test] +fn test_function_call_and_precedence() { + // 함수 호출 및 연산자 우선순위 테스트 + let expr = parse_expression("foo(1, 2+3)"); + + if let Expr::Call { func, args } = expr { + match *func { + Expr::Ident(name) => assert_eq!(name, "foo"), + other => panic!("expected Ident, got {:?}", other), + } + assert_eq!(args.len(), 2); + // 두 번째 인자는 2+3 + match &args[1] { + Expr::BinaryOp { + op: BinaryOp::Add, + lhs, + rhs, + } => { + assert_eq!(**lhs, Expr::IntLiteral(2)); + assert_eq!(**rhs, Expr::IntLiteral(3)); + } + other => panic!("expected 2+3, got {:?}", other), + } + } else { + panic!("expected call expr"); + } +} + +#[test] +fn test_array_indexing_and_assignment() { + // 배열 인덱싱과 대입 표현식 테스트 + let expr = parse_expression("arr[5] = b[2]"); + + if let Expr::Assignment { left, op, right } = expr { + assert_eq!(op, AssignOp::Assign); + // arr[5] + match *left { + Expr::ArrayIndex { + ref array, + ref index, + } => { + match **array { + Expr::Ident(ref name) => assert_eq!(name, "arr"), + ref o => panic!("expected arr, got {:?}", o), + } + assert_eq!(**index, Expr::IntLiteral(5)); + } + ref o => panic!("expected array index lhs, got {:?}", o), + } + // b[2] + match *right { + Expr::ArrayIndex { + ref array, + ref index, + } => { + match **array { + Expr::Ident(ref name) => assert_eq!(name, "b"), + ref o => panic!("expected b, got {:?}", o), + } + assert_eq!(**index, Expr::IntLiteral(2)); + } + ref o => panic!("expected array index rhs, got {:?}", o), + } + } else { + panic!("expected assignment expr"); + } +} + +#[test] +fn test_complex_expression() { + // 복합 표현식과 우선순위 테스트 + let expr = parse_expression("(a + b) * (c - d) / 2"); + + if let Expr::BinaryOp { + op: BinaryOp::Div, + lhs, + rhs, + } = expr + { + assert_eq!(*rhs, Expr::IntLiteral(2)); + + if let Expr::BinaryOp { + op: BinaryOp::Mul, + lhs: mul_lhs, + rhs: mul_rhs, + } = *lhs + { + // (a + b) + if let Expr::BinaryOp { + op: BinaryOp::Add, + lhs: add_lhs, + rhs: add_rhs, + } = *mul_lhs + { + assert_eq!(*add_lhs, Expr::Ident("a".to_string())); + assert_eq!(*add_rhs, Expr::Ident("b".to_string())); + } else { + panic!("Expected (a + b)"); + } + + // (c - d) + if let Expr::BinaryOp { + op: BinaryOp::Sub, + lhs: sub_lhs, + rhs: sub_rhs, + } = *mul_rhs + { + assert_eq!(*sub_lhs, Expr::Ident("c".to_string())); + assert_eq!(*sub_rhs, Expr::Ident("d".to_string())); + } else { + panic!("Expected (c - d)"); + } + } else { + panic!("Expected (a + b) * (c - d)"); + } + } else { + panic!("Expected ((a + b) * (c - d)) / 2"); + } +} + +#[test] +fn test_initializer_list() { + // 배열 초기화 리스트 테스트 + let expr = parse_expression("{1, 2, 3}"); + + if let Expr::InitializerList(items) = expr { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Expr::IntLiteral(1)); + assert_eq!(items[1], Expr::IntLiteral(2)); + assert_eq!(items[2], Expr::IntLiteral(3)); + } else { + panic!("Expected initializer list"); + } + + // 빈 초기화 리스트 + let expr = parse_expression("{}"); + assert!(matches!(expr, Expr::InitializerList(items) if items.is_empty())); + + // 중첩 초기화 리스트 + let expr = parse_expression("{{1, 2}, {3, 4}}"); + if let Expr::InitializerList(items) = expr { + assert_eq!(items.len(), 2); + assert!(matches!(&items[0], Expr::InitializerList(inner) if inner.len() == 2)); + assert!(matches!(&items[1], Expr::InitializerList(inner) if inner.len() == 2)); + } else { + panic!("Expected nested initializer list"); + } +} + +#[test] +fn test_multiple_assignment() { + // 다중 할당 표현식 테스트 (a = b = c = 5) + let expr = parse_expression("a = b = c = 5"); + + if let Expr::Assignment { + left: left1, + op: op1, + right: right1, + } = expr + { + assert_eq!(*left1, Expr::Ident("a".to_string())); + assert_eq!(op1, AssignOp::Assign); + + if let Expr::Assignment { + left: left2, + op: op2, + right: right2, + } = *right1 + { + assert_eq!(*left2, Expr::Ident("b".to_string())); + assert_eq!(op2, AssignOp::Assign); + + if let Expr::Assignment { + left: left3, + op: op3, + right: right3, + } = *right2 + { + assert_eq!(*left3, Expr::Ident("c".to_string())); + assert_eq!(op3, AssignOp::Assign); + assert_eq!(*right3, Expr::IntLiteral(5)); + } else { + panic!("Expected c = 5"); + } + } else { + panic!("Expected b = c = 5"); + } + } else { + panic!("Expected a = b = c = 5"); + } +} + +#[test] +fn test_nested_pointer_ops() { + // 중첩된 포인터 연산자 테스트 + let expr = parse_expression("**ptr"); + + if let Expr::UnaryPrefixOp { + op: PrefixOp::Deref, + rhs, + } = &expr + { + assert!(matches!( + **rhs, + Expr::UnaryPrefixOp { + op: PrefixOp::Deref, + .. + } + )); + } else { + panic!("Expected nested pointer dereference"); + } + + let expr = parse_expression("&*p"); + + if let Expr::UnaryPrefixOp { + op: PrefixOp::Address, + rhs, + } = &expr + { + assert!(matches!( + **rhs, + Expr::UnaryPrefixOp { + op: PrefixOp::Deref, + .. + } + )); + } else { + panic!("Expected address-of dereference"); + } +} + +#[test] +fn test_complex_pointer_arithmetic() { + // 포인터 산술 및 배열 인덱싱 조합 + let expr = parse_expression("*(arr + i)"); + + if let Expr::UnaryPrefixOp { + op: PrefixOp::Deref, + rhs, + } = &expr + { + assert!(matches!( + **rhs, + Expr::BinaryOp { + op: BinaryOp::Add, + .. + } + )); + } else { + panic!("Expected dereference of addition"); + } +} + +#[test] +fn test_op_precedence_mix() { + // 다양한 연산자 우선순위 혼합 테스트 + let expr = parse_expression("a + b * c || d && e == f"); + + // 우선순위: || < && < == < + < * + // 따라서 구조는 ((a + (b * c)) || (d && (e == f))) + assert!(matches!( + expr, + Expr::BinaryOp { + op: BinaryOp::Or, + .. + } + )); +} + +#[test] +fn test_array_with_expr_index() { + // 복잡한 표현식을 배열 인덱스로 사용 + let expr = parse_expression("arr[i * 2 + 1]"); + + if let Expr::ArrayIndex { array: _, index } = expr { + assert!(matches!( + *index, + Expr::BinaryOp { + op: BinaryOp::Add, + .. + } + )); + } else { + panic!("Expected array index"); + } +} + +#[test] +fn test_nested_function_call() { + // 중첩된 함수 호출 + let expr = parse_expression("foo(bar(1, 2), 3)"); + + if let Expr::Call { args, .. } = expr { + assert_eq!(args.len(), 2); + assert!(matches!(args[0], Expr::Call { .. })); + } else { + panic!("Expected function call"); + } +} +#[test] +#[should_panic(expected = "UnexpectedEOF")] +fn test_unbalanced_parentheses() { + // 괄호 짝이 맞지 않는 표현식 + parse_expression("(a + b"); +} + +#[test] +#[should_panic(expected = "UnexpectedEOF")] +fn test_missing_operand() { + // 피연산자가 없는 이항 연산자 + parse_expression("a + "); +} + +#[test] +fn test_consecutive_operators() { + // 연속된 연산자 + parse_expression("a ++ + b"); +} + +#[test] +#[should_panic] +fn test_invalid_assignment_target() { + // 유효하지 않은 대입 대상 (좌변에는 lvalue가 와야 함) + parse_expression("a + b = c"); +} + +#[test] +#[should_panic(expected = "UnexpectedEOF")] +fn test_invalid_array_index() { + // 배열 인덱스로 닫는 괄호가 없음 + parse_expression("arr[5"); +} + +#[test] +#[should_panic(expected = "UnexpectedEOF")] +fn test_function_call_without_closing_paren() { + // 함수 호출 닫는 괄호 없음 + parse_expression("foo(1, 2"); +} + +#[test] +#[should_panic(expected = "UnexpectedEOF")] +fn test_invalid_initializer_list() { + // 초기화 리스트에 닫는 괄호 없음 + parse_expression("{1, 2, 3"); +} + +#[test] +#[should_panic(expected = "UnexpectedToken")] +fn test_comma_without_args() { + // 인자 없이 쉼표만 있는 함수 호출 + parse_expression("foo(,)"); +} + +#[test] +fn test_binary_op_without_lhs() { + // 좌측 피연산자 없는 이항 연산자 + parse_expression("* b"); +} + +#[test] +#[should_panic(expected = "UnexpectedEOF")] +fn test_empty_expression() { + // 빈 표현식 + parse_expression(""); +} diff --git a/tests/parser/unit_tests/function_tests.rs b/tests/parser/unit_tests/function_tests.rs new file mode 100644 index 0000000..bbc1562 --- /dev/null +++ b/tests/parser/unit_tests/function_tests.rs @@ -0,0 +1,281 @@ +use crate::utils::parse_program; +use rustc_tape4::ast::expr::{AssignOp, BinaryOp}; +use rustc_tape4::ast::{Expr, Stmt, TypeSpecifier}; + +#[test] +fn test_empty_program() { + // 함수 정의가 하나도 없으면 빈 Program 반환 + let input = ""; + let program = parse_program(input).unwrap(); + assert_eq!(program.functions.len(), 0); +} + +#[test] +fn test_empty_body() { + // 문장 없는 함수 + let program = parse_program("int f() {}").unwrap(); + assert!(program.functions[0].body.statements.is_empty()); +} + +#[test] +fn test_void_return_type() { + // void 리턴 타입 함수 정의 테스트 (매개변수 없음, 빈 본문) + let input = "void do_nothing() { }"; + let program = parse_program(input).unwrap(); + assert_eq!(program.functions.len(), 1); + let func = &program.functions[0]; + assert_eq!(func.name, "do_nothing"); + assert_eq!(func.return_ty, TypeSpecifier::Void); + assert_eq!(func.params.len(), 0); + assert_eq!(func.body.statements.len(), 0); +} + +#[test] +fn test_simple_function() { + // 기본 함수 정의 테스트 (매개변수 없음) + let input = "int main() { return 0; }"; + let program = parse_program(input).unwrap(); + + assert_eq!(program.functions.len(), 1); + let func = &program.functions[0]; + + // 함수 이름과 반환 타입 검증 + assert_eq!(func.name, "main"); + assert_eq!(func.return_ty, TypeSpecifier::Int); + + // 매개변수 없음 검증 + assert_eq!(func.params.len(), 0); + + // 함수 본문이 한 개의 문장을 가지고 있는지 검증 + assert_eq!(func.body.statements.len(), 1); +} + +#[test] +fn test_function_with_parameters() { + // 매개변수가 있는 함수 정의 테스트 + let input = "int add(int a, int b) { return a + b; }"; + let program = parse_program(input).unwrap(); + + assert_eq!(program.functions.len(), 1); + let func = &program.functions[0]; + + // 함수 이름과 반환 타입 검증 + assert_eq!(func.name, "add"); + assert_eq!(func.return_ty, TypeSpecifier::Int); + + // 매개변수 검증 + assert_eq!(func.params.len(), 2); + + assert_eq!(func.params[0].name, "a"); + assert_eq!(func.params[0].ty, TypeSpecifier::Int); + + assert_eq!(func.params[1].name, "b"); + assert_eq!(func.params[1].ty, TypeSpecifier::Int); + + // 함수 본문이 한 개의 문장을 가지고 있는지 검증 + assert_eq!(func.body.statements.len(), 1); +} + +#[test] +fn test_function_with_pointer_type() { + // 포인터 타입을 사용하는 함수 정의 테스트 + let input = "int* get_array(int size) { return 0; }"; + let program = parse_program(input).unwrap(); + + assert_eq!(program.functions.len(), 1); + let func = &program.functions[0]; + + // 함수 이름 검증 + assert_eq!(func.name, "get_array"); + + // 포인터 반환 타입 검증 + match &func.return_ty { + TypeSpecifier::Pointer(inner_ty) => { + match &**inner_ty { + TypeSpecifier::Int => {} // 정상 + _ => panic!("Expected int pointer return type"), + } + } + _ => panic!("Expected pointer return type"), + } + + // 매개변수 검증 + assert_eq!(func.params.len(), 1); + assert_eq!(func.params[0].name, "size"); + assert_eq!(func.params[0].ty, TypeSpecifier::Int); +} + +#[test] +fn test_function_with_pointer_parameter() { + // 포인터 매개변수 함수 테스트 + let input = "void update(int* ptr) { *ptr = 42; }"; + let program = parse_program(input).unwrap(); + + assert_eq!(program.functions.len(), 1); + let func = &program.functions[0]; + + // 함수 반환 타입과 이름 검증 + assert_eq!(func.name, "update"); + + // 포인터 매개변수 검증 + assert_eq!(func.params.len(), 1); + assert_eq!(func.params[0].name, "ptr"); + + match &func.params[0].ty { + TypeSpecifier::Pointer(inner_ty) => { + match &**inner_ty { + TypeSpecifier::Int => {} // 정상 + _ => panic!("Expected int pointer parameter type"), + } + } + _ => panic!("Expected pointer parameter type"), + } +} + +#[test] +fn test_multiple_functions() { + // 여러 함수 정의 테스트 + let input = " + int foo() { return 1; } + int bar() { return 2; } + "; + let program = parse_program(input).unwrap(); + + assert_eq!(program.functions.len(), 2); + + assert_eq!(program.functions[0].name, "foo"); + assert_eq!(program.functions[0].return_ty, TypeSpecifier::Int); + assert_eq!(program.functions[0].params.len(), 0); + + assert_eq!(program.functions[1].name, "bar"); + assert_eq!(program.functions[1].return_ty, TypeSpecifier::Int); + assert_eq!(program.functions[1].params.len(), 0); +} + +#[test] +fn test_char_return_type() { + // char 리턴 타입 함수 정의 테스트 + let input = "char get_char() { return 'a'; }"; + let program = parse_program(input).unwrap(); + assert_eq!(program.functions.len(), 1); + let func = &program.functions[0]; + + // 함수 이름과 리턴 타입 검증 + assert_eq!(func.name, "get_char"); + assert_eq!(func.return_ty, TypeSpecifier::Char); + assert_eq!(func.params.len(), 0); + + // 본문에 하나의 return 'a'; 문장이 있는지 + let stmts = &func.body.statements; + assert_eq!(stmts.len(), 1); + match &stmts[0] { + Stmt::Return(Some(Expr::CharLiteral(c))) if *c == 'a' => {} + other => panic!("expected `return 'a';`, got {:?}", other), + } +} +#[test] +fn test_multiple_pointer_return() { + // 여러 단계 포인터 리턴 타입 함수 정의 테스트 + let input = "int*** foo() {}"; + let program = parse_program(input).unwrap(); + let func = &program.functions[0]; + + // Return 타입이 Pointer>> 인지 검증 + let mut ty = &func.return_ty; + for _ in 0..3 { + match ty { + TypeSpecifier::Pointer(inner) => ty = inner, + _ => panic!("expected an extra pointer level, got {:?}", ty), + } + } + assert_eq!(*ty, TypeSpecifier::Int); +} + +#[test] +fn test_name_with_underscores_and_digits() { + // 함수 이름에 밑줄과 숫자가 포함된 경우 테스트 + let input = "int _init42_() {}"; + let program = parse_program(input).unwrap(); + let func = &program.functions[0]; + + // 함수 이름 검증 + assert_eq!(func.name, "_init42_"); +} + +#[test] +fn test_pointer_parameters_with_spaces() { + // 매개변수 타입에 포인터와 공백이 섞인 경우 테스트 + let input = "int f( char * a ,int* b) {}"; + let program = parse_program(input).unwrap(); + let func = &program.functions[0]; + assert_eq!(func.params.len(), 2); + + // 첫 번째 파라미터는 char* + match &func.params[0].ty { + TypeSpecifier::Pointer(inner) => assert_eq!(**inner, TypeSpecifier::Char), + other => panic!("expected pointer-to-char, got {:?}", other), + } + // 두 번째 파라미터는 int* + match &func.params[1].ty { + TypeSpecifier::Pointer(inner) => assert_eq!(**inner, TypeSpecifier::Int), + other => panic!("expected pointer-to-int, got {:?}", other), + } +} + +#[test] +fn test_recursive_function() { + let input = "int factorial(int n) { if (n <= 1) return 1; else return n * factorial(n-1); }"; + let program = parse_program(input).unwrap(); + assert_eq!(program.functions.len(), 1); + assert_eq!(program.functions[0].name, "factorial"); + // 함수 내부에서 자기 자신을 호출하는지 확인 + if let Stmt::If { + cond: _, + then_branch, + else_branch, + } = &program.functions[0].body.statements[0] + { + if let Some(else_branch) = else_branch { + if let Stmt::Return(Some(Expr::BinaryOp { + op: BinaryOp::Mul, + lhs, + rhs, + })) = &**else_branch + { + if let Expr::Call { func, args } = &**rhs { + if let Expr::Ident(name) = &**func { + assert_eq!(name, "factorial"); + } else { + panic!("Expected recursive call to factorial"); + } + } else { + panic!("Expected recursive call in multiplication"); + } + } else { + panic!("Expected return n * factorial(n-1)"); + } + } else { + panic!("Expected else branch"); + } + } else { + panic!("Expected if statement"); + } +} + +#[test] +fn test_function_prototype() { + let input = "int prototype(int a, char b);"; + let program = parse_program(input).unwrap(); + assert_eq!(program.functions.len(), 1); + assert!(program.functions[0].body.statements.is_empty()); + assert_eq!(program.functions[0].name, "prototype"); + assert_eq!(program.functions[0].params.len(), 2); +} + +#[test] +fn test_function_with_void_parameter() { + let input = "int f(void) { return 1; }"; + let program = parse_program(input).unwrap(); + assert_eq!(program.functions.len(), 1); + assert_eq!(program.functions[0].params.len(), 0); +} diff --git a/tests/parser/unit_tests/mod.rs b/tests/parser/unit_tests/mod.rs new file mode 100644 index 0000000..e3f719f --- /dev/null +++ b/tests/parser/unit_tests/mod.rs @@ -0,0 +1,3 @@ +mod expression_tests; +mod function_tests; +mod statement_tests; diff --git a/tests/parser/unit_tests/statement_tests.rs b/tests/parser/unit_tests/statement_tests.rs new file mode 100644 index 0000000..19da0f7 --- /dev/null +++ b/tests/parser/unit_tests/statement_tests.rs @@ -0,0 +1,491 @@ +use crate::utils::parse_statement; +use rustc_tape4::ast::expr::BinaryOp; +use rustc_tape4::ast::{Expr, Stmt, TypeSpecifier}; + +#[test] +fn test_empty_statement() { + // 세미콜론만 있는 빈 문장 테스트 + let stmt = parse_statement(";"); + assert!(matches!(stmt, Stmt::ExprStmt(None))); +} + +#[test] +fn test_expression_statement() { + // 표현식 문장 테스트 + let stmt = parse_statement("x = 42;"); + + if let Stmt::ExprStmt(Some(expr)) = stmt { + assert!(matches!(expr, Expr::Assignment { .. })); + } else { + panic!("Expected expression statement"); + } + + // 함수 호출 문장 + let stmt = parse_statement("foo(1, 2);"); + + if let Stmt::ExprStmt(Some(Expr::Call { .. })) = stmt { + // OK + } else { + panic!("Expected function call statement"); + } +} + +#[test] +fn test_declaration_statement() { + // 기본 변수 선언 + let stmt = parse_statement("int x = 10;"); + + if let Stmt::Declaration { ty, declarators } = stmt { + assert_eq!(ty, TypeSpecifier::Int); + assert_eq!(declarators.len(), 1); + assert_eq!(declarators[0].name, "x"); + assert!(matches!(&declarators[0].init, Some(Expr::IntLiteral(10)))); + } else { + panic!("Expected declaration statement"); + } + + // 초기화 없는 선언 + let stmt = parse_statement("char c;"); + + if let Stmt::Declaration { ty, declarators } = stmt { + assert_eq!(ty, TypeSpecifier::Char); + assert_eq!(declarators[0].name, "c"); + assert!(declarators[0].init.is_none()); + } else { + panic!("Expected declaration statement"); + } + + // 포인터 변수 선언 + let stmt = parse_statement("int* ptr;"); + + if let Stmt::Declaration { ty, .. } = stmt { + if let TypeSpecifier::Pointer(inner) = ty { + assert_eq!(*inner, TypeSpecifier::Int); + } else { + panic!("Expected pointer type"); + } + } else { + panic!("Expected declaration statement"); + } +} + +#[test] +fn test_multiple_declarators_in_declaration() { + // 한 선언문에 여러 변수 선언 테스트 + let stmt = parse_statement("int x = 1, y, z = 3;"); + + if let Stmt::Declaration { ty, declarators } = stmt { + assert_eq!(ty, TypeSpecifier::Int); + assert_eq!(declarators.len(), 3); + + // x = 1 + assert_eq!(declarators[0].name, "x"); + assert!(matches!(&declarators[0].init, Some(Expr::IntLiteral(1)))); + + // y (초기화 없음) + assert_eq!(declarators[1].name, "y"); + assert!(declarators[1].init.is_none()); + + // z = 3 + assert_eq!(declarators[2].name, "z"); + assert!(matches!(&declarators[2].init, Some(Expr::IntLiteral(3)))); + } else { + panic!("Expected declaration with multiple declarators"); + } +} + +#[test] +fn test_array_declaration() { + // 배열 선언 테스트 + let stmt = parse_statement("int arr[5];"); + + if let Stmt::Declaration { ty, declarators } = stmt { + assert_eq!(ty, TypeSpecifier::Int); + assert_eq!(declarators[0].name, "arr"); + assert_eq!(declarators[0].array_size, Some(5)); + } else { + panic!("Expected array declaration"); + } + + // 배열 초기화 테스트 + let stmt = parse_statement("int nums[3] = {1, 2, 3};"); + + if let Stmt::Declaration { ty, declarators } = stmt { + assert_eq!(ty, TypeSpecifier::Int); + assert_eq!(declarators[0].name, "nums"); + assert_eq!(declarators[0].array_size, Some(3)); + + if let Some(Expr::InitializerList(items)) = &declarators[0].init { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Expr::IntLiteral(1)); + assert_eq!(items[1], Expr::IntLiteral(2)); + assert_eq!(items[2], Expr::IntLiteral(3)); + } else { + panic!("Expected initializer list"); + } + } else { + panic!("Expected array declaration with initialization"); + } +} + +#[test] +fn test_initializer_list_trailing_comma() { + // 배열 초기화 리스트 후행 콤마 테스트 + let stmt = parse_statement("int a[3] = {1, 2, 3,};"); + + if let Stmt::Declaration { declarators, .. } = stmt { + let d = &declarators[0]; + assert_eq!(d.name, "a"); + assert_eq!(d.array_size, Some(3)); + + if let Some(Expr::InitializerList(v)) = &d.init { + assert_eq!( + v, + &vec![ + Expr::IntLiteral(1), + Expr::IntLiteral(2), + Expr::IntLiteral(3), + ] + ); + } else { + panic!("Expected initializer list"); + } + } else { + panic!("Expected declaration statement"); + } +} + +#[test] +fn test_if_statement() { + // 기본 if 문 테스트 + let stmt = parse_statement("if (x > 0) { return x; }"); + + if let Stmt::If { + cond, + then_branch, + else_branch, + } = stmt + { + assert!(matches!( + cond, + Expr::BinaryOp { + op: BinaryOp::Gt, + .. + } + )); + assert!(else_branch.is_none()); + + if let Stmt::Block(block) = *then_branch { + assert_eq!(block.statements.len(), 1); + assert!(matches!(block.statements[0], Stmt::Return(..))); + } else { + panic!("Expected block statement"); + } + } else { + panic!("Expected if statement"); + } +} + +#[test] +fn test_if_else_statement() { + // if-else 문 테스트 + let stmt = parse_statement("if (x > 0) return x; else return -x;"); + + if let Stmt::If { + cond, + then_branch, + else_branch, + } = stmt + { + assert!(matches!( + cond, + Expr::BinaryOp { + op: BinaryOp::Gt, + .. + } + )); + + assert!(matches!(*then_branch, Stmt::Return(..))); + + assert!(else_branch.is_some()); + if let Some(else_stmt) = else_branch { + assert!(matches!(*else_stmt, Stmt::Return(..))); + } + } else { + panic!("Expected if-else statement"); + } +} + +#[test] +fn test_if_else_if_chain() { + // if-else if-else 체인 테스트 + let stmt = parse_statement("if (x > 0) return 1; else if (x < 0) return -1; else return 0;"); + + if let Stmt::If { + cond: _, + then_branch: _, + else_branch: Some(else_stmt), + } = &stmt + { + if let Stmt::If { + cond: _, + then_branch: _, + else_branch: Some(inner_else), + } = &**else_stmt + { + assert!(matches!(**inner_else, Stmt::Return(..))); + } else { + panic!("Expected else-if"); + } + } else { + panic!("Expected if-else if-else chain"); + } +} + +#[test] +fn test_while_statement() { + // while 문 테스트 + let stmt = parse_statement("while (i < 10) { i = i + 1; }"); + + if let Stmt::While { cond, body } = stmt { + assert!(matches!( + cond, + Expr::BinaryOp { + op: BinaryOp::Lt, + .. + } + )); + + if let Stmt::Block(block) = *body { + assert_eq!(block.statements.len(), 1); + assert!(matches!(block.statements[0], Stmt::ExprStmt(..))); + } else { + panic!("Expected block statement"); + } + } else { + panic!("Expected while statement"); + } +} + +#[test] +fn test_for_statement() { + // 기본 for 문 테스트 + let stmt = parse_statement("for (i = 0; i < 10; i++) sum += i;"); + + if let Stmt::For { + init, + cond, + step, + body, + } = stmt + { + assert!(init.is_some()); + assert!(cond.is_some()); + assert!(step.is_some()); + + if let Some(init_expr) = init { + assert!(matches!( + *init_expr, + Stmt::ExprStmt(Some(Expr::Assignment { .. })) + )); + } + + if let Some(cond_expr) = cond { + assert!(matches!( + cond_expr, + Expr::BinaryOp { + op: BinaryOp::Lt, + .. + } + )); + } + + if let Some(step_expr) = step { + assert!(matches!(step_expr, Expr::UnaryPostfixOp { .. })); + } + + assert!(matches!(*body, Stmt::ExprStmt(..))); + } else { + panic!("Expected for statement"); + } +} + +#[test] +fn test_various_for_forms() { + // 다양한 for 문 형태 테스트 + + // 초기화 없는 for + let stmt = parse_statement("for (; i < 10; i++) { }"); + if let Stmt::For { init, .. } = stmt { + assert!(init.is_none()); + } else { + panic!("Expected for without init"); + } + + // 조건 없는 for + let stmt = parse_statement("for (i = 0;; i++) { }"); + if let Stmt::For { cond, .. } = stmt { + assert!(cond.is_none()); + } else { + panic!("Expected for without condition"); + } + + // 증감 없는 for + let stmt = parse_statement("for (i = 0; i < 10;) { }"); + if let Stmt::For { step, .. } = stmt { + assert!(step.is_none()); + } else { + panic!("Expected for without step"); + } + + // 모두 없는 for (무한 루프) + let stmt = parse_statement("for (;;) { }"); + if let Stmt::For { + init, cond, step, .. + } = stmt + { + assert!(init.is_none()); + assert!(cond.is_none()); + assert!(step.is_none()); + } else { + panic!("Expected infinite for loop"); + } +} + +#[test] +fn test_return_statement() { + // 값 없는 return 문 + let stmt = parse_statement("return;"); + + if let Stmt::Return(expr) = stmt { + assert!(expr.is_none()); + } else { + panic!("Expected return statement without value"); + } + + // 값 있는 return 문 + let stmt = parse_statement("return 42;"); + + if let Stmt::Return(Some(expr)) = stmt { + assert_eq!(expr, Expr::IntLiteral(42)); + } else { + panic!("Expected return statement with value"); + } + + // 표현식 return 문 + let stmt = parse_statement("return a + b * c;"); + + if let Stmt::Return(Some(expr)) = stmt { + assert!(matches!(expr, Expr::BinaryOp { .. })); + } else { + panic!("Expected return statement with expression"); + } +} + +#[test] +fn test_break_and_continue() { + // break 문 + let stmt = parse_statement("break;"); + assert!(matches!(stmt, Stmt::Break)); + + // continue 문 + let stmt = parse_statement("continue;"); + assert!(matches!(stmt, Stmt::Continue)); +} + +#[test] +fn test_block_statement() { + // 단일 문장 블록 + let stmt = parse_statement("{ return 0; }"); + + if let Stmt::Block(block) = stmt { + assert_eq!(block.statements.len(), 1); + assert!(matches!(block.statements[0], Stmt::Return(..))); + } else { + panic!("Expected block statement"); + } + + // 다중 문장 블록 + let stmt = parse_statement("{ int x = 10; x = x + 1; return x; }"); + + if let Stmt::Block(block) = stmt { + assert_eq!(block.statements.len(), 3); + assert!(matches!(block.statements[0], Stmt::Declaration { .. })); + assert!(matches!(block.statements[1], Stmt::ExprStmt(..))); + assert!(matches!(block.statements[2], Stmt::Return(..))); + } else { + panic!("Expected multi-statement block"); + } + + // 빈 블록 + let stmt = parse_statement("{ }"); + + if let Stmt::Block(block) = stmt { + assert_eq!(block.statements.len(), 0); + } else { + panic!("Expected empty block statement"); + } +} + +#[test] +fn test_nested_blocks() { + // 중첩 블록 테스트 + let stmt = parse_statement("{ { int x = 1; } { int y = 2; } }"); + + if let Stmt::Block(block) = stmt { + assert_eq!(block.statements.len(), 2); + assert!(matches!(block.statements[0], Stmt::Block(..))); + assert!(matches!(block.statements[1], Stmt::Block(..))); + } else { + panic!("Expected nested blocks"); + } +} + +#[test] +fn test_complex_nested_statements() { + // 여러 문장 타입이 중첩된 복잡한 예제 + let stmt = parse_statement( + r#" + { + int i = 0; + int sum = 0; + while (i < 10) { + if (i % 2 == 0) { + sum += i; + } else { + continue; + } + i++; + } + return sum; + } + "#, + ); + + if let Stmt::Block(block) = stmt { + assert_eq!(block.statements.len(), 4); // 선언 2개, while 문, return 문 + + // while 문 체크 + if let Stmt::While { body, .. } = &block.statements[2] { + if let Stmt::Block(while_block) = &**body { + assert_eq!(while_block.statements.len(), 2); // if 문, i++ 문 + + // if 문 체크 + if let Stmt::If { else_branch, .. } = &while_block.statements[0] { + assert!(else_branch.is_some()); + } else { + panic!("Expected if statement in while block"); + } + } else { + panic!("Expected block as while body"); + } + } else { + panic!("Expected while statement"); + } + + // return 문 체크 + assert!(matches!(block.statements[3], Stmt::Return(..))); + } else { + panic!("Expected complex nested block"); + } +} diff --git a/tests/utils.rs b/tests/utils.rs new file mode 100644 index 0000000..a0094d3 --- /dev/null +++ b/tests/utils.rs @@ -0,0 +1,76 @@ +use rustc_tape4::ast::{Expr, Program, Stmt}; +use rustc_tape4::lexer::{Lexer, Token}; +use rustc_tape4::parser::Parser; +use rustc_tape4::parser::parser::ParseResult; + +/// 입력 전체를 순환하며 토큰을 수집 +pub fn collect_tokens(input: &str) -> Vec { + let mut l = Lexer::new(input); + let tokens = l + .collect_spanned_tokens() + .iter() + .map(|tok| tok.kind.clone()) + .collect(); + + tokens +} + +pub fn parse_program(input: &str) -> ParseResult { + let mut lexer = Lexer::new(input); + let tokens = lexer.collect_spanned_tokens(); + let mut parser = Parser::new(tokens); + let result = parser.parse_program(); + + if result.is_err() { + println!("파싱 오류: {:?}", result.as_ref().err()); + } + + result +} + +/// 표현식 문자열을 파싱하여 Expr AST 반환 +pub fn parse_expression(input: &str) -> Expr { + let mut lexer = Lexer::new(input); + let tokens = lexer.collect_spanned_tokens(); + + // 디버깅용: 토큰 출력 + println!( + "파싱할 토큰: {:?}", + tokens.iter().map(|t| &t.kind).collect::>() + ); + + let mut parser = Parser::new(tokens); + match parser.parse_expr() { + Ok(expr) => expr, + Err(e) => { + panic!("표현식 파싱 실패: {:?}\n입력: '{}'", e, input); + } + } +} + +/// 문장 문자열을 파싱하여 Stmt AST 반환 +pub fn parse_statement(input: &str) -> Stmt { + // 문장은 세미콜론으로 끝나야 하므로 없으면 추가 + let input = if !input.trim().ends_with(';') { + format!("{};", input) + } else { + input.to_string() + }; + + let mut lexer = Lexer::new(&input); + let tokens = lexer.collect_spanned_tokens(); + + // 디버깅용: 토큰 출력 + println!( + "파싱할 토큰: {:?}", + tokens.iter().map(|t| &t.kind).collect::>() + ); + + let mut parser = Parser::new(tokens); + match parser.parse_statement() { + Ok(stmt) => stmt, + Err(e) => { + panic!("문장 파싱 실패: {:?}\n입력: '{}'", e, input); + } + } +}