From a084c304db821557792175f77e3717c099ef108d Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 10 Dec 2022 17:32:25 +0000 Subject: [PATCH 01/88] Updated rebench config --- rebench.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rebench.conf b/rebench.conf index 24394210..34fe0c15 100644 --- a/rebench.conf +++ b/rebench.conf @@ -7,8 +7,8 @@ reporting: # Benchmark results will be reported to ReBenchDB rebenchdb: # this url needs to point to the API endpoint - db_url: https://rebench.polomack.eu/rebenchdb/results - repo_url: https://github.com/Hirevo/som-rs + db_url: http://localhost:33333/rebenchdb + repo_url: https://github.com/OctaveLarose/som-rs record_all: true # make sure everything is recorded project_name: som-rs From 63d2d6cf0c2c7b60ccfd2ff359880162834f1acc Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 10 Dec 2022 17:33:46 +0000 Subject: [PATCH 02/88] adding bytecodes send1,2,3 --- Cargo.toml | 3 + som-core/src/bytecode.rs | 34 ++++-- som-interpreter-bc/src/compiler.rs | 15 ++- som-interpreter-bc/src/interpreter.rs | 161 +++++++++++++++----------- som-interpreter-bc/src/method.rs | 5 +- 5 files changed, 137 insertions(+), 81 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dc7c85ed..d6188107 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,6 @@ members = [ "som-parser-symbols", "som-parser-text", ] + +[profile.release] +debug=true diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 1d5a111f..0ee7dbfa 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -15,7 +15,10 @@ pub enum Bytecode { PopLocal(u8, u8), PopArgument(u8, u8), PopField(u8), - Send(u8), + Send1(u8), + Send2(u8), + Send3(u8), + SendN(u8), SuperSend(u8), ReturnLocal, ReturnNonLocal, @@ -39,7 +42,10 @@ impl Bytecode { Self::PopLocal(_, _) => "POP_LOCAL", Self::PopArgument(_, _) => "POP_ARGUMENT", Self::PopField(_) => "POP_FIELD", - Self::Send(_) => "SEND", + Self::Send1(_) => "SEND 1", + Self::Send2(_) => "SEND 2", + Self::Send3(_) => "SEND 3", + Self::SendN(_) => "SEND N", Self::SuperSend(_) => "SUPER_SEND", Self::ReturnLocal => "RETURN_LOCAL", Self::ReturnNonLocal => "RETURN_NON_LOCAL", @@ -63,7 +69,10 @@ impl Bytecode { Self::PopLocal(_, _) => "POP_LOCAL ", Self::PopArgument(_, _) => "POP_ARGUMENT ", Self::PopField(_) => "POP_FIELD ", - Self::Send(_) => "SEND ", + Self::Send1(_) => "SEND 1 ", + Self::Send2(_) => "SEND 2 ", + Self::Send3(_) => "SEND 3 ", + Self::SendN(_) => "SEND N ", Self::SuperSend(_) => "SUPER_SEND ", Self::ReturnLocal => "RETURN_LOCAL ", Self::ReturnNonLocal => "RETURN_NON_LOCAL", @@ -71,7 +80,7 @@ impl Bytecode { } } -pub static NAMES: [&str; 16] = [ +pub static NAMES: [&str; 19] = [ "HALT", "DUP", "PUSH_LOCAL", @@ -84,13 +93,16 @@ pub static NAMES: [&str; 16] = [ "POP_LOCAL", "POP_ARGUMENT", "POP_FIELD", - "SEND", + "SEND_1", + "SEND_2", + "SEND_3", + "SEND_N", "SUPER_SEND", "RETURN_LOCAL", "RETURN_NON_LOCAL", ]; -pub static PADDED_NAMES: [&str; 16] = [ +pub static PADDED_NAMES: [&str; 19] = [ "HALT ", "DUP ", "PUSH_LOCAL ", @@ -103,7 +115,10 @@ pub static PADDED_NAMES: [&str; 16] = [ "POP_LOCAL ", "POP_ARGUMENT ", "POP_FIELD ", - "SEND ", + "SEND 1 ", + "SEND 2 ", + "SEND 3 ", + "SEND N ", "SUPER_SEND ", "RETURN_LOCAL ", "RETURN_NON_LOCAL", @@ -125,7 +140,10 @@ impl fmt::Display for Bytecode { Self::PopLocal(up_idx, idx) => write!(f, "POP_LOCAL {}, {}", up_idx, idx), Self::PopArgument(up_idx, idx) => write!(f, "POP_ARGUMENT {}, {}", up_idx, idx), Self::PopField(idx) => write!(f, "POP_FIELD {}", idx), - Self::Send(idx) => write!(f, "SEND {}", idx), + Self::Send1(idx) => write!(f, "SEND 1 {}", idx), + Self::Send2(idx) => write!(f, "SEND 2 {}", idx), + Self::Send3(idx) => write!(f, "SEND 3 {}", idx), + Self::SendN(idx) => write!(f, "SEND N {}", idx), Self::SuperSend(idx) => write!(f, "SUPER_SEND {}", idx), Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index d2600dc0..ebc7805b 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -263,12 +263,23 @@ impl MethodCodegen for ast::Expression { .values .iter() .try_for_each(|value| value.codegen(ctxt))?; + + let nb_params = match message.signature.chars().nth(0) { + Some(ch) if !ch.is_alphabetic() => 1, + _ => message.signature.chars().filter(|ch| *ch == ':').count(), + }; + let sym = ctxt.intern_symbol(message.signature.as_str()); let idx = ctxt.push_literal(Literal::Symbol(sym)); if super_send { ctxt.push_instr(Bytecode::SuperSend(idx as u8)); } else { - ctxt.push_instr(Bytecode::Send(idx as u8)); + match nb_params { + 1 => ctxt.push_instr(Bytecode::Send1(idx as u8)), + 2 => ctxt.push_instr(Bytecode::Send2(idx as u8)), + 3 => ctxt.push_instr(Bytecode::Send3(idx as u8)), + _ => ctxt.push_instr(Bytecode::SendN(idx as u8)) + } } Some(()) } @@ -284,7 +295,7 @@ impl MethodCodegen for ast::Expression { if super_send { ctxt.push_instr(Bytecode::SuperSend(idx as u8)); } else { - ctxt.push_instr(Bytecode::Send(idx as u8)); + ctxt.push_instr(Bytecode::SendN(idx as u8)); } Some(()) } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 311a511f..0c450f26 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -1,6 +1,7 @@ use std::cell::RefCell; use std::rc::Rc; use std::time::Instant; +use std::usize; use som_core::bytecode::Bytecode; @@ -44,6 +45,85 @@ impl Interpreter { self.frames.last() } + fn send(&mut self, idx: u8, mut nb_params: usize, frame: SOMRef, universe: &mut Universe) -> Option { + let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); + let symbol = match literal { + Literal::Symbol(sym) => sym, + _ => { + return None; + } + }; + let signature = universe.lookup_symbol(symbol); + if nb_params == usize::MAX { + nb_params = match signature.chars().nth(0) { + Some(ch) if !ch.is_alphabetic() => 1, + _ => signature.chars().filter(|ch| *ch == ':').count(), + }; + } + + let method = self + .stack + .iter() + .nth_back(nb_params) + .unwrap() + .lookup_method(universe, symbol); + + if let Some(method) = method { + match method.kind() { + MethodKind::Defined(_) => { + let mut args = Vec::with_capacity(nb_params + 1); + + for _ in 0..nb_params { + let arg = self.stack.pop().unwrap(); + args.push(arg); + } + let self_value = self.stack.pop().unwrap(); + args.push(self_value.clone()); + + args.reverse(); + + let holder = method.holder.upgrade().unwrap(); + self.push_frame(FrameKind::Method { + self_value, + method, + holder, + }); + + let frame = self.current_frame().unwrap(); + frame.borrow_mut().args = args; + } + MethodKind::Primitive(func) => { + func(self, universe); + } + MethodKind::NotImplemented(err) => { + let self_value = self.stack.iter().nth_back(nb_params).unwrap(); + println!( + "{}>>#{}", + self_value.class(&universe).borrow().name(), + method.signature() + ); + panic!("Primitive `#{}` not implemented", err) + } + } + } else { + let mut args = Vec::with_capacity(nb_params + 1); + + for _ in 0..nb_params { + let arg = self.stack.pop().unwrap(); + args.push(arg); + } + let self_value = self.stack.pop().unwrap(); + + args.reverse(); + + universe.does_not_understand(self, self_value, symbol, args) + .expect( + "A message cannot be handled and `doesNotUnderstand:arguments:` is not defined on receiver" + ); + } + None + } + pub fn run(&mut self, universe: &mut Universe) -> Option { loop { let frame = match self.current_frame() { @@ -185,76 +265,17 @@ impl Interpreter { self_value.assign_local(idx as usize, value).unwrap(); } } - Bytecode::Send(idx) => { - let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); - let symbol = match literal { - Literal::Symbol(sym) => sym, - _ => { - return None; - } - }; - let signature = universe.lookup_symbol(symbol); - let nb_params = nb_params(signature); - let method = self - .stack - .iter() - .nth_back(nb_params) - .unwrap() - .lookup_method(universe, symbol); - - if let Some(method) = method { - match method.kind() { - MethodKind::Defined(_) => { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - args.push(self_value.clone()); - - args.reverse(); - - let holder = method.holder.upgrade().unwrap(); - self.push_frame(FrameKind::Method { - self_value, - method, - holder, - }); - - let frame = self.current_frame().unwrap(); - frame.borrow_mut().args = args; - } - MethodKind::Primitive(func) => { - func(self, universe); - } - MethodKind::NotImplemented(err) => { - let self_value = self.stack.iter().nth_back(nb_params).unwrap(); - println!( - "{}>>#{}", - self_value.class(&universe).borrow().name(), - method.signature() - ); - panic!("Primitive `#{}` not implemented", err) - } - } - } else { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - - args.reverse(); - - universe.does_not_understand(self, self_value, symbol, args) - .expect( - "A message cannot be handled and `doesNotUnderstand:arguments:` is not defined on receiver" - ); - } + Bytecode::Send1(idx) => { + self.send(idx, 1, frame.clone(), universe); + } + Bytecode::Send2(idx) => { + self.send(idx, 2, frame.clone(), universe); + } + Bytecode::Send3(idx) => { + self.send(idx, 3, frame.clone(), universe); + } + Bytecode::SendN(idx) => { + self.send(idx, usize::MAX, frame.clone(), universe); } Bytecode::SuperSend(idx) => { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index a54d8e81..5ba51b73 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -159,7 +159,10 @@ impl fmt::Display for Method { Bytecode::PopField(idx) => { write!(f, "index: {}", idx)?; } - Bytecode::Send(idx) => { + Bytecode::Send1(idx) | + Bytecode::Send2(idx) | + Bytecode::Send3(idx) | + Bytecode::SendN(idx) => { write!(f, "index: {}", idx)?; } Bytecode::SuperSend(idx) => { From bf0e77e0d26db4361c0944d8bda82f0c6a91d5a7 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 10 Dec 2022 18:36:34 +0000 Subject: [PATCH 03/88] supersend1-2-3 bytecodes --- .gitignore | 4 + som-core/src/bytecode.rs | 50 +++++--- som-interpreter-bc/src/compiler.rs | 9 +- som-interpreter-bc/src/interpreter.rs | 164 ++++++++++++++------------ som-interpreter-bc/src/method.rs | 5 +- 5 files changed, 136 insertions(+), 96 deletions(-) diff --git a/.gitignore b/.gitignore index 597ab203..cc2f393e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,7 @@ /.vscode /.nova /.idea + +# Rebench stuff +payload.json +rebench.data \ No newline at end of file diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 0ee7dbfa..5fb0d3c9 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -19,7 +19,10 @@ pub enum Bytecode { Send2(u8), Send3(u8), SendN(u8), - SuperSend(u8), + SuperSend1(u8), + SuperSend2(u8), + SuperSend3(u8), + SuperSendN(u8), ReturnLocal, ReturnNonLocal, } @@ -46,7 +49,10 @@ impl Bytecode { Self::Send2(_) => "SEND 2", Self::Send3(_) => "SEND 3", Self::SendN(_) => "SEND N", - Self::SuperSend(_) => "SUPER_SEND", + Self::SuperSend1(_) => "SUPER_SEND 1", + Self::SuperSend2(_) => "SUPER_SEND 2", + Self::SuperSend3(_) => "SUPER_SEND 3", + Self::SuperSendN(_) => "SUPER_SEND N", Self::ReturnLocal => "RETURN_LOCAL", Self::ReturnNonLocal => "RETURN_NON_LOCAL", } @@ -69,18 +75,21 @@ impl Bytecode { Self::PopLocal(_, _) => "POP_LOCAL ", Self::PopArgument(_, _) => "POP_ARGUMENT ", Self::PopField(_) => "POP_FIELD ", - Self::Send1(_) => "SEND 1 ", - Self::Send2(_) => "SEND 2 ", - Self::Send3(_) => "SEND 3 ", - Self::SendN(_) => "SEND N ", - Self::SuperSend(_) => "SUPER_SEND ", + Self::Send1(_) => "SEND 1 ", + Self::Send2(_) => "SEND 2 ", + Self::Send3(_) => "SEND 3 ", + Self::SendN(_) => "SEND N ", + Self::SuperSend1(_) => "SUPER_SEND 1 ", + Self::SuperSend2(_) => "SUPER_SEND 2 ", + Self::SuperSend3(_) => "SUPER_SEND 3 ", + Self::SuperSendN(_) => "SUPER_SEND N ", Self::ReturnLocal => "RETURN_LOCAL ", Self::ReturnNonLocal => "RETURN_NON_LOCAL", } } } -pub static NAMES: [&str; 19] = [ +pub static NAMES: [&str; 22] = [ "HALT", "DUP", "PUSH_LOCAL", @@ -97,12 +106,15 @@ pub static NAMES: [&str; 19] = [ "SEND_2", "SEND_3", "SEND_N", - "SUPER_SEND", + "SUPER_SEND_1", + "SUPER_SEND_2", + "SUPER_SEND_3", + "SUPER_SEND_N", "RETURN_LOCAL", "RETURN_NON_LOCAL", ]; -pub static PADDED_NAMES: [&str; 19] = [ +pub static PADDED_NAMES: [&str; 22] = [ "HALT ", "DUP ", "PUSH_LOCAL ", @@ -115,11 +127,14 @@ pub static PADDED_NAMES: [&str; 19] = [ "POP_LOCAL ", "POP_ARGUMENT ", "POP_FIELD ", - "SEND 1 ", - "SEND 2 ", - "SEND 3 ", - "SEND N ", - "SUPER_SEND ", + "SEND_1 ", + "SEND_2 ", + "SEND_3 ", + "SEND_N ", + "SUPER_SEND 1 ", + "SUPER_SEND 2 ", + "SUPER_SEND 3 ", + "SUPER_SEND N ", "RETURN_LOCAL ", "RETURN_NON_LOCAL", ]; @@ -144,7 +159,10 @@ impl fmt::Display for Bytecode { Self::Send2(idx) => write!(f, "SEND 2 {}", idx), Self::Send3(idx) => write!(f, "SEND 3 {}", idx), Self::SendN(idx) => write!(f, "SEND N {}", idx), - Self::SuperSend(idx) => write!(f, "SUPER_SEND {}", idx), + Self::SuperSend1(idx) => write!(f, "SUPER_SEND 1 {}", idx), + Self::SuperSend2(idx) => write!(f, "SUPER_SEND 2 {}", idx), + Self::SuperSend3(idx) => write!(f, "SUPER_SEND 3 {}", idx), + Self::SuperSendN(idx) => write!(f, "SUPER_SEND N {}", idx), Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index ebc7805b..70a05b5b 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -272,7 +272,12 @@ impl MethodCodegen for ast::Expression { let sym = ctxt.intern_symbol(message.signature.as_str()); let idx = ctxt.push_literal(Literal::Symbol(sym)); if super_send { - ctxt.push_instr(Bytecode::SuperSend(idx as u8)); + match nb_params { + 1 => ctxt.push_instr(Bytecode::SuperSend1(idx as u8)), + 2 => ctxt.push_instr(Bytecode::SuperSend2(idx as u8)), + 3 => ctxt.push_instr(Bytecode::SuperSend3(idx as u8)), + _ => ctxt.push_instr(Bytecode::SuperSendN(idx as u8)) + } } else { match nb_params { 1 => ctxt.push_instr(Bytecode::Send1(idx as u8)), @@ -293,7 +298,7 @@ impl MethodCodegen for ast::Expression { let sym = ctxt.intern_symbol(message.op.as_str()); let idx = ctxt.push_literal(Literal::Symbol(sym)); if super_send { - ctxt.push_instr(Bytecode::SuperSend(idx as u8)); + ctxt.push_instr(Bytecode::SuperSendN(idx as u8)); } else { ctxt.push_instr(Bytecode::SendN(idx as u8)); } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 0c450f26..3feeb182 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -45,12 +45,12 @@ impl Interpreter { self.frames.last() } - fn send(&mut self, idx: u8, mut nb_params: usize, frame: SOMRef, universe: &mut Universe) -> Option { + fn send(&mut self, idx: u8, mut nb_params: usize, frame: SOMRef, universe: &mut Universe) { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); let symbol = match literal { Literal::Symbol(sym) => sym, _ => { - return None; + return; } }; let signature = universe.lookup_symbol(symbol); @@ -121,7 +121,80 @@ impl Interpreter { "A message cannot be handled and `doesNotUnderstand:arguments:` is not defined on receiver" ); } - None + } + + fn super_send(&mut self, idx: u8, mut nb_params: usize, frame: SOMRef, universe: &mut Universe) { + let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); + let symbol = match literal { + Literal::Symbol(sym) => sym, + _ => { + return; + } + }; + let signature = universe.lookup_symbol(symbol); + if nb_params == usize::MAX { + nb_params = match signature.chars().nth(0) { + Some(ch) if !ch.is_alphabetic() => 1, + _ => signature.chars().filter(|ch| *ch == ':').count(), + }; + } + + let method = frame + .borrow() + .get_method_holder() + .borrow() + .super_class() + .unwrap() + .borrow() + .lookup_method(symbol); + + if let Some(method) = method { + match method.kind() { + MethodKind::Defined(_) => { + let mut args = Vec::with_capacity(nb_params + 1); + + for _ in 0..nb_params { + let arg = self.stack.pop().unwrap(); + args.push(arg); + } + let self_value = self.stack.pop().unwrap(); + args.push(self_value.clone()); + + args.reverse(); + + let holder = method.holder.upgrade().unwrap(); + self.push_frame(FrameKind::Method { + self_value, + method, + holder, + }); + + let frame = self.current_frame().unwrap(); + frame.borrow_mut().args = args; + } + MethodKind::Primitive(func) => { + func(self, universe); + } + MethodKind::NotImplemented(err) => { + panic!("Primitive `#{}` not implemented", err) + } + } + } else { + let mut args = Vec::with_capacity(nb_params + 1); + + for _ in 0..nb_params { + let arg = self.stack.pop().unwrap(); + args.push(arg); + } + let self_value = self.stack.pop().unwrap(); + + args.reverse(); + + universe.does_not_understand(self, self_value, symbol, args) + .expect( + "A message cannot be handled and `doesNotUnderstand:arguments:` is not defined on receiver" + ); + } } pub fn run(&mut self, universe: &mut Universe) -> Option { @@ -277,73 +350,17 @@ impl Interpreter { Bytecode::SendN(idx) => { self.send(idx, usize::MAX, frame.clone(), universe); } - Bytecode::SuperSend(idx) => { - let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); - let symbol = match literal { - Literal::Symbol(sym) => sym, - _ => { - return None; - } - }; - let signature = universe.lookup_symbol(symbol); - let nb_params = nb_params(signature); - - let method = frame - .borrow() - .get_method_holder() - .borrow() - .super_class() - .unwrap() - .borrow() - .lookup_method(symbol); - - if let Some(method) = method { - match method.kind() { - MethodKind::Defined(_) => { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - args.push(self_value.clone()); - - args.reverse(); - - let holder = method.holder.upgrade().unwrap(); - self.push_frame(FrameKind::Method { - self_value, - method, - holder, - }); - - let frame = self.current_frame().unwrap(); - frame.borrow_mut().args = args; - } - MethodKind::Primitive(func) => { - func(self, universe); - } - MethodKind::NotImplemented(err) => { - panic!("Primitive `#{}` not implemented", err) - } - } - } else { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - - args.reverse(); - - universe.does_not_understand(self, self_value, symbol, args) - .expect( - "A message cannot be handled and `doesNotUnderstand:arguments:` is not defined on receiver" - ); - } + Bytecode::SuperSend1(idx) => { + self.super_send(idx, 1, frame.clone(), universe); + } + Bytecode::SuperSend2(idx) => { + self.super_send(idx, 2, frame.clone(), universe); + } + Bytecode::SuperSend3(idx) => { + self.super_send(idx, 3, frame.clone(), universe); + } + Bytecode::SuperSendN(idx) => { + self.super_send(idx, usize::MAX, frame.clone(), universe); } Bytecode::ReturnLocal => { let value = self.stack.pop().unwrap(); @@ -408,12 +425,5 @@ impl Interpreter { }; Some(value) } - - fn nb_params(signature: &str) -> usize { - match signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => signature.chars().filter(|ch| *ch == ':').count(), - } - } } } diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index 5ba51b73..31938975 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -165,7 +165,10 @@ impl fmt::Display for Method { Bytecode::SendN(idx) => { write!(f, "index: {}", idx)?; } - Bytecode::SuperSend(idx) => { + Bytecode::SuperSend1(idx) | + Bytecode::SuperSend2(idx) | + Bytecode::SuperSend3(idx) | + Bytecode::SuperSendN(idx) => { write!(f, "index: {}", idx)?; } Bytecode::ReturnLocal => {} From 9c3eb96394c287d977b5e864ab6b2549c3542ad4 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 10 Dec 2022 18:39:55 +0000 Subject: [PATCH 04/88] Option type instead of relying like u8::MAX like a maniac --- som-interpreter-bc/src/interpreter.rs | 50 +++++++++++++++------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 3feeb182..1634151f 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -45,7 +45,7 @@ impl Interpreter { self.frames.last() } - fn send(&mut self, idx: u8, mut nb_params: usize, frame: SOMRef, universe: &mut Universe) { + fn send(&mut self, idx: u8, nb_params_opt: Option, frame: SOMRef, universe: &mut Universe) { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); let symbol = match literal { Literal::Symbol(sym) => sym, @@ -54,12 +54,15 @@ impl Interpreter { } }; let signature = universe.lookup_symbol(symbol); - if nb_params == usize::MAX { - nb_params = match signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => signature.chars().filter(|ch| *ch == ':').count(), - }; - } + let nb_params = match nb_params_opt { + Some(x) => x, + None => { + match signature.chars().nth(0) { + Some(ch) if !ch.is_alphabetic() => 1, + _ => signature.chars().filter(|ch| *ch == ':').count(), + } + } + }; let method = self .stack @@ -123,7 +126,7 @@ impl Interpreter { } } - fn super_send(&mut self, idx: u8, mut nb_params: usize, frame: SOMRef, universe: &mut Universe) { + fn super_send(&mut self, idx: u8, nb_params_opt: Option, frame: SOMRef, universe: &mut Universe) { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); let symbol = match literal { Literal::Symbol(sym) => sym, @@ -132,12 +135,15 @@ impl Interpreter { } }; let signature = universe.lookup_symbol(symbol); - if nb_params == usize::MAX { - nb_params = match signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => signature.chars().filter(|ch| *ch == ':').count(), - }; - } + let nb_params = match nb_params_opt { + Some(x) => x, + None => { + match signature.chars().nth(0) { + Some(ch) if !ch.is_alphabetic() => 1, + _ => signature.chars().filter(|ch| *ch == ':').count(), + } + } + }; let method = frame .borrow() @@ -339,28 +345,28 @@ impl Interpreter { } } Bytecode::Send1(idx) => { - self.send(idx, 1, frame.clone(), universe); + self.send(idx, Some(1), frame.clone(), universe); } Bytecode::Send2(idx) => { - self.send(idx, 2, frame.clone(), universe); + self.send(idx, Some(2), frame.clone(), universe); } Bytecode::Send3(idx) => { - self.send(idx, 3, frame.clone(), universe); + self.send(idx, Some(3), frame.clone(), universe); } Bytecode::SendN(idx) => { - self.send(idx, usize::MAX, frame.clone(), universe); + self.send(idx, None, frame.clone(), universe); } Bytecode::SuperSend1(idx) => { - self.super_send(idx, 1, frame.clone(), universe); + self.super_send(idx, Some(1), frame.clone(), universe); } Bytecode::SuperSend2(idx) => { - self.super_send(idx, 2, frame.clone(), universe); + self.super_send(idx, Some(2), frame.clone(), universe); } Bytecode::SuperSend3(idx) => { - self.super_send(idx, 3, frame.clone(), universe); + self.super_send(idx, Some(3), frame.clone(), universe); } Bytecode::SuperSendN(idx) => { - self.super_send(idx, usize::MAX, frame.clone(), universe); + self.super_send(idx, None, frame.clone(), universe); } Bytecode::ReturnLocal => { let value = self.stack.pop().unwrap(); From bb335ad2e344814a21a2d75e2001c371f7c85a4e Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 10 Dec 2022 19:15:54 +0000 Subject: [PATCH 05/88] push 0, 1, nil --- som-core/src/bytecode.rs | 22 ++++++++++++++++++++-- som-interpreter-bc/src/compiler.rs | 26 +++++++++++++++++++++----- som-interpreter-bc/src/interpreter.rs | 12 ++++++++++++ som-interpreter-bc/src/method.rs | 3 +++ 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 5fb0d3c9..f30bf9a1 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -11,6 +11,9 @@ pub enum Bytecode { PushBlock(u8), PushConstant(u8), PushGlobal(u8), + Push0, + Push1, + PushNil, Pop, PopLocal(u8, u8), PopArgument(u8, u8), @@ -41,6 +44,9 @@ impl Bytecode { Self::PushBlock(_) => "PUSH_BLOCK", Self::PushConstant(_) => "PUSH_CONSTANT", Self::PushGlobal(_) => "PUSH_GLOBAL", + Self::Push0 => "PUSH_0", + Self::Push1 => "PUSH_1", + Self::PushNil => "PUSH_NIL", Self::Pop => "POP", Self::PopLocal(_, _) => "POP_LOCAL", Self::PopArgument(_, _) => "POP_ARGUMENT", @@ -71,6 +77,9 @@ impl Bytecode { Self::PushBlock(_) => "PUSH_BLOCK ", Self::PushConstant(_) => "PUSH_CONSTANT ", Self::PushGlobal(_) => "PUSH_GLOBAL ", + Self::Push0 => "PUSH_0 ", + Self::Push1 => "PUSH_1 ", + Self::PushNil => "PUSH_NIL ", Self::Pop => "POP ", Self::PopLocal(_, _) => "POP_LOCAL ", Self::PopArgument(_, _) => "POP_ARGUMENT ", @@ -89,7 +98,7 @@ impl Bytecode { } } -pub static NAMES: [&str; 22] = [ +pub static NAMES: [&str; 25] = [ "HALT", "DUP", "PUSH_LOCAL", @@ -98,6 +107,9 @@ pub static NAMES: [&str; 22] = [ "PUSH_BLOCK", "PUSH_CONSTANT", "PUSH_GLOBAL", + "PUSH_0", + "PUSH_1", + "PUSH_NIL", "POP", "POP_LOCAL", "POP_ARGUMENT", @@ -114,7 +126,7 @@ pub static NAMES: [&str; 22] = [ "RETURN_NON_LOCAL", ]; -pub static PADDED_NAMES: [&str; 22] = [ +pub static PADDED_NAMES: [&str; 25] = [ "HALT ", "DUP ", "PUSH_LOCAL ", @@ -123,6 +135,9 @@ pub static PADDED_NAMES: [&str; 22] = [ "PUSH_BLOCK ", "PUSH_CONSTANT ", "PUSH_GLOBAL ", + "PUSH_0 ", + "PUSH_1 ", + "PUSH_NIL ", "POP ", "POP_LOCAL ", "POP_ARGUMENT ", @@ -151,6 +166,9 @@ impl fmt::Display for Bytecode { Self::PushBlock(idx) => write!(f, "PUSH_BLOCK {}", idx), Self::PushConstant(idx) => write!(f, "PUSH_CONSTANT {}", idx), Self::PushGlobal(idx) => write!(f, "PUSH_GLOBAL {}", idx), + Self::Push0 => write!(f, "PUSH_0"), + Self::Push1 => write!(f, "PUSH_1"), + Self::PushNil => write!(f, "PUSH_NIL"), Self::Pop => write!(f, "POP"), Self::PopLocal(up_idx, idx) => write!(f, "POP_LOCAL {}, {}", up_idx, idx), Self::PopArgument(up_idx, idx) => write!(f, "POP_ARGUMENT {}, {}", up_idx, idx), diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 70a05b5b..5c78ad69 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -232,9 +232,17 @@ impl MethodCodegen for ast::Expression { } Some(FoundVar::Field(idx)) => ctxt.push_instr(Bytecode::PushField(idx)), None => { - let name = ctxt.intern_symbol(name); - let idx = ctxt.push_literal(Literal::Symbol(name)); - ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); + match name.as_str() { + "nil" => ctxt.push_instr(Bytecode::PushNil), + // TODO should cache those false and true, although pushing 0 and 1 isn't functional + // "false" => ctxt.push_instr(Bytecode::Push0), + // "true" => ctxt.push_instr(Bytecode::Push1), + _ => { + let name = ctxt.intern_symbol(name); + let idx = ctxt.push_literal(Literal::Symbol(name)); + ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); + } + } } } Some(()) @@ -333,8 +341,16 @@ impl MethodCodegen for ast::Expression { } let literal = convert_literal(ctxt, literal); - let idx = ctxt.push_literal(literal); - ctxt.push_instr(Bytecode::PushConstant(idx as u8)); + + match literal { + Literal::Integer(0) => ctxt.push_instr(Bytecode::Push0), + Literal::Integer(1) => ctxt.push_instr(Bytecode::Push1), + _ => { + let idx = ctxt.push_literal(literal); + ctxt.push_instr(Bytecode::PushConstant(idx as u8)); + } + } + Some(()) } ast::Expression::Block(val) => { diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 1634151f..0f242d52 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -13,6 +13,9 @@ use crate::universe::Universe; use crate::value::Value; use crate::SOMRef; +const INT_0: Value = Value::Integer(0); +const INT_1: Value = Value::Integer(1); + pub struct Interpreter { /// The interpreter's stack frames. pub frames: Vec>, @@ -295,6 +298,15 @@ impl Interpreter { universe.unknown_global(self, self_value, symbol).unwrap(); } } + Bytecode::Push0 => { + self.stack.push(INT_0); + } + Bytecode::Push1 => { + self.stack.push(INT_1); + } + Bytecode::PushNil => { + self.stack.push(Value::Nil); + } Bytecode::Pop => { self.stack.pop(); } diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index 31938975..a6d1b99b 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -149,6 +149,9 @@ impl fmt::Display for Method { Bytecode::PushGlobal(idx) => { write!(f, "index: {}", idx)?; } + Bytecode::Push0 => {} + Bytecode::Push1 => {} + Bytecode::PushNil => {} Bytecode::Pop => {} Bytecode::PopLocal(up_idx, idx) => { write!(f, "local: {}, context: {}", idx, up_idx)?; From 3389a7b6016f57389a54627a133fa4690704d82c Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 10 Dec 2022 19:50:08 +0000 Subject: [PATCH 06/88] push constant 0, 1, 2 --- som-core/src/bytecode.rs | 54 ++++++++++++++++++--------- som-interpreter-bc/src/compiler.rs | 7 +++- som-interpreter-bc/src/interpreter.rs | 15 ++++++++ som-interpreter-bc/src/method.rs | 1 + 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index f30bf9a1..31a4efd1 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -10,6 +10,9 @@ pub enum Bytecode { PushField(u8), PushBlock(u8), PushConstant(u8), + PushConstant0, + PushConstant1, + PushConstant2, PushGlobal(u8), Push0, Push1, @@ -43,6 +46,9 @@ impl Bytecode { Self::PushField(_) => "PUSH_FIELD", Self::PushBlock(_) => "PUSH_BLOCK", Self::PushConstant(_) => "PUSH_CONSTANT", + Self::PushConstant0 => "PUSH_CONSTANT 0", + Self::PushConstant1 => "PUSH_CONSTANT 1", + Self::PushConstant2 => "PUSH_CONSTANT 2", Self::PushGlobal(_) => "PUSH_GLOBAL", Self::Push0 => "PUSH_0", Self::Push1 => "PUSH_1", @@ -76,6 +82,9 @@ impl Bytecode { Self::PushField(_) => "PUSH_FIELD ", Self::PushBlock(_) => "PUSH_BLOCK ", Self::PushConstant(_) => "PUSH_CONSTANT ", + Self::PushConstant0 => "PUSH_CONSTANT 0 ", + Self::PushConstant1 => "PUSH_CONSTANT 1 ", + Self::PushConstant2 => "PUSH_CONSTANT 2 ", Self::PushGlobal(_) => "PUSH_GLOBAL ", Self::Push0 => "PUSH_0 ", Self::Push1 => "PUSH_1 ", @@ -98,7 +107,7 @@ impl Bytecode { } } -pub static NAMES: [&str; 25] = [ +pub static NAMES: [&str; 28] = [ "HALT", "DUP", "PUSH_LOCAL", @@ -106,6 +115,9 @@ pub static NAMES: [&str; 25] = [ "PUSH_FIELD", "PUSH_BLOCK", "PUSH_CONSTANT", + "PUSH_CONSTANT_0", + "PUSH_CONSTANT_1", + "PUSH_CONSTANT_2", "PUSH_GLOBAL", "PUSH_0", "PUSH_1", @@ -126,7 +138,7 @@ pub static NAMES: [&str; 25] = [ "RETURN_NON_LOCAL", ]; -pub static PADDED_NAMES: [&str; 25] = [ +pub static PADDED_NAMES: [&str; 28] = [ "HALT ", "DUP ", "PUSH_LOCAL ", @@ -134,6 +146,9 @@ pub static PADDED_NAMES: [&str; 25] = [ "PUSH_FIELD ", "PUSH_BLOCK ", "PUSH_CONSTANT ", + "PUSH_CONSTANT_0 ", + "PUSH_CONSTANT_1 ", + "PUSH_CONSTANT_2 ", "PUSH_GLOBAL ", "PUSH_0 ", "PUSH_1 ", @@ -146,10 +161,10 @@ pub static PADDED_NAMES: [&str; 25] = [ "SEND_2 ", "SEND_3 ", "SEND_N ", - "SUPER_SEND 1 ", - "SUPER_SEND 2 ", - "SUPER_SEND 3 ", - "SUPER_SEND N ", + "SUPER_SEND_1 ", + "SUPER_SEND_2 ", + "SUPER_SEND_3 ", + "SUPER_SEND_N ", "RETURN_LOCAL ", "RETURN_NON_LOCAL", ]; @@ -165,22 +180,25 @@ impl fmt::Display for Bytecode { Self::PushField(idx) => write!(f, "PUSH_FIELD {}", idx), Self::PushBlock(idx) => write!(f, "PUSH_BLOCK {}", idx), Self::PushConstant(idx) => write!(f, "PUSH_CONSTANT {}", idx), - Self::PushGlobal(idx) => write!(f, "PUSH_GLOBAL {}", idx), - Self::Push0 => write!(f, "PUSH_0"), - Self::Push1 => write!(f, "PUSH_1"), + Self::PushConstant0 => write!(f, "PUSH_CONSTANT_0"), + Self::PushConstant1 => write!(f, "PUSH_CONSTANT_1"), + Self::PushConstant2 => write!(f, "PUSH_CONSTANT_2"), + Self::PushGlobal(idx) => write!(f, "PUSH_GLOBAL {}", idx), + Self::Push0 => write!(f, "PUSH_0"), + Self::Push1 => write!(f, "PUSH_1"), Self::PushNil => write!(f, "PUSH_NIL"), - Self::Pop => write!(f, "POP"), + Self::Pop => write!(f, "POP"), Self::PopLocal(up_idx, idx) => write!(f, "POP_LOCAL {}, {}", up_idx, idx), Self::PopArgument(up_idx, idx) => write!(f, "POP_ARGUMENT {}, {}", up_idx, idx), Self::PopField(idx) => write!(f, "POP_FIELD {}", idx), - Self::Send1(idx) => write!(f, "SEND 1 {}", idx), - Self::Send2(idx) => write!(f, "SEND 2 {}", idx), - Self::Send3(idx) => write!(f, "SEND 3 {}", idx), - Self::SendN(idx) => write!(f, "SEND N {}", idx), - Self::SuperSend1(idx) => write!(f, "SUPER_SEND 1 {}", idx), - Self::SuperSend2(idx) => write!(f, "SUPER_SEND 2 {}", idx), - Self::SuperSend3(idx) => write!(f, "SUPER_SEND 3 {}", idx), - Self::SuperSendN(idx) => write!(f, "SUPER_SEND N {}", idx), + Self::Send1(idx) => write!(f, "SEND_1 {}", idx), + Self::Send2(idx) => write!(f, "SEND_2 {}", idx), + Self::Send3(idx) => write!(f, "SEND_3 {}", idx), + Self::SendN(idx) => write!(f, "SEND_N {}", idx), + Self::SuperSend1(idx) => write!(f, "SUPER_SEND_1 {}", idx), + Self::SuperSend2(idx) => write!(f, "SUPER_SEND_2 {}", idx), + Self::SuperSend3(idx) => write!(f, "SUPER_SEND_3 {}", idx), + Self::SuperSendN(idx) => write!(f, "SUPER_SEND_N {}", idx), Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 5c78ad69..2825c9a0 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -347,7 +347,12 @@ impl MethodCodegen for ast::Expression { Literal::Integer(1) => ctxt.push_instr(Bytecode::Push1), _ => { let idx = ctxt.push_literal(literal); - ctxt.push_instr(Bytecode::PushConstant(idx as u8)); + match idx { + 0 => ctxt.push_instr(Bytecode::PushConstant0), + 1 => ctxt.push_instr(Bytecode::PushConstant1), + 2 => ctxt.push_instr(Bytecode::PushConstant2), + _ => ctxt.push_instr(Bytecode::PushConstant(idx as u8)) + } } } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 0f242d52..bc89c3ba 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -282,6 +282,21 @@ impl Interpreter { } Bytecode::PushConstant(idx) => { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); + let value = convert_literal(frame, literal).unwrap(); // TODO there may be a way to avoid converting the literal to a value? + self.stack.push(value); + } + Bytecode::PushConstant0 => { + let literal = frame.borrow().lookup_constant(0).unwrap(); + let value = convert_literal(frame, literal).unwrap(); // duplication removable but see above to do, which may need to be handled first + self.stack.push(value); + } + Bytecode::PushConstant1 => { + let literal = frame.borrow().lookup_constant(1).unwrap(); + let value = convert_literal(frame, literal).unwrap(); + self.stack.push(value); + } + Bytecode::PushConstant2 => { + let literal = frame.borrow().lookup_constant(2).unwrap(); let value = convert_literal(frame, literal).unwrap(); self.stack.push(value); } diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index a6d1b99b..11894e67 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -131,6 +131,7 @@ impl fmt::Display for Method { Bytecode::PushBlock(idx) => { write!(f, "index: {}", idx)?; } + Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => {} Bytecode::PushConstant(idx) => { write!(f, "index: {}, ", idx)?; let constant = &env.literals[*idx as usize]; From 5eb4f4d5c0af84ce80c1f5d1d22c3f8d5250c162 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 10 Dec 2022 23:25:18 +0000 Subject: [PATCH 07/88] non functional ifTrue inlining, with new JumpOnFalseTopNil opcode --- som-core/src/bytecode.rs | 10 ++- som-interpreter-bc/src/compiler.rs | 92 ++++++++++++++++++++------- som-interpreter-bc/src/interpreter.rs | 18 ++++++ som-interpreter-bc/src/method.rs | 3 + 4 files changed, 97 insertions(+), 26 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 31a4efd1..4fde43cf 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -31,6 +31,7 @@ pub enum Bytecode { SuperSendN(u8), ReturnLocal, ReturnNonLocal, + JumpOnFalseTopNil(usize) } impl Bytecode { @@ -67,6 +68,7 @@ impl Bytecode { Self::SuperSendN(_) => "SUPER_SEND N", Self::ReturnLocal => "RETURN_LOCAL", Self::ReturnNonLocal => "RETURN_NON_LOCAL", + Self::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", } } @@ -103,11 +105,12 @@ impl Bytecode { Self::SuperSendN(_) => "SUPER_SEND N ", Self::ReturnLocal => "RETURN_LOCAL ", Self::ReturnNonLocal => "RETURN_NON_LOCAL", + Self::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", } } } -pub static NAMES: [&str; 28] = [ +pub static NAMES: [&str; 29] = [ "HALT", "DUP", "PUSH_LOCAL", @@ -136,9 +139,10 @@ pub static NAMES: [&str; 28] = [ "SUPER_SEND_N", "RETURN_LOCAL", "RETURN_NON_LOCAL", + "JUMP_ON_FALSE_TOP_NIL" ]; -pub static PADDED_NAMES: [&str; 28] = [ +pub static PADDED_NAMES: [&str; 29] = [ "HALT ", "DUP ", "PUSH_LOCAL ", @@ -167,6 +171,7 @@ pub static PADDED_NAMES: [&str; 28] = [ "SUPER_SEND_N ", "RETURN_LOCAL ", "RETURN_NON_LOCAL", + "JUMP_ON_FALSE_TOP_NIL" ]; impl fmt::Display for Bytecode { @@ -201,6 +206,7 @@ impl fmt::Display for Bytecode { Self::SuperSendN(idx) => write!(f, "SUPER_SEND_N {}", idx), Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), + Self::JumpOnFalseTopNil(idx) => write!(f, "JUMP_ON_FALSE_TOP_NIL {}", idx), } } } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 2825c9a0..7380127a 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -100,6 +100,8 @@ trait InnerGenCtxt: GenCtxt { fn push_arg(&mut self, name: String) -> usize; fn push_local(&mut self, name: String) -> usize; fn push_literal(&mut self, literal: Literal) -> usize; + fn get_instr_idx(&mut self) -> usize; + fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode); } struct BlockGenCtxt<'a> { @@ -161,6 +163,15 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { let (idx, _) = self.literals.insert_full(literal); idx } + + fn get_instr_idx(&mut self) -> usize { + return self.body.as_ref().unwrap().iter().len(); + } + + fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode) { + let mut bytecode_to_patch = self.body.as_mut().unwrap().get_mut(idx_to_backpatch).unwrap(); + bytecode_to_patch = &mut bytecode_with_new_val.clone(); + } } struct MethodGenCtxt<'a> { @@ -204,6 +215,14 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { fn push_literal(&mut self, literal: Literal) -> usize { self.inner.push_literal(literal) } + + fn get_instr_idx(&mut self) -> usize { + return self.inner.get_instr_idx(); + } + + fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode) { + self.inner.backpatch(idx_to_backpatch, bytecode_with_new_val); + } } trait MethodCodegen { @@ -266,35 +285,59 @@ impl MethodCodegen for ast::Expression { ast::Expression::Reference(value) if value == "super" => true, _ => false, }; + message.receiver.codegen(ctxt)?; - message - .values - .iter() - .try_for_each(|value| value.codegen(ctxt))?; - - let nb_params = match message.signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => message.signature.chars().filter(|ch| *ch == ':').count(), - }; - let sym = ctxt.intern_symbol(message.signature.as_str()); - let idx = ctxt.push_literal(Literal::Symbol(sym)); - if super_send { - match nb_params { - 1 => ctxt.push_instr(Bytecode::SuperSend1(idx as u8)), - 2 => ctxt.push_instr(Bytecode::SuperSend2(idx as u8)), - 3 => ctxt.push_instr(Bytecode::SuperSend3(idx as u8)), - _ => ctxt.push_instr(Bytecode::SuperSendN(idx as u8)) + // We inline ifTrue: + if message.signature == "ifTrue:" { + assert_eq!(message.values.len(), 1); + let block = message.values.get(0).unwrap(); + let val = match block { + ast::Expression::Block(val) => val , + _ => panic!("Invalid argument supplied to ifTrue:") // TODO, not the best error handling! Will do for now though. + }; + + let jump_idx = ctxt.get_instr_idx(); + ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)); + for x in &val.body.exprs { + x.codegen(ctxt); } + let new_val = ctxt.get_instr_idx() + 1; // that + 1 feels shady, what if there's nothing after + ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(new_val)); + + Some(()) } else { - match nb_params { - 1 => ctxt.push_instr(Bytecode::Send1(idx as u8)), - 2 => ctxt.push_instr(Bytecode::Send2(idx as u8)), - 3 => ctxt.push_instr(Bytecode::Send3(idx as u8)), - _ => ctxt.push_instr(Bytecode::SendN(idx as u8)) + message + .values + .iter() + .try_for_each(|value| + value.codegen(ctxt) + )?; + + let nb_params = match message.signature.chars().nth(0) { + Some(ch) if !ch.is_alphabetic() => 1, + _ => message.signature.chars().filter(|ch| *ch == ':').count(), + }; + + let sym = ctxt.intern_symbol(message.signature.as_str()); + let idx = ctxt.push_literal(Literal::Symbol(sym)); + if super_send { + match nb_params { + 1 => ctxt.push_instr(Bytecode::SuperSend1(idx as u8)), + 2 => ctxt.push_instr(Bytecode::SuperSend2(idx as u8)), + 3 => ctxt.push_instr(Bytecode::SuperSend3(idx as u8)), + _ => ctxt.push_instr(Bytecode::SuperSendN(idx as u8)) + } + } else { + match nb_params { + 1 => ctxt.push_instr(Bytecode::Send1(idx as u8)), + 2 => ctxt.push_instr(Bytecode::Send2(idx as u8)), + 3 => ctxt.push_instr(Bytecode::Send3(idx as u8)), + _ => ctxt.push_instr(Bytecode::SendN(idx as u8)) + } } + Some(()) } - Some(()) } ast::Expression::BinaryOp(message) => { let super_send = match message.lhs.as_ref() { @@ -306,7 +349,7 @@ impl MethodCodegen for ast::Expression { let sym = ctxt.intern_symbol(message.op.as_str()); let idx = ctxt.push_literal(Literal::Symbol(sym)); if super_send { - ctxt.push_instr(Bytecode::SuperSendN(idx as u8)); + ctxt.push_instr(Bytecode::SendN(idx as u8)); // TODO why doesn't send2 work? } else { ctxt.push_instr(Bytecode::SendN(idx as u8)); } @@ -446,6 +489,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option MethodKind::NotImplemented(defn.signature.clone()), ast::MethodBody::Body { .. } => { let env = MethodEnv { diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index bc89c3ba..a7a8c19c 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -57,6 +57,11 @@ impl Interpreter { } }; let signature = universe.lookup_symbol(symbol); + match signature { + "if" => println!("KNEW IT"), + _ => {} + } + let nb_params = match nb_params_opt { Some(x) => x, None => { @@ -431,6 +436,19 @@ impl Interpreter { ); } } + Bytecode::JumpOnFalseTopNil(offset) => { + let condition_result = self.stack.pop().unwrap(); + + match condition_result { + Value::Boolean(true) => { + let frame = self.current_frame().unwrap(); + frame.clone().borrow_mut().bytecode_idx += offset; + self.stack.push(Value::Nil); + }, + Value::Boolean(false) => {}, + _ => panic!() + } + } } } diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index 11894e67..a963bd80 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -177,6 +177,9 @@ impl fmt::Display for Method { } Bytecode::ReturnLocal => {} Bytecode::ReturnNonLocal => {} + Bytecode::JumpOnFalseTopNil(idx) => { + write!(f, "index: {}", idx)?; + } } } Ok(()) From d67c0c057a2cc67e30964ac51001dcf30c40f448 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 14 Dec 2022 15:42:10 +0000 Subject: [PATCH 08/88] progress with ifTrue inlining, fixed some bugs but still not functional --- som-interpreter-bc/src/compiler.rs | 5 ++--- som-interpreter-bc/src/interpreter.rs | 24 ++++++++++++++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 7380127a..59758f39 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -169,8 +169,7 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { } fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode) { - let mut bytecode_to_patch = self.body.as_mut().unwrap().get_mut(idx_to_backpatch).unwrap(); - bytecode_to_patch = &mut bytecode_with_new_val.clone(); + self.body.as_mut().unwrap()[idx_to_backpatch] = bytecode_with_new_val; } } @@ -302,7 +301,7 @@ impl MethodCodegen for ast::Expression { for x in &val.body.exprs { x.codegen(ctxt); } - let new_val = ctxt.get_instr_idx() + 1; // that + 1 feels shady, what if there's nothing after + let new_val = ctxt.get_instr_idx(); ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(new_val)); Some(()) diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index a7a8c19c..fced3a80 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -57,10 +57,6 @@ impl Interpreter { } }; let signature = universe.lookup_symbol(symbol); - match signature { - "if" => println!("KNEW IT"), - _ => {} - } let nb_params = match nb_params_opt { Some(x) => x, @@ -228,6 +224,22 @@ impl Interpreter { } }; + // TODO remove this debug + // if &frame.borrow().get_method().signature == "initialize:" { + // println!("bp"); + // // match &frame.borrow().kind { + // // FrameKind::Method { holder, method, .. } => match method.kind() { + // // MethodKind::Defined(env) => { + // // dbg!(&holder); + // // dbg!(&env.body); + // // }, + // // MethodKind::Primitive(_) => {}, + // // MethodKind::NotImplemented(_) => {}, + // // }, + // // FrameKind::Block { block, .. } => {}, + // // }; + // } + frame.borrow_mut().bytecode_idx += 1; match bytecode { @@ -440,12 +452,12 @@ impl Interpreter { let condition_result = self.stack.pop().unwrap(); match condition_result { - Value::Boolean(true) => { + Value::Boolean(false) => { let frame = self.current_frame().unwrap(); frame.clone().borrow_mut().bytecode_idx += offset; self.stack.push(Value::Nil); }, - Value::Boolean(false) => {}, + Value::Boolean(true) => {}, _ => panic!() } } From 30cbb0b4caf2f9f45b021e2f5146d4dc3e103272 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 16 Dec 2022 15:12:52 +0000 Subject: [PATCH 09/88] very WIP commit but getting closer to successful inlining --- som-interpreter-bc/src/compiler.rs | 58 ++++++++++++++++++++++----- som-interpreter-bc/src/frame.rs | 5 +++ som-interpreter-bc/src/interpreter.rs | 37 ++++++++++++++++- 3 files changed, 88 insertions(+), 12 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 59758f39..51f46c65 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -3,6 +3,7 @@ //! use std::cell::RefCell; use std::hash::{Hash, Hasher}; +use std::process::exit; use std::rc::{Rc, Weak}; use indexmap::{IndexMap, IndexSet}; @@ -19,6 +20,9 @@ use crate::primitives; use crate::value::Value; use crate::SOMRef; +static mut NBR_INLINING: usize = 0; + + #[derive(Debug, Clone)] pub enum Literal { Symbol(Interned), @@ -169,6 +173,7 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { } fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode) { + // dbg!(&self.outer.class_name()); self.body.as_mut().unwrap()[idx_to_backpatch] = bytecode_with_new_val; } } @@ -279,7 +284,7 @@ impl MethodCodegen for ast::Expression { } Some(()) } - ast::Expression::Message(message) => { + ast::Expression::Message(message) => unsafe { let super_send = match message.receiver.as_ref() { ast::Expression::Reference(value) if value == "super" => true, _ => false, @@ -288,21 +293,52 @@ impl MethodCodegen for ast::Expression { message.receiver.codegen(ctxt)?; // We inline ifTrue: - if message.signature == "ifTrue:" { + if message.signature == "ifTrue:" && NBR_INLINING < 10000 { assert_eq!(message.values.len(), 1); + + let jump_idx = ctxt.get_instr_idx(); + ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)); + let block = message.values.get(0).unwrap(); - let val = match block { - ast::Expression::Block(val) => val , + match block { + ast::Expression::Block(val) => { + let splitted = val.body.exprs.split_last(); + + if let Some((last, rest)) = splitted { + for expr in rest { + // dbg!(&expr); + expr.codegen(ctxt); + ctxt.push_instr(Bytecode::Pop); + } + + // match last { + // ast::Expression::Exit(expr) => { + // expr.codegen(ctxt)?; + // ctxt.push_instr(Bytecode::ReturnNonLocal); + // Some(()) + // } + // } + match last { + ast::Expression::Exit(expr) => { + expr.codegen(ctxt)?; + ctxt.push_instr(Bytecode::ReturnLocal); + }, + _ => {} + } + // last.codegen(ctxt)?; + // dbg!(&last); + } + // exit(1); + } , _ => panic!("Invalid argument supplied to ifTrue:") // TODO, not the best error handling! Will do for now though. + // arg => {arg.codegen(ctxt);} // TODO is this supposed to be possible? }; - let jump_idx = ctxt.get_instr_idx(); - ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)); - for x in &val.body.exprs { - x.codegen(ctxt); - } - let new_val = ctxt.get_instr_idx(); - ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(new_val)); + let new_idx = ctxt.get_instr_idx(); + let jump_by = new_idx - jump_idx; + ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)); + + NBR_INLINING += 1; Some(()) } else { diff --git a/som-interpreter-bc/src/frame.rs b/som-interpreter-bc/src/frame.rs index cf8ddf92..47a9b197 100644 --- a/som-interpreter-bc/src/frame.rs +++ b/som-interpreter-bc/src/frame.rs @@ -122,6 +122,11 @@ impl Frame { self.get_bytecode(self.bytecode_idx) } + // todo remove me + pub fn get_bytecode_idx(&self) -> usize { + self.bytecode_idx + } + pub fn lookup_constant(&self, idx: usize) -> Option { match self.kind() { FrameKind::Block { block } => block.literals.get(idx).cloned(), diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index fced3a80..71634579 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -1,4 +1,5 @@ use std::cell::RefCell; +use std::process::exit; use std::rc::Rc; use std::time::Instant; use std::usize; @@ -58,6 +59,20 @@ impl Interpreter { }; let signature = universe.lookup_symbol(symbol); + // if signature == "verify:inner:" { + // print!("bp"); + // // match &frame.borrow().kind { + // // FrameKind::Method { holder, method, .. } => match method.kind() { + // // MethodKind::Defined(env) => { + // // dbg!(&holder); + // // dbg!(&env.body); + // // }, + // // _ => {} + // // }, + // // _ => {}, + // // }; + // } + let nb_params = match nb_params_opt { Some(x) => x, None => { @@ -214,6 +229,22 @@ impl Interpreter { None => return Some(self.stack.pop().unwrap_or(Value::Nil)), }; + if frame.borrow().get_bytecode_idx() == 0 { + if &frame.borrow().get_method().signature == "initialize:" { + match &frame.borrow().kind { + FrameKind::Method { holder, method, .. } => match method.kind() { + MethodKind::Defined(env) => { + // dbg!(&holder); + // dbg!(&env.body); + // exit(1); + }, + _ => {}, + }, + _ => {}, + }; + } + } + let opt_bytecode = frame.borrow().get_current_bytecode(); let bytecode = match opt_bytecode { Some(bytecode) => bytecode, @@ -241,6 +272,10 @@ impl Interpreter { // } frame.borrow_mut().bytecode_idx += 1; + dbg!(&frame.borrow().get_method().signature); + // if &frame.borrow().get_method().signature == "mandelbrot:" { + // println!("{}", &bytecode); + // } match bytecode { Bytecode::Halt => { @@ -454,7 +489,7 @@ impl Interpreter { match condition_result { Value::Boolean(false) => { let frame = self.current_frame().unwrap(); - frame.clone().borrow_mut().bytecode_idx += offset; + frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop self.stack.push(Value::Nil); }, Value::Boolean(true) => {}, From 3536084d8a7cc2af82e15ccd3f46759d561d7c1e Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 16 Dec 2022 17:19:12 +0000 Subject: [PATCH 10/88] functional ifTrue inlining at least for running Mandelbrot! --- som-interpreter-bc/src/compiler.rs | 19 +------------------ som-interpreter-bc/src/interpreter.rs | 24 +++--------------------- 2 files changed, 4 insertions(+), 39 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 51f46c65..ac3c91df 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -306,29 +306,12 @@ impl MethodCodegen for ast::Expression { if let Some((last, rest)) = splitted { for expr in rest { - // dbg!(&expr); expr.codegen(ctxt); ctxt.push_instr(Bytecode::Pop); } - // match last { - // ast::Expression::Exit(expr) => { - // expr.codegen(ctxt)?; - // ctxt.push_instr(Bytecode::ReturnNonLocal); - // Some(()) - // } - // } - match last { - ast::Expression::Exit(expr) => { - expr.codegen(ctxt)?; - ctxt.push_instr(Bytecode::ReturnLocal); - }, - _ => {} - } - // last.codegen(ctxt)?; - // dbg!(&last); + last.codegen(ctxt)?; } - // exit(1); } , _ => panic!("Invalid argument supplied to ifTrue:") // TODO, not the best error handling! Will do for now though. // arg => {arg.codegen(ctxt);} // TODO is this supposed to be possible? diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 71634579..d51d0cb4 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -229,22 +229,6 @@ impl Interpreter { None => return Some(self.stack.pop().unwrap_or(Value::Nil)), }; - if frame.borrow().get_bytecode_idx() == 0 { - if &frame.borrow().get_method().signature == "initialize:" { - match &frame.borrow().kind { - FrameKind::Method { holder, method, .. } => match method.kind() { - MethodKind::Defined(env) => { - // dbg!(&holder); - // dbg!(&env.body); - // exit(1); - }, - _ => {}, - }, - _ => {}, - }; - } - } - let opt_bytecode = frame.borrow().get_current_bytecode(); let bytecode = match opt_bytecode { Some(bytecode) => bytecode, @@ -272,10 +256,7 @@ impl Interpreter { // } frame.borrow_mut().bytecode_idx += 1; - dbg!(&frame.borrow().get_method().signature); - // if &frame.borrow().get_method().signature == "mandelbrot:" { - // println!("{}", &bytecode); - // } + // dbg!(&frame.borrow().get_method().signature); match bytecode { Bytecode::Halt => { @@ -492,7 +473,8 @@ impl Interpreter { frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop self.stack.push(Value::Nil); }, - Value::Boolean(true) => {}, + Value::Boolean(true) => { + }, _ => panic!() } } From 01bdda9867351d9349abdf8f0d081f4004fd9fc5 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 19 Dec 2022 15:44:55 +0000 Subject: [PATCH 11/88] Minor cleanups and fixed it for Bounce at least --- som-interpreter-bc/src/compiler.rs | 72 ++++++++++++--------------- som-interpreter-bc/src/interpreter.rs | 1 - 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index ac3c91df..cdd22aef 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -3,7 +3,6 @@ //! use std::cell::RefCell; use std::hash::{Hash, Hasher}; -use std::process::exit; use std::rc::{Rc, Weak}; use indexmap::{IndexMap, IndexSet}; @@ -20,9 +19,6 @@ use crate::primitives; use crate::value::Value; use crate::SOMRef; -static mut NBR_INLINING: usize = 0; - - #[derive(Debug, Clone)] pub enum Literal { Symbol(Interned), @@ -284,7 +280,7 @@ impl MethodCodegen for ast::Expression { } Some(()) } - ast::Expression::Message(message) => unsafe { + ast::Expression::Message(message) => { let super_send = match message.receiver.as_ref() { ast::Expression::Reference(value) if value == "super" => true, _ => false, @@ -293,9 +289,7 @@ impl MethodCodegen for ast::Expression { message.receiver.codegen(ctxt)?; // We inline ifTrue: - if message.signature == "ifTrue:" && NBR_INLINING < 10000 { - assert_eq!(message.values.len(), 1); - + if message.signature == "ifTrue:" && message.values.len() == 1 && matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) { let jump_idx = ctxt.get_instr_idx(); ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)); @@ -313,7 +307,7 @@ impl MethodCodegen for ast::Expression { last.codegen(ctxt)?; } } , - _ => panic!("Invalid argument supplied to ifTrue:") // TODO, not the best error handling! Will do for now though. + val => panic!("Invalid argument supplied to ifTrue: {:?}", val) // TODO, not the best error handling! Will do for now though. // arg => {arg.codegen(ctxt);} // TODO is this supposed to be possible? }; @@ -321,41 +315,39 @@ impl MethodCodegen for ast::Expression { let jump_by = new_idx - jump_idx; ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)); - NBR_INLINING += 1; + return Some(()); + } - Some(()) - } else { - message - .values - .iter() - .try_for_each(|value| - value.codegen(ctxt) - )?; - - let nb_params = match message.signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => message.signature.chars().filter(|ch| *ch == ':').count(), - }; + message + .values + .iter() + .try_for_each(|value| + value.codegen(ctxt) + )?; - let sym = ctxt.intern_symbol(message.signature.as_str()); - let idx = ctxt.push_literal(Literal::Symbol(sym)); - if super_send { - match nb_params { - 1 => ctxt.push_instr(Bytecode::SuperSend1(idx as u8)), - 2 => ctxt.push_instr(Bytecode::SuperSend2(idx as u8)), - 3 => ctxt.push_instr(Bytecode::SuperSend3(idx as u8)), - _ => ctxt.push_instr(Bytecode::SuperSendN(idx as u8)) - } - } else { - match nb_params { - 1 => ctxt.push_instr(Bytecode::Send1(idx as u8)), - 2 => ctxt.push_instr(Bytecode::Send2(idx as u8)), - 3 => ctxt.push_instr(Bytecode::Send3(idx as u8)), - _ => ctxt.push_instr(Bytecode::SendN(idx as u8)) - } + let nb_params = match message.signature.chars().nth(0) { + Some(ch) if !ch.is_alphabetic() => 1, + _ => message.signature.chars().filter(|ch| *ch == ':').count(), + }; + + let sym = ctxt.intern_symbol(message.signature.as_str()); + let idx = ctxt.push_literal(Literal::Symbol(sym)); + if super_send { + match nb_params { + 1 => ctxt.push_instr(Bytecode::SuperSend1(idx as u8)), + 2 => ctxt.push_instr(Bytecode::SuperSend2(idx as u8)), + 3 => ctxt.push_instr(Bytecode::SuperSend3(idx as u8)), + _ => ctxt.push_instr(Bytecode::SuperSendN(idx as u8)) + } + } else { + match nb_params { + 1 => ctxt.push_instr(Bytecode::Send1(idx as u8)), + 2 => ctxt.push_instr(Bytecode::Send2(idx as u8)), + 3 => ctxt.push_instr(Bytecode::Send3(idx as u8)), + _ => ctxt.push_instr(Bytecode::SendN(idx as u8)) } - Some(()) } + Some(()) } ast::Expression::BinaryOp(message) => { let super_send = match message.lhs.as_ref() { diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index d51d0cb4..aef0fea1 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -1,5 +1,4 @@ use std::cell::RefCell; -use std::process::exit; use std::rc::Rc; use std::time::Instant; use std::usize; From 38a331e88fe90858b09038752a05a1dfd558d671 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 19 Dec 2022 16:54:25 +0000 Subject: [PATCH 12/88] Basic bash script for running benchmarks --- run_benchmarks.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100755 run_benchmarks.sh diff --git a/run_benchmarks.sh b/run_benchmarks.sh new file mode 100755 index 00000000..28739b40 --- /dev/null +++ b/run_benchmarks.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +BENCHMARKS=("Bounce" "Mandelbrot" "List" "Permute" "Queens" "QuickSort" "Sieve" "Fannkuch" "Json" "DeltaBlue") + +for bench in "${BENCHMARKS[@]}" +do + cargo run --bin som-interpreter-bc -- -c core-lib/Smalltalk core-lib/Examples/Benchmarks core-lib/Examples/Benchmarks/Json -- core-lib/Examples/Benchmarks/BenchmarkHarness.som $bench 1 0 7 + echo -ne "\n" +done + + From 811877350c0be53da2d5c593ddac4bbffd17f4bc Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 19 Dec 2022 17:07:03 +0000 Subject: [PATCH 13/88] Fixed other benchmarks. Known issue: breaks variable shadowing in inlined blocks, not sure how to fix that yet --- som-interpreter-bc/src/compiler.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index cdd22aef..0c46cb73 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -298,6 +298,10 @@ impl MethodCodegen for ast::Expression { ast::Expression::Block(val) => { let splitted = val.body.exprs.split_last(); + for block_local in &val.locals { + ctxt.push_local(String::from(block_local)); + } + if let Some((last, rest)) = splitted { for expr in rest { expr.codegen(ctxt); From 008909291e1998adde44f4aee4f03d2a5d0d54b2 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 15:44:51 +0000 Subject: [PATCH 14/88] Inlining ifFalse:, and the associate opcode JumpOnTrueTopNil --- som-core/src/bytecode.rs | 16 +++-- som-interpreter-bc/src/compiler.rs | 89 +++++++++++++++++---------- som-interpreter-bc/src/interpreter.rs | 14 +++++ som-interpreter-bc/src/method.rs | 3 + 4 files changed, 86 insertions(+), 36 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 4fde43cf..16e43dba 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -31,7 +31,8 @@ pub enum Bytecode { SuperSendN(u8), ReturnLocal, ReturnNonLocal, - JumpOnFalseTopNil(usize) + JumpOnFalseTopNil(usize), + JumpOnTrueTopNil(usize) } impl Bytecode { @@ -69,6 +70,7 @@ impl Bytecode { Self::ReturnLocal => "RETURN_LOCAL", Self::ReturnNonLocal => "RETURN_NON_LOCAL", Self::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", + Self::JumpOnTrueTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", } } @@ -106,11 +108,12 @@ impl Bytecode { Self::ReturnLocal => "RETURN_LOCAL ", Self::ReturnNonLocal => "RETURN_NON_LOCAL", Self::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", + Self::JumpOnTrueTopNil(_) => "JUMP_ON_TRUE_TOP_NIL", } } } -pub static NAMES: [&str; 29] = [ +pub static NAMES: [&str; 30] = [ "HALT", "DUP", "PUSH_LOCAL", @@ -139,10 +142,11 @@ pub static NAMES: [&str; 29] = [ "SUPER_SEND_N", "RETURN_LOCAL", "RETURN_NON_LOCAL", - "JUMP_ON_FALSE_TOP_NIL" + "JUMP_ON_FALSE_TOP_NIL", + "JUMP_ON_TRUE_TOP_NIL", ]; -pub static PADDED_NAMES: [&str; 29] = [ +pub static PADDED_NAMES: [&str; 30] = [ "HALT ", "DUP ", "PUSH_LOCAL ", @@ -171,7 +175,8 @@ pub static PADDED_NAMES: [&str; 29] = [ "SUPER_SEND_N ", "RETURN_LOCAL ", "RETURN_NON_LOCAL", - "JUMP_ON_FALSE_TOP_NIL" + "JUMP_ON_FALSE_TOP_NIL", // those two are so long they broke the padding, oops. TODO fix + "JUMP_ON_TRUE_TOP_NIL", ]; impl fmt::Display for Bytecode { @@ -207,6 +212,7 @@ impl fmt::Display for Bytecode { Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), Self::JumpOnFalseTopNil(idx) => write!(f, "JUMP_ON_FALSE_TOP_NIL {}", idx), + Self::JumpOnTrueTopNil(idx) => write!(f, "JUMP_ON_TRUE_TOP_NIL {}", idx), } } } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 0c46cb73..22d3c12b 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -9,6 +9,7 @@ use indexmap::{IndexMap, IndexSet}; use num_bigint::BigInt; use som_core::ast; +use som_core::ast::Message; use som_core::bytecode::Bytecode; use crate::block::Block; @@ -229,6 +230,10 @@ trait MethodCodegen { fn codegen(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()>; } +trait PrimMessageInliner { + fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &Message) -> Option<()>; +} + impl MethodCodegen for ast::Body { fn codegen(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()> { for expr in &self.exprs { @@ -288,37 +293,7 @@ impl MethodCodegen for ast::Expression { message.receiver.codegen(ctxt)?; - // We inline ifTrue: - if message.signature == "ifTrue:" && message.values.len() == 1 && matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) { - let jump_idx = ctxt.get_instr_idx(); - ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)); - - let block = message.values.get(0).unwrap(); - match block { - ast::Expression::Block(val) => { - let splitted = val.body.exprs.split_last(); - - for block_local in &val.locals { - ctxt.push_local(String::from(block_local)); - } - - if let Some((last, rest)) = splitted { - for expr in rest { - expr.codegen(ctxt); - ctxt.push_instr(Bytecode::Pop); - } - - last.codegen(ctxt)?; - } - } , - val => panic!("Invalid argument supplied to ifTrue: {:?}", val) // TODO, not the best error handling! Will do for now though. - // arg => {arg.codegen(ctxt);} // TODO is this supposed to be possible? - }; - - let new_idx = ctxt.get_instr_idx(); - let jump_by = new_idx - jump_idx; - ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)); - + if self.inline_if_possible(ctxt, message).is_some() { return Some(()); } @@ -432,6 +407,58 @@ impl MethodCodegen for ast::Expression { } } +impl PrimMessageInliner for ast::Expression { + fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &Message) -> Option<()> { + if message.signature == "ifTrue:" || message.signature == "ifFalse" { + if message.values.len() != 1 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) { + return Some(()); + } + + let jump_idx = ctxt.get_instr_idx(); + + match message.signature.as_str() { + "ifTrue:" => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), + "ifFalse:" => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)), + _ => panic!("Unreachable") + } + + let block = message.values.get(0).unwrap(); + match block { + ast::Expression::Block(val) => { + let splitted = val.body.exprs.split_last(); + + for block_local in &val.locals { + ctxt.push_local(String::from(block_local)); + } + + if let Some((last, rest)) = splitted { + for expr in rest { + expr.codegen(ctxt); + ctxt.push_instr(Bytecode::Pop); + } + + last.codegen(ctxt)?; + } + } , + val => panic!("Invalid argument supplied to ifTrue: {:?}", val) // not the best error handling! Will do for now though. + }; + + let new_idx = ctxt.get_instr_idx(); + let jump_by = new_idx - jump_idx; + + match message.signature.as_str() { + "ifTrue:" => ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), + "ifFalse:" => ctxt.backpatch(jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), + _ => panic!("Unreachable") + } + + return Some(()); + } + return None; + } +} + + struct ClassGenCtxt<'a> { pub name: String, pub fields: IndexSet, diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index aef0fea1..c3b44aef 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -476,6 +476,20 @@ impl Interpreter { }, _ => panic!() } + }, + Bytecode::JumpOnTrueTopNil(offset) => { + let condition_result = self.stack.pop().unwrap(); + + match condition_result { + Value::Boolean(true) => { + let frame = self.current_frame().unwrap(); + frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop + self.stack.push(Value::Nil); + }, + Value::Boolean(false) => { + }, + _ => panic!() + } } } } diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index a963bd80..81fd4b95 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -180,6 +180,9 @@ impl fmt::Display for Method { Bytecode::JumpOnFalseTopNil(idx) => { write!(f, "index: {}", idx)?; } + Bytecode::JumpOnTrueTopNil(idx) => { + write!(f, "index: {}", idx)?; + } } } Ok(()) From 64769c127a85cc180ef488a523afc777e4977f26 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 15:56:00 +0000 Subject: [PATCH 15/88] Removed bytecodes NAMES and PADDED_NAMES, since they were annoying and unused. I wish they were usable, but afaik it's not doable in Rust to replace matches with more clever static array lookups (at least not with enums that have optional arguments) --- som-core/src/bytecode.rs | 66 ---------------------------------------- 1 file changed, 66 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 16e43dba..c141c1e7 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -113,72 +113,6 @@ impl Bytecode { } } -pub static NAMES: [&str; 30] = [ - "HALT", - "DUP", - "PUSH_LOCAL", - "PUSH_ARGUMENT", - "PUSH_FIELD", - "PUSH_BLOCK", - "PUSH_CONSTANT", - "PUSH_CONSTANT_0", - "PUSH_CONSTANT_1", - "PUSH_CONSTANT_2", - "PUSH_GLOBAL", - "PUSH_0", - "PUSH_1", - "PUSH_NIL", - "POP", - "POP_LOCAL", - "POP_ARGUMENT", - "POP_FIELD", - "SEND_1", - "SEND_2", - "SEND_3", - "SEND_N", - "SUPER_SEND_1", - "SUPER_SEND_2", - "SUPER_SEND_3", - "SUPER_SEND_N", - "RETURN_LOCAL", - "RETURN_NON_LOCAL", - "JUMP_ON_FALSE_TOP_NIL", - "JUMP_ON_TRUE_TOP_NIL", -]; - -pub static PADDED_NAMES: [&str; 30] = [ - "HALT ", - "DUP ", - "PUSH_LOCAL ", - "PUSH_ARGUMENT ", - "PUSH_FIELD ", - "PUSH_BLOCK ", - "PUSH_CONSTANT ", - "PUSH_CONSTANT_0 ", - "PUSH_CONSTANT_1 ", - "PUSH_CONSTANT_2 ", - "PUSH_GLOBAL ", - "PUSH_0 ", - "PUSH_1 ", - "PUSH_NIL ", - "POP ", - "POP_LOCAL ", - "POP_ARGUMENT ", - "POP_FIELD ", - "SEND_1 ", - "SEND_2 ", - "SEND_3 ", - "SEND_N ", - "SUPER_SEND_1 ", - "SUPER_SEND_2 ", - "SUPER_SEND_3 ", - "SUPER_SEND_N ", - "RETURN_LOCAL ", - "RETURN_NON_LOCAL", - "JUMP_ON_FALSE_TOP_NIL", // those two are so long they broke the padding, oops. TODO fix - "JUMP_ON_TRUE_TOP_NIL", -]; - impl fmt::Display for Bytecode { #[rustfmt::skip] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { From 06fabe52f8fc53f5adbf092dce6592c8f836d53f Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 17:06:24 +0000 Subject: [PATCH 16/88] start of ifTrue:ifFalse: inlining, and some slight refactoring to make my life easier --- som-core/src/bytecode.rs | 28 +++--- som-interpreter-bc/src/compiler.rs | 127 ++++++++++++++++++++------ som-interpreter-bc/src/interpreter.rs | 4 + som-interpreter-bc/src/method.rs | 1 + 4 files changed, 117 insertions(+), 43 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index c141c1e7..204b98d7 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -31,6 +31,7 @@ pub enum Bytecode { SuperSendN(u8), ReturnLocal, ReturnNonLocal, + Jump(usize), JumpOnFalseTopNil(usize), JumpOnTrueTopNil(usize) } @@ -59,18 +60,18 @@ impl Bytecode { Self::PopLocal(_, _) => "POP_LOCAL", Self::PopArgument(_, _) => "POP_ARGUMENT", Self::PopField(_) => "POP_FIELD", - Self::Send1(_) => "SEND 1", - Self::Send2(_) => "SEND 2", - Self::Send3(_) => "SEND 3", - Self::SendN(_) => "SEND N", - Self::SuperSend1(_) => "SUPER_SEND 1", - Self::SuperSend2(_) => "SUPER_SEND 2", - Self::SuperSend3(_) => "SUPER_SEND 3", - Self::SuperSendN(_) => "SUPER_SEND N", + Self::Send1(_) => "SEND 1", + Self::Send2(_) => "SEND 2", + Self::Send3(_) => "SEND 3", + Self::SendN(_) => "SEND N", + Self::SuperSend1(_) => "SUPER_SEND 1", + Self::SuperSend2(_) => "SUPER_SEND 2", + Self::SuperSend3(_) => "SUPER_SEND 3", + Self::SuperSendN(_) => "SUPER_SEND N", Self::ReturnLocal => "RETURN_LOCAL", Self::ReturnNonLocal => "RETURN_NON_LOCAL", - Self::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", - Self::JumpOnTrueTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", + Self::Jump(_) => "JUMP", + _ => "NO NAME, TODO" } } @@ -107,8 +108,8 @@ impl Bytecode { Self::SuperSendN(_) => "SUPER_SEND N ", Self::ReturnLocal => "RETURN_LOCAL ", Self::ReturnNonLocal => "RETURN_NON_LOCAL", - Self::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", - Self::JumpOnTrueTopNil(_) => "JUMP_ON_TRUE_TOP_NIL", + Self::Jump(_) => "JUMP ", + _ => "NO NAME, TODO" } } } @@ -145,8 +146,9 @@ impl fmt::Display for Bytecode { Self::SuperSendN(idx) => write!(f, "SUPER_SEND_N {}", idx), Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), + Self::Jump(idx) => write!(f, "JUMP {}", idx), Self::JumpOnFalseTopNil(idx) => write!(f, "JUMP_ON_FALSE_TOP_NIL {}", idx), - Self::JumpOnTrueTopNil(idx) => write!(f, "JUMP_ON_TRUE_TOP_NIL {}", idx), + _ => write!(f, "No display for this bytecode, TODO.") // I am lazy } } } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 22d3c12b..a3437990 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -9,7 +9,6 @@ use indexmap::{IndexMap, IndexSet}; use num_bigint::BigInt; use som_core::ast; -use som_core::ast::Message; use som_core::bytecode::Bytecode; use crate::block::Block; @@ -231,7 +230,8 @@ trait MethodCodegen { } trait PrimMessageInliner { - fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &Message) -> Option<()>; + fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; + fn inline_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; } impl MethodCodegen for ast::Body { @@ -408,54 +408,121 @@ impl MethodCodegen for ast::Expression { } impl PrimMessageInliner for ast::Expression { - fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &Message) -> Option<()> { + fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { if message.signature == "ifTrue:" || message.signature == "ifFalse" { if message.values.len() != 1 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) { return Some(()); } + // avoids some panic! match arms, there's only two possibilities + let is_if_true = message.signature == "ifTrue:"; + let jump_idx = ctxt.get_instr_idx(); - match message.signature.as_str() { - "ifTrue:" => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), - "ifFalse:" => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)), - _ => panic!("Unreachable") + match is_if_true { + true => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), + false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) } - let block = message.values.get(0).unwrap(); - match block { - ast::Expression::Block(val) => { - let splitted = val.body.exprs.split_last(); + self.inline_block(ctxt, message.values.get(0).unwrap()); - for block_local in &val.locals { - ctxt.push_local(String::from(block_local)); - } + let jump_by = ctxt.get_instr_idx() - jump_idx; + match is_if_true { + true => ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), + false => ctxt.backpatch(jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), + } - if let Some((last, rest)) = splitted { - for expr in rest { - expr.codegen(ctxt); - ctxt.push_instr(Bytecode::Pop); - } + return Some(()); + } + + /*if message.signature == "ifTrue:ifFalse:" { // || message.signature == "ifFalse:ifTrue:" + if message.values.len() != 2 + || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) + || !matches!(message.values.get(1).unwrap(), ast::Expression::Block(_)) { + return Some(()); + } + + // avoids some panic! match arms, there's only two possibilities + let is_if_true_if_false = message.signature == "ifTrue:ifFalse"; - last.codegen(ctxt)?; + let start_jump_idx = ctxt.get_instr_idx(); + + match is_if_true_if_false { + true => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), + false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) + } + + if let ast::Expression::Block(block1) = message.values.get(0).unwrap() { + for block_local in &block1.locals { + ctxt.push_local(String::from(block_local)); + } + + if let Some((last, rest)) = block1.body.exprs.split_last() { + for expr in rest { + expr.codegen(ctxt); + ctxt.push_instr(Bytecode::Pop); } - } , - val => panic!("Invalid argument supplied to ifTrue: {:?}", val) // not the best error handling! Will do for now though. - }; + last.codegen(ctxt)?; + } + } - let new_idx = ctxt.get_instr_idx(); - let jump_by = new_idx - jump_idx; + let end_first_jump_idx = ctxt.get_instr_idx(); // the JUMP we've just emitted. + ctxt.push_instr(Bytecode::Jump(0)); - match message.signature.as_str() { - "ifTrue:" => ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), - "ifFalse:" => ctxt.backpatch(jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), - _ => panic!("Unreachable") + let jump_by = ctxt.get_instr_idx() - start_jump_idx; + match is_if_true_if_false { + true => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), + false => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), + } + + if let ast::Expression::Block(block2) = message.values.get(1).unwrap() { + for block_local in &block2.locals { + ctxt.push_local(String::from(block_local)); + } + + if let Some((last, rest)) = block2.body.exprs.split_last() { + for expr in rest { + expr.codegen(ctxt); + ctxt.push_instr(Bytecode::Pop); + } + last.codegen(ctxt)?; + } + } + + let jump_by = ctxt.get_instr_idx() - end_first_jump_idx; + match is_if_true_if_false { + true => ctxt.backpatch(end_first_jump_idx, Bytecode::Jump(jump_by)), + false => ctxt.backpatch(end_first_jump_idx, Bytecode::Jump(jump_by)), } return Some(()); - } + }*/ + + // TODO: [whileTrue, whileFalse], [or, and] return None; } + + fn inline_block(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { + match block_expr { + ast::Expression::Block(block) => { + for block_local in &block.locals { + ctxt.push_local(String::from(block_local)); + } + + // TODO i suspect we can reuse compile_block() instead, but a quick attempt failed. + // TODO also, need remove those POPs somehow. + if let Some((last, rest)) = block.body.exprs.split_last() { + for expr in rest { + expr.codegen(ctxt); + ctxt.push_instr(Bytecode::Pop); + } + last.codegen(ctxt)?; + } + Some(()) + }, + _ => panic!("Expression was not a block") + } + } } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index c3b44aef..97412d1e 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -463,6 +463,10 @@ impl Interpreter { ); } } + Bytecode::Jump(offset) => { + let frame = self.current_frame().unwrap(); + frame.clone().borrow_mut().bytecode_idx += offset - 1; + }, Bytecode::JumpOnFalseTopNil(offset) => { let condition_result = self.stack.pop().unwrap(); diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index 81fd4b95..5b3d0a80 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -183,6 +183,7 @@ impl fmt::Display for Method { Bytecode::JumpOnTrueTopNil(idx) => { write!(f, "index: {}", idx)?; } + _ => write!(f, "No display for this bytecode, TODO.")? // I am lazy } } Ok(()) From 016e151fbf4b1dbda653773e03e1701041348e91 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 17:42:10 +0000 Subject: [PATCH 17/88] successful ifTrue:ifFalse: and ifFalse:ifTrue: inlining. Known issue, though: "self" in inlined blocks no longer points to the right value, so Bounce fails. --- som-core/src/bytecode.rs | 6 ++- som-interpreter-bc/src/compiler.rs | 53 ++++++--------------------- som-interpreter-bc/src/interpreter.rs | 34 ++++++++++++++--- 3 files changed, 45 insertions(+), 48 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 204b98d7..dbb49279 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -32,8 +32,10 @@ pub enum Bytecode { ReturnLocal, ReturnNonLocal, Jump(usize), + JumpOnTrueTopNil(usize), JumpOnFalseTopNil(usize), - JumpOnTrueTopNil(usize) + JumpOnTruePop(usize), + JumpOnFalsePop(usize) } impl Bytecode { @@ -71,7 +73,7 @@ impl Bytecode { Self::ReturnLocal => "RETURN_LOCAL", Self::ReturnNonLocal => "RETURN_NON_LOCAL", Self::Jump(_) => "JUMP", - _ => "NO NAME, TODO" + _ => "NO NAME, TODO" // laziness } } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index a3437990..70a81cbf 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -435,68 +435,39 @@ impl PrimMessageInliner for ast::Expression { return Some(()); } - /*if message.signature == "ifTrue:ifFalse:" { // || message.signature == "ifFalse:ifTrue:" + if message.signature == "ifTrue:ifFalse:" || message.signature == "ifFalse:ifTrue:" { if message.values.len() != 2 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) || !matches!(message.values.get(1).unwrap(), ast::Expression::Block(_)) { return Some(()); } - // avoids some panic! match arms, there's only two possibilities - let is_if_true_if_false = message.signature == "ifTrue:ifFalse"; + let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; let start_jump_idx = ctxt.get_instr_idx(); - match is_if_true_if_false { - true => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), - false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) + true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), } - if let ast::Expression::Block(block1) = message.values.get(0).unwrap() { - for block_local in &block1.locals { - ctxt.push_local(String::from(block_local)); - } - - if let Some((last, rest)) = block1.body.exprs.split_last() { - for expr in rest { - expr.codegen(ctxt); - ctxt.push_instr(Bytecode::Pop); - } - last.codegen(ctxt)?; - } - } + self.inline_block(ctxt, message.values.get(0).unwrap()); - let end_first_jump_idx = ctxt.get_instr_idx(); // the JUMP we've just emitted. + let middle_jump_idx = ctxt.get_instr_idx(); ctxt.push_instr(Bytecode::Jump(0)); let jump_by = ctxt.get_instr_idx() - start_jump_idx; match is_if_true_if_false { - true => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), - false => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), + true => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnFalsePop(jump_by)), + false => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnTruePop(jump_by)), } - if let ast::Expression::Block(block2) = message.values.get(1).unwrap() { - for block_local in &block2.locals { - ctxt.push_local(String::from(block_local)); - } - - if let Some((last, rest)) = block2.body.exprs.split_last() { - for expr in rest { - expr.codegen(ctxt); - ctxt.push_instr(Bytecode::Pop); - } - last.codegen(ctxt)?; - } - } + self.inline_block(ctxt, message.values.get(1).unwrap()); - let jump_by = ctxt.get_instr_idx() - end_first_jump_idx; - match is_if_true_if_false { - true => ctxt.backpatch(end_first_jump_idx, Bytecode::Jump(jump_by)), - false => ctxt.backpatch(end_first_jump_idx, Bytecode::Jump(jump_by)), - } + let jump_by = ctxt.get_instr_idx() - middle_jump_idx; + ctxt.backpatch(middle_jump_idx, Bytecode::Jump(jump_by)); return Some(()); - }*/ + } // TODO: [whileTrue, whileFalse], [or, and] return None; diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 97412d1e..9f678103 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -467,13 +467,27 @@ impl Interpreter { let frame = self.current_frame().unwrap(); frame.clone().borrow_mut().bytecode_idx += offset - 1; }, + Bytecode::JumpOnTrueTopNil(offset) => { + let condition_result = self.stack.pop().unwrap(); + + match condition_result { + Value::Boolean(true) => { + let frame = self.current_frame().unwrap(); + frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop + self.stack.push(Value::Nil); // TODO read and rewrite OR pop, instead of pop then optional push + }, + Value::Boolean(false) => { + }, + _ => panic!() + } + }, Bytecode::JumpOnFalseTopNil(offset) => { let condition_result = self.stack.pop().unwrap(); match condition_result { Value::Boolean(false) => { let frame = self.current_frame().unwrap(); - frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop + frame.clone().borrow_mut().bytecode_idx += offset - 1; self.stack.push(Value::Nil); }, Value::Boolean(true) => { @@ -481,20 +495,30 @@ impl Interpreter { _ => panic!() } }, - Bytecode::JumpOnTrueTopNil(offset) => { + Bytecode::JumpOnTruePop(offset) => { let condition_result = self.stack.pop().unwrap(); match condition_result { Value::Boolean(true) => { let frame = self.current_frame().unwrap(); - frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop - self.stack.push(Value::Nil); + frame.clone().borrow_mut().bytecode_idx += offset - 1; }, + Value::Boolean(false) => {}, + _ => panic!() + } + }, + Bytecode::JumpOnFalsePop(offset) => { + let condition_result = self.stack.pop().unwrap(); + + match condition_result { Value::Boolean(false) => { + let frame = self.current_frame().unwrap(); + frame.clone().borrow_mut().bytecode_idx += offset - 1; }, + Value::Boolean(true) => {}, _ => panic!() } - } + }, } } From 0504ad682b3a55965077ccad9f646276fd228027 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 18:03:59 +0000 Subject: [PATCH 18/88] Fixed the self bug, was an oversight --- som-interpreter-bc/src/compiler.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 70a81cbf..bfd136b8 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -411,7 +411,7 @@ impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { if message.signature == "ifTrue:" || message.signature == "ifFalse" { if message.values.len() != 1 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) { - return Some(()); + return None; } // avoids some panic! match arms, there's only two possibilities @@ -435,11 +435,11 @@ impl PrimMessageInliner for ast::Expression { return Some(()); } - if message.signature == "ifTrue:ifFalse:" || message.signature == "ifFalse:ifTrue:" { + if message.signature == "ifTrue:ifFalse:" {// || message.signature == "ifFalse:ifTrue:" { if message.values.len() != 2 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) || !matches!(message.values.get(1).unwrap(), ast::Expression::Block(_)) { - return Some(()); + return None; } let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; From ad99e9946c686e7d545682dcb6a9412257389750 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 18:18:45 +0000 Subject: [PATCH 19/88] Improved some panic messages and left a TODO --- som-interpreter-bc/src/compiler.rs | 1 + som-interpreter-bc/src/interpreter.rs | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index bfd136b8..cbf84a24 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -410,6 +410,7 @@ impl MethodCodegen for ast::Expression { impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { if message.signature == "ifTrue:" || message.signature == "ifFalse" { + // TODO we can inline more than blocks if we rely on the existing codegen methods. However, that's a pain for some reason. if message.values.len() != 1 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) { return None; } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 9f678103..9c8c2eeb 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -478,7 +478,7 @@ impl Interpreter { }, Value::Boolean(false) => { }, - _ => panic!() + _ => panic!("Jump condition did not evaluate to boolean") } }, Bytecode::JumpOnFalseTopNil(offset) => { @@ -492,7 +492,7 @@ impl Interpreter { }, Value::Boolean(true) => { }, - _ => panic!() + _ => panic!("Jump condition did not evaluate to boolean") } }, Bytecode::JumpOnTruePop(offset) => { @@ -504,7 +504,7 @@ impl Interpreter { frame.clone().borrow_mut().bytecode_idx += offset - 1; }, Value::Boolean(false) => {}, - _ => panic!() + _ => panic!("Jump condition did not evaluate to boolean") } }, Bytecode::JumpOnFalsePop(offset) => { @@ -516,7 +516,7 @@ impl Interpreter { frame.clone().borrow_mut().bytecode_idx += offset - 1; }, Value::Boolean(true) => {}, - _ => panic!() + _ => panic!("Jump condition did not evaluate to boolean") } }, } From 9840c015fdcc3c5c717d5d4b30fd6a29c1795f6e Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 18:24:08 +0000 Subject: [PATCH 20/88] Removed an old TODO and slightly optimized binary ops --- som-interpreter-bc/src/compiler.rs | 4 ++-- som-interpreter-bc/src/frame.rs | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index cbf84a24..ec3d5125 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -338,9 +338,9 @@ impl MethodCodegen for ast::Expression { let sym = ctxt.intern_symbol(message.op.as_str()); let idx = ctxt.push_literal(Literal::Symbol(sym)); if super_send { - ctxt.push_instr(Bytecode::SendN(idx as u8)); // TODO why doesn't send2 work? + ctxt.push_instr(Bytecode::Send1(idx as u8)); } else { - ctxt.push_instr(Bytecode::SendN(idx as u8)); + ctxt.push_instr(Bytecode::Send1(idx as u8)); } Some(()) } diff --git a/som-interpreter-bc/src/frame.rs b/som-interpreter-bc/src/frame.rs index 47a9b197..cf8ddf92 100644 --- a/som-interpreter-bc/src/frame.rs +++ b/som-interpreter-bc/src/frame.rs @@ -122,11 +122,6 @@ impl Frame { self.get_bytecode(self.bytecode_idx) } - // todo remove me - pub fn get_bytecode_idx(&self) -> usize { - self.bytecode_idx - } - pub fn lookup_constant(&self, idx: usize) -> Option { match self.kind() { FrameKind::Block { block } => block.literals.get(idx).cloned(), From fdbb7b5cea3c34975e93562f064bb62857995263 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 20 Dec 2022 19:05:54 +0000 Subject: [PATCH 21/88] Had forgotten to inline ifFalse:ifTrue:, oops. --- som-interpreter-bc/src/compiler.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index ec3d5125..db253183 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -434,9 +434,7 @@ impl PrimMessageInliner for ast::Expression { } return Some(()); - } - - if message.signature == "ifTrue:ifFalse:" {// || message.signature == "ifFalse:ifTrue:" { + } else if message.signature == "ifTrue:ifFalse:" || message.signature == "ifFalse:ifTrue:" { if message.values.len() != 2 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) || !matches!(message.values.get(1).unwrap(), ast::Expression::Block(_)) { From 0e45b1abc4bba60f1ab9a4aad7eb3763ef0d8b09 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 21 Dec 2022 21:30:00 +0000 Subject: [PATCH 22/88] Working towards inlining whileTrue: --- som-core/src/bytecode.rs | 4 +- som-interpreter-bc/src/compiler.rs | 137 +++++++++++++++++++++++--- som-interpreter-bc/src/interpreter.rs | 10 +- 3 files changed, 137 insertions(+), 14 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index dbb49279..6becc24e 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -32,6 +32,7 @@ pub enum Bytecode { ReturnLocal, ReturnNonLocal, Jump(usize), + JumpBackward(usize), JumpOnTrueTopNil(usize), JumpOnFalseTopNil(usize), JumpOnTruePop(usize), @@ -149,8 +150,9 @@ impl fmt::Display for Bytecode { Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), Self::Jump(idx) => write!(f, "JUMP {}", idx), + Self::JumpBackward(idx) => write!(f, "JUMP_BACKWARD {}", idx), Self::JumpOnFalseTopNil(idx) => write!(f, "JUMP_ON_FALSE_TOP_NIL {}", idx), - _ => write!(f, "No display for this bytecode, TODO.") // I am lazy + _ => write!(f, "No display for this bytecode, TODO") } } } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index db253183..2985f8bd 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -97,10 +97,14 @@ trait GenCtxt { trait InnerGenCtxt: GenCtxt { fn as_gen_ctxt(&mut self) -> &mut dyn GenCtxt; fn push_instr(&mut self, instr: Bytecode); + fn pop_instr(&mut self); + fn get_instructions(&self) -> &Vec; fn push_arg(&mut self, name: String) -> usize; fn push_local(&mut self, name: String) -> usize; + fn get_literal(&self, idx: usize) -> Option<&Literal>; // is this needed? fn push_literal(&mut self, literal: Literal) -> usize; - fn get_instr_idx(&mut self) -> usize; + fn remove_literal(&mut self, idx: usize) -> Option; + fn get_instr_idx(&self) -> usize; fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode); } @@ -149,6 +153,14 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { body.push(instr); } + fn pop_instr(&mut self) { + self.body.as_mut().unwrap().pop(); + } + + fn get_instructions(&self) -> &Vec { + self.body.as_ref().unwrap() + } + fn push_arg(&mut self, name: String) -> usize { let (idx, _) = self.args.insert_full(name); idx @@ -159,12 +171,20 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { idx } + fn get_literal(&self, idx: usize) -> Option<&Literal> { + self.literals.get_index(idx) + } + fn push_literal(&mut self, literal: Literal) -> usize { let (idx, _) = self.literals.insert_full(literal); idx } - fn get_instr_idx(&mut self) -> usize { + fn remove_literal(&mut self, idx: usize) -> Option { + self.literals.shift_remove_index(idx) + } + + fn get_instr_idx(&self) -> usize { return self.body.as_ref().unwrap().iter().len(); } @@ -204,6 +224,14 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { self.inner.push_instr(instr) } + fn pop_instr(&mut self) { + self.inner.pop_instr(); + } + + fn get_instructions(&self) -> &Vec { + self.inner.get_instructions() + } + fn push_arg(&mut self, name: String) -> usize { self.inner.push_arg(name) } @@ -216,7 +244,15 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { self.inner.push_literal(literal) } - fn get_instr_idx(&mut self) -> usize { + fn get_literal(&self, idx: usize) -> Option<&Literal> { + self.inner.get_literal(idx) + } + + fn remove_literal(&mut self, idx: usize) -> Option { + self.inner.remove_literal(idx) + } + + fn get_instr_idx(&self) -> usize { return self.inner.get_instr_idx(); } @@ -231,7 +267,8 @@ trait MethodCodegen { trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; - fn inline_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; + fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; + fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; } impl MethodCodegen for ast::Body { @@ -411,7 +448,7 @@ impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { if message.signature == "ifTrue:" || message.signature == "ifFalse" { // TODO we can inline more than blocks if we rely on the existing codegen methods. However, that's a pain for some reason. - if message.values.len() != 1 || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { return None; } @@ -425,7 +462,7 @@ impl PrimMessageInliner for ast::Expression { false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) } - self.inline_block(ctxt, message.values.get(0).unwrap()); + self.inline_block_expr(ctxt, message.values.get(0)?); let jump_by = ctxt.get_instr_idx() - jump_idx; match is_if_true { @@ -436,8 +473,8 @@ impl PrimMessageInliner for ast::Expression { return Some(()); } else if message.signature == "ifTrue:ifFalse:" || message.signature == "ifFalse:ifTrue:" { if message.values.len() != 2 - || !matches!(message.values.get(0).unwrap(), ast::Expression::Block(_)) - || !matches!(message.values.get(1).unwrap(), ast::Expression::Block(_)) { + || !matches!(message.values.get(0)?, ast::Expression::Block(_)) + || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { return None; } @@ -449,7 +486,7 @@ impl PrimMessageInliner for ast::Expression { false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), } - self.inline_block(ctxt, message.values.get(0).unwrap()); + self.inline_block_expr(ctxt, message.values.get(0)?); let middle_jump_idx = ctxt.get_instr_idx(); ctxt.push_instr(Bytecode::Jump(0)); @@ -460,19 +497,59 @@ impl PrimMessageInliner for ast::Expression { false => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnTruePop(jump_by)), } - self.inline_block(ctxt, message.values.get(1).unwrap()); + self.inline_block_expr(ctxt, message.values.get(1)?); let jump_by = ctxt.get_instr_idx() - middle_jump_idx; ctxt.backpatch(middle_jump_idx, Bytecode::Jump(jump_by)); + return Some(()); + } else if message.signature == "whileTrueMARKED:" { // TODO whileFalse: + let block_idx = match ctxt.get_instructions().last()? { + Bytecode::PushBlock(val) => val, + _ => return None + }; + + let block_ref = match ctxt.remove_literal(*block_idx as usize)? { + Literal::Block(val) => val.clone(), + _ => return None + }; + + if message.values.len() != 1 + || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + return None; + } + + ctxt.pop_instr(); // we remove the PUSH_BLOCK + + let cond_idx = ctxt.get_instr_idx(); + + self.inline_compiled_block(ctxt, block_ref.as_ref()); + + let loop_start_idx = ctxt.get_instr_idx(); + + ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)); + + self.inline_block_expr(ctxt, message.values.get(0).unwrap()); + let jump_to_cond_val = ctxt.get_instr_idx() - cond_idx; + ctxt.push_instr(Bytecode::JumpBackward(jump_to_cond_val)); + + let loop_jump_by = ctxt.get_instr_idx() - loop_start_idx; + ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalseTopNil(loop_jump_by)); + + println!("BYTECODES:"); + for instr in ctxt.get_instructions() { + println!("{}", instr); + } + println!(""); + return Some(()); } - // TODO: [whileTrue, whileFalse], [or, and] + // TODO: [or, and] return None; } - fn inline_block(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { + fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { match block_expr { ast::Expression::Block(block) => { for block_local in &block.locals { @@ -493,6 +570,42 @@ impl PrimMessageInliner for ast::Expression { _ => panic!("Expression was not a block") } } + + // not great to have two versions of the same method for structures that contain the same info. eh + fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()> { + for block_local in &block.locals { + dbg!(block_local); + todo!() + // TODO actually push locals. + // ctxt.push_local(String::from(block_local)); + } + + let literals_offset = block.literals.len(); + for block_lit in &block.literals { + match block_lit { + Literal::Symbol(interned) => {ctxt.push_literal(Literal::Symbol(*interned))} + _ => { todo!() } + }; + // ctxt.push_literal(Literal::from(block_lit)); + } + + if let Some((last, body)) = block.body.split_last() { + for block_bc in body { + match block_bc { + Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr( Bytecode::PushLocal(*up_idx - 1, *idx)), + Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr( Bytecode::PushArgument(*up_idx - 1, *idx)), + Bytecode::Send1(lit_idx) => ctxt.push_instr( Bytecode::Send1(lit_idx + literals_offset as u8)), + _ => ctxt.push_instr(*block_bc) + } + } + match last { + Bytecode::ReturnLocal => {}, + _ => ctxt.push_instr(*last) // afaik it's always the case, so maybe should always be popped. + } + } + + Some(()) + } } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 9c8c2eeb..d98ea765 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -254,6 +254,10 @@ impl Interpreter { // // }; // } + if &frame.borrow().get_method().signature == "innerBenchmarkLoop:" { + println!("cur bc: {}", bytecode); + print!(""); + } frame.borrow_mut().bytecode_idx += 1; // dbg!(&frame.borrow().get_method().signature); @@ -307,7 +311,7 @@ impl Interpreter { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); let mut block = match literal { Literal::Block(blk) => Block::clone(&blk), - _ => return None, + _ => panic!("PushBlock expected a block, but got another invalid literal"), }; block.frame.replace(Rc::clone(frame)); self.stack.push(Value::Block(Rc::new(block))); @@ -467,6 +471,10 @@ impl Interpreter { let frame = self.current_frame().unwrap(); frame.clone().borrow_mut().bytecode_idx += offset - 1; }, + Bytecode::JumpBackward(offset) => { + let frame = self.current_frame().unwrap(); + frame.clone().borrow_mut().bytecode_idx -= offset + 1; + }, Bytecode::JumpOnTrueTopNil(offset) => { let condition_result = self.stack.pop().unwrap(); From aab089d47bc95d081a64534392bcb8b93b865908 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 21 Dec 2022 21:39:45 +0000 Subject: [PATCH 23/88] Successfully inlining one whileTrue in Bounce --- som-interpreter-bc/src/compiler.rs | 22 +++++++++++----------- som-interpreter-bc/src/interpreter.rs | 9 +++++---- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 2985f8bd..d32d2da8 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -536,11 +536,11 @@ impl PrimMessageInliner for ast::Expression { let loop_jump_by = ctxt.get_instr_idx() - loop_start_idx; ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalseTopNil(loop_jump_by)); - println!("BYTECODES:"); - for instr in ctxt.get_instructions() { - println!("{}", instr); - } - println!(""); + // println!("BYTECODES:"); + // for instr in ctxt.get_instructions() { + // println!("{}", instr); + // } + // println!(""); return Some(()); } @@ -553,10 +553,11 @@ impl PrimMessageInliner for ast::Expression { match block_expr { ast::Expression::Block(block) => { for block_local in &block.locals { - ctxt.push_local(String::from(block_local)); + ctxt.push_local(String::from(block_local)); // breaks shadowing } // TODO i suspect we can reuse compile_block() instead, but a quick attempt failed. + // i suspect we can reuse the other inline function (inlines a compiled block) when it's done, since turning a block expr into a block is trivial. // TODO also, need remove those POPs somehow. if let Some((last, rest)) = block.body.exprs.split_last() { for expr in rest { @@ -575,18 +576,17 @@ impl PrimMessageInliner for ast::Expression { fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()> { for block_local in &block.locals { dbg!(block_local); - todo!() - // TODO actually push locals. + todo!("actually pushing locals would be nice") // ctxt.push_local(String::from(block_local)); } - let literals_offset = block.literals.len(); + let literals_offset = block.literals.len() - 1; for block_lit in &block.literals { match block_lit { - Literal::Symbol(interned) => {ctxt.push_literal(Literal::Symbol(*interned))} + Literal::Symbol(interned) => { + ctxt.push_literal(Literal::Symbol(*interned))} _ => { todo!() } }; - // ctxt.push_literal(Literal::from(block_lit)); } if let Some((last, body)) = block.body.split_last() { diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index d98ea765..504c356b 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -254,10 +254,11 @@ impl Interpreter { // // }; // } - if &frame.borrow().get_method().signature == "innerBenchmarkLoop:" { - println!("cur bc: {}", bytecode); - print!(""); - } + // if &frame.borrow().get_method().signature == "innerBenchmarkLoop:" { + // println!("cur bc: {}", bytecode); + // print!(""); + // } + frame.borrow_mut().bytecode_idx += 1; // dbg!(&frame.borrow().get_method().signature); From e3a1a310b631d3757b12747cee85752d3a77299d Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 21 Dec 2022 23:15:30 +0100 Subject: [PATCH 24/88] Inlining every ifTrue makes Bounce not crash at least --- som-interpreter-bc/src/compiler.rs | 10 ++++++---- som-interpreter-bc/src/interpreter.rs | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index d32d2da8..285a6ce2 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -503,7 +503,7 @@ impl PrimMessageInliner for ast::Expression { ctxt.backpatch(middle_jump_idx, Bytecode::Jump(jump_by)); return Some(()); - } else if message.signature == "whileTrueMARKED:" { // TODO whileFalse: + } else if message.signature == "whileTrue:" { // TODO whileFalse: let block_idx = match ctxt.get_instructions().last()? { Bytecode::PushBlock(val) => val, _ => return None @@ -580,11 +580,12 @@ impl PrimMessageInliner for ast::Expression { // ctxt.push_local(String::from(block_local)); } - let literals_offset = block.literals.len() - 1; + // let literals_offset = block.literals.len()- 1; for block_lit in &block.literals { match block_lit { Literal::Symbol(interned) => { - ctxt.push_literal(Literal::Symbol(*interned))} + ctxt.push_literal(Literal::Symbol(*interned)); + } _ => { todo!() } }; } @@ -593,8 +594,9 @@ impl PrimMessageInliner for ast::Expression { for block_bc in body { match block_bc { Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr( Bytecode::PushLocal(*up_idx - 1, *idx)), + Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr( Bytecode::PopLocal(*up_idx - 1, *idx)), Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr( Bytecode::PushArgument(*up_idx - 1, *idx)), - Bytecode::Send1(lit_idx) => ctxt.push_instr( Bytecode::Send1(lit_idx + literals_offset as u8)), + Bytecode::Send1(lit_idx) => ctxt.push_instr( Bytecode::Send1(*lit_idx)), _ => ctxt.push_instr(*block_bc) } } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 504c356b..2387cae3 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -254,7 +254,7 @@ impl Interpreter { // // }; // } - // if &frame.borrow().get_method().signature == "innerBenchmarkLoop:" { + // if &frame.borrow().get_method().signature == "resolve:" { // println!("cur bc: {}", bytecode); // print!(""); // } From bda7fd7680c9a05e49ff2161c92c6921413bc192 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 26 Dec 2022 17:59:11 +0100 Subject: [PATCH 25/88] Progress with whileTrue, seems to occasionally make infinite loops --- som-core/src/bytecode.rs | 1 + som-interpreter-bc/src/compiler.rs | 131 +++++++++++++++++++------- som-interpreter-bc/src/interpreter.rs | 1 + 3 files changed, 100 insertions(+), 33 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 6becc24e..5a16fea1 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -152,6 +152,7 @@ impl fmt::Display for Bytecode { Self::Jump(idx) => write!(f, "JUMP {}", idx), Self::JumpBackward(idx) => write!(f, "JUMP_BACKWARD {}", idx), Self::JumpOnFalseTopNil(idx) => write!(f, "JUMP_ON_FALSE_TOP_NIL {}", idx), + Self::JumpOnFalsePop(idx) => write!(f, "JUMP_ON_FALSE_POP {}", idx), _ => write!(f, "No display for this bytecode, TODO") } } diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 285a6ce2..fc7e20a6 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -106,6 +106,8 @@ trait InnerGenCtxt: GenCtxt { fn remove_literal(&mut self, idx: usize) -> Option; fn get_instr_idx(&self) -> usize; fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode); + fn get_body_debug(&self) -> Option<&Vec>; + fn get_literals_debug(&self) -> IndexSet; } struct BlockGenCtxt<'a> { @@ -144,6 +146,14 @@ impl GenCtxt for BlockGenCtxt<'_> { } impl InnerGenCtxt for BlockGenCtxt<'_> { + fn get_body_debug(&self) -> Option<&Vec> { + self.body.as_ref() + } + + fn get_literals_debug(&self) -> IndexSet { + self.literals.clone() + } + fn as_gen_ctxt(&mut self) -> &mut dyn GenCtxt { self } @@ -216,6 +226,7 @@ impl GenCtxt for MethodGenCtxt<'_> { } impl InnerGenCtxt for MethodGenCtxt<'_> { + fn as_gen_ctxt(&mut self) -> &mut dyn GenCtxt { self } @@ -259,6 +270,14 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode) { self.inner.backpatch(idx_to_backpatch, bytecode_with_new_val); } + + fn get_body_debug(&self) -> Option<&Vec> { + self.inner.get_body_debug() + } + + fn get_literals_debug(&self) -> IndexSet { + self.inner.get_literals_debug() + } } trait MethodCodegen { @@ -295,9 +314,6 @@ impl MethodCodegen for ast::Expression { None => { match name.as_str() { "nil" => ctxt.push_instr(Bytecode::PushNil), - // TODO should cache those false and true, although pushing 0 and 1 isn't functional - // "false" => ctxt.push_instr(Bytecode::Push0), - // "true" => ctxt.push_instr(Bytecode::Push1), _ => { let name = ctxt.intern_symbol(name); let idx = ctxt.push_literal(Literal::Symbol(name)); @@ -509,7 +525,8 @@ impl PrimMessageInliner for ast::Expression { _ => return None }; - let block_ref = match ctxt.remove_literal(*block_idx as usize)? { + // todo pop the literal + let block_ref = match ctxt.get_literal(*block_idx as usize)? { Literal::Block(val) => val.clone(), _ => return None }; @@ -525,22 +542,42 @@ impl PrimMessageInliner for ast::Expression { self.inline_compiled_block(ctxt, block_ref.as_ref()); + // println!("BYTECODES AFTER FIRST BLOCK:"); + // for instr in ctxt.get_instructions() { + // println!("{}", instr); + // } + // dbg!(ctxt.get_literals_debug()); + + // println!("BYTECODES IN FIRST BLOCK:"); + // for instr in &block_ref.as_ref().body { + // println!("{}", instr); + // } + // println!(); + // println!("Block lits:"); + // dbg!(&block_ref.as_ref().literals); + let loop_start_idx = ctxt.get_instr_idx(); - ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)); + ctxt.push_instr(Bytecode::JumpOnFalsePop(0)); self.inline_block_expr(ctxt, message.values.get(0).unwrap()); + + ctxt.push_instr(Bytecode::Pop); + let jump_to_cond_val = ctxt.get_instr_idx() - cond_idx; ctxt.push_instr(Bytecode::JumpBackward(jump_to_cond_val)); + let loop_jump_by = ctxt.get_instr_idx() - loop_start_idx; - ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalseTopNil(loop_jump_by)); + ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalsePop(loop_jump_by)); + + ctxt.push_instr(Bytecode::PushNil); // println!("BYTECODES:"); // for instr in ctxt.get_instructions() { // println!("{}", instr); // } - // println!(""); + // println!(); return Some(()); } @@ -580,29 +617,69 @@ impl PrimMessageInliner for ast::Expression { // ctxt.push_local(String::from(block_local)); } - // let literals_offset = block.literals.len()- 1; - for block_lit in &block.literals { - match block_lit { - Literal::Symbol(interned) => { - ctxt.push_literal(Literal::Symbol(*interned)); - } - _ => { todo!() } - }; - } + + // let literals_offset = block.literals.len(); + // for block_lit in &block.literals { + // match block_lit { + // Literal::Symbol(interned) => { + // ctxt.push_literal(Literal::Symbol(*interned)); + // } + // _ => { todo!() } + // }; + // } if let Some((last, body)) = block.body.split_last() { for block_bc in body { match block_bc { - Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr( Bytecode::PushLocal(*up_idx - 1, *idx)), - Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr( Bytecode::PopLocal(*up_idx - 1, *idx)), - Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr( Bytecode::PushArgument(*up_idx - 1, *idx)), - Bytecode::Send1(lit_idx) => ctxt.push_instr( Bytecode::Send1(*lit_idx)), + Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), + Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), + Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), + Bytecode::Send1(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::Send1(idx as u8)); + }, + _ => todo!() + } + }, + Bytecode::Send2(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::Send2(idx as u8)); + }, + _ => todo!() + } + }, + Bytecode::Send3(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::Send3(idx as u8)); + }, + _ => todo!() + } + }, + Bytecode::SendN(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::SendN(idx as u8)); + }, + _ => todo!() + } + }, _ => ctxt.push_instr(*block_bc) } } + match last { Bytecode::ReturnLocal => {}, - _ => ctxt.push_instr(*last) // afaik it's always the case, so maybe should always be popped. + _ => { + panic!("wait, this can happen?"); + // ctxt.push_instr(*last); + } } } @@ -697,14 +774,10 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option { - // println!("(system) compiling block ..."); - let mut ctxt = BlockGenCtxt { outer, args: defn.parameters.iter().cloned().collect(), @@ -731,8 +804,6 @@ fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { nb_params: ctxt.args.len(), }; - // println!("(system) compiled block !"); - Some(block) } @@ -901,11 +972,5 @@ pub fn compile_class( instance_class_mut.methods = instance_class_ctxt.methods; drop(instance_class_mut); - // for method in instance_class.borrow().methods.values() { - // println!("{}", method); - // } - - // println!("compiled '{}' !", defn.name); - Some(instance_class) } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 2387cae3..c95faea3 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -254,6 +254,7 @@ impl Interpreter { // // }; // } + // dbg!(&frame.borrow().get_method().signature); // if &frame.borrow().get_method().signature == "resolve:" { // println!("cur bc: {}", bytecode); // print!(""); From 9c3ed08b8498dac56599d47441200a3a667f17e5 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 26 Dec 2022 18:41:08 +0100 Subject: [PATCH 26/88] added whileFalse: but still haven't fixed some issues with whileTrue: (Mandelbrot infinite loop) --- som-interpreter-bc/src/compiler.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index fc7e20a6..1a5f9ec5 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -462,7 +462,7 @@ impl MethodCodegen for ast::Expression { impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { - if message.signature == "ifTrue:" || message.signature == "ifFalse" { + if message.signature == "ifTrue:" || message.signature == "ifFalse:" { // TODO we can inline more than blocks if we rely on the existing codegen methods. However, that's a pain for some reason. if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { return None; @@ -487,7 +487,7 @@ impl PrimMessageInliner for ast::Expression { } return Some(()); - } else if message.signature == "ifTrue:ifFalse:" || message.signature == "ifFalse:ifTrue:" { + } else if message.signature == "ifTrueXDDD:ifFalse:" || message.signature == "ifFalseDDX:ifTrue:" { if message.values.len() != 2 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { @@ -519,7 +519,7 @@ impl PrimMessageInliner for ast::Expression { ctxt.backpatch(middle_jump_idx, Bytecode::Jump(jump_by)); return Some(()); - } else if message.signature == "whileTrue:" { // TODO whileFalse: + } else if message.signature == "whileTrue:" || message.signature == "whileFalse:" { let block_idx = match ctxt.get_instructions().last()? { Bytecode::PushBlock(val) => val, _ => return None @@ -538,6 +538,8 @@ impl PrimMessageInliner for ast::Expression { ctxt.pop_instr(); // we remove the PUSH_BLOCK + let is_while_true = message.signature == "whileTrue:"; + let cond_idx = ctxt.get_instr_idx(); self.inline_compiled_block(ctxt, block_ref.as_ref()); @@ -558,7 +560,10 @@ impl PrimMessageInliner for ast::Expression { let loop_start_idx = ctxt.get_instr_idx(); - ctxt.push_instr(Bytecode::JumpOnFalsePop(0)); + match is_while_true { + true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) + } self.inline_block_expr(ctxt, message.values.get(0).unwrap()); @@ -567,9 +572,12 @@ impl PrimMessageInliner for ast::Expression { let jump_to_cond_val = ctxt.get_instr_idx() - cond_idx; ctxt.push_instr(Bytecode::JumpBackward(jump_to_cond_val)); - let loop_jump_by = ctxt.get_instr_idx() - loop_start_idx; - ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalsePop(loop_jump_by)); + + match is_while_true { + true => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalsePop(loop_jump_by)), + false => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnTruePop(loop_jump_by)) + } ctxt.push_instr(Bytecode::PushNil); @@ -634,6 +642,7 @@ impl PrimMessageInliner for ast::Expression { Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), + Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), Bytecode::Send1(lit_idx) => { match block.literals.get(*lit_idx as usize)? { Literal::Symbol(interned) => { From 23f61329f41cda28947cc17fc368d8ffe25e5264 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 26 Dec 2022 19:36:59 +0100 Subject: [PATCH 27/88] Wrote an inliner module because inlining code was getting too sizeable (had to make some compiler.rs data structures public) --- som-interpreter-bc/src/compiler.rs | 252 +--------------------------- som-interpreter-bc/src/inliner.rs | 257 +++++++++++++++++++++++++++++ som-interpreter-bc/src/lib.rs | 3 + 3 files changed, 265 insertions(+), 247 deletions(-) create mode 100644 som-interpreter-bc/src/inliner.rs diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 1a5f9ec5..9ee9700f 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -18,6 +18,7 @@ use crate::method::{Method, MethodEnv, MethodKind}; use crate::primitives; use crate::value::Value; use crate::SOMRef; +use crate::inliner::PrimMessageInliner; #[derive(Debug, Clone)] pub enum Literal { @@ -82,19 +83,19 @@ impl Hash for Literal { } } -enum FoundVar { +pub enum FoundVar { Local(u8, u8), Argument(u8, u8), Field(u8), } -trait GenCtxt { +pub trait GenCtxt { fn find_var(&mut self, name: &str) -> Option; fn intern_symbol(&mut self, name: &str) -> Interned; fn class_name(&self) -> &str; } -trait InnerGenCtxt: GenCtxt { +pub trait InnerGenCtxt: GenCtxt { fn as_gen_ctxt(&mut self) -> &mut dyn GenCtxt; fn push_instr(&mut self, instr: Bytecode); fn pop_instr(&mut self); @@ -280,16 +281,10 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { } } -trait MethodCodegen { +pub trait MethodCodegen { fn codegen(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()>; } -trait PrimMessageInliner { - fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; - fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; - fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; -} - impl MethodCodegen for ast::Body { fn codegen(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()> { for expr in &self.exprs { @@ -460,243 +455,6 @@ impl MethodCodegen for ast::Expression { } } -impl PrimMessageInliner for ast::Expression { - fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { - if message.signature == "ifTrue:" || message.signature == "ifFalse:" { - // TODO we can inline more than blocks if we rely on the existing codegen methods. However, that's a pain for some reason. - if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { - return None; - } - - // avoids some panic! match arms, there's only two possibilities - let is_if_true = message.signature == "ifTrue:"; - - let jump_idx = ctxt.get_instr_idx(); - - match is_if_true { - true => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), - false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) - } - - self.inline_block_expr(ctxt, message.values.get(0)?); - - let jump_by = ctxt.get_instr_idx() - jump_idx; - match is_if_true { - true => ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), - false => ctxt.backpatch(jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), - } - - return Some(()); - } else if message.signature == "ifTrueXDDD:ifFalse:" || message.signature == "ifFalseDDX:ifTrue:" { - if message.values.len() != 2 - || !matches!(message.values.get(0)?, ast::Expression::Block(_)) - || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { - return None; - } - - let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; - - let start_jump_idx = ctxt.get_instr_idx(); - match is_if_true_if_false { - true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), - false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), - } - - self.inline_block_expr(ctxt, message.values.get(0)?); - - let middle_jump_idx = ctxt.get_instr_idx(); - ctxt.push_instr(Bytecode::Jump(0)); - - let jump_by = ctxt.get_instr_idx() - start_jump_idx; - match is_if_true_if_false { - true => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnFalsePop(jump_by)), - false => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnTruePop(jump_by)), - } - - self.inline_block_expr(ctxt, message.values.get(1)?); - - let jump_by = ctxt.get_instr_idx() - middle_jump_idx; - ctxt.backpatch(middle_jump_idx, Bytecode::Jump(jump_by)); - - return Some(()); - } else if message.signature == "whileTrue:" || message.signature == "whileFalse:" { - let block_idx = match ctxt.get_instructions().last()? { - Bytecode::PushBlock(val) => val, - _ => return None - }; - - // todo pop the literal - let block_ref = match ctxt.get_literal(*block_idx as usize)? { - Literal::Block(val) => val.clone(), - _ => return None - }; - - if message.values.len() != 1 - || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { - return None; - } - - ctxt.pop_instr(); // we remove the PUSH_BLOCK - - let is_while_true = message.signature == "whileTrue:"; - - let cond_idx = ctxt.get_instr_idx(); - - self.inline_compiled_block(ctxt, block_ref.as_ref()); - - // println!("BYTECODES AFTER FIRST BLOCK:"); - // for instr in ctxt.get_instructions() { - // println!("{}", instr); - // } - // dbg!(ctxt.get_literals_debug()); - - // println!("BYTECODES IN FIRST BLOCK:"); - // for instr in &block_ref.as_ref().body { - // println!("{}", instr); - // } - // println!(); - // println!("Block lits:"); - // dbg!(&block_ref.as_ref().literals); - - let loop_start_idx = ctxt.get_instr_idx(); - - match is_while_true { - true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), - false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) - } - - self.inline_block_expr(ctxt, message.values.get(0).unwrap()); - - ctxt.push_instr(Bytecode::Pop); - - let jump_to_cond_val = ctxt.get_instr_idx() - cond_idx; - ctxt.push_instr(Bytecode::JumpBackward(jump_to_cond_val)); - - let loop_jump_by = ctxt.get_instr_idx() - loop_start_idx; - - match is_while_true { - true => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalsePop(loop_jump_by)), - false => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnTruePop(loop_jump_by)) - } - - ctxt.push_instr(Bytecode::PushNil); - - // println!("BYTECODES:"); - // for instr in ctxt.get_instructions() { - // println!("{}", instr); - // } - // println!(); - - return Some(()); - } - - // TODO: [or, and] - return None; - } - - fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { - match block_expr { - ast::Expression::Block(block) => { - for block_local in &block.locals { - ctxt.push_local(String::from(block_local)); // breaks shadowing - } - - // TODO i suspect we can reuse compile_block() instead, but a quick attempt failed. - // i suspect we can reuse the other inline function (inlines a compiled block) when it's done, since turning a block expr into a block is trivial. - // TODO also, need remove those POPs somehow. - if let Some((last, rest)) = block.body.exprs.split_last() { - for expr in rest { - expr.codegen(ctxt); - ctxt.push_instr(Bytecode::Pop); - } - last.codegen(ctxt)?; - } - Some(()) - }, - _ => panic!("Expression was not a block") - } - } - - // not great to have two versions of the same method for structures that contain the same info. eh - fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()> { - for block_local in &block.locals { - dbg!(block_local); - todo!("actually pushing locals would be nice") - // ctxt.push_local(String::from(block_local)); - } - - - // let literals_offset = block.literals.len(); - // for block_lit in &block.literals { - // match block_lit { - // Literal::Symbol(interned) => { - // ctxt.push_literal(Literal::Symbol(*interned)); - // } - // _ => { todo!() } - // }; - // } - - if let Some((last, body)) = block.body.split_last() { - for block_bc in body { - match block_bc { - Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), - Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), - Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), - Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), - Bytecode::Send1(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::Send1(idx as u8)); - }, - _ => todo!() - } - }, - Bytecode::Send2(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::Send2(idx as u8)); - }, - _ => todo!() - } - }, - Bytecode::Send3(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::Send3(idx as u8)); - }, - _ => todo!() - } - }, - Bytecode::SendN(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::SendN(idx as u8)); - }, - _ => todo!() - } - }, - _ => ctxt.push_instr(*block_bc) - } - } - - match last { - Bytecode::ReturnLocal => {}, - _ => { - panic!("wait, this can happen?"); - // ctxt.push_instr(*last); - } - } - } - - Some(()) - } -} - - struct ClassGenCtxt<'a> { pub name: String, pub fields: IndexSet, diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs new file mode 100644 index 00000000..40d8e69a --- /dev/null +++ b/som-interpreter-bc/src/inliner.rs @@ -0,0 +1,257 @@ +use som_core::ast; +use som_core::bytecode::Bytecode; +use crate::block::Block; +use crate::compiler::{InnerGenCtxt, Literal}; +use crate::compiler::MethodCodegen; + +pub trait PrimMessageInliner { + fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; + fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; + fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; + + fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; + fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; + fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; +} + +impl PrimMessageInliner for ast::Expression { + fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { + match message.signature.as_str() { + "ifTrue:" | "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message), + "ifTrue:ifFalse:" | "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message), + // "whileTrue:" | "whileFalse:" => self.inline_while(ctxt, message), + // TODO: [or, and] + _ => None + } + } + + fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { + match block_expr { + ast::Expression::Block(block) => { + for block_local in &block.locals { + ctxt.push_local(String::from(block_local)); // breaks shadowing + } + + // TODO i suspect we can reuse the other inline function (inlines a compiled block) when it's done, since turning a block expr into a block is trivial. + // TODO also, need remove those POPs somehow. + if let Some((last, rest)) = block.body.exprs.split_last() { + for expr in rest { + expr.codegen(ctxt); + ctxt.push_instr(Bytecode::Pop); + } + last.codegen(ctxt)?; + } + Some(()) + }, + _ => panic!("Expression was not a block") + } + } + + fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()> { + for block_local in &block.locals { + dbg!(block_local); + todo!("actually pushing locals would be nice") + // ctxt.push_local(String::from(block_local)); + } + + + // let literals_offset = block.literals.len(); + // for block_lit in &block.literals { + // match block_lit { + // Literal::Symbol(interned) => { + // ctxt.push_literal(Literal::Symbol(*interned)); + // } + // _ => { todo!() } + // }; + // } + + if let Some((last, body)) = block.body.split_last() { + for block_bc in body { + match block_bc { + Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), + Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), + Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), + Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), + Bytecode::Send1(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::Send1(idx as u8)); + }, + _ => todo!() + } + }, + Bytecode::Send2(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::Send2(idx as u8)); + }, + _ => todo!() + } + }, + Bytecode::Send3(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::Send3(idx as u8)); + }, + _ => todo!() + } + }, + Bytecode::SendN(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + ctxt.push_instr(Bytecode::SendN(idx as u8)); + }, + _ => todo!() + } + }, + _ => ctxt.push_instr(*block_bc) + } + } + + match last { + Bytecode::ReturnLocal => {}, + _ => { + panic!("wait, this can happen?"); + // ctxt.push_instr(*last); + } + } + } + + Some(()) + } + + fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { + // TODO we can inline more than blocks if we rely on the existing codegen methods. However, that's a pain for some reason. + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + return None; + } + + let is_if_true = message.signature == "ifTrue:"; + + let jump_idx = ctxt.get_instr_idx(); + + match is_if_true { + true => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), + false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) + } + + self.inline_block_expr(ctxt, message.values.get(0)?); + + let jump_by = ctxt.get_instr_idx() - jump_idx; + match is_if_true { + true => ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), + false => ctxt.backpatch(jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), + } + + return Some(()); + } + + fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { + if message.values.len() != 2 + || !matches!(message.values.get(0)?, ast::Expression::Block(_)) + || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { + return None; + } + + let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; + + let start_jump_idx = ctxt.get_instr_idx(); + match is_if_true_if_false { + true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), + } + + self.inline_block_expr(ctxt, message.values.get(0)?); + + let middle_jump_idx = ctxt.get_instr_idx(); + ctxt.push_instr(Bytecode::Jump(0)); + + let jump_by = ctxt.get_instr_idx() - start_jump_idx; + match is_if_true_if_false { + true => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnFalsePop(jump_by)), + false => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnTruePop(jump_by)), + } + + self.inline_block_expr(ctxt, message.values.get(1)?); + + let jump_by = ctxt.get_instr_idx() - middle_jump_idx; + ctxt.backpatch(middle_jump_idx, Bytecode::Jump(jump_by)); + + return Some(()); + } + + fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { + let block_idx = match ctxt.get_instructions().last()? { + Bytecode::PushBlock(val) => val, + _ => return None + }; + + // todo pop the literal + let block_ref = match ctxt.get_literal(*block_idx as usize)? { + Literal::Block(val) => val.clone(), + _ => return None + }; + + if message.values.len() != 1 + || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + return None; + } + + ctxt.pop_instr(); // we remove the PUSH_BLOCK + + let is_while_true = message.signature == "whileTrue:"; + + let cond_idx = ctxt.get_instr_idx(); + + self.inline_compiled_block(ctxt, block_ref.as_ref()); + + // println!("BYTECODES AFTER FIRST BLOCK:"); + // for instr in ctxt.get_instructions() { + // println!("{}", instr); + // } + // dbg!(ctxt.get_literals_debug()); + + // println!("BYTECODES IN FIRST BLOCK:"); + // for instr in &block_ref.as_ref().body { + // println!("{}", instr); + // } + // println!(); + // println!("Block lits:"); + // dbg!(&block_ref.as_ref().literals); + + let loop_start_idx = ctxt.get_instr_idx(); + + match is_while_true { + true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) + } + + self.inline_block_expr(ctxt, message.values.get(0).unwrap()); + + ctxt.push_instr(Bytecode::Pop); + + let jump_to_cond_val = ctxt.get_instr_idx() - cond_idx; + ctxt.push_instr(Bytecode::JumpBackward(jump_to_cond_val)); + + let loop_jump_by = ctxt.get_instr_idx() - loop_start_idx; + + match is_while_true { + true => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalsePop(loop_jump_by)), + false => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnTruePop(loop_jump_by)) + } + + ctxt.push_instr(Bytecode::PushNil); + + // println!("BYTECODES:"); + // for instr in ctxt.get_instructions() { + // println!("{}", instr); + // } + // println!(); + + return Some(()); + } +} \ No newline at end of file diff --git a/som-interpreter-bc/src/lib.rs b/som-interpreter-bc/src/lib.rs index c4917230..016f9c0e 100644 --- a/som-interpreter-bc/src/lib.rs +++ b/som-interpreter-bc/src/lib.rs @@ -30,6 +30,9 @@ pub mod universe; /// Facilities for manipulating values. pub mod value; +/// Inlining some calls to a select few builtin functions for sizeable perf gains. +pub mod inliner; + /// A strong and owning reference to an object. pub type SOMRef = Rc>; /// A weak reference to an object. From 3e297152c239f75674a23badb3382cb48fe10587 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 26 Dec 2022 20:39:39 +0100 Subject: [PATCH 28/88] Setting the stage for patching inner blocks during inlining --- som-interpreter-bc/src/inliner.rs | 83 ++++++++++++------------------- 1 file changed, 32 insertions(+), 51 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 40d8e69a..ab85e23b 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -4,10 +4,12 @@ use crate::block::Block; use crate::compiler::{InnerGenCtxt, Literal}; use crate::compiler::MethodCodegen; +// TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; + fn patch_inner_block_during_inlining(&self, block: &Block) -> Block; fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; @@ -19,7 +21,7 @@ impl PrimMessageInliner for ast::Expression { match message.signature.as_str() { "ifTrue:" | "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message), "ifTrue:ifFalse:" | "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message), - // "whileTrue:" | "whileFalse:" => self.inline_while(ctxt, message), + "whileTrue:" | "whileFalse:" => self.inline_while(ctxt, message), // TODO: [or, and] _ => None } @@ -50,80 +52,59 @@ impl PrimMessageInliner for ast::Expression { fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()> { for block_local in &block.locals { dbg!(block_local); - todo!("actually pushing locals would be nice") + todo!("actually pushing locals would be nice!") // ctxt.push_local(String::from(block_local)); } - - // let literals_offset = block.literals.len(); - // for block_lit in &block.literals { - // match block_lit { - // Literal::Symbol(interned) => { - // ctxt.push_literal(Literal::Symbol(*interned)); - // } - // _ => { todo!() } - // }; - // } - - if let Some((last, body)) = block.body.split_last() { + // last is always ReturnLocal, so it gets ignored + if let Some((_, body)) = block.body.split_last() { for block_bc in body { match block_bc { Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), - Bytecode::Send1(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::Send1(idx as u8)); - }, - _ => todo!() - } - }, - Bytecode::Send2(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::Send2(idx as u8)); - }, - _ => todo!() - } - }, - Bytecode::Send3(lit_idx) => { + Bytecode::Send1(lit_idx) | Bytecode::Send2(lit_idx) | + Bytecode::Send3(lit_idx) | Bytecode::SendN(lit_idx) => { match block.literals.get(*lit_idx as usize)? { Literal::Symbol(interned) => { + // does this push duplicate literals? I think it doesn't? let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::Send3(idx as u8)); + match block_bc { + Bytecode::Send1(_) => ctxt.push_instr(Bytecode::Send1(idx as u8)), + Bytecode::Send2(_) => ctxt.push_instr(Bytecode::Send2(idx as u8)), + Bytecode::Send3(_) => ctxt.push_instr(Bytecode::Send3(idx as u8)), + Bytecode::SendN(_) => ctxt.push_instr(Bytecode::SendN(idx as u8)), + _ => panic!("Unreachable branch") + } }, - _ => todo!() + _ => panic!("Unexpected block literal type, not yet implemented") } }, - Bytecode::SendN(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - ctxt.push_instr(Bytecode::SendN(idx as u8)); - }, - _ => todo!() - } + Bytecode::PushBlock(block_idx) => { + dbg!(block_idx); + match block.literals.get(*block_idx as usize)? { + Literal::Block(inner_block) => self.patch_inner_block_during_inlining(inner_block.as_ref()), + _ => panic!("PushBlock not actually pushing a block somehow") + }; + todo!("then we push the new literal in place of the old one (needs the same index) and we make sure no block literal is pushed by the following send") + // ctxt.push_literal(block); }, _ => ctxt.push_instr(*block_bc) } } - - match last { - Bytecode::ReturnLocal => {}, - _ => { - panic!("wait, this can happen?"); - // ctxt.push_instr(*last); - } - } } Some(()) } + fn patch_inner_block_during_inlining(&self, block: &Block) -> Block { + for bc in &block.body { + dbg!(&bc); + } + todo!("A block is used in the compiled block, and needs to be modified to account for the moved vars in its outer context. It'll need to return a new block since Rc means immutable"); + } + fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { // TODO we can inline more than blocks if we rely on the existing codegen methods. However, that's a pain for some reason. if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { From 6f040622ca7d60bd8b1056bf7d465fde3a6854c7 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 27 Dec 2022 15:23:38 +0100 Subject: [PATCH 29/88] fixed mandelbrot infinite loop, but not Json --- som-interpreter-bc/src/block.rs | 2 ++ som-interpreter-bc/src/compiler.rs | 3 ++- som-interpreter-bc/src/inliner.rs | 35 +++++++++--------------------- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/som-interpreter-bc/src/block.rs b/som-interpreter-bc/src/block.rs index 6adfeb24..61254d51 100644 --- a/som-interpreter-bc/src/block.rs +++ b/som-interpreter-bc/src/block.rs @@ -1,4 +1,5 @@ use std::fmt; +use som_core::ast; use som_core::bytecode::Bytecode; @@ -18,6 +19,7 @@ pub struct Block { pub literals: Vec, pub body: Vec, pub nb_params: usize, + pub ast_body: ast::Block // TODO really not a fan of this, it's only needed during parsing... } impl Block { diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 9ee9700f..d781483a 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -544,7 +544,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option { +pub fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { let mut ctxt = BlockGenCtxt { outer, args: defn.parameters.iter().cloned().collect(), @@ -569,6 +569,7 @@ fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { literals: ctxt.literals.into_iter().collect(), body: ctxt.body.unwrap_or_default(), nb_params: ctxt.args.len(), + ast_body: defn.clone() }; Some(block) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index ab85e23b..c0147734 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,7 +1,8 @@ +use std::rc::Rc; use som_core::ast; use som_core::bytecode::Bytecode; use crate::block::Block; -use crate::compiler::{InnerGenCtxt, Literal}; +use crate::compiler::{compile_block, InnerGenCtxt, Literal}; use crate::compiler::MethodCodegen; // TODO some of those should return Result types and throw errors instead, most likely. @@ -9,7 +10,7 @@ pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; - fn patch_inner_block_during_inlining(&self, block: &Block) -> Block; + fn patch_inner_block_during_inlining(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Block; fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; @@ -82,13 +83,14 @@ impl PrimMessageInliner for ast::Expression { } }, Bytecode::PushBlock(block_idx) => { - dbg!(block_idx); match block.literals.get(*block_idx as usize)? { - Literal::Block(inner_block) => self.patch_inner_block_during_inlining(inner_block.as_ref()), + Literal::Block(inner_block) => { + let new_block = self.patch_inner_block_during_inlining(ctxt, inner_block.as_ref()); + let idx = ctxt.push_literal(Literal::Block(Rc::new(new_block))); + ctxt.push_instr(Bytecode::PushBlock(idx as u8)); + }, _ => panic!("PushBlock not actually pushing a block somehow") }; - todo!("then we push the new literal in place of the old one (needs the same index) and we make sure no block literal is pushed by the following send") - // ctxt.push_literal(block); }, _ => ctxt.push_instr(*block_bc) } @@ -98,11 +100,8 @@ impl PrimMessageInliner for ast::Expression { Some(()) } - fn patch_inner_block_during_inlining(&self, block: &Block) -> Block { - for bc in &block.body { - dbg!(&bc); - } - todo!("A block is used in the compiled block, and needs to be modified to account for the moved vars in its outer context. It'll need to return a new block since Rc means immutable"); + fn patch_inner_block_during_inlining(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Block { + compile_block(ctxt.as_gen_ctxt(), &block.ast_body).unwrap() // ...is it really that simple? } fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { @@ -190,20 +189,6 @@ impl PrimMessageInliner for ast::Expression { self.inline_compiled_block(ctxt, block_ref.as_ref()); - // println!("BYTECODES AFTER FIRST BLOCK:"); - // for instr in ctxt.get_instructions() { - // println!("{}", instr); - // } - // dbg!(ctxt.get_literals_debug()); - - // println!("BYTECODES IN FIRST BLOCK:"); - // for instr in &block_ref.as_ref().body { - // println!("{}", instr); - // } - // println!(); - // println!("Block lits:"); - // dbg!(&block_ref.as_ref().literals); - let loop_start_idx = ctxt.get_instr_idx(); match is_while_true { From 052c8319149da1ad8ed203e3cadbd03d904908dc Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 27 Dec 2022 16:01:13 +0100 Subject: [PATCH 30/88] Minor refactoring in inlining code for slightly more clarity (included moving some more logic to backpatch_jump() ) --- som-interpreter-bc/src/compiler.rs | 30 +++++++++----- som-interpreter-bc/src/inliner.rs | 66 +++++++++--------------------- 2 files changed, 39 insertions(+), 57 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index d781483a..65848d3a 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -105,8 +105,8 @@ pub trait InnerGenCtxt: GenCtxt { fn get_literal(&self, idx: usize) -> Option<&Literal>; // is this needed? fn push_literal(&mut self, literal: Literal) -> usize; fn remove_literal(&mut self, idx: usize) -> Option; - fn get_instr_idx(&self) -> usize; - fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode); + fn get_cur_instr_idx(&self) -> usize; + fn backpatch_jump(&mut self, idx_to_backpatch: usize); fn get_body_debug(&self) -> Option<&Vec>; fn get_literals_debug(&self) -> IndexSet; } @@ -195,13 +195,23 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { self.literals.shift_remove_index(idx) } - fn get_instr_idx(&self) -> usize { + fn get_cur_instr_idx(&self) -> usize { return self.body.as_ref().unwrap().iter().len(); } - fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode) { - // dbg!(&self.outer.class_name()); - self.body.as_mut().unwrap()[idx_to_backpatch] = bytecode_with_new_val; + fn backpatch_jump(&mut self, idx_to_backpatch: usize) { + let jump_offset = self.get_cur_instr_idx() - idx_to_backpatch; + + self.body.as_mut().unwrap()[idx_to_backpatch] = + match self.body.as_ref().unwrap()[idx_to_backpatch] { + Bytecode::Jump(_) => Bytecode::Jump(jump_offset), + Bytecode::JumpBackward(_) => Bytecode::JumpBackward(jump_offset), + Bytecode::JumpOnTrueTopNil(_) => Bytecode::JumpOnTrueTopNil(jump_offset), + Bytecode::JumpOnFalseTopNil(_) => Bytecode::JumpOnFalseTopNil(jump_offset), + Bytecode::JumpOnTruePop(_) => Bytecode::JumpOnTruePop(jump_offset), + Bytecode::JumpOnFalsePop(_) => Bytecode::JumpOnFalsePop(jump_offset), + _ => panic!("Attempting to backpatch a bytecode non jump") + }; } } @@ -264,12 +274,12 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { self.inner.remove_literal(idx) } - fn get_instr_idx(&self) -> usize { - return self.inner.get_instr_idx(); + fn get_cur_instr_idx(&self) -> usize { + return self.inner.get_cur_instr_idx(); } - fn backpatch(&mut self, idx_to_backpatch: usize, bytecode_with_new_val: Bytecode) { - self.inner.backpatch(idx_to_backpatch, bytecode_with_new_val); + fn backpatch_jump(&mut self, idx_to_backpatch: usize) { + self.inner.backpatch_jump(idx_to_backpatch); } fn get_body_debug(&self) -> Option<&Vec> { diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index c0147734..68c0e11d 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -105,15 +105,14 @@ impl PrimMessageInliner for ast::Expression { } fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { - // TODO we can inline more than blocks if we rely on the existing codegen methods. However, that's a pain for some reason. + let is_if_true = message.signature == "ifTrue:"; + + // TODO we can inline more than blocks if we rely on the existing codegen methods. if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { return None; } - let is_if_true = message.signature == "ifTrue:"; - - let jump_idx = ctxt.get_instr_idx(); - + let jump_idx = ctxt.get_cur_instr_idx(); match is_if_true { true => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) @@ -121,25 +120,21 @@ impl PrimMessageInliner for ast::Expression { self.inline_block_expr(ctxt, message.values.get(0)?); - let jump_by = ctxt.get_instr_idx() - jump_idx; - match is_if_true { - true => ctxt.backpatch(jump_idx, Bytecode::JumpOnFalseTopNil(jump_by)), - false => ctxt.backpatch(jump_idx, Bytecode::JumpOnTrueTopNil(jump_by)), - } + ctxt.backpatch_jump(jump_idx); return Some(()); } fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { + let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; + if message.values.len() != 2 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { return None; } - let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; - - let start_jump_idx = ctxt.get_instr_idx(); + let start_jump_idx = ctxt.get_cur_instr_idx(); match is_if_true_if_false { true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), @@ -147,31 +142,26 @@ impl PrimMessageInliner for ast::Expression { self.inline_block_expr(ctxt, message.values.get(0)?); - let middle_jump_idx = ctxt.get_instr_idx(); + let middle_jump_idx = ctxt.get_cur_instr_idx(); ctxt.push_instr(Bytecode::Jump(0)); - let jump_by = ctxt.get_instr_idx() - start_jump_idx; - match is_if_true_if_false { - true => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnFalsePop(jump_by)), - false => ctxt.backpatch(start_jump_idx, Bytecode::JumpOnTruePop(jump_by)), - } - + ctxt.backpatch_jump(start_jump_idx); self.inline_block_expr(ctxt, message.values.get(1)?); - - let jump_by = ctxt.get_instr_idx() - middle_jump_idx; - ctxt.backpatch(middle_jump_idx, Bytecode::Jump(jump_by)); + ctxt.backpatch_jump(middle_jump_idx); return Some(()); } fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { + let is_while_true = message.signature == "whileTrue:"; + let block_idx = match ctxt.get_instructions().last()? { Bytecode::PushBlock(val) => val, _ => return None }; // todo pop the literal - let block_ref = match ctxt.get_literal(*block_idx as usize)? { + let cond_block_ref = match ctxt.get_literal(*block_idx as usize)? { Literal::Block(val) => val.clone(), _ => return None }; @@ -183,14 +173,11 @@ impl PrimMessageInliner for ast::Expression { ctxt.pop_instr(); // we remove the PUSH_BLOCK - let is_while_true = message.signature == "whileTrue:"; + let idx_before_condition = ctxt.get_cur_instr_idx(); - let cond_idx = ctxt.get_instr_idx(); - - self.inline_compiled_block(ctxt, block_ref.as_ref()); - - let loop_start_idx = ctxt.get_instr_idx(); + self.inline_compiled_block(ctxt, cond_block_ref.as_ref()); + let cond_jump_idx = ctxt.get_cur_instr_idx(); match is_while_true { true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) @@ -199,25 +186,10 @@ impl PrimMessageInliner for ast::Expression { self.inline_block_expr(ctxt, message.values.get(0).unwrap()); ctxt.push_instr(Bytecode::Pop); - - let jump_to_cond_val = ctxt.get_instr_idx() - cond_idx; - ctxt.push_instr(Bytecode::JumpBackward(jump_to_cond_val)); - - let loop_jump_by = ctxt.get_instr_idx() - loop_start_idx; - - match is_while_true { - true => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnFalsePop(loop_jump_by)), - false => ctxt.backpatch(loop_start_idx, Bytecode::JumpOnTruePop(loop_jump_by)) - } - + ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); + ctxt.backpatch_jump(cond_jump_idx); ctxt.push_instr(Bytecode::PushNil); - // println!("BYTECODES:"); - // for instr in ctxt.get_instructions() { - // println!("{}", instr); - // } - // println!(); - return Some(()); } } \ No newline at end of file From d02f5930f97256b29bd73c604f91a6fd6920bce0 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 11:55:52 +0100 Subject: [PATCH 31/88] Fixed an infinite loop with whileFalse in Json --- som-interpreter-bc/src/inliner.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 68c0e11d..e078c3b2 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -92,6 +92,29 @@ impl PrimMessageInliner for ast::Expression { _ => panic!("PushBlock not actually pushing a block somehow") }; }, + Bytecode::PushConstant(constant_idx) => { + match block.literals.get(*constant_idx as usize)? { + lit => { + let lit_idx = ctxt.push_literal(lit.clone()); + ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)); + } + }; + }, + Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => { + let constant_idx: usize = match block_bc { + Bytecode::PushConstant0 => 0, + Bytecode::PushConstant1 => 1, + Bytecode::PushConstant2 => 2, + _ => panic!("Unreachable") + }; + + match block.literals.get(constant_idx)? { + lit => { + let lit_idx = ctxt.push_literal(lit.clone()); + ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)); + } + }; + }, _ => ctxt.push_instr(*block_bc) } } @@ -166,8 +189,7 @@ impl PrimMessageInliner for ast::Expression { _ => return None }; - if message.values.len() != 1 - || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { return None; } From 6446b4fac414d6378d6ed5f0b941b0e85a6fb0e9 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 16:35:49 +0100 Subject: [PATCH 32/88] Removed unused bytecode halt (pysom also has it and never uses it, iirc) --- som-core/src/bytecode.rs | 7 ++----- som-interpreter-bc/src/interpreter.rs | 5 +---- som-interpreter-bc/src/method.rs | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 5a16fea1..5a187c64 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -3,7 +3,6 @@ use std::fmt; #[repr(u8)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Bytecode { - Halt, Dup, PushLocal(u8, u8), PushArgument(u8, u8), @@ -45,7 +44,6 @@ impl Bytecode { pub fn name(self) -> &'static str { // NAMES[self as usize] match self { - Self::Halt => "HALT", Self::Dup => "DUP", Self::PushLocal(_, _) => "PUSH_LOCAL", Self::PushArgument(_, _) => "PUSH_ARGUMENT", @@ -83,7 +81,6 @@ impl Bytecode { pub fn padded_name(self) -> &'static str { // PADDED_NAMES[self as usize] match self { - Self::Halt => "HALT ", Self::Dup => "DUP ", Self::PushLocal(_, _) => "PUSH_LOCAL ", Self::PushArgument(_, _) => "PUSH_ARGUMENT ", @@ -121,7 +118,6 @@ impl fmt::Display for Bytecode { #[rustfmt::skip] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Self::Halt => write!(f, "HALT"), Self::Dup => write!(f, "DUP"), Self::PushLocal(up_idx, idx) => write!(f, "PUSH_LOCAL {}, {}", up_idx, idx), Self::PushArgument(up_idx, idx) => write!(f, "PUSH_ARGUMENT {}, {}", up_idx, idx), @@ -153,7 +149,8 @@ impl fmt::Display for Bytecode { Self::JumpBackward(idx) => write!(f, "JUMP_BACKWARD {}", idx), Self::JumpOnFalseTopNil(idx) => write!(f, "JUMP_ON_FALSE_TOP_NIL {}", idx), Self::JumpOnFalsePop(idx) => write!(f, "JUMP_ON_FALSE_POP {}", idx), - _ => write!(f, "No display for this bytecode, TODO") + Self::JumpOnTrueTopNil(idx) => write!(f, "JUMP_ON_TRUE_TOP_NIL {}", idx), + Self::JumpOnTruePop(idx) => write!(f, "JUMP_ON_TRUE_POP {}", idx), } } } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index c95faea3..2f455daa 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -264,9 +264,6 @@ impl Interpreter { // dbg!(&frame.borrow().get_method().signature); match bytecode { - Bytecode::Halt => { - return Some(Value::Nil); - } Bytecode::Dup => { let value = self.stack.last().cloned().unwrap(); self.stack.push(value); @@ -342,7 +339,7 @@ impl Interpreter { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); let symbol = match literal { Literal::Symbol(sym) => sym, - _ => return None, + _ => panic!("Global is not a symbol."), }; if let Some(value) = universe.lookup_global(symbol) { self.stack.push(value); diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index 5b3d0a80..2f3ceaa7 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -117,7 +117,6 @@ impl fmt::Display for Method { writeln!(f)?; write!(f, " {} ", bytecode.padded_name())?; match bytecode { - Bytecode::Halt => {} Bytecode::Dup => {} Bytecode::PushLocal(up_idx, idx) => { write!(f, "local: {}, context: {}", idx, up_idx)?; From 39251da0aca2895a8ed1fb9a2dc469c5a138c034 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 16:38:22 +0100 Subject: [PATCH 33/88] Fixed Richards and added it to run_benchmarks.sh --- run_benchmarks.sh | 4 ++-- som-interpreter-bc/src/inliner.rs | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/run_benchmarks.sh b/run_benchmarks.sh index 28739b40..419fc29a 100755 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -1,10 +1,10 @@ #!/bin/bash -BENCHMARKS=("Bounce" "Mandelbrot" "List" "Permute" "Queens" "QuickSort" "Sieve" "Fannkuch" "Json" "DeltaBlue") +BENCHMARKS=("Bounce" "Mandelbrot" "List" "Permute" "Queens" "QuickSort" "Sieve" "Fannkuch" "JsonSmall" "Richards" "DeltaBlue") for bench in "${BENCHMARKS[@]}" do - cargo run --bin som-interpreter-bc -- -c core-lib/Smalltalk core-lib/Examples/Benchmarks core-lib/Examples/Benchmarks/Json -- core-lib/Examples/Benchmarks/BenchmarkHarness.som $bench 1 0 7 + cargo run --bin som-interpreter-bc -- -c core-lib/Smalltalk core-lib/Examples/Benchmarks core-lib/Examples/Benchmarks/Json core-lib/Examples/Benchmarks/Richards core-lib/Examples/Benchmarks/DeltaBlue -- core-lib/Examples/Benchmarks/BenchmarkHarness.som $bench 1 0 7 echo -ne "\n" done diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index e078c3b2..4d4d9aba 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -100,6 +100,14 @@ impl PrimMessageInliner for ast::Expression { } }; }, + Bytecode::PushGlobal(global_idx) => { + match block.literals.get(*global_idx as usize)? { + lit => { + let lit_idx = ctxt.push_literal(lit.clone()); + ctxt.push_instr(Bytecode::PushGlobal(lit_idx as u8)); + } + }; + }, Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => { let constant_idx: usize = match block_bc { Bytecode::PushConstant0 => 0, @@ -115,6 +123,7 @@ impl PrimMessageInliner for ast::Expression { } }; }, + Bytecode::ReturnNonLocal => panic!("There shouldn't be a return here"), _ => ctxt.push_instr(*block_bc) } } From 58e9a8b531cd74f701f2ee4e740daeaf30202490 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 16:44:17 +0100 Subject: [PATCH 34/88] Added a halt primitive for debugging --- som-interpreter-bc/src/primitives/object.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/som-interpreter-bc/src/primitives/object.rs b/som-interpreter-bc/src/primitives/object.rs index c1f5c2e5..c471cd01 100644 --- a/som-interpreter-bc/src/primitives/object.rs +++ b/som-interpreter-bc/src/primitives/object.rs @@ -22,6 +22,7 @@ pub static INSTANCE_PRIMITIVES: &[(&str, PrimitiveFn, bool)] = &[ ), ("instVarAt:", self::inst_var_at, true), ("instVarAt:put:", self::inst_var_at_put, true), + ("halt", self::halt, true), ("==", self::eq, true), ]; pub static CLASS_PRIMITIVES: &[(&str, PrimitiveFn, bool)] = &[]; @@ -248,6 +249,11 @@ fn inst_var_at_put(interpreter: &mut Interpreter, _: &mut Universe) { interpreter.stack.push(local); } +fn halt(_interpreter: &mut Interpreter, _: &mut Universe) { + const _: &'static str = "Object>>#halt"; + println!("HALT"); // so a breakpoint can be put +} + /// Search for an instance primitive matching the given signature. pub fn get_instance_primitive(signature: &str) -> Option { INSTANCE_PRIMITIVES From b044400769f849df797403dba94dae5d8f46b208 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 17:19:32 +0100 Subject: [PATCH 35/88] while inlining functional in Json, therefore in every benchmark tested! --- som-interpreter-bc/src/inliner.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 4d4d9aba..c42136f4 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -36,7 +36,7 @@ impl PrimMessageInliner for ast::Expression { } // TODO i suspect we can reuse the other inline function (inlines a compiled block) when it's done, since turning a block expr into a block is trivial. - // TODO also, need remove those POPs somehow. + // also, need remove those POPs somehow. if let Some((last, rest)) = block.body.exprs.split_last() { for expr in rest { expr.codegen(ctxt); @@ -216,7 +216,16 @@ impl PrimMessageInliner for ast::Expression { self.inline_block_expr(ctxt, message.values.get(0).unwrap()); - ctxt.push_instr(Bytecode::Pop); + // we push a POP, unless the body of the loop is empty. + match message.values.get(0).unwrap() { + ast::Expression::Block(block) => { + if block.body.exprs.len() != 0 { + ctxt.push_instr(Bytecode::Pop); + } + }, + _ => panic!("unreachable") + }; + ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); ctxt.backpatch_jump(cond_jump_idx); ctxt.push_instr(Bytecode::PushNil); From 9cecc8811f695f2ef22f7682c8fe60e0c0756bbd Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 17:35:23 +0100 Subject: [PATCH 36/88] minor jump bytecode code cleanup --- som-interpreter-bc/src/interpreter.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 2f455daa..f2138184 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -467,47 +467,49 @@ impl Interpreter { } } Bytecode::Jump(offset) => { - let frame = self.current_frame().unwrap(); + let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; }, Bytecode::JumpBackward(offset) => { - let frame = self.current_frame().unwrap(); + let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx -= offset + 1; }, Bytecode::JumpOnTrueTopNil(offset) => { - let condition_result = self.stack.pop().unwrap(); + let condition_result = self.stack.last()?; match condition_result { Value::Boolean(true) => { - let frame = self.current_frame().unwrap(); + let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop - self.stack.push(Value::Nil); // TODO read and rewrite OR pop, instead of pop then optional push + *self.stack.last_mut()? = Value::Nil; }, Value::Boolean(false) => { + self.stack.pop(); }, _ => panic!("Jump condition did not evaluate to boolean") } }, Bytecode::JumpOnFalseTopNil(offset) => { - let condition_result = self.stack.pop().unwrap(); + let condition_result = self.stack.last()?; match condition_result { Value::Boolean(false) => { - let frame = self.current_frame().unwrap(); + let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; - self.stack.push(Value::Nil); + *self.stack.last_mut()? = Value::Nil; }, Value::Boolean(true) => { + self.stack.pop(); }, _ => panic!("Jump condition did not evaluate to boolean") } }, Bytecode::JumpOnTruePop(offset) => { - let condition_result = self.stack.pop().unwrap(); + let condition_result = self.stack.pop()?; match condition_result { Value::Boolean(true) => { - let frame = self.current_frame().unwrap(); + let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; }, Value::Boolean(false) => {}, @@ -515,11 +517,11 @@ impl Interpreter { } }, Bytecode::JumpOnFalsePop(offset) => { - let condition_result = self.stack.pop().unwrap(); + let condition_result = self.stack.pop()?; match condition_result { Value::Boolean(false) => { - let frame = self.current_frame().unwrap(); + let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; }, Value::Boolean(true) => {}, From a63319cd4e7a6877248b681648d2bac6aace130c Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 17:49:21 +0100 Subject: [PATCH 37/88] now inlining more than block expressions --- som-interpreter-bc/src/inliner.rs | 36 +++++++++++-------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index c42136f4..07d41711 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -8,9 +8,8 @@ use crate::compiler::MethodCodegen; // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; - fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; + fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; - fn patch_inner_block_during_inlining(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Block; fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; @@ -28,15 +27,14 @@ impl PrimMessageInliner for ast::Expression { } } - fn inline_block_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { + fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { match block_expr { ast::Expression::Block(block) => { for block_local in &block.locals { ctxt.push_local(String::from(block_local)); // breaks shadowing } - // TODO i suspect we can reuse the other inline function (inlines a compiled block) when it's done, since turning a block expr into a block is trivial. - // also, need remove those POPs somehow. + // TODO need remove those POPs somehow. if let Some((last, rest)) = block.body.exprs.split_last() { for expr in rest { expr.codegen(ctxt); @@ -46,7 +44,7 @@ impl PrimMessageInliner for ast::Expression { } Some(()) }, - _ => panic!("Expression was not a block") + expr => expr.codegen(ctxt) } } @@ -85,7 +83,7 @@ impl PrimMessageInliner for ast::Expression { Bytecode::PushBlock(block_idx) => { match block.literals.get(*block_idx as usize)? { Literal::Block(inner_block) => { - let new_block = self.patch_inner_block_during_inlining(ctxt, inner_block.as_ref()); + let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; let idx = ctxt.push_literal(Literal::Block(Rc::new(new_block))); ctxt.push_instr(Bytecode::PushBlock(idx as u8)); }, @@ -132,15 +130,10 @@ impl PrimMessageInliner for ast::Expression { Some(()) } - fn patch_inner_block_during_inlining(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Block { - compile_block(ctxt.as_gen_ctxt(), &block.ast_body).unwrap() // ...is it really that simple? - } - fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { let is_if_true = message.signature == "ifTrue:"; - // TODO we can inline more than blocks if we rely on the existing codegen methods. - if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + if message.values.len() != 1 { return None; } @@ -150,8 +143,7 @@ impl PrimMessageInliner for ast::Expression { false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) } - self.inline_block_expr(ctxt, message.values.get(0)?); - + self.inline_expr(ctxt, message.values.get(0)?); ctxt.backpatch_jump(jump_idx); return Some(()); @@ -160,9 +152,7 @@ impl PrimMessageInliner for ast::Expression { fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; - if message.values.len() != 2 - || !matches!(message.values.get(0)?, ast::Expression::Block(_)) - || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { + if message.values.len() != 2 { return None; } @@ -172,13 +162,13 @@ impl PrimMessageInliner for ast::Expression { false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), } - self.inline_block_expr(ctxt, message.values.get(0)?); + self.inline_expr(ctxt, message.values.get(0)?); let middle_jump_idx = ctxt.get_cur_instr_idx(); ctxt.push_instr(Bytecode::Jump(0)); ctxt.backpatch_jump(start_jump_idx); - self.inline_block_expr(ctxt, message.values.get(1)?); + self.inline_expr(ctxt, message.values.get(1)?); ctxt.backpatch_jump(middle_jump_idx); return Some(()); @@ -198,7 +188,7 @@ impl PrimMessageInliner for ast::Expression { _ => return None }; - if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + if message.values.len() != 1 { return None; } @@ -214,7 +204,7 @@ impl PrimMessageInliner for ast::Expression { false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) } - self.inline_block_expr(ctxt, message.values.get(0).unwrap()); + self.inline_expr(ctxt, message.values.get(0).unwrap()); // we push a POP, unless the body of the loop is empty. match message.values.get(0).unwrap() { @@ -223,7 +213,7 @@ impl PrimMessageInliner for ast::Expression { ctxt.push_instr(Bytecode::Pop); } }, - _ => panic!("unreachable") + _ => {} }; ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); From 89eff54be1494cda22d3cd3ad4af1853f75d3d0d Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 30 Dec 2022 18:04:51 +0100 Subject: [PATCH 38/88] JumpType enum for more clarity --- som-interpreter-bc/src/inliner.rs | 54 +++++++++++++++++-------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 07d41711..c0048989 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -4,6 +4,12 @@ use som_core::bytecode::Bytecode; use crate::block::Block; use crate::compiler::{compile_block, InnerGenCtxt, Literal}; use crate::compiler::MethodCodegen; +use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; + +pub enum JumpType { + JumpOnFalse, + JumpOnTrue +} // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { @@ -11,17 +17,20 @@ pub trait PrimMessageInliner { fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; - fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; - fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; - fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; + fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; + fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; + fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; } impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { match message.signature.as_str() { - "ifTrue:" | "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message), - "ifTrue:ifFalse:" | "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message), - "whileTrue:" | "whileFalse:" => self.inline_while(ctxt, message), + "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), + "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), + "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), + "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), + "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), + "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), // TODO: [or, and] _ => None } @@ -34,7 +43,7 @@ impl PrimMessageInliner for ast::Expression { ctxt.push_local(String::from(block_local)); // breaks shadowing } - // TODO need remove those POPs somehow. + // TODO need to remove those POPs somehow. if let Some((last, rest)) = block.body.exprs.split_last() { for expr in rest { expr.codegen(ctxt); @@ -130,36 +139,33 @@ impl PrimMessageInliner for ast::Expression { Some(()) } - fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { - let is_if_true = message.signature == "ifTrue:"; - + fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { if message.values.len() != 1 { return None; } let jump_idx = ctxt.get_cur_instr_idx(); - match is_if_true { - true => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), - false => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) + match jump_type { + JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), + JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) } + // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating self.inline_expr(ctxt, message.values.get(0)?); ctxt.backpatch_jump(jump_idx); return Some(()); } - fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { - let is_if_true_if_false = message.signature == "ifTrue:ifFalse:"; - + fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { if message.values.len() != 2 { return None; } let start_jump_idx = ctxt.get_cur_instr_idx(); - match is_if_true_if_false { - true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), - false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), + match jump_type { + JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), } self.inline_expr(ctxt, message.values.get(0)?); @@ -174,9 +180,7 @@ impl PrimMessageInliner for ast::Expression { return Some(()); } - fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { - let is_while_true = message.signature == "whileTrue:"; - + fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { let block_idx = match ctxt.get_instructions().last()? { Bytecode::PushBlock(val) => val, _ => return None @@ -199,9 +203,9 @@ impl PrimMessageInliner for ast::Expression { self.inline_compiled_block(ctxt, cond_block_ref.as_ref()); let cond_jump_idx = ctxt.get_cur_instr_idx(); - match is_while_true { - true => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), - false => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) + match jump_type { + JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) } self.inline_expr(ctxt, message.values.get(0).unwrap()); From bc6e8b9de9d88d31685365a640383042291e90f0 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 31 Dec 2022 15:48:08 +0100 Subject: [PATCH 39/88] block inlining now pushes specialzied pushconstant bytecodes when possible --- som-interpreter-bc/src/inliner.rs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index c0048989..ef6ff67f 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -99,19 +99,24 @@ impl PrimMessageInliner for ast::Expression { _ => panic!("PushBlock not actually pushing a block somehow") }; }, - Bytecode::PushConstant(constant_idx) => { - match block.literals.get(*constant_idx as usize)? { + Bytecode::PushGlobal(global_idx) => { + match block.literals.get(*global_idx as usize)? { lit => { let lit_idx = ctxt.push_literal(lit.clone()); - ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)); + ctxt.push_instr(Bytecode::PushGlobal(lit_idx as u8)); } }; }, - Bytecode::PushGlobal(global_idx) => { - match block.literals.get(*global_idx as usize)? { + Bytecode::PushConstant(constant_idx) => { + match block.literals.get(*constant_idx as usize)? { lit => { let lit_idx = ctxt.push_literal(lit.clone()); - ctxt.push_instr(Bytecode::PushGlobal(lit_idx as u8)); + match lit_idx { + 0 => ctxt.push_instr(Bytecode::PushConstant0), + 1 => ctxt.push_instr(Bytecode::PushConstant1), + 2 => ctxt.push_instr(Bytecode::PushConstant2), + _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) + } } }; }, @@ -126,7 +131,12 @@ impl PrimMessageInliner for ast::Expression { match block.literals.get(constant_idx)? { lit => { let lit_idx = ctxt.push_literal(lit.clone()); - ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)); + match lit_idx { + 0 => ctxt.push_instr(Bytecode::PushConstant0), + 1 => ctxt.push_instr(Bytecode::PushConstant1), + 2 => ctxt.push_instr(Bytecode::PushConstant2), + _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) + } } }; }, From fda6bd96e9e46b1529e25f55b7b22338c2095aeb Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Sat, 31 Dec 2022 15:48:29 +0100 Subject: [PATCH 40/88] first attempt at a bc optimizing pass (non functional) --- som-interpreter-bc/src/compiler.rs | 59 +++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 65848d3a..cea41a65 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -107,8 +107,7 @@ pub trait InnerGenCtxt: GenCtxt { fn remove_literal(&mut self, idx: usize) -> Option; fn get_cur_instr_idx(&self) -> usize; fn backpatch_jump(&mut self, idx_to_backpatch: usize); - fn get_body_debug(&self) -> Option<&Vec>; - fn get_literals_debug(&self) -> IndexSet; + fn do_optimizing_pass(&mut self); } struct BlockGenCtxt<'a> { @@ -147,14 +146,6 @@ impl GenCtxt for BlockGenCtxt<'_> { } impl InnerGenCtxt for BlockGenCtxt<'_> { - fn get_body_debug(&self) -> Option<&Vec> { - self.body.as_ref() - } - - fn get_literals_debug(&self) -> IndexSet { - self.literals.clone() - } - fn as_gen_ctxt(&mut self) -> &mut dyn GenCtxt { self } @@ -213,6 +204,34 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { _ => panic!("Attempting to backpatch a bytecode non jump") }; } + + fn do_optimizing_pass(&mut self) { + if self.body.is_none() || self.body.as_ref().unwrap().len() == 0 { + return; + } + + let mut idx = 0; + + // removing DUP POP_X POP combinations + while idx < self.body.as_ref().unwrap().len() { + let bc = &self.body.as_ref().unwrap()[idx]; + match bc { + Bytecode::PopField(_) | Bytecode::PopLocal(_, _) | Bytecode::PopArgument(_, _) => { + let next_bc = self.body.as_ref().unwrap().get(idx + 1); + let prev_bc = &self.body.as_ref().unwrap()[idx - 1]; + + if next_bc.is_some() && matches!(next_bc.unwrap(), Bytecode::Pop) && matches!(prev_bc, Bytecode::Dup) { + self.body.as_mut().unwrap().remove(idx - 1); + self.body.as_mut().unwrap().remove(idx); // so idx + 1 but we removed an elem + idx -= 1; + } else { + idx += 1; + } + }, + _ => idx += 1 + } + } + } } struct MethodGenCtxt<'a> { @@ -282,12 +301,8 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { self.inner.backpatch_jump(idx_to_backpatch); } - fn get_body_debug(&self) -> Option<&Vec> { - self.inner.get_body_debug() - } - - fn get_literals_debug(&self) -> IndexSet { - self.inner.get_literals_debug() + fn do_optimizing_pass(&mut self) { + self.inner.do_optimizing_pass(); } } @@ -530,9 +545,18 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option Date: Sat, 31 Dec 2022 16:19:22 +0100 Subject: [PATCH 41/88] better solution for the dup pop_x pop optimization, but doesn't work yet still --- som-interpreter-bc/src/compiler.rs | 50 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index cea41a65..257f64f0 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -206,31 +206,30 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { } fn do_optimizing_pass(&mut self) { - if self.body.is_none() || self.body.as_ref().unwrap().len() == 0 { + if self.body.is_none() || self.body.as_ref().unwrap().len() < 3 { return; } - let mut idx = 0; - // removing DUP POP_X POP combinations - while idx < self.body.as_ref().unwrap().len() { - let bc = &self.body.as_ref().unwrap()[idx]; - match bc { - Bytecode::PopField(_) | Bytecode::PopLocal(_, _) | Bytecode::PopArgument(_, _) => { - let next_bc = self.body.as_ref().unwrap().get(idx + 1); - let prev_bc = &self.body.as_ref().unwrap()[idx - 1]; - - if next_bc.is_some() && matches!(next_bc.unwrap(), Bytecode::Pop) && matches!(prev_bc, Bytecode::Dup) { - self.body.as_mut().unwrap().remove(idx - 1); - self.body.as_mut().unwrap().remove(idx); // so idx + 1 but we removed an elem - idx -= 1; - } else { - idx += 1; - } - }, - _ => idx += 1 + let mut indices_to_remove: Vec = vec![]; + + for (idx, bytecode_win) in self.body.as_ref().unwrap().windows(3).enumerate() { + if matches!(bytecode_win[0], Bytecode::Dup) && + matches!(bytecode_win[1], Bytecode::PopField(..) | Bytecode::PopLocal(..) | Bytecode::PopArgument(..)) && + matches!(bytecode_win[2], Bytecode::Pop) { + indices_to_remove.push(idx); + indices_to_remove.push(idx + 2); } } + + self.body = Some(self.body.as_ref().unwrap().iter().enumerate() + .filter_map(|(idx, bc)| + if indices_to_remove.contains(&idx) { + None + } else { + Some(bc.clone()) + } + ).collect::>()); } } @@ -548,9 +547,18 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option Date: Mon, 2 Jan 2023 13:12:35 +0100 Subject: [PATCH 42/88] fixed removing the dup popx pop sequences partly, as it still breaks in many cases --- som-interpreter-bc/src/compiler.rs | 89 +++++++++++++++++++++--------- som-interpreter-bc/src/inliner.rs | 12 ++-- 2 files changed, 68 insertions(+), 33 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 257f64f0..b79f8e7c 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -106,8 +106,9 @@ pub trait InnerGenCtxt: GenCtxt { fn push_literal(&mut self, literal: Literal) -> usize; fn remove_literal(&mut self, idx: usize) -> Option; fn get_cur_instr_idx(&self) -> usize; - fn backpatch_jump(&mut self, idx_to_backpatch: usize); - fn do_optimizing_pass(&mut self); + fn patch_jump(&mut self, idx_to_backpatch: usize, new_val: usize); + fn backpatch_jump_to_current(&mut self, idx_to_backpatch: usize); + fn remove_dup_popx_pop_sequences(&mut self); } struct BlockGenCtxt<'a> { @@ -190,7 +191,7 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { return self.body.as_ref().unwrap().iter().len(); } - fn backpatch_jump(&mut self, idx_to_backpatch: usize) { + fn backpatch_jump_to_current(&mut self, idx_to_backpatch: usize) { let jump_offset = self.get_cur_instr_idx() - idx_to_backpatch; self.body.as_mut().unwrap()[idx_to_backpatch] = @@ -205,7 +206,20 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { }; } - fn do_optimizing_pass(&mut self) { + fn patch_jump(&mut self, idx_to_backpatch: usize, new_val: usize) { + self.body.as_mut().unwrap()[idx_to_backpatch] = + match self.body.as_ref().unwrap()[idx_to_backpatch] { + Bytecode::Jump(_) => Bytecode::Jump(new_val), + Bytecode::JumpBackward(_) => Bytecode::JumpBackward(new_val), + Bytecode::JumpOnTrueTopNil(_) => Bytecode::JumpOnTrueTopNil(new_val), + Bytecode::JumpOnFalseTopNil(_) => Bytecode::JumpOnFalseTopNil(new_val), + Bytecode::JumpOnTruePop(_) => Bytecode::JumpOnTruePop(new_val), + Bytecode::JumpOnFalsePop(_) => Bytecode::JumpOnFalsePop(new_val), + _ => panic!("Attempting to patch a bytecode non jump") + }; + } + + fn remove_dup_popx_pop_sequences(&mut self) { if self.body.is_none() || self.body.as_ref().unwrap().len() < 3 { return; } @@ -222,6 +236,34 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { } } + if indices_to_remove.is_empty() { + return; + } + + let mut jumps_to_patch: Vec<(usize, usize)> = vec![]; + for (cur_idx, bc) in self.body.as_ref().unwrap().iter().enumerate() { + match bc { + Bytecode::Jump(jump_offset) | Bytecode::JumpOnTrueTopNil(jump_offset) | Bytecode::JumpOnFalseTopNil(jump_offset) + | Bytecode::JumpOnFalsePop(jump_offset) | Bytecode::JumpOnTruePop(jump_offset) => { + let nbr_offset_adjust = indices_to_remove.iter().filter(|&&idx_to_remove| cur_idx < idx_to_remove && idx_to_remove < cur_idx + jump_offset).count(); + if nbr_offset_adjust != 0 { + jumps_to_patch.push((cur_idx, jump_offset - nbr_offset_adjust)); + } + }, + Bytecode::JumpBackward(jump_offset) => { + let nbr_offset_adjust = indices_to_remove.iter().filter(|&&idx_to_remove| cur_idx - jump_offset < idx_to_remove && idx_to_remove < cur_idx).count(); + if nbr_offset_adjust != 0 { + jumps_to_patch.push((cur_idx, jump_offset - nbr_offset_adjust)); + } + }, + _ => {} + } + } + + for (jump_idx, new_val) in jumps_to_patch { + self.patch_jump(jump_idx, new_val) + } + self.body = Some(self.body.as_ref().unwrap().iter().enumerate() .filter_map(|(idx, bc)| if indices_to_remove.contains(&idx) { @@ -230,6 +272,12 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { Some(bc.clone()) } ).collect::>()); + + // eprintln!("BYTECODES AFTER:"); + // for bc in self.body.as_ref().unwrap() { + // eprintln!("{}", bc); + // } + // eprintln!(); } } @@ -296,12 +344,16 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { return self.inner.get_cur_instr_idx(); } - fn backpatch_jump(&mut self, idx_to_backpatch: usize) { - self.inner.backpatch_jump(idx_to_backpatch); + fn patch_jump(&mut self, idx_to_backpatch: usize, new_val: usize) { + self.inner.patch_jump(idx_to_backpatch, new_val) + } + + fn backpatch_jump_to_current(&mut self, idx_to_backpatch: usize) { + self.inner.backpatch_jump_to_current(idx_to_backpatch); } - fn do_optimizing_pass(&mut self) { - self.inner.do_optimizing_pass(); + fn remove_dup_popx_pop_sequences(&mut self) { + self.inner.remove_dup_popx_pop_sequences(); } } @@ -547,24 +599,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option Option<()> { match message.signature.as_str() { - "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), - "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), + // "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), + // "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), @@ -162,7 +162,7 @@ impl PrimMessageInliner for ast::Expression { // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating self.inline_expr(ctxt, message.values.get(0)?); - ctxt.backpatch_jump(jump_idx); + ctxt.backpatch_jump_to_current(jump_idx); return Some(()); } @@ -183,9 +183,9 @@ impl PrimMessageInliner for ast::Expression { let middle_jump_idx = ctxt.get_cur_instr_idx(); ctxt.push_instr(Bytecode::Jump(0)); - ctxt.backpatch_jump(start_jump_idx); + ctxt.backpatch_jump_to_current(start_jump_idx); self.inline_expr(ctxt, message.values.get(1)?); - ctxt.backpatch_jump(middle_jump_idx); + ctxt.backpatch_jump_to_current(middle_jump_idx); return Some(()); } @@ -231,7 +231,7 @@ impl PrimMessageInliner for ast::Expression { }; ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); - ctxt.backpatch_jump(cond_jump_idx); + ctxt.backpatch_jump_to_current(cond_jump_idx); ctxt.push_instr(Bytecode::PushNil); return Some(()); From 41c5a6b04fed2916d5127272bdd60967390fd7d1 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 4 Jan 2023 15:30:27 +0100 Subject: [PATCH 43/88] Removed the ugly ast::Block field in the Block struct --- run_benchmarks.sh | 2 +- som-interpreter-bc/src/block.rs | 4 +- som-interpreter-bc/src/compiler.rs | 75 +++++++----------------------- som-interpreter-bc/src/inliner.rs | 45 ++++++++---------- 4 files changed, 38 insertions(+), 88 deletions(-) diff --git a/run_benchmarks.sh b/run_benchmarks.sh index 419fc29a..06be9776 100755 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -1,6 +1,6 @@ #!/bin/bash -BENCHMARKS=("Bounce" "Mandelbrot" "List" "Permute" "Queens" "QuickSort" "Sieve" "Fannkuch" "JsonSmall" "Richards" "DeltaBlue") +BENCHMARKS=("Bounce" "Mandelbrot" "List" "Permute" "Queens" "QuickSort" "Sieve" "Fannkuch" "JsonSmall" "DeltaBlue" "Richards") for bench in "${BENCHMARKS[@]}" do diff --git a/som-interpreter-bc/src/block.rs b/som-interpreter-bc/src/block.rs index 61254d51..2701bf3d 100644 --- a/som-interpreter-bc/src/block.rs +++ b/som-interpreter-bc/src/block.rs @@ -1,5 +1,4 @@ use std::fmt; -use som_core::ast; use som_core::bytecode::Bytecode; @@ -18,8 +17,7 @@ pub struct Block { pub locals: Vec, pub literals: Vec, pub body: Vec, - pub nb_params: usize, - pub ast_body: ast::Block // TODO really not a fan of this, it's only needed during parsing... + pub nb_params: usize } impl Block { diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index b79f8e7c..925d18f6 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -224,60 +224,19 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { return; } - // removing DUP POP_X POP combinations - let mut indices_to_remove: Vec = vec![]; - - for (idx, bytecode_win) in self.body.as_ref().unwrap().windows(3).enumerate() { - if matches!(bytecode_win[0], Bytecode::Dup) && - matches!(bytecode_win[1], Bytecode::PopField(..) | Bytecode::PopLocal(..) | Bytecode::PopArgument(..)) && - matches!(bytecode_win[2], Bytecode::Pop) { - indices_to_remove.push(idx); - indices_to_remove.push(idx + 2); - } - } - - if indices_to_remove.is_empty() { - return; + let body = self.body.as_mut().unwrap(); + + if matches!(body[body.len() - 3], Bytecode::Dup) && + matches!(body[body.len() - 2], Bytecode::PopField(..) | Bytecode::PopLocal(..) | Bytecode::PopArgument(..)) && + matches!(body.last().unwrap(), Bytecode::Pop) + { + dbg!(&body); + let (_, tokeep, _) = (body.pop(), body.pop().unwrap(), body.pop()); + println!("lol.lmao."); + dbg!(&tokeep); + self.body.as_mut().unwrap().push(tokeep); + dbg!(self.body.as_ref().unwrap()); } - - let mut jumps_to_patch: Vec<(usize, usize)> = vec![]; - for (cur_idx, bc) in self.body.as_ref().unwrap().iter().enumerate() { - match bc { - Bytecode::Jump(jump_offset) | Bytecode::JumpOnTrueTopNil(jump_offset) | Bytecode::JumpOnFalseTopNil(jump_offset) - | Bytecode::JumpOnFalsePop(jump_offset) | Bytecode::JumpOnTruePop(jump_offset) => { - let nbr_offset_adjust = indices_to_remove.iter().filter(|&&idx_to_remove| cur_idx < idx_to_remove && idx_to_remove < cur_idx + jump_offset).count(); - if nbr_offset_adjust != 0 { - jumps_to_patch.push((cur_idx, jump_offset - nbr_offset_adjust)); - } - }, - Bytecode::JumpBackward(jump_offset) => { - let nbr_offset_adjust = indices_to_remove.iter().filter(|&&idx_to_remove| cur_idx - jump_offset < idx_to_remove && idx_to_remove < cur_idx).count(); - if nbr_offset_adjust != 0 { - jumps_to_patch.push((cur_idx, jump_offset - nbr_offset_adjust)); - } - }, - _ => {} - } - } - - for (jump_idx, new_val) in jumps_to_patch { - self.patch_jump(jump_idx, new_val) - } - - self.body = Some(self.body.as_ref().unwrap().iter().enumerate() - .filter_map(|(idx, bc)| - if indices_to_remove.contains(&idx) { - None - } else { - Some(bc.clone()) - } - ).collect::>()); - - // eprintln!("BYTECODES AFTER:"); - // for bc in self.body.as_ref().unwrap() { - // eprintln!("{}", bc); - // } - // eprintln!(); } } @@ -599,7 +558,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option Option { +fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { let mut ctxt = BlockGenCtxt { outer, args: defn.parameters.iter().cloned().collect(), @@ -635,19 +594,19 @@ pub fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option Option<()> { match message.signature.as_str() { - // "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), - // "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), + "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), + "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), @@ -57,6 +56,7 @@ impl PrimMessageInliner for ast::Expression { } } + #[allow(dead_code)] // Unused for now, I implemented it but it was unnecessary oops fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()> { for block_local in &block.locals { dbg!(block_local); @@ -89,16 +89,16 @@ impl PrimMessageInliner for ast::Expression { _ => panic!("Unexpected block literal type, not yet implemented") } }, - Bytecode::PushBlock(block_idx) => { - match block.literals.get(*block_idx as usize)? { - Literal::Block(inner_block) => { - let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; - let idx = ctxt.push_literal(Literal::Block(Rc::new(new_block))); - ctxt.push_instr(Bytecode::PushBlock(idx as u8)); - }, - _ => panic!("PushBlock not actually pushing a block somehow") - }; - }, + // Bytecode::PushBlock(block_idx) => { + // match block.literals.get(*block_idx as usize)? { + // Literal::Block(inner_block) => { + // let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; + // let idx = ctxt.push_literal(Literal::Block(Rc::new(new_block))); + // ctxt.push_instr(Bytecode::PushBlock(idx as u8)); + // }, + // _ => panic!("PushBlock not actually pushing a block somehow") + // }; + // }, Bytecode::PushGlobal(global_idx) => { match block.literals.get(*global_idx as usize)? { lit => { @@ -191,26 +191,19 @@ impl PrimMessageInliner for ast::Expression { } fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { - let block_idx = match ctxt.get_instructions().last()? { - Bytecode::PushBlock(val) => val, - _ => return None - }; - - // todo pop the literal - let cond_block_ref = match ctxt.get_literal(*block_idx as usize)? { - Literal::Block(val) => val.clone(), - _ => return None - }; - if message.values.len() != 1 { return None; } + if matches!(message.receiver.as_ref(), ast::Expression::Block(_)) { + return None; + } + ctxt.pop_instr(); // we remove the PUSH_BLOCK let idx_before_condition = ctxt.get_cur_instr_idx(); - self.inline_compiled_block(ctxt, cond_block_ref.as_ref()); + self.inline_expr(ctxt, message.receiver.as_ref()); let cond_jump_idx = ctxt.get_cur_instr_idx(); match jump_type { From 7089dfed96446e7b16979181dca478f1aa7de0b0 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 4 Jan 2023 17:11:26 +0100 Subject: [PATCH 44/88] functional instructions replacement, as far as i can tell --- som-interpreter-bc/src/compiler.rs | 67 +++++++++++++++----- som-interpreter-bc/src/primitives/integer.rs | 2 +- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 925d18f6..683e1e62 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -224,19 +224,55 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { return; } - let body = self.body.as_mut().unwrap(); - - if matches!(body[body.len() - 3], Bytecode::Dup) && - matches!(body[body.len() - 2], Bytecode::PopField(..) | Bytecode::PopLocal(..) | Bytecode::PopArgument(..)) && - matches!(body.last().unwrap(), Bytecode::Pop) - { - dbg!(&body); - let (_, tokeep, _) = (body.pop(), body.pop().unwrap(), body.pop()); - println!("lol.lmao."); - dbg!(&tokeep); - self.body.as_mut().unwrap().push(tokeep); - dbg!(self.body.as_ref().unwrap()); + let mut indices_to_remove: Vec = vec![]; + + for (idx, bytecode_win) in self.body.as_ref().unwrap().windows(3).enumerate() { + if matches!(bytecode_win[0], Bytecode::Dup) && + matches!(bytecode_win[1], Bytecode::PopField(..) | Bytecode::PopLocal(..) | Bytecode::PopArgument(..)) && + matches!(bytecode_win[2], Bytecode::Pop) { + indices_to_remove.push(idx); + indices_to_remove.push(idx + 2); + } + } + + if indices_to_remove.is_empty() { + return; } + + let mut jumps_to_patch = vec![]; + for (cur_idx, bc) in self.body.as_ref().unwrap().iter().enumerate() { + match bc { + Bytecode::Jump(jump_offset) | Bytecode::JumpOnTrueTopNil(jump_offset) | Bytecode::JumpOnFalseTopNil(jump_offset) | + Bytecode::JumpOnTruePop(jump_offset) | Bytecode::JumpOnFalsePop(jump_offset) => { + if indices_to_remove.contains(&(cur_idx + jump_offset)) { + let lol = indices_to_remove.iter().position(|&v| v == cur_idx + jump_offset).unwrap(); + indices_to_remove.remove(lol); + indices_to_remove.remove(lol - 1); + } + + let nbr_to_adjust = indices_to_remove.iter().filter(|&&v| cur_idx < v && v <= cur_idx + jump_offset).count(); + jumps_to_patch.push((cur_idx, jump_offset - nbr_to_adjust)); + }, + Bytecode::JumpBackward(jump_offset) => { + let nbr_to_adjust = indices_to_remove.iter().filter(|&&v| cur_idx > v && v > cur_idx - jump_offset).count(); + jumps_to_patch.push((cur_idx, jump_offset - nbr_to_adjust)); + }, + _ => {} + } + } + + for (jump_idx, jump_val) in jumps_to_patch { + self.patch_jump(jump_idx, jump_val); + } + + self.body = Some(self.body.as_ref().unwrap().iter().enumerate() + .filter_map(|(idx, bc)| + if indices_to_remove.contains(&idx) { + None + } else { + Some(bc.clone()) + } + ).collect::>()); } } @@ -515,8 +551,6 @@ impl GenCtxt for ClassGenCtxt<'_> { } fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option { - // println!("(method) compiling '{}' ...", defn.signature); - let mut ctxt = MethodGenCtxt { signature: defn.signature.clone(), inner: BlockGenCtxt { @@ -558,7 +592,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option { for expr in rest { expr.codegen(&mut ctxt)?; ctxt.push_instr(Bytecode::Pop); - // ctxt.remove_dup_popx_pop_sequences(); } last.codegen(&mut ctxt)?; ctxt.push_instr(Bytecode::ReturnLocal); } - // ctxt.remove_dup_popx_pop_sequences(); + ctxt.remove_dup_popx_pop_sequences(); let block = Block { frame: None, diff --git a/som-interpreter-bc/src/primitives/integer.rs b/som-interpreter-bc/src/primitives/integer.rs index 89765955..a708677b 100644 --- a/som-interpreter-bc/src/primitives/integer.rs +++ b/som-interpreter-bc/src/primitives/integer.rs @@ -492,7 +492,7 @@ fn lt(interpreter: &mut Interpreter, _: &mut Universe) { (Value::Double(a), Value::Integer(b)) => Value::Boolean(a < (b as f64)), (Value::BigInteger(a), Value::Integer(b)) => Value::Boolean(a < BigInt::from(b)), (Value::Integer(a), Value::BigInteger(b)) => Value::Boolean(BigInt::from(a) < b), - _ => panic!("'{}': wrong types", SIGNATURE), + (t1, t2) => panic!("'{}': wrong types: {:?} and {:?}", SIGNATURE, t1, t2), }; interpreter.stack.push(value); From 4676292d085f05f6b1dcc39fc7b97d61efc9e641 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 4 Jan 2023 17:49:50 +0100 Subject: [PATCH 45/88] deactivating dup_popx_pop temporarily --- som-interpreter-bc/src/compiler.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 683e1e62..e38b1def 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -592,7 +592,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option { last.codegen(&mut ctxt)?; ctxt.push_instr(Bytecode::ReturnLocal); } - ctxt.remove_dup_popx_pop_sequences(); + // ctxt.remove_dup_popx_pop_sequences(); let block = Block { frame: None, From 271875d104ab8019b4f78097de8429ab0b76f1c2 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 4 Jan 2023 17:54:23 +0100 Subject: [PATCH 46/88] only inlining blocks again --- som-interpreter-bc/src/inliner.rs | 107 ++---------------------------- 1 file changed, 7 insertions(+), 100 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 71d0fe3a..95438141 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,7 +1,6 @@ use som_core::ast; use som_core::bytecode::Bytecode; -use crate::block::Block; -use crate::compiler::{InnerGenCtxt, Literal}; +use crate::compiler::{InnerGenCtxt}; use crate::compiler::MethodCodegen; use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; @@ -14,7 +13,6 @@ pub enum JumpType { pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; - fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()>; fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; @@ -56,101 +54,8 @@ impl PrimMessageInliner for ast::Expression { } } - #[allow(dead_code)] // Unused for now, I implemented it but it was unnecessary oops - fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &Block) -> Option<()> { - for block_local in &block.locals { - dbg!(block_local); - todo!("actually pushing locals would be nice!") - // ctxt.push_local(String::from(block_local)); - } - - // last is always ReturnLocal, so it gets ignored - if let Some((_, body)) = block.body.split_last() { - for block_bc in body { - match block_bc { - Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), - Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), - Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), - Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), - Bytecode::Send1(lit_idx) | Bytecode::Send2(lit_idx) | - Bytecode::Send3(lit_idx) | Bytecode::SendN(lit_idx) => { - match block.literals.get(*lit_idx as usize)? { - Literal::Symbol(interned) => { - // does this push duplicate literals? I think it doesn't? - let idx = ctxt.push_literal(Literal::Symbol(*interned)); - match block_bc { - Bytecode::Send1(_) => ctxt.push_instr(Bytecode::Send1(idx as u8)), - Bytecode::Send2(_) => ctxt.push_instr(Bytecode::Send2(idx as u8)), - Bytecode::Send3(_) => ctxt.push_instr(Bytecode::Send3(idx as u8)), - Bytecode::SendN(_) => ctxt.push_instr(Bytecode::SendN(idx as u8)), - _ => panic!("Unreachable branch") - } - }, - _ => panic!("Unexpected block literal type, not yet implemented") - } - }, - // Bytecode::PushBlock(block_idx) => { - // match block.literals.get(*block_idx as usize)? { - // Literal::Block(inner_block) => { - // let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; - // let idx = ctxt.push_literal(Literal::Block(Rc::new(new_block))); - // ctxt.push_instr(Bytecode::PushBlock(idx as u8)); - // }, - // _ => panic!("PushBlock not actually pushing a block somehow") - // }; - // }, - Bytecode::PushGlobal(global_idx) => { - match block.literals.get(*global_idx as usize)? { - lit => { - let lit_idx = ctxt.push_literal(lit.clone()); - ctxt.push_instr(Bytecode::PushGlobal(lit_idx as u8)); - } - }; - }, - Bytecode::PushConstant(constant_idx) => { - match block.literals.get(*constant_idx as usize)? { - lit => { - let lit_idx = ctxt.push_literal(lit.clone()); - match lit_idx { - 0 => ctxt.push_instr(Bytecode::PushConstant0), - 1 => ctxt.push_instr(Bytecode::PushConstant1), - 2 => ctxt.push_instr(Bytecode::PushConstant2), - _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) - } - } - }; - }, - Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => { - let constant_idx: usize = match block_bc { - Bytecode::PushConstant0 => 0, - Bytecode::PushConstant1 => 1, - Bytecode::PushConstant2 => 2, - _ => panic!("Unreachable") - }; - - match block.literals.get(constant_idx)? { - lit => { - let lit_idx = ctxt.push_literal(lit.clone()); - match lit_idx { - 0 => ctxt.push_instr(Bytecode::PushConstant0), - 1 => ctxt.push_instr(Bytecode::PushConstant1), - 2 => ctxt.push_instr(Bytecode::PushConstant2), - _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) - } - } - }; - }, - Bytecode::ReturnNonLocal => panic!("There shouldn't be a return here"), - _ => ctxt.push_instr(*block_bc) - } - } - } - - Some(()) - } - fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { - if message.values.len() != 1 { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { return None; } @@ -168,7 +73,9 @@ impl PrimMessageInliner for ast::Expression { } fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { - if message.values.len() != 2 { + if message.values.len() != 2 + || !matches!(message.values.get(0)?, ast::Expression::Block(_)) + || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { return None; } @@ -191,11 +98,11 @@ impl PrimMessageInliner for ast::Expression { } fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { - if message.values.len() != 1 { + if message.values.len() != 1 { return None; } - if matches!(message.receiver.as_ref(), ast::Expression::Block(_)) { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { return None; } From cc3ce2d20c762ccbb537c6872efe20dd5dd8236a Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 13 Jan 2023 14:09:23 +0000 Subject: [PATCH 47/88] sticking with this core-lib version for now --- core-lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core-lib b/core-lib index afd5a63b..b0c4abfa 160000 --- a/core-lib +++ b/core-lib @@ -1 +1 @@ -Subproject commit afd5a63b662eca78de80b37653daf7f0a0ece958 +Subproject commit b0c4abfa9096ba845b2113ad872b599883cfe624 From b63f40f664d10a2f3f178d554add002a50c159af Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 13 Jan 2023 15:20:53 +0000 Subject: [PATCH 48/88] Added a comment to explain how the dup_popx_pop removal function is unused --- som-interpreter-bc/src/compiler.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index e38b1def..4ec8fa40 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -219,6 +219,7 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { }; } + /// NEVER CALLED. TODO should be made faster and actually invoked, most likely by storing the last four bytecodes for faster checking. fn remove_dup_popx_pop_sequences(&mut self) { if self.body.is_none() || self.body.as_ref().unwrap().len() < 3 { return; From c7a7355126b94c815e653f9f20930b86b5aab26e Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 17 Jan 2024 14:12:16 +0000 Subject: [PATCH 49/88] temporary fix for disassembler --- som-interpreter-bc/src/disassembler.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index 312ca2e2..263e38d3 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -23,9 +23,6 @@ fn disassemble_body( print!("{padding} {0}", bytecode.padded_name()); match bytecode { - Bytecode::Halt => { - println!(); - } Bytecode::Dup => { println!(); } @@ -97,7 +94,7 @@ fn disassemble_body( // }; // println!(" (`{0}`)", universe.lookup_symbol(argument)); } - Bytecode::Send(idx) | Bytecode::SuperSend(idx) => { + Bytecode::SendN(idx) | Bytecode::SuperSendN(idx) => { print!(" {idx}"); let Some(Literal::Symbol(signature)) = current.resolve_literal(idx) else { println!(" (invalid signature)"); @@ -110,7 +107,8 @@ fn disassemble_body( } Bytecode::ReturnNonLocal => { println!(); - } + }, + _ => todo!() // evil, I know. I'm lazy } } } From 6405349f7d3108a1dc0ba221c7193393a594cc3c Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 23 Jan 2024 16:30:23 +0000 Subject: [PATCH 50/88] adding the new BCs to the disassembler --- som-core/src/bytecode.rs | 124 +++++++++++++------------ som-interpreter-bc/src/disassembler.rs | 18 +++- 2 files changed, 79 insertions(+), 63 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 5a187c64..0d1376c0 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -44,35 +44,39 @@ impl Bytecode { pub fn name(self) -> &'static str { // NAMES[self as usize] match self { - Self::Dup => "DUP", - Self::PushLocal(_, _) => "PUSH_LOCAL", - Self::PushArgument(_, _) => "PUSH_ARGUMENT", - Self::PushField(_) => "PUSH_FIELD", - Self::PushBlock(_) => "PUSH_BLOCK", - Self::PushConstant(_) => "PUSH_CONSTANT", - Self::PushConstant0 => "PUSH_CONSTANT 0", - Self::PushConstant1 => "PUSH_CONSTANT 1", - Self::PushConstant2 => "PUSH_CONSTANT 2", - Self::PushGlobal(_) => "PUSH_GLOBAL", - Self::Push0 => "PUSH_0", - Self::Push1 => "PUSH_1", - Self::PushNil => "PUSH_NIL", - Self::Pop => "POP", - Self::PopLocal(_, _) => "POP_LOCAL", - Self::PopArgument(_, _) => "POP_ARGUMENT", - Self::PopField(_) => "POP_FIELD", - Self::Send1(_) => "SEND 1", - Self::Send2(_) => "SEND 2", - Self::Send3(_) => "SEND 3", - Self::SendN(_) => "SEND N", - Self::SuperSend1(_) => "SUPER_SEND 1", - Self::SuperSend2(_) => "SUPER_SEND 2", - Self::SuperSend3(_) => "SUPER_SEND 3", - Self::SuperSendN(_) => "SUPER_SEND N", - Self::ReturnLocal => "RETURN_LOCAL", - Self::ReturnNonLocal => "RETURN_NON_LOCAL", - Self::Jump(_) => "JUMP", - _ => "NO NAME, TODO" // laziness + Self::Dup => "DUP", + Self::PushLocal(_, _) => "PUSH_LOCAL", + Self::PushArgument(_, _) => "PUSH_ARGUMENT", + Self::PushField(_) => "PUSH_FIELD", + Self::PushBlock(_) => "PUSH_BLOCK", + Self::PushConstant(_) => "PUSH_CONSTANT", + Self::PushConstant0 => "PUSH_CONSTANT 0", + Self::PushConstant1 => "PUSH_CONSTANT 1", + Self::PushConstant2 => "PUSH_CONSTANT 2", + Self::PushGlobal(_) => "PUSH_GLOBAL", + Self::Push0 => "PUSH_0", + Self::Push1 => "PUSH_1", + Self::PushNil => "PUSH_NIL", + Self::Pop => "POP", + Self::PopLocal(_, _) => "POP_LOCAL", + Self::PopArgument(_, _) => "POP_ARGUMENT", + Self::PopField(_) => "POP_FIELD", + Self::Send1(_) => "SEND 1", + Self::Send2(_) => "SEND 2", + Self::Send3(_) => "SEND 3", + Self::SendN(_) => "SEND N", + Self::SuperSend1(_) => "SUPER_SEND 1", + Self::SuperSend2(_) => "SUPER_SEND 2", + Self::SuperSend3(_) => "SUPER_SEND 3", + Self::SuperSendN(_) => "SUPER_SEND N", + Self::ReturnLocal => "RETURN_LOCAL", + Self::ReturnNonLocal => "RETURN_NON_LOCAL", + Self::Jump(_) => "JUMP", + Bytecode::JumpBackward(_) => "JUMP_BACKWARD", + Bytecode::JumpOnTrueTopNil(_) => "JUMP_ON_TRUE_TOP_NIL", + Bytecode::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL", + Bytecode::JumpOnTruePop(_) => "JUMP_ON_TRUE_POP", + Bytecode::JumpOnFalsePop(_) => "JUMP_ON_FALSE_POP" } } @@ -81,35 +85,39 @@ impl Bytecode { pub fn padded_name(self) -> &'static str { // PADDED_NAMES[self as usize] match self { - Self::Dup => "DUP ", - Self::PushLocal(_, _) => "PUSH_LOCAL ", - Self::PushArgument(_, _) => "PUSH_ARGUMENT ", - Self::PushField(_) => "PUSH_FIELD ", - Self::PushBlock(_) => "PUSH_BLOCK ", - Self::PushConstant(_) => "PUSH_CONSTANT ", - Self::PushConstant0 => "PUSH_CONSTANT 0 ", - Self::PushConstant1 => "PUSH_CONSTANT 1 ", - Self::PushConstant2 => "PUSH_CONSTANT 2 ", - Self::PushGlobal(_) => "PUSH_GLOBAL ", - Self::Push0 => "PUSH_0 ", - Self::Push1 => "PUSH_1 ", - Self::PushNil => "PUSH_NIL ", - Self::Pop => "POP ", - Self::PopLocal(_, _) => "POP_LOCAL ", - Self::PopArgument(_, _) => "POP_ARGUMENT ", - Self::PopField(_) => "POP_FIELD ", - Self::Send1(_) => "SEND 1 ", - Self::Send2(_) => "SEND 2 ", - Self::Send3(_) => "SEND 3 ", - Self::SendN(_) => "SEND N ", - Self::SuperSend1(_) => "SUPER_SEND 1 ", - Self::SuperSend2(_) => "SUPER_SEND 2 ", - Self::SuperSend3(_) => "SUPER_SEND 3 ", - Self::SuperSendN(_) => "SUPER_SEND N ", - Self::ReturnLocal => "RETURN_LOCAL ", - Self::ReturnNonLocal => "RETURN_NON_LOCAL", - Self::Jump(_) => "JUMP ", - _ => "NO NAME, TODO" + Self::Dup => "DUP ", + Self::PushLocal(_, _) => "PUSH_LOCAL ", + Self::PushArgument(_, _) => "PUSH_ARGUMENT ", + Self::PushField(_) => "PUSH_FIELD ", + Self::PushBlock(_) => "PUSH_BLOCK ", + Self::PushConstant(_) => "PUSH_CONSTANT ", + Self::PushConstant0 => "PUSH_CONSTANT 0 ", + Self::PushConstant1 => "PUSH_CONSTANT 1 ", + Self::PushConstant2 => "PUSH_CONSTANT 2 ", + Self::PushGlobal(_) => "PUSH_GLOBAL ", + Self::Push0 => "PUSH_0 ", + Self::Push1 => "PUSH_1 ", + Self::PushNil => "PUSH_NIL ", + Self::Pop => "POP ", + Self::PopLocal(_, _) => "POP_LOCAL ", + Self::PopArgument(_, _) => "POP_ARGUMENT ", + Self::PopField(_) => "POP_FIELD ", + Self::Send1(_) => "SEND 1 ", + Self::Send2(_) => "SEND 2 ", + Self::Send3(_) => "SEND 3 ", + Self::SendN(_) => "SEND N ", + Self::SuperSend1(_) => "SUPER_SEND 1 ", + Self::SuperSend2(_) => "SUPER_SEND 2 ", + Self::SuperSend3(_) => "SUPER_SEND 3 ", + Self::SuperSendN(_) => "SUPER_SEND N ", + Self::ReturnLocal => "RETURN_LOCAL ", + Self::ReturnNonLocal => "RETURN_NON_LOCAL ", + Self::Jump(_) => "JUMP ", + Self::JumpBackward(_) => "JUMP_BACKWARD ", + Self::JumpOnTrueTopNil(_) => "JUMP_ON_TRUE_TOP_NIL ", + Self::JumpOnFalseTopNil(_) => "JUMP_ON_FALSE_TOP_NIL ", + Self::JumpOnTruePop(_) => "JUMP_ON_TRUE_POP ", + Self::JumpOnFalsePop(_) => "JUMP_ON_FALSE_POP " } } } diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index 263e38d3..36706bad 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -19,8 +19,9 @@ fn disassemble_body( ) { let padding = " |".repeat(level); let current = env.last().copied().unwrap(); - for bytecode in current.get_body().into_iter().copied() { - print!("{padding} {0}", bytecode.padded_name()); + for (idx, bytecode) in current.get_body().into_iter().copied().enumerate() { + print!("{idx} {padding} {0}", bytecode.padded_name()); + // print!("{padding} {0}", bytecode.padded_name()); match bytecode { Bytecode::Dup => { @@ -94,21 +95,28 @@ fn disassemble_body( // }; // println!(" (`{0}`)", universe.lookup_symbol(argument)); } - Bytecode::SendN(idx) | Bytecode::SuperSendN(idx) => { + Bytecode::Send1(idx) | Bytecode::Send2(idx) | Bytecode::Send3(idx) | Bytecode::SendN(idx) | + Bytecode::SuperSend1(idx) | Bytecode::SuperSend2(idx) | Bytecode::SuperSend3(idx) | Bytecode::SuperSendN(idx) => { print!(" {idx}"); let Some(Literal::Symbol(signature)) = current.resolve_literal(idx) else { println!(" (invalid signature)"); continue; }; println!(" (#{0})", universe.lookup_symbol(*signature)); - } + }, Bytecode::ReturnLocal => { println!(); } Bytecode::ReturnNonLocal => { println!(); }, - _ => todo!() // evil, I know. I'm lazy + Bytecode::Jump(idx) | Bytecode::JumpBackward(idx) | + Bytecode::JumpOnFalsePop(idx) | Bytecode::JumpOnTruePop(idx) | + Bytecode::JumpOnFalseTopNil(idx) | Bytecode::JumpOnTrueTopNil(idx) => { + println!("{}", idx); + } + Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil => {println!();} + Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2=> {println!(" (TODO)")} } } } From 6671aa65d44e15d04a13df7a0bac1677738857b6 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 23 Jan 2024 16:43:00 +0000 Subject: [PATCH 51/88] disas: properly handling all the new BCs --- som-interpreter-bc/src/disassembler.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index 36706bad..2bd55347 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -58,7 +58,15 @@ fn disassemble_body( disassemble_body(universe, class, level + 1, env); env.pop(); } - Bytecode::PushConstant(idx) => { + Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 | Bytecode::PushConstant(_) => { + let idx = match bytecode { + Bytecode::PushConstant0 => 0, + Bytecode::PushConstant1 => 1, + Bytecode::PushConstant2 => 2, + Bytecode::PushConstant(idx) => idx, + _ => panic!("Unreachable.") + }; + print!(" {idx}"); let Some(literal) = current.resolve_literal(idx) else { println!(" (invalid constant)"); @@ -116,7 +124,6 @@ fn disassemble_body( println!("{}", idx); } Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil => {println!();} - Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2=> {println!(" (TODO)")} } } } From 112709dccbe4ce65a6dd1d0df6de4e1d9c171b52 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 23 Jan 2024 16:49:18 +0000 Subject: [PATCH 52/88] making the inlining work properly with shadowing, slowly but surely. step one: working ifTrue inlining for Bounce --- som-interpreter-bc/src/inliner.rs | 153 +++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 24 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 95438141..9a1a199e 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,6 +1,7 @@ use som_core::ast; use som_core::bytecode::Bytecode; -use crate::compiler::{InnerGenCtxt}; +use crate::block::BlockInfo; +use crate::compiler::{InnerGenCtxt, Literal}; use crate::compiler::MethodCodegen; use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; @@ -9,9 +10,12 @@ pub enum JumpType { JumpOnTrue } +static mut has_been_accessed: bool = false; + // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; + fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()>; fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; @@ -23,35 +27,115 @@ impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { match message.signature.as_str() { "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), - "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), - "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), - "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), - "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), - "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), + // "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), + // "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), + // "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), + // "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), + // "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), // TODO: [or, and] _ => None } } - fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { - match block_expr { - ast::Expression::Block(block) => { - for block_local in &block.locals { - ctxt.push_local(String::from(block_local)); // breaks shadowing - } + fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()> { + // TODO uncomment + for block_local_intern_id in &block.locals { + panic!("we don't handle block locals yet!"); + // ctxt.push_local(ctxt.lookup_symbol(block_local_intern_id)); + } - // TODO need to remove those POPs somehow. - if let Some((last, rest)) = block.body.exprs.split_last() { - for expr in rest { - expr.codegen(ctxt); - ctxt.push_instr(Bytecode::Pop); - } - last.codegen(ctxt)?; + // last is always ReturnLocal, so it gets ignored + if let Some((_, body)) = block.body.split_last() { + for block_bc in body { + match block_bc { + Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), + Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), + Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), + Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), + Bytecode::Send1(lit_idx) | Bytecode::Send2(lit_idx) | + Bytecode::Send3(lit_idx) | Bytecode::SendN(lit_idx) => { + match block.literals.get(*lit_idx as usize)? { + Literal::Symbol(interned) => { + // does this push duplicate literals? I think it doesn't? + let idx = ctxt.push_literal(Literal::Symbol(*interned)); + match block_bc { + Bytecode::Send1(_) => ctxt.push_instr(Bytecode::Send1(idx as u8)), + Bytecode::Send2(_) => ctxt.push_instr(Bytecode::Send2(idx as u8)), + Bytecode::Send3(_) => ctxt.push_instr(Bytecode::Send3(idx as u8)), + Bytecode::SendN(_) => ctxt.push_instr(Bytecode::SendN(idx as u8)), + _ => panic!("Unreachable branch") + } + }, + _ => panic!("Unexpected block literal type, not yet implemented") + } + }, + // Bytecode::PushBlock(block_idx) => { + // match block.literals.get(*block_idx as usize)? { + // Literal::Block(inner_block) => { + // let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; + // let idx = ctxt.push_literal(Literal::Block(Rc::new(new_block))); + // ctxt.push_instr(Bytecode::PushBlock(idx as u8)); + // }, + // _ => panic!("PushBlock not actually pushing a block somehow") + // }; + // }, + Bytecode::PushGlobal(global_idx) => { + match block.literals.get(*global_idx as usize)? { + lit => { + let lit_idx = ctxt.push_literal(lit.clone()); + ctxt.push_instr(Bytecode::PushGlobal(lit_idx as u8)); + } + }; + }, + Bytecode::PushConstant(constant_idx) => { + match block.literals.get(*constant_idx as usize)? { + lit => { + let lit_idx = ctxt.push_literal(lit.clone()); + match lit_idx { + 0 => ctxt.push_instr(Bytecode::PushConstant0), + 1 => ctxt.push_instr(Bytecode::PushConstant1), + 2 => ctxt.push_instr(Bytecode::PushConstant2), + _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) + } + } + }; + }, + Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => { + let constant_idx: usize = match block_bc { + Bytecode::PushConstant0 => 0, + Bytecode::PushConstant1 => 1, + Bytecode::PushConstant2 => 2, + _ => panic!("Unreachable") + }; + + match block.literals.get(constant_idx)? { + lit => { + let lit_idx = ctxt.push_literal(lit.clone()); + match lit_idx { + 0 => ctxt.push_instr(Bytecode::PushConstant0), + 1 => ctxt.push_instr(Bytecode::PushConstant1), + 2 => ctxt.push_instr(Bytecode::PushConstant2), + _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) + } + } + }; + }, + Bytecode::ReturnNonLocal => {ctxt.push_instr(Bytecode::ReturnLocal)}, + _ => ctxt.push_instr(*block_bc) } - Some(()) - }, - expr => expr.codegen(ctxt) + } } + + Some(()) + } + + #[allow(dead_code)] + fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { + todo!(); + // match block_expr { + // ast::Expression::Block(block) => self.inline_compiled_block(ctxt, block), + // _ => todo!("does that show up in the tests")//expr.codegen(ctxt) + // } } fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { @@ -59,14 +143,35 @@ impl PrimMessageInliner for ast::Expression { return None; } + // we need to compile the block before inlining it, and we haven't encountered/compiled it yet + message.values.get(0)?.codegen(ctxt)?; + + let block_idx = match ctxt.get_instructions().last()? { + Bytecode::PushBlock(val) => *val, + _ => panic!("should be impossible: we've just compiled a block.") + }; + ctxt.pop_instr(); // we remove the PUSH_BLOCK + let jump_idx = ctxt.get_cur_instr_idx(); match jump_type { JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) } - // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating - self.inline_expr(ctxt, message.values.get(0)?); + let cond_block_ref = match ctxt.get_literal(block_idx as usize)? { + Literal::Block(val) => val.clone(), + _ => return None + }; + // shouldn't break anything, probably +// ctxt.remove_literal(block_idx as usize); +// dbg!(&cond_block_ref.as_ref().blk_info.body); + + self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()); + + // dbg!(ctxt.get_instructions()); + +// // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating +// self.inline_expr(ctxt, message.values.get(0)?); ctxt.backpatch_jump_to_current(jump_idx); return Some(()); From bcf3aef867bdcc3c8b779a34566738e409829004 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 23 Jan 2024 19:01:12 +0000 Subject: [PATCH 53/88] inlining is getting there, but odd bugs are present --- som-interpreter-bc/src/block.rs | 5 ++- som-interpreter-bc/src/compiler.rs | 3 +- som-interpreter-bc/src/inliner.rs | 55 ++++++++++++++++----------- som-interpreter-bc/src/interpreter.rs | 7 +++- 4 files changed, 44 insertions(+), 26 deletions(-) diff --git a/som-interpreter-bc/src/block.rs b/som-interpreter-bc/src/block.rs index d4719cc9..299b11fd 100644 --- a/som-interpreter-bc/src/block.rs +++ b/som-interpreter-bc/src/block.rs @@ -1,6 +1,7 @@ use std::cell::RefCell; use std::fmt; use std::rc::Rc; +use som_core::ast; use som_core::bytecode::Bytecode; @@ -27,7 +28,9 @@ pub struct Block { /// Reference to the captured stack frame. pub frame: Option>, pub blk_info: Rc, -} + // OLarose: not a fan... but it's needed when inlining to be able to recreate a working version of the block from the original AST + // (see PushBlock in inliner) + pub ast_body: ast::Block, } impl Block { /// Get the block's class. diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 6a1ad35e..d7f21d7b 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -631,7 +631,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option { +pub(crate) fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { // println!("(system) compiling block ..."); let mut ctxt = BlockGenCtxt { @@ -675,6 +675,7 @@ fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { nb_params, inline_cache, }), + ast_body: defn.clone() // not a fan of this, only needed during parsing and cloning feels needless... TODO }; // println!("(system) compiled block !"); diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 9a1a199e..81df26ed 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,7 +1,8 @@ +use std::rc::Rc; use som_core::ast; use som_core::bytecode::Bytecode; -use crate::block::BlockInfo; -use crate::compiler::{InnerGenCtxt, Literal}; +use crate::block::{BlockInfo}; +use crate::compiler::{compile_block, InnerGenCtxt, Literal}; use crate::compiler::MethodCodegen; use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; @@ -10,8 +11,6 @@ pub enum JumpType { JumpOnTrue } -static mut has_been_accessed: bool = false; - // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; @@ -26,8 +25,8 @@ pub trait PrimMessageInliner { impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { match message.signature.as_str() { - "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), - // "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), + // "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), + "ifFalse2:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), // "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), // "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), // "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), @@ -38,8 +37,7 @@ impl PrimMessageInliner for ast::Expression { } fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()> { - // TODO uncomment - for block_local_intern_id in &block.locals { + for _block_local_intern_id in &block.locals { panic!("we don't handle block locals yet!"); // ctxt.push_local(ctxt.lookup_symbol(block_local_intern_id)); } @@ -50,7 +48,7 @@ impl PrimMessageInliner for ast::Expression { match block_bc { Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), - Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), + Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), // not 100% sure i need to adjust the up_idx there and for pop Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), Bytecode::Send1(lit_idx) | Bytecode::Send2(lit_idx) | Bytecode::Send3(lit_idx) | Bytecode::SendN(lit_idx) => { @@ -69,16 +67,16 @@ impl PrimMessageInliner for ast::Expression { _ => panic!("Unexpected block literal type, not yet implemented") } }, - // Bytecode::PushBlock(block_idx) => { - // match block.literals.get(*block_idx as usize)? { - // Literal::Block(inner_block) => { - // let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; - // let idx = ctxt.push_literal(Literal::Block(Rc::new(new_block))); - // ctxt.push_instr(Bytecode::PushBlock(idx as u8)); - // }, - // _ => panic!("PushBlock not actually pushing a block somehow") - // }; - // }, + Bytecode::PushBlock(block_idx) => { + match block.literals.get(*block_idx as usize)? { + Literal::Block(inner_block) => { + let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; + let idx = ctxt.push_literal(Literal::Block(Rc::from(new_block))); + ctxt.push_instr(Bytecode::PushBlock(idx as u8)); + }, + _ => panic!("PushBlock not actually pushing a block somehow") + }; + }, Bytecode::PushGlobal(global_idx) => { match block.literals.get(*global_idx as usize)? { lit => { @@ -120,8 +118,18 @@ impl PrimMessageInliner for ast::Expression { } }; }, - Bytecode::ReturnNonLocal => {ctxt.push_instr(Bytecode::ReturnLocal)}, - _ => ctxt.push_instr(*block_bc) + Bytecode::ReturnNonLocal => ctxt.push_instr(Bytecode::ReturnLocal), + Bytecode::ReturnLocal => panic!("Uh, that's a thing?"), + // For jumps, we just need to adjust their offsets based on when we started inlining the block. probably. + Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(idx + ctxt.get_cur_instr_idx())), + Bytecode::JumpBackward(idx) => ctxt.push_instr(Bytecode::JumpBackward(idx + ctxt.get_cur_instr_idx())), + Bytecode::JumpOnTruePop(idx) => ctxt.push_instr(Bytecode::JumpOnTruePop(idx + ctxt.get_cur_instr_idx())), + Bytecode::JumpOnFalsePop(idx) => ctxt.push_instr(Bytecode::JumpOnFalsePop(idx + ctxt.get_cur_instr_idx())), + Bytecode::JumpOnTrueTopNil(idx) => { + ctxt.push_instr(Bytecode::JumpOnTrueTopNil(idx + ctxt.get_cur_instr_idx())) + }, + Bytecode::JumpOnFalseTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(idx + ctxt.get_cur_instr_idx())), + _ => ctxt.push_instr(*block_bc) // I *think* the rest are all fine.. } } } @@ -130,7 +138,7 @@ impl PrimMessageInliner for ast::Expression { } #[allow(dead_code)] - fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block_expr: &ast::Expression) -> Option<()> { + fn inline_expr(&self, _ctxt: &mut dyn InnerGenCtxt, _block_expr: &ast::Expression) -> Option<()> { todo!(); // match block_expr { // ast::Expression::Block(block) => self.inline_compiled_block(ctxt, block), @@ -165,7 +173,7 @@ impl PrimMessageInliner for ast::Expression { // shouldn't break anything, probably // ctxt.remove_literal(block_idx as usize); // dbg!(&cond_block_ref.as_ref().blk_info.body); - + self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()); // dbg!(ctxt.get_instructions()); @@ -173,6 +181,7 @@ impl PrimMessageInliner for ast::Expression { // // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating // self.inline_expr(ctxt, message.values.get(0)?); ctxt.backpatch_jump_to_current(jump_idx); + // ctxt.backpatch_jump_to_current(jump_idx + nbr_instrs_inlined); return Some(()); } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 85a01b5c..cd4a2796 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -230,7 +230,6 @@ impl Interpreter { None => return Some(self.stack.pop().unwrap_or(Value::Nil)), }; - let bytecode_idx = frame.borrow().bytecode_idx; let opt_bytecode = frame.borrow().get_current_bytecode(); let bytecode = match opt_bytecode { Some(bytecode) => bytecode, @@ -511,6 +510,8 @@ impl Interpreter { } } + // TODO: check why this is unused + #[allow(dead_code)] fn do_send( interpreter: &mut Interpreter, universe: &mut Universe, @@ -573,6 +574,8 @@ impl Interpreter { } } + // TODO: check why this is unused + #[allow(dead_code)] fn resolve_method( frame: &SOMRef, class: &SOMRef, @@ -656,6 +659,8 @@ impl Interpreter { Some(value) } + // TODO: check why this is unused + #[allow(dead_code)] fn nb_params(signature: &str) -> usize { match signature.chars().nth(0) { Some(ch) if !ch.is_alphabetic() => 1, From aaf2166c6101dbcdaa9b5bcaa02061b88b22a079 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 24 Jan 2024 19:23:31 +0000 Subject: [PATCH 54/88] more inlining tweaks. it still doesn't work some of the time though --- som-interpreter-bc/src/inliner.rs | 192 +++++++++++++++++------------- 1 file changed, 109 insertions(+), 83 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 81df26ed..f7c16e26 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -15,7 +15,8 @@ pub enum JumpType { pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()>; - fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; + fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()>; + // fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; @@ -25,9 +26,9 @@ pub trait PrimMessageInliner { impl PrimMessageInliner for ast::Expression { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { match message.signature.as_str() { - // "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), - "ifFalse2:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), - // "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), + "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), + "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), + "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), // "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), // "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), // "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), @@ -42,12 +43,24 @@ impl PrimMessageInliner for ast::Expression { // ctxt.push_local(ctxt.lookup_symbol(block_local_intern_id)); } + let idx_start_inlining = ctxt.get_cur_instr_idx(); + // last is always ReturnLocal, so it gets ignored if let Some((_, body)) = block.body.split_last() { for block_bc in body { match block_bc { - Bytecode::PushLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), - Bytecode::PopLocal(up_idx, idx) => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), + Bytecode::PushLocal(up_idx, idx) => { + match up_idx { // todo: is there more logic to put there? + 0 => ctxt.push_instr(Bytecode::PushLocal(*up_idx, *idx)), + 1.. => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)) + } + }, + Bytecode::PopLocal(up_idx, idx) => { + match up_idx { + 0 => ctxt.push_instr(Bytecode::PopLocal(*up_idx, *idx)), + 1.. => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)) + } + }, Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), // not 100% sure i need to adjust the up_idx there and for pop Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), Bytecode::Send1(lit_idx) | Bytecode::Send2(lit_idx) | @@ -118,17 +131,25 @@ impl PrimMessageInliner for ast::Expression { } }; }, - Bytecode::ReturnNonLocal => ctxt.push_instr(Bytecode::ReturnLocal), - Bytecode::ReturnLocal => panic!("Uh, that's a thing?"), - // For jumps, we just need to adjust their offsets based on when we started inlining the block. probably. - Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(idx + ctxt.get_cur_instr_idx())), - Bytecode::JumpBackward(idx) => ctxt.push_instr(Bytecode::JumpBackward(idx + ctxt.get_cur_instr_idx())), - Bytecode::JumpOnTruePop(idx) => ctxt.push_instr(Bytecode::JumpOnTruePop(idx + ctxt.get_cur_instr_idx())), - Bytecode::JumpOnFalsePop(idx) => ctxt.push_instr(Bytecode::JumpOnFalsePop(idx + ctxt.get_cur_instr_idx())), - Bytecode::JumpOnTrueTopNil(idx) => { - ctxt.push_instr(Bytecode::JumpOnTrueTopNil(idx + ctxt.get_cur_instr_idx())) + Bytecode::ReturnNonLocal => { + // TODO; if the new context level is 0 (check prev bytecode emitted?), gotta emit a RETURNLOCAL instead! + // as far as i understand... this still works? and is just slower? TODO fix though obviously + // dbg!("wow"); + // dbg!(&ctxt.get_instructions().last()); + // match ctxt.get_instructions().last().unwrap() { + // Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnLocal), + // _ => ctxt.push_instr(Bytecode::ReturnNonLocal) + // } + ctxt.push_instr(Bytecode::ReturnNonLocal) }, - Bytecode::JumpOnFalseTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(idx + ctxt.get_cur_instr_idx())), + Bytecode::ReturnLocal => {},//panic!("Is that a thing? If so, just ignore it."), + // For jumps, we just need to adjust their offsets based on when we started inlining the block. probably. + Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(idx + idx_start_inlining)), + Bytecode::JumpBackward(idx) => ctxt.push_instr(Bytecode::JumpBackward(idx + idx_start_inlining)), + Bytecode::JumpOnTruePop(idx) => ctxt.push_instr(Bytecode::JumpOnTruePop(idx + idx_start_inlining)), + Bytecode::JumpOnFalsePop(idx) => ctxt.push_instr(Bytecode::JumpOnFalsePop(idx + idx_start_inlining)), + Bytecode::JumpOnTrueTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(idx + idx_start_inlining)), + Bytecode::JumpOnFalseTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(idx + idx_start_inlining)), _ => ctxt.push_instr(*block_bc) // I *think* the rest are all fine.. } } @@ -137,13 +158,21 @@ impl PrimMessageInliner for ast::Expression { Some(()) } - #[allow(dead_code)] - fn inline_expr(&self, _ctxt: &mut dyn InnerGenCtxt, _block_expr: &ast::Expression) -> Option<()> { - todo!(); - // match block_expr { - // ast::Expression::Block(block) => self.inline_compiled_block(ctxt, block), - // _ => todo!("does that show up in the tests")//expr.codegen(ctxt) - // } + fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()> { + let block1_idx = match ctxt.get_instructions().last()? { + Bytecode::PushBlock(val) => *val, + _ => panic!("function expects last bytecode to be a block.") + }; + ctxt.pop_instr(); // removing the PUSH_BLOCK + + let cond_block_ref = match ctxt.get_literal(block1_idx as usize)? { + Literal::Block(val) => val.clone(), + _ => return None + }; + // shouldn't break anything, probably + // ctxt.remove_literal(block_idx as usize); + + self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()) } fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { @@ -151,37 +180,24 @@ impl PrimMessageInliner for ast::Expression { return None; } - // we need to compile the block before inlining it, and we haven't encountered/compiled it yet - message.values.get(0)?.codegen(ctxt)?; - - let block_idx = match ctxt.get_instructions().last()? { - Bytecode::PushBlock(val) => *val, - _ => panic!("should be impossible: we've just compiled a block.") - }; - ctxt.pop_instr(); // we remove the PUSH_BLOCK - let jump_idx = ctxt.get_cur_instr_idx(); match jump_type { JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) } - let cond_block_ref = match ctxt.get_literal(block_idx as usize)? { - Literal::Block(val) => val.clone(), - _ => return None - }; - // shouldn't break anything, probably -// ctxt.remove_literal(block_idx as usize); -// dbg!(&cond_block_ref.as_ref().blk_info.body); - - self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()); + // we need to compile the block before inlining it, and we haven't encountered/compiled it yet + message.values.get(0)?.codegen(ctxt)?; + + self.inline_last_push_block_bc(ctxt); // dbg!(ctxt.get_instructions()); -// // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating + // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating + // wrt previous todo comment: likely super outdated. but until proven, i'm keeping it as a reminder. + // self.inline_expr(ctxt, message.values.get(0)?); ctxt.backpatch_jump_to_current(jump_idx); - // ctxt.backpatch_jump_to_current(jump_idx + nbr_instrs_inlined); return Some(()); } @@ -199,55 +215,65 @@ impl PrimMessageInliner for ast::Expression { JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), } - self.inline_expr(ctxt, message.values.get(0)?); + message.values.get(0)?.codegen(ctxt)?; + + self.inline_last_push_block_bc(ctxt); + // self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()); let middle_jump_idx = ctxt.get_cur_instr_idx(); ctxt.push_instr(Bytecode::Jump(0)); ctxt.backpatch_jump_to_current(start_jump_idx); - self.inline_expr(ctxt, message.values.get(1)?); - ctxt.backpatch_jump_to_current(middle_jump_idx); - - return Some(()); - } - - fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { - if message.values.len() != 1 { - return None; - } - - if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { - return None; - } - - ctxt.pop_instr(); // we remove the PUSH_BLOCK - let idx_before_condition = ctxt.get_cur_instr_idx(); + message.values.get(1)?.codegen(ctxt)?; - self.inline_expr(ctxt, message.receiver.as_ref()); + // self.inline_expr(ctxt, message.values.get(1)?); + // self.inline_compiled_block(ctxt, cond_block2_ref.as_ref().blk_info.as_ref()); + self.inline_last_push_block_bc(ctxt); - let cond_jump_idx = ctxt.get_cur_instr_idx(); - match jump_type { - JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), - JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) - } - - self.inline_expr(ctxt, message.values.get(0).unwrap()); - - // we push a POP, unless the body of the loop is empty. - match message.values.get(0).unwrap() { - ast::Expression::Block(block) => { - if block.body.exprs.len() != 0 { - ctxt.push_instr(Bytecode::Pop); - } - }, - _ => {} - }; - - ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); - ctxt.backpatch_jump_to_current(cond_jump_idx); - ctxt.push_instr(Bytecode::PushNil); + ctxt.backpatch_jump_to_current(middle_jump_idx); return Some(()); } + + fn inline_while(&self, _ctxt: &mut dyn InnerGenCtxt, _message: &ast::Message, _jump_type: JumpType) -> Option<()> { + todo!("make it use the new inlining function"); + // if message.values.len() != 1 { + // return None; + // } + // + // if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + // return None; + // } + // + // ctxt.pop_instr(); // we remove the PUSH_BLOCK + // + // let idx_before_condition = ctxt.get_cur_instr_idx(); + // + // self.inline_expr(ctxt, message.receiver.as_ref()); + // + // let cond_jump_idx = ctxt.get_cur_instr_idx(); + // match jump_type { + // JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + // JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) + // } + // + // self.inline_expr(ctxt, message.values.get(0).unwrap()); + // + // // we push a POP, unless the body of the loop is empty. + // match message.values.get(0).unwrap() { + // ast::Expression::Block(block) => { + // if block.body.exprs.len() != 0 { + // ctxt.push_instr(Bytecode::Pop); + // } + // }, + // _ => {} + // }; + // + // ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); + // ctxt.backpatch_jump_to_current(cond_jump_idx); + // ctxt.push_instr(Bytecode::PushNil); + // + // return Some(()); + } } \ No newline at end of file From 137730068359514542db60ebfd3b3d1ecc4c1a77 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Thu, 25 Jan 2024 11:48:04 +0000 Subject: [PATCH 55/88] inlining locals as well, in an ugly way! also added back inline_while but it's not functional (not that inlining even is, fully, in the first place) --- som-interpreter-bc/src/compiler.rs | 22 ++++++ som-interpreter-bc/src/disassembler.rs | 3 +- som-interpreter-bc/src/inliner.rs | 97 ++++++++++++++------------ 3 files changed, 75 insertions(+), 47 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index d7f21d7b..c4b92b40 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -92,6 +92,7 @@ pub enum FoundVar { pub trait GenCtxt { fn find_var(&mut self, name: &str) -> Option; fn intern_symbol(&mut self, name: &str) -> Interned; + fn lookup_symbol(&self, id: Interned) -> &str; fn class_name(&self) -> &str; } @@ -102,6 +103,7 @@ pub trait InnerGenCtxt: GenCtxt { fn get_instructions(&self) -> &Vec; fn push_arg(&mut self, name: String) -> usize; fn push_local(&mut self, name: String) -> usize; + fn get_nbr_locals(&self) -> usize; fn get_literal(&self, idx: usize) -> Option<&Literal>; // is this needed? fn push_literal(&mut self, literal: Literal) -> usize; fn remove_literal(&mut self, idx: usize) -> Option; @@ -141,6 +143,10 @@ impl GenCtxt for BlockGenCtxt<'_> { self.outer.intern_symbol(name) } + fn lookup_symbol(&self, id: Interned) -> &str { + self.outer.lookup_symbol(id) + } + fn class_name(&self) -> &str { self.outer.class_name() } @@ -275,6 +281,10 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { } ).collect::>()); } + + fn get_nbr_locals(&self) -> usize { + self.locals.len() + } } struct MethodGenCtxt<'a> { @@ -293,6 +303,10 @@ impl GenCtxt for MethodGenCtxt<'_> { self.inner.intern_symbol(name) } + fn lookup_symbol(&self, id: Interned) -> &str { + self.inner.lookup_symbol(id) + } + fn class_name(&self) -> &str { self.inner.class_name() } @@ -351,6 +365,10 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { fn remove_dup_popx_pop_sequences(&mut self) { self.inner.remove_dup_popx_pop_sequences(); } + + fn get_nbr_locals(&self) -> usize { + self.inner.get_nbr_locals() + } } pub trait MethodCodegen { @@ -547,6 +565,10 @@ impl GenCtxt for ClassGenCtxt<'_> { self.interner.intern(name) } + fn lookup_symbol(&self, id: Interned) -> &str { + self.interner.lookup(id) + } + fn class_name(&self) -> &str { self.name.as_str() } diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index 2bd55347..d13630c8 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -20,7 +20,8 @@ fn disassemble_body( let padding = " |".repeat(level); let current = env.last().copied().unwrap(); for (idx, bytecode) in current.get_body().into_iter().copied().enumerate() { - print!("{idx} {padding} {0}", bytecode.padded_name()); + let extra_spaces_nbr = if idx >= 100 { 0 } else if (10..=99).contains(&idx) { 1 } else { 2 }; + print!("{idx} {0} {padding} {1}", " ".repeat(extra_spaces_nbr), bytecode.padded_name()); // print!("{padding} {0}", bytecode.padded_name()); match bytecode { diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index f7c16e26..8dce4bdc 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,4 +1,6 @@ use std::rc::Rc; +use rand::distributions::Alphanumeric; +use rand::Rng; use som_core::ast; use som_core::bytecode::Bytecode; use crate::block::{BlockInfo}; @@ -28,7 +30,7 @@ impl PrimMessageInliner for ast::Expression { match message.signature.as_str() { "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), - "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), + // "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), // "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), // "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), // "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), @@ -38,9 +40,18 @@ impl PrimMessageInliner for ast::Expression { } fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()> { - for _block_local_intern_id in &block.locals { - panic!("we don't handle block locals yet!"); - // ctxt.push_local(ctxt.lookup_symbol(block_local_intern_id)); + let nbr_locals_pre_inlining = ctxt.get_nbr_locals(); + + for block_local_intern_id in &block.locals { + let symbol_str= ctxt.lookup_symbol(*block_local_intern_id); + // TODO this is a very ugly, very temporary thing + // TODO but it is also hilarious though. I can't believe this works + let random_string: String = rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(3) + .map(char::from) + .collect(); + ctxt.push_local(String::from(symbol_str) + random_string.as_str()); } let idx_start_inlining = ctxt.get_cur_instr_idx(); @@ -50,14 +61,14 @@ impl PrimMessageInliner for ast::Expression { for block_bc in body { match block_bc { Bytecode::PushLocal(up_idx, idx) => { - match up_idx { // todo: is there more logic to put there? - 0 => ctxt.push_instr(Bytecode::PushLocal(*up_idx, *idx)), + match up_idx { + 0 => ctxt.push_instr(Bytecode::PushLocal(*up_idx, nbr_locals_pre_inlining as u8 + *idx)), 1.. => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)) } }, Bytecode::PopLocal(up_idx, idx) => { match up_idx { - 0 => ctxt.push_instr(Bytecode::PopLocal(*up_idx, *idx)), + 0 => ctxt.push_instr(Bytecode::PopLocal(*up_idx, nbr_locals_pre_inlining as u8 + *idx)), 1.. => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)) } }, @@ -236,44 +247,38 @@ impl PrimMessageInliner for ast::Expression { return Some(()); } - fn inline_while(&self, _ctxt: &mut dyn InnerGenCtxt, _message: &ast::Message, _jump_type: JumpType) -> Option<()> { - todo!("make it use the new inlining function"); - // if message.values.len() != 1 { - // return None; - // } - // - // if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { - // return None; - // } - // - // ctxt.pop_instr(); // we remove the PUSH_BLOCK - // - // let idx_before_condition = ctxt.get_cur_instr_idx(); - // - // self.inline_expr(ctxt, message.receiver.as_ref()); - // - // let cond_jump_idx = ctxt.get_cur_instr_idx(); - // match jump_type { - // JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), - // JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) - // } - // - // self.inline_expr(ctxt, message.values.get(0).unwrap()); - // - // // we push a POP, unless the body of the loop is empty. - // match message.values.get(0).unwrap() { - // ast::Expression::Block(block) => { - // if block.body.exprs.len() != 0 { - // ctxt.push_instr(Bytecode::Pop); - // } - // }, - // _ => {} - // }; - // - // ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); - // ctxt.backpatch_jump_to_current(cond_jump_idx); - // ctxt.push_instr(Bytecode::PushNil); - // - // return Some(()); + fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) || !matches!(ctxt.get_instructions().last() , Some(Bytecode::PushBlock(_))) { + return None; + } + + let idx_before_condition = ctxt.get_cur_instr_idx(); + + self.inline_last_push_block_bc(ctxt); + + let cond_jump_idx = ctxt.get_cur_instr_idx(); + match jump_type { + JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), + JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) + } + + message.values.get(0)?.codegen(ctxt)?; + self.inline_last_push_block_bc(ctxt); + + // we push a POP, unless the body of the loop is empty. + match message.values.get(0).unwrap() { + ast::Expression::Block(block) => { + if block.body.exprs.len() != 0 { + ctxt.push_instr(Bytecode::Pop); + } + }, + _ => {} + }; + + ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); + ctxt.backpatch_jump_to_current(cond_jump_idx); + ctxt.push_instr(Bytecode::PushNil); + + return Some(()); } } \ No newline at end of file From f87dbb41184d575c93fe43afebb9c0a4c54a5412 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 29 Jan 2024 11:38:23 +0000 Subject: [PATCH 56/88] Proper solution for shadowing: now storing the original scope when inlining variables. Doesn't solve the open issue of compiled blocks --- som-interpreter-bc/src/compiler.rs | 47 +++++++++++++++++++++++------- som-interpreter-bc/src/inliner.rs | 46 ++++++++++++++++------------- 2 files changed, 61 insertions(+), 32 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index c4b92b40..92e501b3 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -7,6 +7,7 @@ use std::rc::{Rc, Weak}; use indexmap::{IndexMap, IndexSet}; use num_bigint::BigInt; +use rand::Rng; use som_core::ast; use som_core::bytecode::Bytecode; @@ -94,6 +95,7 @@ pub trait GenCtxt { fn intern_symbol(&mut self, name: &str) -> Interned; fn lookup_symbol(&self, id: Interned) -> &str; fn class_name(&self) -> &str; + fn current_scope(&self) -> usize; } pub trait InnerGenCtxt: GenCtxt { @@ -102,7 +104,7 @@ pub trait InnerGenCtxt: GenCtxt { fn pop_instr(&mut self); fn get_instructions(&self) -> &Vec; fn push_arg(&mut self, name: String) -> usize; - fn push_local(&mut self, name: String) -> usize; + fn push_local(&mut self, name: String, original_scope: usize) -> usize; fn get_nbr_locals(&self) -> usize; fn get_literal(&self, idx: usize) -> Option<&Literal>; // is this needed? fn push_literal(&mut self, literal: Literal) -> usize; @@ -116,9 +118,10 @@ pub trait InnerGenCtxt: GenCtxt { struct BlockGenCtxt<'a> { pub outer: &'a mut dyn GenCtxt, pub args: IndexSet, - pub locals: IndexSet, + pub locals: IndexSet<(String, usize)>, pub literals: IndexSet, pub body: Option>, + pub scope: usize, } impl GenCtxt for BlockGenCtxt<'_> { @@ -127,8 +130,13 @@ impl GenCtxt for BlockGenCtxt<'_> { "super" => "self", name => name, }; - (self.locals.get_index_of(name)) + + // first check the locals in this scope, then check the locals that were inlined into the scope (i.e. have a different original scope) + // needed because when you inline a block, it can contain some PUSH_BLOCKs where we recompile the block, therefore scope info gets out of whack + // it's not a great solution, pretty slow. a better one would be that when we recompile the blocks, we adjust their bytecode directly which -should- circumvent the issue? + (self.locals.iter().position(|(local_name, local_scope)| { local_name == name && (*local_scope == self.current_scope()) })) .map(|idx| FoundVar::Local(0, idx as u8)) + .or_else(|| self.locals.iter().position(|(local_name, _)| local_name == name).map(|idx| FoundVar::Local(0, idx as u8))) .or_else(|| (self.args.get_index_of(name)).map(|idx| FoundVar::Argument(0, idx as u8))) .or_else(|| { self.outer.find_var(name).map(|found| match found { @@ -150,6 +158,10 @@ impl GenCtxt for BlockGenCtxt<'_> { fn class_name(&self) -> &str { self.outer.class_name() } + + fn current_scope(&self) -> usize { + self.scope + } } impl InnerGenCtxt for BlockGenCtxt<'_> { @@ -175,8 +187,8 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { idx } - fn push_local(&mut self, name: String) -> usize { - let (idx, _) = self.locals.insert_full(name); + fn push_local(&mut self, name: String, original_scope: usize) -> usize { + let (idx, _) = self.locals.insert_full((name, original_scope)); idx } @@ -310,6 +322,10 @@ impl GenCtxt for MethodGenCtxt<'_> { fn class_name(&self) -> &str { self.inner.class_name() } + + fn current_scope(&self) -> usize { + self.inner.current_scope() + } } impl InnerGenCtxt for MethodGenCtxt<'_> { @@ -334,8 +350,8 @@ impl InnerGenCtxt for MethodGenCtxt<'_> { self.inner.push_arg(name) } - fn push_local(&mut self, name: String) -> usize { - self.inner.push_local(name) + fn push_local(&mut self, name: String, original_scope: usize) -> usize { + self.inner.push_local(name, original_scope) } fn push_literal(&mut self, literal: Literal) -> usize { @@ -572,6 +588,10 @@ impl GenCtxt for ClassGenCtxt<'_> { fn class_name(&self) -> &str { self.name.as_str() } + + fn current_scope(&self) -> usize { + panic!("Asking for the current scope of a class, and not a block/method, makes little sense.") + } } fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option { @@ -588,10 +608,11 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option IndexSet::new(), - ast::MethodBody::Body { locals, .. } => locals.iter().cloned().collect(), + ast::MethodBody::Body { locals, .. } => locals.iter().cloned().map(|s| (s, 0)).collect(), }, literals: IndexSet::new(), body: None, + scope: 0 }, }; @@ -629,7 +650,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option Option { // println!("(system) compiling block ..."); + let mut rand_thread = rand::thread_rng(); + // let block_scope = outer.current_scope() + 1; + let block_scope = rand_thread.gen(); let mut ctxt = BlockGenCtxt { outer, args: defn.parameters.iter().cloned().collect(), - locals: defn.locals.iter().cloned().collect(), + locals: defn.locals.iter().cloned().map(|s| (s, block_scope)).collect(), literals: IndexSet::new(), body: None, + scope: block_scope }; let splitted = defn.body.exprs.split_last(); @@ -680,7 +705,7 @@ pub(crate) fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Optio let locals = std::mem::take(&mut ctxt.locals); locals .into_iter() - .map(|name| ctxt.intern_symbol(&name)) + .map(|(name, _)| ctxt.intern_symbol(&name)) .collect() }; let literals = ctxt.literals.into_iter().collect(); diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 8dce4bdc..e423c363 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,5 +1,4 @@ use std::rc::Rc; -use rand::distributions::Alphanumeric; use rand::Rng; use som_core::ast; use som_core::bytecode::Bytecode; @@ -42,19 +41,17 @@ impl PrimMessageInliner for ast::Expression { fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()> { let nbr_locals_pre_inlining = ctxt.get_nbr_locals(); + let mut rand_thread = rand::thread_rng(); + let og_scope = rand_thread.gen(); // does this matter? should it be the exact same as the original compiled block? i'm thinking it's fine for block_local_intern_id in &block.locals { - let symbol_str= ctxt.lookup_symbol(*block_local_intern_id); - // TODO this is a very ugly, very temporary thing - // TODO but it is also hilarious though. I can't believe this works - let random_string: String = rand::thread_rng() - .sample_iter(&Alphanumeric) - .take(3) - .map(char::from) - .collect(); - ctxt.push_local(String::from(symbol_str) + random_string.as_str()); + let symbol_str = ctxt.lookup_symbol(*block_local_intern_id); + // ctxt.push_local(String::from(symbol_str), ctxt.current_scope() + 1); + ctxt.push_local(String::from(symbol_str), og_scope); } - let idx_start_inlining = ctxt.get_cur_instr_idx(); + // dbg!(&block.body); + + // let idx_start_inlining = ctxt.get_cur_instr_idx(); // last is always ReturnLocal, so it gets ignored if let Some((_, body)) = block.body.split_last() { @@ -94,8 +91,11 @@ impl PrimMessageInliner for ast::Expression { Bytecode::PushBlock(block_idx) => { match block.literals.get(*block_idx as usize)? { Literal::Block(inner_block) => { + // dbg!(&inner_block.ast_body); + // dbg!(&inner_block.blk_info.body); let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; let idx = ctxt.push_literal(Literal::Block(Rc::from(new_block))); + // dbg!(idx); ctxt.push_instr(Bytecode::PushBlock(idx as u8)); }, _ => panic!("PushBlock not actually pushing a block somehow") @@ -153,14 +153,15 @@ impl PrimMessageInliner for ast::Expression { // } ctxt.push_instr(Bytecode::ReturnNonLocal) }, - Bytecode::ReturnLocal => {},//panic!("Is that a thing? If so, just ignore it."), - // For jumps, we just need to adjust their offsets based on when we started inlining the block. probably. - Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(idx + idx_start_inlining)), - Bytecode::JumpBackward(idx) => ctxt.push_instr(Bytecode::JumpBackward(idx + idx_start_inlining)), - Bytecode::JumpOnTruePop(idx) => ctxt.push_instr(Bytecode::JumpOnTruePop(idx + idx_start_inlining)), - Bytecode::JumpOnFalsePop(idx) => ctxt.push_instr(Bytecode::JumpOnFalsePop(idx + idx_start_inlining)), - Bytecode::JumpOnTrueTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(idx + idx_start_inlining)), - Bytecode::JumpOnFalseTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(idx + idx_start_inlining)), + Bytecode::ReturnLocal => {}, //panic!("Is that a thing? If so, just ignore it."), + // todo: hmm... do we? if so, add these to the _ case i guess. + // Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(idx + idx_start_inlining)), + Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(*idx)), + Bytecode::JumpBackward(idx) => ctxt.push_instr(Bytecode::JumpBackward(*idx)), + Bytecode::JumpOnTruePop(idx) => ctxt.push_instr(Bytecode::JumpOnTruePop(*idx)), + Bytecode::JumpOnFalsePop(idx) => ctxt.push_instr(Bytecode::JumpOnFalsePop(*idx)), + Bytecode::JumpOnTrueTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(*idx)), + Bytecode::JumpOnFalseTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(*idx)), _ => ctxt.push_instr(*block_bc) // I *think* the rest are all fine.. } } @@ -172,7 +173,7 @@ impl PrimMessageInliner for ast::Expression { fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()> { let block1_idx = match ctxt.get_instructions().last()? { Bytecode::PushBlock(val) => *val, - _ => panic!("function expects last bytecode to be a block.") + _ => panic!("function expects last bytecode to be a PUSH_BLOCK.") }; ctxt.pop_instr(); // removing the PUSH_BLOCK @@ -183,7 +184,10 @@ impl PrimMessageInliner for ast::Expression { // shouldn't break anything, probably // ctxt.remove_literal(block_idx as usize); - self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()) + match self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()) { + None => panic!("Inlining a compiled block failed!"), + _ => Some(()) + } } fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { From aeb48a5317de8eae91d7c041a72860eba6bc4c82 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 29 Jan 2024 11:41:00 +0000 Subject: [PATCH 57/88] functional inlining for ifTrue:ifFalse too! --- som-interpreter-bc/src/inliner.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index e423c363..42dc285c 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -29,8 +29,8 @@ impl PrimMessageInliner for ast::Expression { match message.signature.as_str() { "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), - // "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), - // "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), + "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), + "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), // "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), // "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), // TODO: [or, and] @@ -42,7 +42,7 @@ impl PrimMessageInliner for ast::Expression { let nbr_locals_pre_inlining = ctxt.get_nbr_locals(); let mut rand_thread = rand::thread_rng(); - let og_scope = rand_thread.gen(); // does this matter? should it be the exact same as the original compiled block? i'm thinking it's fine + let og_scope = rand_thread.gen(); // does this matter? should it be the exact same as the original compiled block? i'm thinking it's fine like this? for block_local_intern_id in &block.locals { let symbol_str = ctxt.lookup_symbol(*block_local_intern_id); // ctxt.push_local(String::from(symbol_str), ctxt.current_scope() + 1); From fdaf601e057fb703585b44b54af78ab77bb49562 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 29 Jan 2024 11:51:00 +0000 Subject: [PATCH 58/88] whileTrue/whileFalse inlining! works on the benchmarks (all running 1 0 7) and basic interpreter tests. --- som-interpreter-bc/src/inliner.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 42dc285c..85c0c4f0 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -17,8 +17,6 @@ pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()>; fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()>; - // fn inline_expr(&self, ctxt: &mut dyn InnerGenCtxt, block: &ast::Expression) -> Option<()>; - fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; @@ -31,8 +29,8 @@ impl PrimMessageInliner for ast::Expression { "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), - // "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), - // "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), + "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), + "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), // TODO: [or, and] _ => None } @@ -279,7 +277,7 @@ impl PrimMessageInliner for ast::Expression { _ => {} }; - ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition)); + ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition + 1)); ctxt.backpatch_jump_to_current(cond_jump_idx); ctxt.push_instr(Bytecode::PushNil); From 89605f4dc8165afc57c94c229d222b267dd57b0e Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 29 Jan 2024 13:45:47 +0000 Subject: [PATCH 59/88] removing an unneeded literal --- som-interpreter-bc/src/inliner.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 85c0c4f0..8553176b 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -169,18 +169,17 @@ impl PrimMessageInliner for ast::Expression { } fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()> { - let block1_idx = match ctxt.get_instructions().last()? { + let block_idx = match ctxt.get_instructions().last()? { Bytecode::PushBlock(val) => *val, _ => panic!("function expects last bytecode to be a PUSH_BLOCK.") }; ctxt.pop_instr(); // removing the PUSH_BLOCK - let cond_block_ref = match ctxt.get_literal(block1_idx as usize)? { + let cond_block_ref = match ctxt.get_literal(block_idx as usize)? { Literal::Block(val) => val.clone(), _ => return None }; - // shouldn't break anything, probably - // ctxt.remove_literal(block_idx as usize); + ctxt.remove_literal(block_idx as usize); match self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()) { None => panic!("Inlining a compiled block failed!"), From 31ff265a4fd43bd062e9857e614f1cdfca295c75 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 29 Jan 2024 13:49:18 +0000 Subject: [PATCH 60/88] reverting gitignore/rebench.conf for the PR --- .gitignore | 4 ---- rebench.conf | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 0c1df386..d59ac5c0 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,3 @@ # Folder to put files not intended to be pushed (like temporary files) /ignored - -# Rebench stuff -payload.json -rebench.data \ No newline at end of file diff --git a/rebench.conf b/rebench.conf index 2433d391..ccbf1377 100644 --- a/rebench.conf +++ b/rebench.conf @@ -7,8 +7,8 @@ reporting: # Benchmark results will be reported to ReBenchDB rebenchdb: # this url needs to point to the API endpoint - db_url: http://localhost:33333/rebenchdb - repo_url: https://github.com/OctaveLarose/som-rs + db_url: https://rebench.polomack.eu/rebenchdb + repo_url: https://github.com/Hirevo/som-rs record_all: true # make sure everything is recorded project_name: som-rs From 78e46a7dc41cd6298e47293b016317a398d84f93 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 30 Jan 2024 11:05:05 +0000 Subject: [PATCH 61/88] Fixed some bad merge in the interpreter causing some old send function to be invoked instead of the newer one --- som-interpreter-bc/src/interpreter.rs | 239 ++++++-------------------- 1 file changed, 50 insertions(+), 189 deletions(-) diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index cd4a2796..d16946c6 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -50,179 +50,6 @@ impl Interpreter { self.frames.last() } - fn send(&mut self, idx: u8, nb_params_opt: Option, frame: SOMRef, universe: &mut Universe) { - let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); - let symbol = match literal { - Literal::Symbol(sym) => sym, - _ => { - return; - } - }; - let signature = universe.lookup_symbol(symbol); - - // if signature == "verify:inner:" { - // print!("bp"); - // // match &frame.borrow().kind { - // // FrameKind::Method { holder, method, .. } => match method.kind() { - // // MethodKind::Defined(env) => { - // // dbg!(&holder); - // // dbg!(&env.body); - // // }, - // // _ => {} - // // }, - // // _ => {}, - // // }; - // } - - let nb_params = match nb_params_opt { - Some(x) => x, - None => { - match signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => signature.chars().filter(|ch| *ch == ':').count(), - } - } - }; - - let method = self - .stack - .iter() - .nth_back(nb_params) - .unwrap() - .lookup_method(universe, symbol); - - if let Some(method) = method { - match method.kind() { - MethodKind::Defined(_) => { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - args.push(self_value.clone()); - - args.reverse(); - - let holder = method.holder.upgrade().unwrap(); - self.push_frame(FrameKind::Method { - self_value, - method, - holder, - }); - - let frame = self.current_frame().unwrap(); - frame.borrow_mut().args = args; - } - MethodKind::Primitive(func) => { - func(self, universe); - } - MethodKind::NotImplemented(err) => { - let self_value = self.stack.iter().nth_back(nb_params).unwrap(); - println!( - "{}>>#{}", - self_value.class(&universe).borrow().name(), - method.signature() - ); - panic!("Primitive `#{}` not implemented", err) - } - } - } else { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - - args.reverse(); - - universe.does_not_understand(self, self_value, symbol, args) - .expect( - "A message cannot be handled and `doesNotUnderstand:arguments:` is not defined on receiver" - ); - } - } - - fn super_send(&mut self, idx: u8, nb_params_opt: Option, frame: SOMRef, universe: &mut Universe) { - let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); - let symbol = match literal { - Literal::Symbol(sym) => sym, - _ => { - return; - } - }; - let signature = universe.lookup_symbol(symbol); - let nb_params = match nb_params_opt { - Some(x) => x, - None => { - match signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => signature.chars().filter(|ch| *ch == ':').count(), - } - } - }; - - let method = frame - .borrow() - .get_method_holder() - .borrow() - .super_class() - .unwrap() - .borrow() - .lookup_method(symbol); - - if let Some(method) = method { - match method.kind() { - MethodKind::Defined(_) => { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - args.push(self_value.clone()); - - args.reverse(); - - let holder = method.holder.upgrade().unwrap(); - self.push_frame(FrameKind::Method { - self_value, - method, - holder, - }); - - let frame = self.current_frame().unwrap(); - frame.borrow_mut().args = args; - } - MethodKind::Primitive(func) => { - func(self, universe); - } - MethodKind::NotImplemented(err) => { - panic!("Primitive `#{}` not implemented", err) - } - } - } else { - let mut args = Vec::with_capacity(nb_params + 1); - - for _ in 0..nb_params { - let arg = self.stack.pop().unwrap(); - args.push(arg); - } - let self_value = self.stack.pop().unwrap(); - - args.reverse(); - - universe.does_not_understand(self, self_value, symbol, args) - .expect( - "A message cannot be handled and `doesNotUnderstand:arguments:` is not defined on receiver" - ); - } - } - pub fn run(&mut self, universe: &mut Universe) -> Option { loop { let frame = match self.current_frame() { @@ -230,6 +57,7 @@ impl Interpreter { None => return Some(self.stack.pop().unwrap_or(Value::Nil)), }; + let bytecode_idx = frame.borrow().bytecode_idx; let opt_bytecode = frame.borrow().get_current_bytecode(); let bytecode = match opt_bytecode { Some(bytecode) => bytecode, @@ -386,29 +214,29 @@ impl Interpreter { } } Bytecode::Send1(idx) => { - self.send(idx, Some(1), frame.clone(), universe); + send(self, idx, Some(1), bytecode_idx, frame.clone(), universe); } Bytecode::Send2(idx) => { - self.send(idx, Some(2), frame.clone(), universe); + send(self, idx, Some(2), bytecode_idx, frame.clone(), universe); } Bytecode::Send3(idx) => { - self.send(idx, Some(3), frame.clone(), universe); + send(self, idx, Some(3), bytecode_idx, frame.clone(), universe); } Bytecode::SendN(idx) => { - self.send(idx, None, frame.clone(), universe); + send(self, idx, None, bytecode_idx, frame.clone(), universe); } Bytecode::SuperSend1(idx) => { - self.super_send(idx, Some(1), frame.clone(), universe); + super_send(self, idx, Some(1), bytecode_idx, frame.clone(), universe); } Bytecode::SuperSend2(idx) => { - self.super_send(idx, Some(2), frame.clone(), universe); + super_send(self, idx, Some(2), bytecode_idx, frame.clone(), universe); } Bytecode::SuperSend3(idx) => { - self.super_send(idx, Some(3), frame.clone(), universe); + super_send(self, idx, Some(3), bytecode_idx, frame.clone(), universe); } Bytecode::SuperSendN(idx) => { - self.super_send(idx, None, frame.clone(), universe); - } + super_send(self, idx, None, bytecode_idx, frame.clone(), universe); + }, Bytecode::ReturnLocal => { let value = self.stack.pop().unwrap(); self.pop_frame(); @@ -510,9 +338,46 @@ impl Interpreter { } } - // TODO: check why this is unused - #[allow(dead_code)] - fn do_send( + fn send(interpreter: &mut Interpreter, idx: u8, nb_params_opt: Option, bytecode_idx: usize, frame: Rc>, universe: &mut Universe) { + let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); + let Literal::Symbol(symbol) = literal else { + panic!("Not a symbol"); + }; + let signature = universe.lookup_symbol(symbol); + let nb_params = match nb_params_opt { + Some(x) => x, + None => nb_params(signature) + }; + + let method = { + let receiver = interpreter.stack.iter().nth_back(nb_params).unwrap(); + let receiver_class = receiver.class(universe); + resolve_method(&frame, &receiver_class, symbol, bytecode_idx) + }; + + do_send(interpreter, universe, method, symbol, nb_params); + } + + fn super_send(interpreter: &mut Interpreter, idx: u8, nb_params_opt: Option, bytecode_idx: usize, frame: Rc>, universe: &mut Universe) { + let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); + let Literal::Symbol(symbol) = literal else { + panic!("Not a symbol") + }; + let signature = universe.lookup_symbol(symbol); + let nb_params = match nb_params_opt { + Some(x) => x, + None => nb_params(signature) + }; + let method = { + let holder = frame.borrow().get_method_holder(); + let super_class = holder.borrow().super_class().unwrap(); + resolve_method(&frame, &super_class, symbol, bytecode_idx) + }; + + do_send(interpreter, universe, method, symbol, nb_params); + } + + pub fn do_send( interpreter: &mut Interpreter, universe: &mut Universe, method: Option>, @@ -574,8 +439,6 @@ impl Interpreter { } } - // TODO: check why this is unused - #[allow(dead_code)] fn resolve_method( frame: &SOMRef, class: &SOMRef, @@ -659,8 +522,6 @@ impl Interpreter { Some(value) } - // TODO: check why this is unused - #[allow(dead_code)] fn nb_params(signature: &str) -> usize { match signature.chars().nth(0) { Some(ch) if !ch.is_alphabetic() => 1, From 3fad7db05617eb78525ee51ef2e07f242b6fd34e Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 30 Jan 2024 11:06:51 +0000 Subject: [PATCH 62/88] Revert "reverting gitignore/rebench.conf for the PR" (for debugging) This reverts commit 31ff265a4fd43bd062e9857e614f1cdfca295c75. --- .gitignore | 4 ++++ rebench.conf | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index d59ac5c0..0c1df386 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,7 @@ # Folder to put files not intended to be pushed (like temporary files) /ignored + +# Rebench stuff +payload.json +rebench.data \ No newline at end of file diff --git a/rebench.conf b/rebench.conf index ccbf1377..2433d391 100644 --- a/rebench.conf +++ b/rebench.conf @@ -7,8 +7,8 @@ reporting: # Benchmark results will be reported to ReBenchDB rebenchdb: # this url needs to point to the API endpoint - db_url: https://rebench.polomack.eu/rebenchdb - repo_url: https://github.com/Hirevo/som-rs + db_url: http://localhost:33333/rebenchdb + repo_url: https://github.com/OctaveLarose/som-rs record_all: true # make sure everything is recorded project_name: som-rs From c1b4a38ae57726edf8763ab1b0b6531257a19dd6 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 2 Feb 2024 14:44:34 +0000 Subject: [PATCH 63/88] moved all the specialized bytecode to their own branch (expecting more slowdown now) --- som-core/src/bytecode.rs | 68 +++----------- som-interpreter-bc/src/compiler.rs | 51 ++--------- som-interpreter-bc/src/disassembler.rs | 17 ++-- som-interpreter-bc/src/inliner.rs | 44 ++------- som-interpreter-bc/src/interpreter.rs | 119 +++++++------------------ som-interpreter-bc/src/method.rs | 14 +-- 6 files changed, 67 insertions(+), 246 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 0d1376c0..26faf403 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -3,31 +3,20 @@ use std::fmt; #[repr(u8)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Bytecode { + Halt, Dup, PushLocal(u8, u8), PushArgument(u8, u8), PushField(u8), PushBlock(u8), PushConstant(u8), - PushConstant0, - PushConstant1, - PushConstant2, PushGlobal(u8), - Push0, - Push1, - PushNil, Pop, PopLocal(u8, u8), PopArgument(u8, u8), PopField(u8), - Send1(u8), - Send2(u8), - Send3(u8), - SendN(u8), - SuperSend1(u8), - SuperSend2(u8), - SuperSend3(u8), - SuperSendN(u8), + Send(u8), + SuperSend(u8), ReturnLocal, ReturnNonLocal, Jump(usize), @@ -44,31 +33,20 @@ impl Bytecode { pub fn name(self) -> &'static str { // NAMES[self as usize] match self { + Self::Halt => "HALT", Self::Dup => "DUP", Self::PushLocal(_, _) => "PUSH_LOCAL", Self::PushArgument(_, _) => "PUSH_ARGUMENT", Self::PushField(_) => "PUSH_FIELD", Self::PushBlock(_) => "PUSH_BLOCK", Self::PushConstant(_) => "PUSH_CONSTANT", - Self::PushConstant0 => "PUSH_CONSTANT 0", - Self::PushConstant1 => "PUSH_CONSTANT 1", - Self::PushConstant2 => "PUSH_CONSTANT 2", Self::PushGlobal(_) => "PUSH_GLOBAL", - Self::Push0 => "PUSH_0", - Self::Push1 => "PUSH_1", - Self::PushNil => "PUSH_NIL", Self::Pop => "POP", Self::PopLocal(_, _) => "POP_LOCAL", Self::PopArgument(_, _) => "POP_ARGUMENT", Self::PopField(_) => "POP_FIELD", - Self::Send1(_) => "SEND 1", - Self::Send2(_) => "SEND 2", - Self::Send3(_) => "SEND 3", - Self::SendN(_) => "SEND N", - Self::SuperSend1(_) => "SUPER_SEND 1", - Self::SuperSend2(_) => "SUPER_SEND 2", - Self::SuperSend3(_) => "SUPER_SEND 3", - Self::SuperSendN(_) => "SUPER_SEND N", + Self::Send(_) => "SEND", + Self::SuperSend(_) => "SUPER_SEND", Self::ReturnLocal => "RETURN_LOCAL", Self::ReturnNonLocal => "RETURN_NON_LOCAL", Self::Jump(_) => "JUMP", @@ -85,31 +63,20 @@ impl Bytecode { pub fn padded_name(self) -> &'static str { // PADDED_NAMES[self as usize] match self { + Self::Halt => "HALT ", Self::Dup => "DUP ", Self::PushLocal(_, _) => "PUSH_LOCAL ", Self::PushArgument(_, _) => "PUSH_ARGUMENT ", Self::PushField(_) => "PUSH_FIELD ", Self::PushBlock(_) => "PUSH_BLOCK ", Self::PushConstant(_) => "PUSH_CONSTANT ", - Self::PushConstant0 => "PUSH_CONSTANT 0 ", - Self::PushConstant1 => "PUSH_CONSTANT 1 ", - Self::PushConstant2 => "PUSH_CONSTANT 2 ", Self::PushGlobal(_) => "PUSH_GLOBAL ", - Self::Push0 => "PUSH_0 ", - Self::Push1 => "PUSH_1 ", - Self::PushNil => "PUSH_NIL ", Self::Pop => "POP ", Self::PopLocal(_, _) => "POP_LOCAL ", Self::PopArgument(_, _) => "POP_ARGUMENT ", Self::PopField(_) => "POP_FIELD ", - Self::Send1(_) => "SEND 1 ", - Self::Send2(_) => "SEND 2 ", - Self::Send3(_) => "SEND 3 ", - Self::SendN(_) => "SEND N ", - Self::SuperSend1(_) => "SUPER_SEND 1 ", - Self::SuperSend2(_) => "SUPER_SEND 2 ", - Self::SuperSend3(_) => "SUPER_SEND 3 ", - Self::SuperSendN(_) => "SUPER_SEND N ", + Self::Send(_) => "SEND ", + Self::SuperSend(_) => "SUPER_SEND ", Self::ReturnLocal => "RETURN_LOCAL ", Self::ReturnNonLocal => "RETURN_NON_LOCAL ", Self::Jump(_) => "JUMP ", @@ -126,31 +93,20 @@ impl fmt::Display for Bytecode { #[rustfmt::skip] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Self::Halt => write!(f, "HALT"), Self::Dup => write!(f, "DUP"), Self::PushLocal(up_idx, idx) => write!(f, "PUSH_LOCAL {}, {}", up_idx, idx), Self::PushArgument(up_idx, idx) => write!(f, "PUSH_ARGUMENT {}, {}", up_idx, idx), Self::PushField(idx) => write!(f, "PUSH_FIELD {}", idx), Self::PushBlock(idx) => write!(f, "PUSH_BLOCK {}", idx), Self::PushConstant(idx) => write!(f, "PUSH_CONSTANT {}", idx), - Self::PushConstant0 => write!(f, "PUSH_CONSTANT_0"), - Self::PushConstant1 => write!(f, "PUSH_CONSTANT_1"), - Self::PushConstant2 => write!(f, "PUSH_CONSTANT_2"), Self::PushGlobal(idx) => write!(f, "PUSH_GLOBAL {}", idx), - Self::Push0 => write!(f, "PUSH_0"), - Self::Push1 => write!(f, "PUSH_1"), - Self::PushNil => write!(f, "PUSH_NIL"), Self::Pop => write!(f, "POP"), Self::PopLocal(up_idx, idx) => write!(f, "POP_LOCAL {}, {}", up_idx, idx), Self::PopArgument(up_idx, idx) => write!(f, "POP_ARGUMENT {}, {}", up_idx, idx), Self::PopField(idx) => write!(f, "POP_FIELD {}", idx), - Self::Send1(idx) => write!(f, "SEND_1 {}", idx), - Self::Send2(idx) => write!(f, "SEND_2 {}", idx), - Self::Send3(idx) => write!(f, "SEND_3 {}", idx), - Self::SendN(idx) => write!(f, "SEND_N {}", idx), - Self::SuperSend1(idx) => write!(f, "SUPER_SEND_1 {}", idx), - Self::SuperSend2(idx) => write!(f, "SUPER_SEND_2 {}", idx), - Self::SuperSend3(idx) => write!(f, "SUPER_SEND_3 {}", idx), - Self::SuperSendN(idx) => write!(f, "SUPER_SEND_N {}", idx), + Self::Send(idx) => write!(f, "SEND {}", idx), + Self::SuperSend(idx) => write!(f, "SUPER_SEND {}", idx), Self::ReturnLocal => write!(f, "RETURN_LOCAL", ), Self::ReturnNonLocal => write!(f, "RETURN_NON_LOCAL", ), Self::Jump(idx) => write!(f, "JUMP {}", idx), diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 92e501b3..8811014b 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -413,14 +413,9 @@ impl MethodCodegen for ast::Expression { } Some(FoundVar::Field(idx)) => ctxt.push_instr(Bytecode::PushField(idx)), None => { - match name.as_str() { - "nil" => ctxt.push_instr(Bytecode::PushNil), - _ => { - let name = ctxt.intern_symbol(name); - let idx = ctxt.push_literal(Literal::Symbol(name)); - ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); - } - } + let name = ctxt.intern_symbol(name); + let idx = ctxt.push_literal(Literal::Symbol(name)); + ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); } } Some(()) @@ -457,28 +452,12 @@ impl MethodCodegen for ast::Expression { .try_for_each(|value| value.codegen(ctxt) )?; - - let nb_params = match message.signature.chars().nth(0) { - Some(ch) if !ch.is_alphabetic() => 1, - _ => message.signature.chars().filter(|ch| *ch == ':').count(), - }; - let sym = ctxt.intern_symbol(message.signature.as_str()); let idx = ctxt.push_literal(Literal::Symbol(sym)); if super_send { - match nb_params { - 1 => ctxt.push_instr(Bytecode::SuperSend1(idx as u8)), - 2 => ctxt.push_instr(Bytecode::SuperSend2(idx as u8)), - 3 => ctxt.push_instr(Bytecode::SuperSend3(idx as u8)), - _ => ctxt.push_instr(Bytecode::SuperSendN(idx as u8)) - } + ctxt.push_instr(Bytecode::SuperSend(idx as u8)) } else { - match nb_params { - 1 => ctxt.push_instr(Bytecode::Send1(idx as u8)), - 2 => ctxt.push_instr(Bytecode::Send2(idx as u8)), - 3 => ctxt.push_instr(Bytecode::Send3(idx as u8)), - _ => ctxt.push_instr(Bytecode::SendN(idx as u8)) - } + ctxt.push_instr(Bytecode::Send(idx as u8)) } Some(()) } @@ -492,9 +471,9 @@ impl MethodCodegen for ast::Expression { let sym = ctxt.intern_symbol(message.op.as_str()); let idx = ctxt.push_literal(Literal::Symbol(sym)); if super_send { - ctxt.push_instr(Bytecode::Send1(idx as u8)); + ctxt.push_instr(Bytecode::Send(idx as u8)); } else { - ctxt.push_instr(Bytecode::Send1(idx as u8)); + ctxt.push_instr(Bytecode::Send(idx as u8)); } Some(()) } @@ -527,20 +506,8 @@ impl MethodCodegen for ast::Expression { } let literal = convert_literal(ctxt, literal); - - match literal { - Literal::Integer(0) => ctxt.push_instr(Bytecode::Push0), - Literal::Integer(1) => ctxt.push_instr(Bytecode::Push1), - _ => { - let idx = ctxt.push_literal(literal); - match idx { - 0 => ctxt.push_instr(Bytecode::PushConstant0), - 1 => ctxt.push_instr(Bytecode::PushConstant1), - 2 => ctxt.push_instr(Bytecode::PushConstant2), - _ => ctxt.push_instr(Bytecode::PushConstant(idx as u8)) - } - } - } + let idx = ctxt.push_literal(literal); + ctxt.push_instr(Bytecode::PushConstant(idx as u8)); Some(()) } diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index d13630c8..6797781d 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -25,6 +25,9 @@ fn disassemble_body( // print!("{padding} {0}", bytecode.padded_name()); match bytecode { + Bytecode::Halt => { + println!(); + } Bytecode::Dup => { println!(); } @@ -59,15 +62,7 @@ fn disassemble_body( disassemble_body(universe, class, level + 1, env); env.pop(); } - Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 | Bytecode::PushConstant(_) => { - let idx = match bytecode { - Bytecode::PushConstant0 => 0, - Bytecode::PushConstant1 => 1, - Bytecode::PushConstant2 => 2, - Bytecode::PushConstant(idx) => idx, - _ => panic!("Unreachable.") - }; - + Bytecode::PushConstant(idx) => { print!(" {idx}"); let Some(literal) = current.resolve_literal(idx) else { println!(" (invalid constant)"); @@ -104,8 +99,7 @@ fn disassemble_body( // }; // println!(" (`{0}`)", universe.lookup_symbol(argument)); } - Bytecode::Send1(idx) | Bytecode::Send2(idx) | Bytecode::Send3(idx) | Bytecode::SendN(idx) | - Bytecode::SuperSend1(idx) | Bytecode::SuperSend2(idx) | Bytecode::SuperSend3(idx) | Bytecode::SuperSendN(idx) => { + Bytecode::Send(idx) | Bytecode::SuperSend(idx) => { print!(" {idx}"); let Some(Literal::Symbol(signature)) = current.resolve_literal(idx) else { println!(" (invalid signature)"); @@ -124,7 +118,6 @@ fn disassemble_body( Bytecode::JumpOnFalseTopNil(idx) | Bytecode::JumpOnTrueTopNil(idx) => { println!("{}", idx); } - Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil => {println!();} } } } diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 8553176b..7d2e350a 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -69,19 +69,12 @@ impl PrimMessageInliner for ast::Expression { }, Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), // not 100% sure i need to adjust the up_idx there and for pop Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), - Bytecode::Send1(lit_idx) | Bytecode::Send2(lit_idx) | - Bytecode::Send3(lit_idx) | Bytecode::SendN(lit_idx) => { + Bytecode::Send(lit_idx) => { match block.literals.get(*lit_idx as usize)? { Literal::Symbol(interned) => { // does this push duplicate literals? I think it doesn't? let idx = ctxt.push_literal(Literal::Symbol(*interned)); - match block_bc { - Bytecode::Send1(_) => ctxt.push_instr(Bytecode::Send1(idx as u8)), - Bytecode::Send2(_) => ctxt.push_instr(Bytecode::Send2(idx as u8)), - Bytecode::Send3(_) => ctxt.push_instr(Bytecode::Send3(idx as u8)), - Bytecode::SendN(_) => ctxt.push_instr(Bytecode::SendN(idx as u8)), - _ => panic!("Unreachable branch") - } + ctxt.push_instr(Bytecode::Send(idx as u8)); }, _ => panic!("Unexpected block literal type, not yet implemented") } @@ -111,32 +104,7 @@ impl PrimMessageInliner for ast::Expression { match block.literals.get(*constant_idx as usize)? { lit => { let lit_idx = ctxt.push_literal(lit.clone()); - match lit_idx { - 0 => ctxt.push_instr(Bytecode::PushConstant0), - 1 => ctxt.push_instr(Bytecode::PushConstant1), - 2 => ctxt.push_instr(Bytecode::PushConstant2), - _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) - } - } - }; - }, - Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => { - let constant_idx: usize = match block_bc { - Bytecode::PushConstant0 => 0, - Bytecode::PushConstant1 => 1, - Bytecode::PushConstant2 => 2, - _ => panic!("Unreachable") - }; - - match block.literals.get(constant_idx)? { - lit => { - let lit_idx = ctxt.push_literal(lit.clone()); - match lit_idx { - 0 => ctxt.push_instr(Bytecode::PushConstant0), - 1 => ctxt.push_instr(Bytecode::PushConstant1), - 2 => ctxt.push_instr(Bytecode::PushConstant2), - _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)) - } + ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)); } }; }, @@ -278,7 +246,11 @@ impl PrimMessageInliner for ast::Expression { ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition + 1)); ctxt.backpatch_jump_to_current(cond_jump_idx); - ctxt.push_instr(Bytecode::PushNil); + + // that's a PushNil with the specialized bytecode, which is prettier. + let name = ctxt.intern_symbol("nil"); + let idx = ctxt.push_literal(Literal::Symbol(name)); + ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); return Some(()); } diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index d16946c6..468532d4 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -15,9 +15,6 @@ use crate::universe::Universe; use crate::value::Value; use crate::SOMRef; -const INT_0: Value = Value::Integer(0); -const INT_1: Value = Value::Integer(1); - pub struct Interpreter { /// The interpreter's stack frames. pub frames: Vec>, @@ -71,6 +68,9 @@ impl Interpreter { frame.borrow_mut().bytecode_idx += 1; match bytecode { + Bytecode::Halt => { + return Some(Value::Nil); + } Bytecode::Dup => { let value = self.stack.last().cloned().unwrap(); self.stack.push(value); @@ -127,21 +127,6 @@ impl Interpreter { let value = convert_literal(frame, literal).unwrap(); // TODO there may be a way to avoid converting the literal to a value? self.stack.push(value); } - Bytecode::PushConstant0 => { - let literal = frame.borrow().lookup_constant(0).unwrap(); - let value = convert_literal(frame, literal).unwrap(); // duplication removable but see above to do, which may need to be handled first - self.stack.push(value); - } - Bytecode::PushConstant1 => { - let literal = frame.borrow().lookup_constant(1).unwrap(); - let value = convert_literal(frame, literal).unwrap(); - self.stack.push(value); - } - Bytecode::PushConstant2 => { - let literal = frame.borrow().lookup_constant(2).unwrap(); - let value = convert_literal(frame, literal).unwrap(); - self.stack.push(value); - } Bytecode::PushGlobal(idx) => { let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); let symbol = match literal { @@ -155,15 +140,6 @@ impl Interpreter { universe.unknown_global(self, self_value, symbol).unwrap(); } } - Bytecode::Push0 => { - self.stack.push(INT_0); - } - Bytecode::Push1 => { - self.stack.push(INT_1); - } - Bytecode::PushNil => { - self.stack.push(Value::Nil); - } Bytecode::Pop => { self.stack.pop(); } @@ -213,29 +189,35 @@ impl Interpreter { self_value.assign_local(idx as usize, value).unwrap(); } } - Bytecode::Send1(idx) => { - send(self, idx, Some(1), bytecode_idx, frame.clone(), universe); - } - Bytecode::Send2(idx) => { - send(self, idx, Some(2), bytecode_idx, frame.clone(), universe); - } - Bytecode::Send3(idx) => { - send(self, idx, Some(3), bytecode_idx, frame.clone(), universe); - } - Bytecode::SendN(idx) => { - send(self, idx, None, bytecode_idx, frame.clone(), universe); - } - Bytecode::SuperSend1(idx) => { - super_send(self, idx, Some(1), bytecode_idx, frame.clone(), universe); - } - Bytecode::SuperSend2(idx) => { - super_send(self, idx, Some(2), bytecode_idx, frame.clone(), universe); - } - Bytecode::SuperSend3(idx) => { - super_send(self, idx, Some(3), bytecode_idx, frame.clone(), universe); + Bytecode::Send(idx) => { + let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); + let Literal::Symbol(symbol) = literal else { + return None; + }; + let signature = universe.lookup_symbol(symbol); + let nb_params = nb_params(signature); + let method = { + let receiver = self.stack.iter().nth_back(nb_params)?; + let receiver_class = receiver.class(universe); + resolve_method(frame, &receiver_class, symbol, bytecode_idx) + }; + + do_send(self, universe, method, symbol, nb_params); } - Bytecode::SuperSendN(idx) => { - super_send(self, idx, None, bytecode_idx, frame.clone(), universe); + Bytecode::SuperSend(idx) => { + let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); + let Literal::Symbol(symbol) = literal else { + return None; + }; + let signature = universe.lookup_symbol(symbol); + let nb_params = nb_params(signature); + let method = { + let holder = frame.borrow().get_method_holder(); + let super_class = holder.borrow().super_class()?; + resolve_method(frame, &super_class, symbol, bytecode_idx) + }; + + do_send(self, universe, method, symbol, nb_params); }, Bytecode::ReturnLocal => { let value = self.stack.pop().unwrap(); @@ -338,45 +320,6 @@ impl Interpreter { } } - fn send(interpreter: &mut Interpreter, idx: u8, nb_params_opt: Option, bytecode_idx: usize, frame: Rc>, universe: &mut Universe) { - let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); - let Literal::Symbol(symbol) = literal else { - panic!("Not a symbol"); - }; - let signature = universe.lookup_symbol(symbol); - let nb_params = match nb_params_opt { - Some(x) => x, - None => nb_params(signature) - }; - - let method = { - let receiver = interpreter.stack.iter().nth_back(nb_params).unwrap(); - let receiver_class = receiver.class(universe); - resolve_method(&frame, &receiver_class, symbol, bytecode_idx) - }; - - do_send(interpreter, universe, method, symbol, nb_params); - } - - fn super_send(interpreter: &mut Interpreter, idx: u8, nb_params_opt: Option, bytecode_idx: usize, frame: Rc>, universe: &mut Universe) { - let literal = frame.borrow().lookup_constant(idx as usize).unwrap(); - let Literal::Symbol(symbol) = literal else { - panic!("Not a symbol") - }; - let signature = universe.lookup_symbol(symbol); - let nb_params = match nb_params_opt { - Some(x) => x, - None => nb_params(signature) - }; - let method = { - let holder = frame.borrow().get_method_holder(); - let super_class = holder.borrow().super_class().unwrap(); - resolve_method(&frame, &super_class, symbol, bytecode_idx) - }; - - do_send(interpreter, universe, method, symbol, nb_params); - } - pub fn do_send( interpreter: &mut Interpreter, universe: &mut Universe, diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index 532678af..9256f4f6 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -133,7 +133,6 @@ impl fmt::Display for Method { Bytecode::PushBlock(idx) => { write!(f, "index: {}", idx)?; } - Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => {} Bytecode::PushConstant(idx) => { write!(f, "index: {}, ", idx)?; let constant = &env.literals[*idx as usize]; @@ -152,9 +151,6 @@ impl fmt::Display for Method { Bytecode::PushGlobal(idx) => { write!(f, "index: {}", idx)?; } - Bytecode::Push0 => {} - Bytecode::Push1 => {} - Bytecode::PushNil => {} Bytecode::Pop => {} Bytecode::PopLocal(up_idx, idx) => { write!(f, "local: {}, context: {}", idx, up_idx)?; @@ -165,16 +161,10 @@ impl fmt::Display for Method { Bytecode::PopField(idx) => { write!(f, "index: {}", idx)?; } - Bytecode::Send1(idx) | - Bytecode::Send2(idx) | - Bytecode::Send3(idx) | - Bytecode::SendN(idx) => { + Bytecode::Send(idx) => { write!(f, "index: {}", idx)?; } - Bytecode::SuperSend1(idx) | - Bytecode::SuperSend2(idx) | - Bytecode::SuperSend3(idx) | - Bytecode::SuperSendN(idx) => { + Bytecode::SuperSend(idx) => { write!(f, "index: {}", idx)?; } Bytecode::ReturnLocal => {} From 189630e097dfcea5d23debaa263f1155082aad3c Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 2 Feb 2024 15:55:04 +0000 Subject: [PATCH 64/88] minor disassembler tweak for better reasoning about jumps --- som-interpreter-bc/src/disassembler.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index 6797781d..d1a65fce 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -19,9 +19,9 @@ fn disassemble_body( ) { let padding = " |".repeat(level); let current = env.last().copied().unwrap(); - for (idx, bytecode) in current.get_body().into_iter().copied().enumerate() { - let extra_spaces_nbr = if idx >= 100 { 0 } else if (10..=99).contains(&idx) { 1 } else { 2 }; - print!("{idx} {0} {padding} {1}", " ".repeat(extra_spaces_nbr), bytecode.padded_name()); + for (cur_idx, bytecode) in current.get_body().into_iter().copied().enumerate() { + let extra_spaces_nbr = if cur_idx >= 100 { 0 } else if (10..=99).contains(&cur_idx) { 1 } else { 2 }; + print!("{cur_idx} {0} {padding} {1}", " ".repeat(extra_spaces_nbr), bytecode.padded_name()); // print!("{padding} {0}", bytecode.padded_name()); match bytecode { @@ -113,10 +113,13 @@ fn disassemble_body( Bytecode::ReturnNonLocal => { println!(); }, - Bytecode::Jump(idx) | Bytecode::JumpBackward(idx) | + Bytecode::Jump(idx) | Bytecode::JumpOnFalsePop(idx) | Bytecode::JumpOnTruePop(idx) | Bytecode::JumpOnFalseTopNil(idx) | Bytecode::JumpOnTrueTopNil(idx) => { - println!("{}", idx); + println!(" {} (jump to bytecode index {})", idx, cur_idx + idx); + }, + Bytecode::JumpBackward(idx) => { + println!(" {} (jump to bytecode index {})", idx, cur_idx - idx); } } } From 10670c6bde6fa1c7b487d5fb9ca6edea5452ec56 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 2 Feb 2024 15:55:50 +0000 Subject: [PATCH 65/88] config for running benchmarks on yuria1 --- .gitlab-ci.yml | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ rebench.conf | 2 +- 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..a260f2b8 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,55 @@ +stages: + - build-test + - benchmark + - benchmark-completion + +#variables: + +before_script: + - git submodule update --init + +build-and-test-interpreters: + stage: build-test + tags: [yuria] + script: + # Setup + - cargo clean + - cargo build --release + + # Unit Tests +# - PYTHONPATH=src python3 -m pytest +# - ./som.sh -cp Smalltalk TestSuite/TestHarness.som + + # Interpreter +# - $RPYTHON --batch src/main_rpython.py +# - ./som-bc-interp -cp Smalltalk TestSuite/TestHarness.som + + # Package and Upload + - lz4 ./target/release/som-interpreter-ast som-interpreter-ast.lz4 + - lz4 ./target/release/som-interpreter-bc som-interpreter-bc.lz4 + + - | + sftp tmp-artifacts << EOF + -mkdir incoming/${CI_PIPELINE_ID}/ + put ${PROJECT_FOLDER}/som-interpreter-ast.lz4 incoming/${CI_PIPELINE_ID}/ + put ${PROJECT_FOLDER}/som-interpreter-bc.lz4 incoming/${CI_PIPELINE_ID}/ + EOF + +benchmark-y1: + stage: benchmark + tags: [yuria] + script: + - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-ast.lz4 + - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-bc.lz4 + + - lz4 -d som-interpreter-ast.lz4 som-interpreter-ast + - lz4 -d som-interpreter-bc.lz4 som-interpreter-bc + + # Run Benchmarks + - rebench --experiment="CI ID $CI_PIPELINE_ID" --branch="$CI_COMMIT_REF_NAME" -c rebench.conf m:yuria + +report-completion: + stage: benchmark-completion + tags: [yuria] + script: + - rebench --experiment="CI ID $CI_PIPELINE_ID" --report-completion rebench.conf diff --git a/rebench.conf b/rebench.conf index 2433d391..97d2f0dd 100644 --- a/rebench.conf +++ b/rebench.conf @@ -7,7 +7,7 @@ reporting: # Benchmark results will be reported to ReBenchDB rebenchdb: # this url needs to point to the API endpoint - db_url: http://localhost:33333/rebenchdb + db_url: https://rebench.stefan-marr.de/rebenchdb repo_url: https://github.com/OctaveLarose/som-rs record_all: true # make sure everything is recorded project_name: som-rs From 3c9a85608ad52b1c56800184865e8a28a48f43c0 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 2 Feb 2024 16:02:49 +0000 Subject: [PATCH 66/88] fixed interpreter sftp --- .gitlab-ci.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a260f2b8..82ffbf16 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -31,8 +31,8 @@ build-and-test-interpreters: - | sftp tmp-artifacts << EOF -mkdir incoming/${CI_PIPELINE_ID}/ - put ${PROJECT_FOLDER}/som-interpreter-ast.lz4 incoming/${CI_PIPELINE_ID}/ - put ${PROJECT_FOLDER}/som-interpreter-bc.lz4 incoming/${CI_PIPELINE_ID}/ + put som-interpreter-ast.lz4 incoming/${CI_PIPELINE_ID}/ + put som-interpreter-bc.lz4 incoming/${CI_PIPELINE_ID}/ EOF benchmark-y1: @@ -42,8 +42,9 @@ benchmark-y1: - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-ast.lz4 - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-bc.lz4 - - lz4 -d som-interpreter-ast.lz4 som-interpreter-ast - - lz4 -d som-interpreter-bc.lz4 som-interpreter-bc + # force because it already exists. which means this is possibly a very useless operation. + - lz4 -df som-interpreter-ast.lz4 som-interpreter-ast + - lz4 -df som-interpreter-bc.lz4 som-interpreter-bc # Run Benchmarks - rebench --experiment="CI ID $CI_PIPELINE_ID" --branch="$CI_COMMIT_REF_NAME" -c rebench.conf m:yuria From a90242bf28e5a52c46fdf3222741b22de48ca08e Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 2 Feb 2024 16:14:46 +0000 Subject: [PATCH 67/88] fixed gitlab-ci, maybe --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 82ffbf16..2510fb83 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -42,9 +42,9 @@ benchmark-y1: - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-ast.lz4 - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-bc.lz4 - # force because it already exists. which means this is possibly a very useless operation. - - lz4 -df som-interpreter-ast.lz4 som-interpreter-ast - - lz4 -df som-interpreter-bc.lz4 som-interpreter-bc + - mkdir -p ./target/release + - lz4 -d som-interpreter-ast.lz4 ./target/release/som-interpreter-ast + - lz4 -d som-interpreter-bc.lz4 ./target/release/som-interpreter-bc # Run Benchmarks - rebench --experiment="CI ID $CI_PIPELINE_ID" --branch="$CI_COMMIT_REF_NAME" -c rebench.conf m:yuria From 0357b31ef10cb597a3449d58a62e16163bb74999 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 2 Feb 2024 16:18:41 +0000 Subject: [PATCH 68/88] fixed gitlab-ci, probably! --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2510fb83..8d0b66c2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -47,7 +47,7 @@ benchmark-y1: - lz4 -d som-interpreter-bc.lz4 ./target/release/som-interpreter-bc # Run Benchmarks - - rebench --experiment="CI ID $CI_PIPELINE_ID" --branch="$CI_COMMIT_REF_NAME" -c rebench.conf m:yuria + - rebench --experiment="CI ID $CI_PIPELINE_ID" --branch="$CI_COMMIT_REF_NAME" -c rebench.conf report-completion: stage: benchmark-completion From df26e6025873fc37bbf5976455caef786ebe5b5b Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 5 Feb 2024 10:24:00 +0000 Subject: [PATCH 69/88] inlining: changing core-lib version to the same as master --- core-lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core-lib b/core-lib index b0c4abfa..afd5a63b 160000 --- a/core-lib +++ b/core-lib @@ -1 +1 @@ -Subproject commit b0c4abfa9096ba845b2113ad872b599883cfe624 +Subproject commit afd5a63b662eca78de80b37653daf7f0a0ece958 From 1fe112355f380eae2eeea9cdb55af2fec48b94b0 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 5 Feb 2024 16:34:59 +0000 Subject: [PATCH 70/88] inlining or and and, too --- som-interpreter-bc/src/inliner.rs | 60 +++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 7d2e350a..9c207f76 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -12,6 +12,11 @@ pub enum JumpType { JumpOnTrue } +pub enum OrAndChoice { + Or, + And +} + // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; @@ -20,6 +25,7 @@ pub trait PrimMessageInliner { fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; + fn inline_or_and(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, or_and_choice: OrAndChoice) -> Option<()>; } impl PrimMessageInliner for ast::Expression { @@ -31,7 +37,9 @@ impl PrimMessageInliner for ast::Expression { "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), - // TODO: [or, and] + "or:" => self.inline_or_and(ctxt, message, OrAndChoice::Or), + "and:" => self.inline_or_and(ctxt, message, OrAndChoice::And), + // TODO: to:do _ => None } } @@ -43,14 +51,9 @@ impl PrimMessageInliner for ast::Expression { let og_scope = rand_thread.gen(); // does this matter? should it be the exact same as the original compiled block? i'm thinking it's fine like this? for block_local_intern_id in &block.locals { let symbol_str = ctxt.lookup_symbol(*block_local_intern_id); - // ctxt.push_local(String::from(symbol_str), ctxt.current_scope() + 1); ctxt.push_local(String::from(symbol_str), og_scope); } - // dbg!(&block.body); - - // let idx_start_inlining = ctxt.get_cur_instr_idx(); - // last is always ReturnLocal, so it gets ignored if let Some((_, body)) = block.body.split_last() { for block_bc in body { @@ -82,11 +85,8 @@ impl PrimMessageInliner for ast::Expression { Bytecode::PushBlock(block_idx) => { match block.literals.get(*block_idx as usize)? { Literal::Block(inner_block) => { - // dbg!(&inner_block.ast_body); - // dbg!(&inner_block.blk_info.body); let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; let idx = ctxt.push_literal(Literal::Block(Rc::from(new_block))); - // dbg!(idx); ctxt.push_instr(Bytecode::PushBlock(idx as u8)); }, _ => panic!("PushBlock not actually pushing a block somehow") @@ -111,7 +111,7 @@ impl PrimMessageInliner for ast::Expression { Bytecode::ReturnNonLocal => { // TODO; if the new context level is 0 (check prev bytecode emitted?), gotta emit a RETURNLOCAL instead! // as far as i understand... this still works? and is just slower? TODO fix though obviously - // dbg!("wow"); + // dbg!(&ctxt.get_instructions().last()); // match ctxt.get_instructions().last().unwrap() { // Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnLocal), @@ -119,7 +119,7 @@ impl PrimMessageInliner for ast::Expression { // } ctxt.push_instr(Bytecode::ReturnNonLocal) }, - Bytecode::ReturnLocal => {}, //panic!("Is that a thing? If so, just ignore it."), + Bytecode::ReturnLocal => {}, // todo: hmm... do we? if so, add these to the _ case i guess. // Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(idx + idx_start_inlining)), Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(*idx)), @@ -179,7 +179,7 @@ impl PrimMessageInliner for ast::Expression { // self.inline_expr(ctxt, message.values.get(0)?); ctxt.backpatch_jump_to_current(jump_idx); - return Some(()); + Some(()) } fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { @@ -213,7 +213,7 @@ impl PrimMessageInliner for ast::Expression { ctxt.backpatch_jump_to_current(middle_jump_idx); - return Some(()); + Some(()) } fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { @@ -252,6 +252,38 @@ impl PrimMessageInliner for ast::Expression { let idx = ctxt.push_literal(Literal::Symbol(name)); ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); - return Some(()); + Some(()) + } + + fn inline_or_and(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, or_and_choice: OrAndChoice) -> Option<()> { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + return None; + } + + let skip_cond_jump_idx = ctxt.get_cur_instr_idx(); + + match or_and_choice { + OrAndChoice::Or => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)), + OrAndChoice::And => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)) + } + + message.values.get(0)?.codegen(ctxt)?; + self.inline_last_push_block_bc(ctxt); + + let skip_return_true_idx = ctxt.get_cur_instr_idx(); + ctxt.push_instr(Bytecode::Jump(0)); + + ctxt.backpatch_jump_to_current(skip_cond_jump_idx); + + let name = match or_and_choice { + OrAndChoice::Or => ctxt.intern_symbol("true"), + OrAndChoice::And => ctxt.intern_symbol("false") + }; + let idx = ctxt.push_literal(Literal::Symbol(name)); + ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); + + ctxt.backpatch_jump_to_current(skip_return_true_idx); + + Some(()) } } \ No newline at end of file From 5f4d3db33770c00bb54167bf55f5e809fbea3663 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 5 Feb 2024 16:34:59 +0000 Subject: [PATCH 71/88] inlining or and and, too --- som-interpreter-bc/src/inliner.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 9c207f76..3930f09a 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -6,6 +6,7 @@ use crate::block::{BlockInfo}; use crate::compiler::{compile_block, InnerGenCtxt, Literal}; use crate::compiler::MethodCodegen; use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; +use crate::inliner::OrAndChoice::{Or, And}; pub enum JumpType { JumpOnFalse, @@ -37,9 +38,9 @@ impl PrimMessageInliner for ast::Expression { "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), - "or:" => self.inline_or_and(ctxt, message, OrAndChoice::Or), - "and:" => self.inline_or_and(ctxt, message, OrAndChoice::And), - // TODO: to:do + "or:" => self.inline_or_and(ctxt, message, Or), + "and:" => self.inline_or_and(ctxt, message, And), + // TODO: to:do, maybe others i'm forgetting _ => None } } From 7310443bb5b7a99746f70b8c5b1335527d4de089 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 6 Feb 2024 09:48:11 +0000 Subject: [PATCH 72/88] only inlining or, to see if only and breaks --- som-interpreter-bc/src/inliner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 3930f09a..8953b493 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -39,7 +39,7 @@ impl PrimMessageInliner for ast::Expression { "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), "or:" => self.inline_or_and(ctxt, message, Or), - "and:" => self.inline_or_and(ctxt, message, And), + // "and:" => self.inline_or_and(ctxt, message, And), // TODO: to:do, maybe others i'm forgetting _ => None } From ce750a0ae1580d372c639fae14e725d36fb5c6fd Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Tue, 6 Feb 2024 13:42:35 +0000 Subject: [PATCH 73/88] basic inlining tests --- som-interpreter-bc/src/inliner.rs | 10 +- som-interpreter-bc/tests/inlining_tests.rs | 173 +++++++++++++++++++++ 2 files changed, 178 insertions(+), 5 deletions(-) create mode 100644 som-interpreter-bc/tests/inlining_tests.rs diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 8953b493..e49914fd 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -38,7 +38,7 @@ impl PrimMessageInliner for ast::Expression { "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), - "or:" => self.inline_or_and(ctxt, message, Or), + // "or:" => self.inline_or_and(ctxt, message, Or), // "and:" => self.inline_or_and(ctxt, message, And), // TODO: to:do, maybe others i'm forgetting _ => None @@ -264,8 +264,8 @@ impl PrimMessageInliner for ast::Expression { let skip_cond_jump_idx = ctxt.get_cur_instr_idx(); match or_and_choice { - OrAndChoice::Or => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)), - OrAndChoice::And => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)) + Or => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)), + And => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)) } message.values.get(0)?.codegen(ctxt)?; @@ -277,8 +277,8 @@ impl PrimMessageInliner for ast::Expression { ctxt.backpatch_jump_to_current(skip_cond_jump_idx); let name = match or_and_choice { - OrAndChoice::Or => ctxt.intern_symbol("true"), - OrAndChoice::And => ctxt.intern_symbol("false") + Or => ctxt.intern_symbol("true"), + And => ctxt.intern_symbol("false") }; let idx = ctxt.push_literal(Literal::Symbol(name)); ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); diff --git a/som-interpreter-bc/tests/inlining_tests.rs b/som-interpreter-bc/tests/inlining_tests.rs new file mode 100644 index 00000000..6ba8c0d6 --- /dev/null +++ b/som-interpreter-bc/tests/inlining_tests.rs @@ -0,0 +1,173 @@ +use std::path::PathBuf; +use som_core::bytecode::Bytecode; + +use som_interpreter_bc::compiler; +use som_interpreter_bc::method::MethodKind; +use som_interpreter_bc::universe::Universe; +use som_lexer::{Lexer, Token}; +use som_parser::lang; + +fn setup_universe() -> Universe { + let classpath = vec![ + PathBuf::from("../core-lib/Smalltalk"), + PathBuf::from("../core-lib/TestSuite/BasicInterpreterTests"), + ]; + Universe::with_classpath(classpath).expect("could not setup test universe") +} + +fn get_bytecodes_from_method(class_txt: &str, method_name: &str) -> Vec { + let mut universe = setup_universe(); + + let method_name_interned = universe.intern_symbol(method_name); + + let mut lexer = Lexer::new(class_txt).skip_comments(true).skip_whitespace(true); + let tokens: Vec = lexer.by_ref().collect(); + assert!( + lexer.text().is_empty(), + "could not fully tokenize test expression" + ); + + let class_def = som_parser::apply(lang::class_def(), tokens.as_slice()).unwrap(); + + let object_class = universe.object_class(); + let class = compiler::compile_class(&mut universe.interner, &class_def, Some(&object_class)); + assert!(class.is_some(), "could not compile test expression"); + + let class = class.unwrap(); + let method = class + .borrow() + .lookup_method(method_name_interned) + .expect("method not found ??"); + + match &method.as_ref().kind { + MethodKind::Defined(m) => m.body.clone(), + _ => unreachable!() + } +} + +fn expect_bytecode_sequence(bytecodes: &Vec, expected_bc_sequence: &[Bytecode]) { + assert!(bytecodes.windows(expected_bc_sequence.len()).any(|window| window == expected_bc_sequence)) +} + +#[test] +fn if_true_or_false_inlining_ok() { + let class_txt = "Foo = ( run = ( + true ifTrue: [ ^true ]. + ^ false + )) + "; + + let bytecodes = get_bytecodes_from_method(class_txt, "run"); + + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::PushGlobal(0), + Bytecode::JumpOnFalseTopNil(3), + Bytecode::PushGlobal(0), + Bytecode::ReturnNonLocal, + Bytecode::Pop, + Bytecode::PushGlobal(1), + Bytecode::ReturnNonLocal, + Bytecode::Pop, + Bytecode::PushArgument(0, 0), + Bytecode::ReturnLocal + ]); + + let class_txt2 = "Foo = ( run = ( + false ifFalse: [ ^false ]. + ^ true + )) + "; + + let bytecodes = get_bytecodes_from_method(class_txt2, "run"); + + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::PushGlobal(0), + Bytecode::JumpOnTrueTopNil(3), + Bytecode::PushGlobal(0), + Bytecode::ReturnNonLocal, + Bytecode::Pop, + Bytecode::PushGlobal(1), + Bytecode::ReturnNonLocal, + Bytecode::Pop, + Bytecode::PushArgument(0, 0), + Bytecode::ReturnLocal + ]); +} + +#[test] +fn if_true_if_false_inlining_ok() { + let class_txt = "Foo = ( run = ( true ifTrue: [ ^true ] ifFalse: [ ^false]. ))"; + + let bytecodes = get_bytecodes_from_method(class_txt, "run"); + + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::PushGlobal(0), + Bytecode::JumpOnFalsePop(4), + Bytecode::PushGlobal(0), + Bytecode::ReturnNonLocal, + Bytecode::Jump(3), + Bytecode::PushGlobal(1), + Bytecode::ReturnNonLocal, + Bytecode::Pop, + Bytecode::PushArgument(0, 0), + Bytecode::ReturnLocal, + ]); + + let class_txt2 = "Foo = ( run = ( true ifFalse: [ ^false ] ifTrue: [ ^ true]. ))"; + + let bytecodes = get_bytecodes_from_method(class_txt2, "run"); + + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::PushGlobal(0), + Bytecode::JumpOnTruePop(4), + Bytecode::PushGlobal(1), + Bytecode::ReturnNonLocal, + Bytecode::Jump(3), + Bytecode::PushGlobal(0), + Bytecode::ReturnNonLocal, + Bytecode::Pop, + Bytecode::PushArgument(0, 0), + Bytecode::ReturnLocal, + ]); +} + +#[test] +fn while_true_false_inlining_ok() { + let class_txt = "Foo = ( run = ( + | cnt | + cnt := 0. + [ cnt < 1000000 ] whileTrue: [ + cnt := cnt + 1. + ] + )) + "; + + let bytecodes = get_bytecodes_from_method(class_txt, "run"); + + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::JumpOnFalsePop(8), + Bytecode::PushLocal(0, 0), + Bytecode::PushConstant(3), + Bytecode::Send(4), + Bytecode::Dup, + Bytecode::PopLocal(0, 0), + Bytecode::Pop, + Bytecode::JumpBackward(10) + ]); + + let class_txt_2 = class_txt.replace("whileTrue", "whileFalse"); + let bytecodes = get_bytecodes_from_method(class_txt_2.as_str(), "run"); + + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::JumpOnTruePop(8), + Bytecode::PushLocal(0, 0), + Bytecode::PushConstant(3), + Bytecode::Send(4), + Bytecode::Dup, + Bytecode::PopLocal(0, 0), + Bytecode::Pop, + Bytecode::JumpBackward(10) + ]) +} + +// TODO or, and \ No newline at end of file From 5efc26a6f4b12db8afcb80a318a8df6505050dd0 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 7 Feb 2024 12:27:23 +0000 Subject: [PATCH 74/88] or/and inlining + tests for that --- .gitlab-ci.yml | 2 ++ som-interpreter-bc/src/inliner.rs | 8 +++--- som-interpreter-bc/tests/inlining_tests.rs | 33 +++++++++++++++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8d0b66c2..3651ca24 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,6 +16,8 @@ build-and-test-interpreters: - cargo clean - cargo build --release + - cargo test + # Unit Tests # - PYTHONPATH=src python3 -m pytest # - ./som.sh -cp Smalltalk TestSuite/TestHarness.som diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index e49914fd..2bd5c668 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -38,8 +38,8 @@ impl PrimMessageInliner for ast::Expression { "ifFalse:ifTrue:" => self.inline_if_true_if_false(ctxt, message, JumpOnTrue), "whileTrue:" => self.inline_while(ctxt, message, JumpOnFalse), "whileFalse:" => self.inline_while(ctxt, message, JumpOnTrue), - // "or:" => self.inline_or_and(ctxt, message, Or), - // "and:" => self.inline_or_and(ctxt, message, And), + "or:" => self.inline_or_and(ctxt, message, Or), + "and:" => self.inline_or_and(ctxt, message, And), // TODO: to:do, maybe others i'm forgetting _ => None } @@ -264,8 +264,8 @@ impl PrimMessageInliner for ast::Expression { let skip_cond_jump_idx = ctxt.get_cur_instr_idx(); match or_and_choice { - Or => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)), - And => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)) + Or => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), + And => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)) } message.values.get(0)?.codegen(ctxt)?; diff --git a/som-interpreter-bc/tests/inlining_tests.rs b/som-interpreter-bc/tests/inlining_tests.rs index 6ba8c0d6..52f088e6 100644 --- a/som-interpreter-bc/tests/inlining_tests.rs +++ b/som-interpreter-bc/tests/inlining_tests.rs @@ -170,4 +170,35 @@ fn while_true_false_inlining_ok() { ]) } -// TODO or, and \ No newline at end of file +#[test] +fn or_and_inlining_ok() { + let class_txt = "Foo = ( run = ( + ^ (true or: [ false ]) + )) + "; + + let bytecodes = get_bytecodes_from_method(class_txt, "run"); + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::PushGlobal(0), + Bytecode::JumpOnTruePop(3), + Bytecode::PushGlobal(1), + Bytecode::Jump(2), + Bytecode::PushGlobal(0), + Bytecode::ReturnNonLocal + ]); + + let class_txt2 = "Foo = ( run = ( + ^ (true and: [ false ]) + )) + "; + + let bytecodes = get_bytecodes_from_method(class_txt2, "run"); + expect_bytecode_sequence(&bytecodes, &[ + Bytecode::PushGlobal(0), + Bytecode::JumpOnFalsePop(3), + Bytecode::PushGlobal(1), + Bytecode::Jump(2), + Bytecode::PushGlobal(1), + Bytecode::ReturnNonLocal + ]); +} \ No newline at end of file From dd6b338baeba9f1b91732f5030760a3b25716e2d Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 7 Feb 2024 15:42:39 +0000 Subject: [PATCH 75/88] some more advanced inlining tests --- som-interpreter-bc/tests/inlining_tests.rs | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/som-interpreter-bc/tests/inlining_tests.rs b/som-interpreter-bc/tests/inlining_tests.rs index 52f088e6..22fb2ac9 100644 --- a/som-interpreter-bc/tests/inlining_tests.rs +++ b/som-interpreter-bc/tests/inlining_tests.rs @@ -201,4 +201,42 @@ fn or_and_inlining_ok() { Bytecode::PushGlobal(1), Bytecode::ReturnNonLocal ]); +} + +#[test] +fn inlining_pyramid() { + let class_txt = "Foo = ( run = ( + | a b c d e f g | + ^ (a ifTrue: [b ifTrue: [c ifTrue: [d ifTrue: [e ifTrue: [f ifTrue: [g]]]]]]) + )) + "; + + let class_txt2 = "Foo = ( run = ( + | a | + ^ (a ifTrue: [| b | b ifTrue: [| c | c ifTrue: [| d | d ifTrue: [| e | e ifTrue: [| f | f ifTrue: [| g | g]]]]]]) + )) + "; + + let bytecodes = get_bytecodes_from_method(class_txt, "run"); + let bytecodes2 = get_bytecodes_from_method(class_txt2, "run"); + + let expected_bc = &[ + Bytecode::PushLocal(0, 0), + Bytecode::JumpOnFalseTopNil(12), + Bytecode::PushLocal(0, 1), + Bytecode::JumpOnFalseTopNil(10), + Bytecode::PushLocal(0, 2), + Bytecode::JumpOnFalseTopNil(8), + Bytecode::PushLocal(0, 3), + Bytecode::JumpOnFalseTopNil(6), + Bytecode::PushLocal(0, 4), + Bytecode::JumpOnFalseTopNil(4), + Bytecode::PushLocal(0, 5), + Bytecode::JumpOnFalseTopNil(2), + Bytecode::PushLocal(0, 6), + Bytecode::ReturnNonLocal + ]; + + expect_bytecode_sequence(&bytecodes, expected_bc); + expect_bytecode_sequence(&bytecodes2, expected_bc); } \ No newline at end of file From b452df927002f41be4de0cd20a453eb4658ca116 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 12 Feb 2024 20:54:18 +0000 Subject: [PATCH 76/88] yuria2'ing --- .gitlab-ci.yml | 8 ++++---- rebench.conf | 52 +++++++++++++++++++++++++------------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3651ca24..1392144b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,7 +10,7 @@ before_script: build-and-test-interpreters: stage: build-test - tags: [yuria] + tags: [yuria2] script: # Setup - cargo clean @@ -37,9 +37,9 @@ build-and-test-interpreters: put som-interpreter-bc.lz4 incoming/${CI_PIPELINE_ID}/ EOF -benchmark-y1: +benchmark-y2: stage: benchmark - tags: [yuria] + tags: [yuria2] script: - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-ast.lz4 - sftp tmp-artifacts:incoming/${CI_PIPELINE_ID}/som-interpreter-bc.lz4 @@ -53,6 +53,6 @@ benchmark-y1: report-completion: stage: benchmark-completion - tags: [yuria] + tags: [yuria2] script: - rebench --experiment="CI ID $CI_PIPELINE_ID" --report-completion rebench.conf diff --git a/rebench.conf b/rebench.conf index 97d2f0dd..1af79853 100644 --- a/rebench.conf +++ b/rebench.conf @@ -21,38 +21,38 @@ benchmark_suites: command: &MACRO_CMD "-c core-lib/Smalltalk core-lib/Examples/Benchmarks core-lib/Examples/Benchmarks/Richards core-lib/Examples/Benchmarks/DeltaBlue core-lib/Examples/Benchmarks/NBody core-lib/Examples/Benchmarks/Json core-lib/Examples/Benchmarks/GraphSearch -- BenchmarkHarness %(benchmark)s %(iterations)s " iterations: 10 benchmarks: - - Richards: {extra_args: 1} - - DeltaBlue: {extra_args: 50} - - NBody: {extra_args: 500} - - JsonSmall: {extra_args: 1} - - GraphSearch: {extra_args: 4} - - PageRank: {extra_args: 40} + - Richards: {extra_args: 1, machines: [yuria2]} + - DeltaBlue: {extra_args: 50, machines: [yuria2]} + - NBody: {extra_args: 500, machines: [yuria2]} + - JsonSmall: {extra_args: 1, machines: [yuria2]} + - GraphSearch: {extra_args: 4, machines: [yuria2]} + - PageRank: {extra_args: 40, machines: [yuria2]} micro: gauge_adapter: RebenchLog command: "-c core-lib/Smalltalk core-lib/Examples/Benchmarks core-lib/Examples/Benchmarks/LanguageFeatures -- BenchmarkHarness %(benchmark)s %(iterations)s " iterations: 10 benchmarks: - - Fannkuch: {extra_args: 6} - - Fibonacci: {extra_args: "3"} - - Dispatch: {extra_args: 2} - - Bounce: {extra_args: "2"} - - Loop: {extra_args: 5} - - Permute: {extra_args: "3"} - - Queens: {extra_args: "2"} - - List: {extra_args: "2"} - - Recurse: {extra_args: "3"} - - Storage: {extra_args: 1} - - Sieve: {extra_args: 4} - - BubbleSort: {extra_args: "3"} - - QuickSort: {extra_args: 1} - - Sum: {extra_args: 2} - - Towers: {extra_args: "2"} - - TreeSort: {extra_args: "1"} - - IntegerLoop: {extra_args: 2} - - FieldLoop: {extra_args: 1} - - WhileLoop: {extra_args: 10} - - Mandelbrot: {extra_args: 30} + - Fannkuch: {extra_args: 6, machines: [yuria2]} + - Fibonacci: {extra_args: "3", machines: [yuria2]} + - Dispatch: {extra_args: 2, machines: [yuria2]} + - Bounce: {extra_args: "2", machines: [yuria2]} + - Loop: {extra_args: 5, machines: [yuria2]} + - Permute: {extra_args: "3", machines: [yuria2]} + - Queens: {extra_args: "2", machines: [yuria2]} + - List: {extra_args: "2", machines: [yuria2]} + - Recurse: {extra_args: "3", machines: [yuria2]} + - Storage: {extra_args: 1, machines: [yuria2]} + - Sieve: {extra_args: 4, machines: [yuria2]} + - BubbleSort: {extra_args: "3", machines: [yuria2]} + - QuickSort: {extra_args: 1, machines: [yuria2]} + - Sum: {extra_args: 2, machines: [yuria2]} + - Towers: {extra_args: "2", machines: [yuria2]} + - TreeSort: {extra_args: "1", machines: [yuria2]} + - IntegerLoop: {extra_args: 2, machines: [yuria2]} + - FieldLoop: {extra_args: 1, machines: [yuria2]} + - WhileLoop: {extra_args: 10, machines: [yuria2]} + - Mandelbrot: {extra_args: 30, machines: [yuria2]} executors: som-rs-ast: From fb7ea67fdb894e523d116438ea32987edba98a20 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Thu, 22 Feb 2024 16:18:56 +0000 Subject: [PATCH 77/88] BROKEN: ongoing merge with specialized bc branch. Lacking dup_popx_pop etc logic --- som-core/src/bytecode.rs | 2 +- som-interpreter-bc/src/block.rs | 5 +- som-interpreter-bc/src/compiler.rs | 58 +++-- som-interpreter-bc/src/disassembler.rs | 28 ++- som-interpreter-bc/src/inliner.rs | 240 ++++++++++++++------- som-interpreter-bc/src/interpreter.rs | 36 ++-- som-interpreter-bc/src/method.rs | 2 +- som-interpreter-bc/tests/inlining_tests.rs | 203 +++++++++-------- 8 files changed, 356 insertions(+), 218 deletions(-) diff --git a/som-core/src/bytecode.rs b/som-core/src/bytecode.rs index 4986d879..4b849a28 100644 --- a/som-core/src/bytecode.rs +++ b/som-core/src/bytecode.rs @@ -36,7 +36,7 @@ pub enum Bytecode { JumpOnTrueTopNil(usize), JumpOnFalseTopNil(usize), JumpOnTruePop(usize), - JumpOnFalsePop(usize) + JumpOnFalsePop(usize), } // TODO check case, padding, for pull request diff --git a/som-interpreter-bc/src/block.rs b/som-interpreter-bc/src/block.rs index 299b11fd..a93c99c6 100644 --- a/som-interpreter-bc/src/block.rs +++ b/som-interpreter-bc/src/block.rs @@ -1,7 +1,7 @@ +use som_core::ast; use std::cell::RefCell; use std::fmt; use std::rc::Rc; -use som_core::ast; use som_core::bytecode::Bytecode; @@ -30,7 +30,8 @@ pub struct Block { pub blk_info: Rc, // OLarose: not a fan... but it's needed when inlining to be able to recreate a working version of the block from the original AST // (see PushBlock in inliner) - pub ast_body: ast::Block, } + pub ast_body: ast::Block, +} impl Block { /// Get the block's class. diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 1fd9ec1e..0e6c639f 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -14,12 +14,12 @@ use som_core::bytecode::Bytecode; use crate::block::{Block, BlockInfo}; use crate::class::{Class, MaybeWeak}; +use crate::inliner::PrimMessageInliner; use crate::interner::{Interned, Interner}; use crate::method::{Method, MethodEnv, MethodKind}; use crate::primitives; use crate::value::Value; use crate::SOMRef; -use crate::inliner::PrimMessageInliner; #[derive(Debug, Clone)] pub enum Literal { @@ -134,17 +134,24 @@ impl GenCtxt for BlockGenCtxt<'_> { // first check the locals in this scope, then check the locals that were inlined into the scope (i.e. have a different original scope) // needed because when you inline a block, it can contain some PUSH_BLOCKs where we recompile the block, therefore scope info gets out of whack // it's not a great solution, pretty slow. a better one would be that when we recompile the blocks, we adjust their bytecode directly which -should- circumvent the issue? - (self.locals.iter().position(|(local_name, local_scope)| { local_name == name && (*local_scope == self.current_scope()) })) - .map(|idx| FoundVar::Local(0, idx as u8)) - .or_else(|| self.locals.iter().position(|(local_name, _)| local_name == name).map(|idx| FoundVar::Local(0, idx as u8))) - .or_else(|| (self.args.get_index_of(name)).map(|idx| FoundVar::Argument(0, idx as u8))) - .or_else(|| { - self.outer.find_var(name).map(|found| match found { - FoundVar::Local(up_idx, idx) => FoundVar::Local(up_idx + 1, idx), - FoundVar::Argument(up_idx, idx) => FoundVar::Argument(up_idx + 1, idx), - FoundVar::Field(idx) => FoundVar::Field(idx), - }) + (self.locals.iter().position(|(local_name, local_scope)| { + local_name == name && (*local_scope == self.current_scope()) + })) + .map(|idx| FoundVar::Local(0, idx as u8)) + .or_else(|| { + self.locals + .iter() + .position(|(local_name, _)| local_name == name) + .map(|idx| FoundVar::Local(0, idx as u8)) + }) + .or_else(|| (self.args.get_index_of(name)).map(|idx| FoundVar::Argument(0, idx as u8))) + .or_else(|| { + self.outer.find_var(name).map(|found| match found { + FoundVar::Local(up_idx, idx) => FoundVar::Local(up_idx + 1, idx), + FoundVar::Argument(up_idx, idx) => FoundVar::Argument(up_idx + 1, idx), + FoundVar::Field(idx) => FoundVar::Field(idx), }) + }) } fn intern_symbol(&mut self, name: &str) -> Interned { @@ -220,8 +227,8 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { Bytecode::JumpOnFalseTopNil(_) => Bytecode::JumpOnFalseTopNil(jump_offset), Bytecode::JumpOnTruePop(_) => Bytecode::JumpOnTruePop(jump_offset), Bytecode::JumpOnFalsePop(_) => Bytecode::JumpOnFalsePop(jump_offset), - _ => panic!("Attempting to backpatch a bytecode non jump") - }; + _ => panic!("Attempting to backpatch a bytecode non jump"), + }; } fn patch_jump(&mut self, idx_to_backpatch: usize, new_val: usize) { @@ -233,7 +240,7 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { Bytecode::JumpOnFalseTopNil(_) => Bytecode::JumpOnFalseTopNil(new_val), Bytecode::JumpOnTruePop(_) => Bytecode::JumpOnTruePop(new_val), Bytecode::JumpOnFalsePop(_) => Bytecode::JumpOnFalsePop(new_val), - _ => panic!("Attempting to patch a bytecode non jump") + _ => panic!("Attempting to patch a bytecode non jump"), }; } @@ -309,7 +316,6 @@ impl GenCtxt for MethodGenCtxt<'_> { } impl InnerGenCtxt for MethodGenCtxt<'_> { - fn as_gen_ctxt(&mut self) -> &mut dyn GenCtxt { self } @@ -560,7 +566,9 @@ impl GenCtxt for ClassGenCtxt<'_> { } fn current_scope(&self) -> usize { - panic!("Asking for the current scope of a class, and not a block/method, makes little sense.") + panic!( + "Asking for the current scope of a class, and not a block/method, makes little sense." + ) } } @@ -578,11 +586,13 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option IndexSet::new(), - ast::MethodBody::Body { locals, .. } => locals.iter().cloned().map(|s| (s, 0)).collect(), + ast::MethodBody::Body { locals, .. } => { + locals.iter().cloned().map(|s| (s, 0)).collect() + } }, literals: IndexSet::new(), body: None, - scope: 0 + scope: 0, }, }; @@ -613,7 +623,6 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option MethodKind::NotImplemented(defn.signature.clone()), ast::MethodBody::Body { .. } => { let locals = { @@ -653,10 +662,15 @@ pub(crate) fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Optio let mut ctxt = BlockGenCtxt { outer, args: defn.parameters.iter().cloned().collect(), - locals: defn.locals.iter().cloned().map(|s| (s, block_scope)).collect(), + locals: defn + .locals + .iter() + .cloned() + .map(|s| (s, block_scope)) + .collect(), literals: IndexSet::new(), body: None, - scope: block_scope + scope: block_scope, }; let splitted = defn.body.exprs.split_last(); @@ -692,7 +706,7 @@ pub(crate) fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Optio nb_params, inline_cache, }), - ast_body: defn.clone() // not a fan of this, only needed during parsing and cloning feels needless... TODO + ast_body: defn.clone(), // not a fan of this, only needed during parsing and cloning feels needless... TODO }; // println!("(system) compiled block !"); diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index 3d552d58..c7667c2e 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -20,8 +20,18 @@ fn disassemble_body( let padding = " |".repeat(level); let current = env.last().copied().unwrap(); for (cur_idx, bytecode) in current.get_body().into_iter().copied().enumerate() { - let extra_spaces_nbr = if cur_idx >= 100 { 0 } else if (10..=99).contains(&cur_idx) { 1 } else { 2 }; - print!("{cur_idx} {0} {padding} {1}", " ".repeat(extra_spaces_nbr), bytecode.padded_name()); + let extra_spaces_nbr = if cur_idx >= 100 { + 0 + } else if (10..=99).contains(&cur_idx) { + 1 + } else { + 2 + }; + print!( + "{cur_idx} {0} {padding} {1}", + " ".repeat(extra_spaces_nbr), + bytecode.padded_name() + ); // print!("{padding} {0}", bytecode.padded_name()); match bytecode { @@ -124,18 +134,20 @@ fn disassemble_body( continue; }; println!(" (#{0})", universe.lookup_symbol(*signature)); - }, + } Bytecode::ReturnLocal => { println!(); } Bytecode::ReturnNonLocal => { println!(); - }, - Bytecode::Jump(idx) | - Bytecode::JumpOnFalsePop(idx) | Bytecode::JumpOnTruePop(idx) | - Bytecode::JumpOnFalseTopNil(idx) | Bytecode::JumpOnTrueTopNil(idx) => { + } + Bytecode::Jump(idx) + | Bytecode::JumpOnFalsePop(idx) + | Bytecode::JumpOnTruePop(idx) + | Bytecode::JumpOnFalseTopNil(idx) + | Bytecode::JumpOnTrueTopNil(idx) => { println!(" {} (jump to bytecode index {})", idx, cur_idx + idx); - }, + } Bytecode::JumpBackward(idx) => { println!(" {} (jump to bytecode index {})", idx, cur_idx - idx); } diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 7cbf3e18..eb2b0652 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,36 +1,61 @@ -use std::rc::Rc; +use crate::block::BlockInfo; +use crate::compiler::MethodCodegen; +use crate::compiler::{compile_block, InnerGenCtxt, Literal}; +use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; +use crate::inliner::OrAndChoice::{And, Or}; use rand::Rng; use som_core::ast; use som_core::bytecode::Bytecode; -use crate::block::{BlockInfo}; -use crate::compiler::{compile_block, InnerGenCtxt, Literal}; -use crate::compiler::MethodCodegen; -use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; -use crate::inliner::OrAndChoice::{Or, And}; +use std::rc::Rc; pub enum JumpType { JumpOnFalse, - JumpOnTrue + JumpOnTrue, } pub enum OrAndChoice { Or, - And + And, } // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { - fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()>; + fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) + -> Option<()>; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()>; fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()>; - fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; - fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; - fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()>; - fn inline_or_and(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, or_and_choice: OrAndChoice) -> Option<()>; + fn inline_if_true_or_if_false( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + jump_type: JumpType, + ) -> Option<()>; + fn inline_if_true_if_false( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + jump_type: JumpType, + ) -> Option<()>; + fn inline_while( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + jump_type: JumpType, + ) -> Option<()>; + fn inline_or_and( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + or_and_choice: OrAndChoice, + ) -> Option<()>; } impl PrimMessageInliner for ast::Expression { - fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) -> Option<()> { + fn inline_if_possible( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + ) -> Option<()> { match message.signature.as_str() { "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), @@ -41,7 +66,7 @@ impl PrimMessageInliner for ast::Expression { "or:" => self.inline_or_and(ctxt, message, Or), "and:" => self.inline_or_and(ctxt, message, And), // TODO: to:do, maybe others i'm forgetting - _ => None + _ => None, } } @@ -59,47 +84,65 @@ impl PrimMessageInliner for ast::Expression { if let Some((_, body)) = block.body.split_last() { for block_bc in body { match block_bc { - Bytecode::PushLocal(up_idx, idx) => { - match up_idx { - 0 => ctxt.push_instr(Bytecode::PushLocal(*up_idx, nbr_locals_pre_inlining as u8 + *idx)), - 1.. => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)) - } + Bytecode::PushLocal(up_idx, idx) => match up_idx { + 0 => ctxt.push_instr(Bytecode::PushLocal( + *up_idx, + nbr_locals_pre_inlining as u8 + *idx, + )), + 1.. => ctxt.push_instr(Bytecode::PushLocal(*up_idx - 1, *idx)), }, - Bytecode::PopLocal(up_idx, idx) => { - match up_idx { - 0 => ctxt.push_instr(Bytecode::PopLocal(*up_idx, nbr_locals_pre_inlining as u8 + *idx)), - 1.. => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)) - } + Bytecode::PopLocal(up_idx, idx) => match up_idx { + 0 => ctxt.push_instr(Bytecode::PopLocal( + *up_idx, + nbr_locals_pre_inlining as u8 + *idx, + )), + 1.. => ctxt.push_instr(Bytecode::PopLocal(*up_idx - 1, *idx)), }, - Bytecode::PushArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)), // not 100% sure i need to adjust the up_idx there and for pop - Bytecode::PopArgument(up_idx, idx) => ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)), - Bytecode::Send1(lit_idx) | Bytecode::Send2(lit_idx) | Bytecode::Send3(lit_idx) | Bytecode::SendN(lit_idx) => { + Bytecode::PushArgument(up_idx, idx) => { + ctxt.push_instr(Bytecode::PushArgument(*up_idx - 1, *idx)) + } // not 100% sure i need to adjust the up_idx there and for pop + Bytecode::PopArgument(up_idx, idx) => { + ctxt.push_instr(Bytecode::PopArgument(*up_idx - 1, *idx)) + } + Bytecode::Send1(lit_idx) + | Bytecode::Send2(lit_idx) + | Bytecode::Send3(lit_idx) + | Bytecode::SendN(lit_idx) => { match block.literals.get(*lit_idx as usize)? { Literal::Symbol(interned) => { // TODO does this push duplicate literals? I think it doesn't? let idx = ctxt.push_literal(Literal::Symbol(*interned)); match block_bc { - Bytecode::Send1(_) => ctxt.push_instr(Bytecode::Send1(idx as u8)), - Bytecode::Send2(_) => ctxt.push_instr(Bytecode::Send2(idx as u8)), - Bytecode::Send3(_) => ctxt.push_instr(Bytecode::Send3(idx as u8)), - Bytecode::SendN(_) => ctxt.push_instr(Bytecode::SendN(idx as u8)), - _ => unreachable!() + Bytecode::Send1(_) => { + ctxt.push_instr(Bytecode::Send1(idx as u8)) + } + Bytecode::Send2(_) => { + ctxt.push_instr(Bytecode::Send2(idx as u8)) + } + Bytecode::Send3(_) => { + ctxt.push_instr(Bytecode::Send3(idx as u8)) + } + Bytecode::SendN(_) => { + ctxt.push_instr(Bytecode::SendN(idx as u8)) + } + _ => unreachable!(), } - }, - _ => panic!("Unexpected block literal type, not yet implemented") + } + _ => panic!("Unexpected block literal type, not yet implemented"), } - }, + } Bytecode::PushBlock(block_idx) => { match block.literals.get(*block_idx as usize)? { Literal::Block(inner_block) => { - let new_block = compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; + let new_block = + compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; let idx = ctxt.push_literal(Literal::Block(Rc::from(new_block))); ctxt.push_instr(Bytecode::PushBlock(idx as u8)); - }, - _ => panic!("PushBlock not actually pushing a block somehow") + } + _ => panic!("PushBlock not actually pushing a block somehow"), }; - }, + } Bytecode::PushGlobal(global_idx) => { match block.literals.get(*global_idx as usize)? { lit => { @@ -107,8 +150,11 @@ impl PrimMessageInliner for ast::Expression { ctxt.push_instr(Bytecode::PushGlobal(lit_idx as u8)); } }; - }, - Bytecode::PushConstant(_) | Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => { + } + Bytecode::PushConstant(_) + | Bytecode::PushConstant0 + | Bytecode::PushConstant1 + | Bytecode::PushConstant2 => { let constant_idx = match block_bc { Bytecode::PushConstant(idx) => *idx, Bytecode::PushConstant0 => 0, @@ -120,10 +166,15 @@ impl PrimMessageInliner for ast::Expression { match block.literals.get(constant_idx as usize)? { lit => { let lit_idx = ctxt.push_literal(lit.clone()); - ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)); // TODO: if 0/1/2, push specialized one + match lit_idx { // maybe create a function just for translating "constant_id (usize) <-> Bytecode" that to avoid duplication + 0 => ctxt.push_instr(Bytecode::PushConstant0), + 1 => ctxt.push_instr(Bytecode::PushConstant1), + 2 => ctxt.push_instr(Bytecode::PushConstant2), + _ => ctxt.push_instr(Bytecode::PushConstant(lit_idx as u8)), + } } }; - }, + } Bytecode::ReturnNonLocal => { // TODO; if the new context level is 0 (check prev bytecode emitted?), gotta emit a RETURNLOCAL instead! // as far as i understand... this still works? and is just slower? TODO fix though obviously @@ -134,19 +185,36 @@ impl PrimMessageInliner for ast::Expression { // _ => ctxt.push_instr(Bytecode::ReturnNonLocal) // } ctxt.push_instr(Bytecode::ReturnNonLocal) - }, - Bytecode::ReturnLocal => {}, + } + Bytecode::ReturnLocal => {} // todo: hmm... do we? if so, add these to the _ case i guess. // Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(idx + idx_start_inlining)), Bytecode::Jump(idx) => ctxt.push_instr(Bytecode::Jump(*idx)), Bytecode::JumpBackward(idx) => ctxt.push_instr(Bytecode::JumpBackward(*idx)), Bytecode::JumpOnTruePop(idx) => ctxt.push_instr(Bytecode::JumpOnTruePop(*idx)), - Bytecode::JumpOnFalsePop(idx) => ctxt.push_instr(Bytecode::JumpOnFalsePop(*idx)), - Bytecode::JumpOnTrueTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(*idx)), - Bytecode::JumpOnFalseTopNil(idx) => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(*idx)), - Bytecode::Halt | Bytecode::Dup | Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil | - Bytecode::Pop | Bytecode::PushField(_) | Bytecode::PopField(_) | - Bytecode::SuperSend1(_) | Bytecode::SuperSend2(_) | Bytecode::SuperSend3(_) | Bytecode::SuperSendN(_) => {} // explicitly listing them out to account for the fact that new BC could be introduced and mess things up if we handled it with a _ case + Bytecode::JumpOnFalsePop(idx) => { + ctxt.push_instr(Bytecode::JumpOnFalsePop(*idx)) + } + Bytecode::JumpOnTrueTopNil(idx) => { + ctxt.push_instr(Bytecode::JumpOnTrueTopNil(*idx)) + } + Bytecode::JumpOnFalseTopNil(idx) => { + ctxt.push_instr(Bytecode::JumpOnFalseTopNil(*idx)) + } + Bytecode::Halt + | Bytecode::Dup + | Bytecode::Push0 + | Bytecode::Push1 + | Bytecode::PushNil + | Bytecode::Pop + | Bytecode::PushField(_) + | Bytecode::PopField(_) + | Bytecode::SuperSend1(_) + | Bytecode::SuperSend2(_) + | Bytecode::SuperSend3(_) + | Bytecode::SuperSendN(_) => { + ctxt.push_instr(*block_bc) // explicitly listing them out to account for the fact that new BC could be introduced and mess things up if we handled it with a _ case + } } } } @@ -157,31 +225,37 @@ impl PrimMessageInliner for ast::Expression { fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()> { let block_idx = match ctxt.get_instructions().last()? { Bytecode::PushBlock(val) => *val, - _ => panic!("function expects last bytecode to be a PUSH_BLOCK.") + _ => panic!("function expects last bytecode to be a PUSH_BLOCK."), }; ctxt.pop_instr(); // removing the PUSH_BLOCK let cond_block_ref = match ctxt.get_literal(block_idx as usize)? { Literal::Block(val) => val.clone(), - _ => return None + _ => return None, }; ctxt.remove_literal(block_idx as usize); match self.inline_compiled_block(ctxt, cond_block_ref.as_ref().blk_info.as_ref()) { None => panic!("Inlining a compiled block failed!"), - _ => Some(()) + _ => Some(()), } } - fn inline_if_true_or_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { - if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + fn inline_if_true_or_if_false( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + jump_type: JumpType, + ) -> Option<()> { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) + { return None; } let jump_idx = ctxt.get_cur_instr_idx(); match jump_type { JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalseTopNil(0)), - JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)) + JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTrueTopNil(0)), } // we need to compile the block before inlining it, and we haven't encountered/compiled it yet @@ -194,16 +268,22 @@ impl PrimMessageInliner for ast::Expression { // todo i think Recurse took a big hit when i started inlining any expression instead of just blocks. needs investigating // wrt previous todo comment: likely super outdated. but until proven, i'm keeping it as a reminder. -// self.inline_expr(ctxt, message.values.get(0)?); + // self.inline_expr(ctxt, message.values.get(0)?); ctxt.backpatch_jump_to_current(jump_idx); Some(()) } - fn inline_if_true_if_false(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { + fn inline_if_true_if_false( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + jump_type: JumpType, + ) -> Option<()> { if message.values.len() != 2 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) - || !matches!(message.values.get(1)?, ast::Expression::Block(_)) { + || !matches!(message.values.get(1)?, ast::Expression::Block(_)) + { return None; } @@ -234,8 +314,16 @@ impl PrimMessageInliner for ast::Expression { Some(()) } - fn inline_while(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, jump_type: JumpType) -> Option<()> { - if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) || !matches!(ctxt.get_instructions().last() , Some(Bytecode::PushBlock(_))) { + fn inline_while( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + jump_type: JumpType, + ) -> Option<()> { + if message.values.len() != 1 + || !matches!(message.values.get(0)?, ast::Expression::Block(_)) + || !matches!(ctxt.get_instructions().last(), Some(Bytecode::PushBlock(_))) + { return None; } @@ -246,7 +334,7 @@ impl PrimMessageInliner for ast::Expression { let cond_jump_idx = ctxt.get_cur_instr_idx(); match jump_type { JumpOnFalse => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), - JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)) + JumpOnTrue => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), } message.values.get(0)?.codegen(ctxt)?; @@ -254,15 +342,17 @@ impl PrimMessageInliner for ast::Expression { // we push a POP, unless the body of the loop is empty. match message.values.get(0).unwrap() { - ast::Expression::Block(block) => { + ast::Expression::Block(block) => { if block.body.exprs.len() != 0 { ctxt.push_instr(Bytecode::Pop); } - }, + } _ => {} }; - ctxt.push_instr(Bytecode::JumpBackward(ctxt.get_cur_instr_idx() - idx_before_condition + 1)); + ctxt.push_instr(Bytecode::JumpBackward( + ctxt.get_cur_instr_idx() - idx_before_condition + 1, + )); ctxt.backpatch_jump_to_current(cond_jump_idx); // that's a PushNil with the specialized bytecode, which is prettier. @@ -273,8 +363,14 @@ impl PrimMessageInliner for ast::Expression { Some(()) } - fn inline_or_and(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message, or_and_choice: OrAndChoice) -> Option<()> { - if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) { + fn inline_or_and( + &self, + ctxt: &mut dyn InnerGenCtxt, + message: &ast::Message, + or_and_choice: OrAndChoice, + ) -> Option<()> { + if message.values.len() != 1 || !matches!(message.values.get(0)?, ast::Expression::Block(_)) + { return None; } @@ -282,7 +378,7 @@ impl PrimMessageInliner for ast::Expression { match or_and_choice { Or => ctxt.push_instr(Bytecode::JumpOnTruePop(0)), - And => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)) + And => ctxt.push_instr(Bytecode::JumpOnFalsePop(0)), } message.values.get(0)?.codegen(ctxt)?; @@ -295,7 +391,7 @@ impl PrimMessageInliner for ast::Expression { let name = match or_and_choice { Or => ctxt.intern_symbol("true"), - And => ctxt.intern_symbol("false") + And => ctxt.intern_symbol("false"), }; let idx = ctxt.push_literal(Literal::Symbol(name)); ctxt.push_instr(Bytecode::PushGlobal(idx as u8)); @@ -304,4 +400,4 @@ impl PrimMessageInliner for ast::Expression { Some(()) } -} \ No newline at end of file +} diff --git a/som-interpreter-bc/src/interpreter.rs b/som-interpreter-bc/src/interpreter.rs index 257a7f27..d799dec0 100644 --- a/som-interpreter-bc/src/interpreter.rs +++ b/som-interpreter-bc/src/interpreter.rs @@ -325,11 +325,11 @@ impl Interpreter { Bytecode::Jump(offset) => { let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; - }, + } Bytecode::JumpBackward(offset) => { let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx -= offset + 1; - }, + } Bytecode::JumpOnTrueTopNil(offset) => { let condition_result = self.stack.last()?; @@ -338,13 +338,13 @@ impl Interpreter { let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; // minus one because it gets incremented by one already every loop *self.stack.last_mut()? = Value::Nil; - }, + } Value::Boolean(false) => { self.stack.pop(); - }, - _ => panic!("Jump condition did not evaluate to boolean") + } + _ => panic!("Jump condition did not evaluate to boolean"), } - }, + } Bytecode::JumpOnFalseTopNil(offset) => { let condition_result = self.stack.last()?; @@ -353,13 +353,13 @@ impl Interpreter { let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; *self.stack.last_mut()? = Value::Nil; - }, + } Value::Boolean(true) => { self.stack.pop(); - }, - _ => panic!("Jump condition did not evaluate to boolean") + } + _ => panic!("Jump condition did not evaluate to boolean"), } - }, + } Bytecode::JumpOnTruePop(offset) => { let condition_result = self.stack.pop()?; @@ -367,11 +367,11 @@ impl Interpreter { Value::Boolean(true) => { let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; - }, - Value::Boolean(false) => {}, - _ => panic!("Jump condition did not evaluate to boolean") + } + Value::Boolean(false) => {} + _ => panic!("Jump condition did not evaluate to boolean"), } - }, + } Bytecode::JumpOnFalsePop(offset) => { let condition_result = self.stack.pop()?; @@ -379,11 +379,11 @@ impl Interpreter { Value::Boolean(false) => { let frame = self.current_frame()?; frame.clone().borrow_mut().bytecode_idx += offset - 1; - }, - Value::Boolean(true) => {}, - _ => panic!("Jump condition did not evaluate to boolean") + } + Value::Boolean(true) => {} + _ => panic!("Jump condition did not evaluate to boolean"), } - }, + } } } diff --git a/som-interpreter-bc/src/method.rs b/som-interpreter-bc/src/method.rs index f8aa1881..315db71e 100644 --- a/som-interpreter-bc/src/method.rs +++ b/som-interpreter-bc/src/method.rs @@ -187,7 +187,7 @@ impl fmt::Display for Method { Bytecode::JumpOnTrueTopNil(idx) => { write!(f, "index: {}", idx)?; } - _ => write!(f, "No display for this bytecode, TODO.")? // I am lazy + _ => write!(f, "No display for this bytecode, TODO.")?, // I am lazy } } Ok(()) diff --git a/som-interpreter-bc/tests/inlining_tests.rs b/som-interpreter-bc/tests/inlining_tests.rs index 05dd5007..624d9b61 100644 --- a/som-interpreter-bc/tests/inlining_tests.rs +++ b/som-interpreter-bc/tests/inlining_tests.rs @@ -1,6 +1,6 @@ -use std::path::PathBuf; use som_core::bytecode::Bytecode; use som_core::bytecode::Bytecode::*; +use std::path::PathBuf; use som_interpreter_bc::compiler; use som_interpreter_bc::method::MethodKind; @@ -21,7 +21,9 @@ fn get_bytecodes_from_method(class_txt: &str, method_name: &str) -> Vec = lexer.by_ref().collect(); assert!( lexer.text().is_empty(), @@ -42,12 +44,14 @@ fn get_bytecodes_from_method(class_txt: &str, method_name: &str) -> Vec m.body.clone(), - _ => unreachable!() + _ => unreachable!(), } } fn expect_bytecode_sequence(bytecodes: &Vec, expected_bc_sequence: &[Bytecode]) { - assert!(bytecodes.windows(expected_bc_sequence.len()).any(|window| window == expected_bc_sequence)) + assert!(bytecodes + .windows(expected_bc_sequence.len()) + .any(|window| window == expected_bc_sequence)) } #[test] @@ -60,18 +64,21 @@ fn if_true_or_false_inlining_ok() { let bytecodes = get_bytecodes_from_method(class_txt, "run"); - expect_bytecode_sequence(&bytecodes, &[ - PushGlobal(0), - JumpOnFalseTopNil(3), - PushGlobal(0), - ReturnNonLocal, - Pop, - PushGlobal(1), - ReturnNonLocal, - Pop, - PushArgument(0, 0), - ReturnLocal - ]); + expect_bytecode_sequence( + &bytecodes, + &[ + PushGlobal(0), + JumpOnFalseTopNil(3), + PushGlobal(0), + ReturnNonLocal, + Pop, + PushGlobal(1), + ReturnNonLocal, + Pop, + PushArgument(0, 0), + ReturnLocal, + ], + ); let class_txt2 = "Foo = ( run = ( false ifFalse: [ ^false ]. @@ -81,18 +88,21 @@ fn if_true_or_false_inlining_ok() { let bytecodes = get_bytecodes_from_method(class_txt2, "run"); - expect_bytecode_sequence(&bytecodes, &[ - PushGlobal(0), - JumpOnTrueTopNil(3), - PushGlobal(0), - ReturnNonLocal, - Pop, - PushGlobal(1), - ReturnNonLocal, - Pop, - PushArgument(0, 0), - ReturnLocal - ]); + expect_bytecode_sequence( + &bytecodes, + &[ + PushGlobal(0), + JumpOnTrueTopNil(3), + PushGlobal(0), + ReturnNonLocal, + Pop, + PushGlobal(1), + ReturnNonLocal, + Pop, + PushArgument(0, 0), + ReturnLocal, + ], + ); } #[test] @@ -101,42 +111,48 @@ fn if_true_if_false_inlining_ok() { let bytecodes = get_bytecodes_from_method(class_txt, "run"); - expect_bytecode_sequence(&bytecodes, &[ - PushGlobal(0), - JumpOnFalsePop(4), - PushGlobal(0), - ReturnNonLocal, - Jump(3), - PushGlobal(1), - ReturnNonLocal, - Pop, - PushArgument(0, 0), - ReturnLocal, - ]); + expect_bytecode_sequence( + &bytecodes, + &[ + PushGlobal(0), + JumpOnFalsePop(4), + PushGlobal(0), + ReturnNonLocal, + Jump(3), + PushGlobal(1), + ReturnNonLocal, + Pop, + PushArgument(0, 0), + ReturnLocal, + ], + ); let class_txt2 = "Foo = ( run = ( true ifFalse: [ ^false ] ifTrue: [ ^ true]. ))"; let bytecodes = get_bytecodes_from_method(class_txt2, "run"); - expect_bytecode_sequence(&bytecodes, &[ - PushGlobal(0), - JumpOnTruePop(4), - PushGlobal(1), - ReturnNonLocal, - Jump(3), - PushGlobal(0), - ReturnNonLocal, - Pop, - PushArgument(0, 0), - ReturnLocal, - ]); + expect_bytecode_sequence( + &bytecodes, + &[ + PushGlobal(0), + JumpOnTruePop(4), + PushGlobal(1), + ReturnNonLocal, + Jump(3), + PushGlobal(0), + ReturnNonLocal, + Pop, + PushArgument(0, 0), + ReturnLocal, + ], + ); } #[test] fn while_true_false_inlining_ok() { let class_txt = "Foo = ( run = ( | cnt | - cnt := 0. + cnt := 42. [ cnt < 1000000 ] whileTrue: [ cnt := cnt + 1. ] @@ -145,29 +161,22 @@ fn while_true_false_inlining_ok() { let bytecodes = get_bytecodes_from_method(class_txt, "run"); - dbg!(&bytecodes); - expect_bytecode_sequence(&bytecodes, &[ - JumpOnFalsePop(8), - PushLocal(0, 0), - PushConstant(3), - Send2(4), - Dup, - PopLocal(0, 0), - Pop, - JumpBackward(10) - ]); - - expect_bytecode_sequence(&bytecodes, &[ - PushLocal(0, 0), - PushConstant(0), - Send2(1), - JumpOnFalsePop(6), - PushLocal(0, 0), - Send2(2), - PopLocal(0, 0), - Pop, - JumpBackward(8), - ]); + expect_bytecode_sequence( + &bytecodes, + &[ + PushLocal(0, 0), + PushConstant(1), + Send2(2), + JumpOnFalsePop(8), + PushLocal(0, 0), + Push1, + Send2(3), + Dup, + PopLocal(0, 0), + Pop, + JumpBackward(10), + ], + ); // let class_txt_2 = class_txt.replace("whileTrue", "whileFalse"); // let bytecodes = get_bytecodes_from_method(class_txt_2.as_str(), "run"); @@ -192,14 +201,17 @@ fn or_and_inlining_ok() { "; let bytecodes = get_bytecodes_from_method(class_txt, "run"); - expect_bytecode_sequence(&bytecodes, &[ - PushGlobal(0), - JumpOnTruePop(3), - PushGlobal(1), - Jump(2), - PushGlobal(0), - ReturnNonLocal - ]); + expect_bytecode_sequence( + &bytecodes, + &[ + PushGlobal(0), + JumpOnTruePop(3), + PushGlobal(1), + Jump(2), + PushGlobal(0), + ReturnNonLocal, + ], + ); let class_txt2 = "Foo = ( run = ( ^ (true and: [ false ]) @@ -207,14 +219,17 @@ fn or_and_inlining_ok() { "; let bytecodes = get_bytecodes_from_method(class_txt2, "run"); - expect_bytecode_sequence(&bytecodes, &[ - PushGlobal(0), - JumpOnFalsePop(3), - PushGlobal(1), - Jump(2), - PushGlobal(1), - ReturnNonLocal - ]); + expect_bytecode_sequence( + &bytecodes, + &[ + PushGlobal(0), + JumpOnFalsePop(3), + PushGlobal(1), + Jump(2), + PushGlobal(1), + ReturnNonLocal, + ], + ); } #[test] @@ -248,9 +263,9 @@ fn inlining_pyramid() { PushLocal(0, 5), JumpOnFalseTopNil(2), PushLocal(0, 6), - ReturnNonLocal + ReturnNonLocal, ]; expect_bytecode_sequence(&bytecodes, expected_bc); expect_bytecode_sequence(&bytecodes2, expected_bc); -} \ No newline at end of file +} From 7133fd97e4ed31a21075e9ee1e689b103f09f353 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Thu, 22 Feb 2024 17:05:05 +0000 Subject: [PATCH 78/88] BROKEN: ongoing merge with specialized bc branch. Implemented dup_popx_pop etc logic for jumps, breaks in some cases --- som-interpreter-bc/src/compiler.rs | 47 +++++++++++++++++----- som-interpreter-bc/tests/inlining_tests.rs | 11 +++-- som-interpreter-bc/tests/specialized_bc.rs | 40 ++++++++++++++++++ 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 0e6c639f..34748fd0 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -249,18 +249,13 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { } fn remove_dup_popx_pop_sequences(&mut self) { - todo!("not functional yet with inlining"); - let Some(body) = self.body.as_mut() else { - return; - }; - - if body.len() < 3 { + if self.body.is_none() || self.body.as_ref().unwrap().len() < 3 { // TODO once behavior is fixed, change to only one mutable borrow at the start like in the old code return; } let mut indices_to_remove: Vec = vec![]; - for (idx, bytecode_win) in body.windows(3).enumerate() { + for (idx, bytecode_win) in self.body.as_ref().unwrap().windows(3).enumerate() { if matches!(bytecode_win[0], Bytecode::Dup) && matches!( bytecode_win[1], @@ -277,12 +272,43 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { return; } + let mut jumps_to_patch = vec![]; + for (cur_idx, bc) in self.body.as_ref().unwrap().iter().enumerate() { + match bc { + Bytecode::Jump(jump_offset) | Bytecode::JumpOnTrueTopNil(jump_offset) | Bytecode::JumpOnFalseTopNil(jump_offset) | + Bytecode::JumpOnTruePop(jump_offset) | Bytecode::JumpOnFalsePop(jump_offset) => { + if indices_to_remove.contains(&(cur_idx + jump_offset)) { + let idx = indices_to_remove.iter().position(|&v| v == cur_idx + jump_offset).unwrap(); + indices_to_remove.remove(idx); + indices_to_remove.remove(idx - 1); + } + + let nbr_to_adjust = indices_to_remove.iter().filter(|&&v| cur_idx < v && v <= cur_idx + jump_offset).count(); + jumps_to_patch.push((cur_idx, jump_offset - nbr_to_adjust)); + }, + Bytecode::JumpBackward(jump_offset) => { + let nbr_to_adjust = indices_to_remove.iter().filter(|&&v| cur_idx > v && v > cur_idx - jump_offset).count(); + jumps_to_patch.push((cur_idx, jump_offset - nbr_to_adjust)); + }, + _ => {} + } + } + + for (jump_idx, jump_val) in jumps_to_patch { + self.patch_jump(jump_idx, jump_val); + } + + // dbg!("Before:"); + // dbg!(self.body.as_ref().unwrap()); let mut index = 0; - body.retain(|_| { + self.body.as_mut().unwrap().retain(|_| { let is_kept = !indices_to_remove.contains(&index); index += 1; is_kept }); + // dbg!("After:"); + // dbg!(self.body.as_ref().unwrap()); + // dbg!("---"); } } @@ -617,9 +643,10 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Optio last.codegen(&mut ctxt)?; ctxt.push_instr(Bytecode::ReturnLocal); } - // ctxt.remove_dup_popx_pop_sequences(); // todo check no redundant calls for this guy + ctxt.remove_dup_popx_pop_sequences(); let frame = None; let locals = { diff --git a/som-interpreter-bc/tests/inlining_tests.rs b/som-interpreter-bc/tests/inlining_tests.rs index 624d9b61..077756a1 100644 --- a/som-interpreter-bc/tests/inlining_tests.rs +++ b/som-interpreter-bc/tests/inlining_tests.rs @@ -161,20 +161,19 @@ fn while_true_false_inlining_ok() { let bytecodes = get_bytecodes_from_method(class_txt, "run"); + dbg!(&bytecodes); expect_bytecode_sequence( &bytecodes, &[ PushLocal(0, 0), - PushConstant(1), + PushConstant1, Send2(2), - JumpOnFalsePop(8), + JumpOnFalsePop(6), PushLocal(0, 0), Push1, Send2(3), - Dup, PopLocal(0, 0), - Pop, - JumpBackward(10), + JumpBackward(8), ], ); @@ -268,4 +267,4 @@ fn inlining_pyramid() { expect_bytecode_sequence(&bytecodes, expected_bc); expect_bytecode_sequence(&bytecodes2, expected_bc); -} +} \ No newline at end of file diff --git a/som-interpreter-bc/tests/specialized_bc.rs b/som-interpreter-bc/tests/specialized_bc.rs index 8092b4f5..46850829 100644 --- a/som-interpreter-bc/tests/specialized_bc.rs +++ b/som-interpreter-bc/tests/specialized_bc.rs @@ -172,3 +172,43 @@ fn super_send_bytecodes() { &[PushArgument(0, 0), Push1, Push1, Push1, SuperSendN(3)], ); } + +#[test] +fn tmp_popx_pop_whatever_bug() { + let class_txt = "Foo = ( + resolve: a = ( + (a == nil) ifFalse: [ ^ a ]. + ) + + run = ( ^ [:a | a - 1] value: 43 ) + ) + "; + + let bytecodes = get_bytecodes_from_method(class_txt, "resolve:"); + dbg!(&bytecodes); + + let actual_wrong_bc = &[ + PushArgument( + 0, + 1, + ), + PushNil, + Send2( + 0, + ), + JumpOnTrueTopNil( + 3, + ), + PushArgument( + 0, + 1, + ), + ReturnNonLocal, + Pop, + PushArgument( + 0, + 0, + ), + ReturnLocal, + ]; +} \ No newline at end of file From 540956b43ad7ac1d4cdb1683db05174d0f4d19bd Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Mon, 26 Feb 2024 11:30:20 +0000 Subject: [PATCH 79/88] functional merge with specialized BC branch --- som-interpreter-bc/src/compiler.rs | 93 ++++++++++++++++------ som-interpreter-bc/src/inliner.rs | 11 ++- som-interpreter-bc/tests/specialized_bc.rs | 67 +++++++++------- 3 files changed, 114 insertions(+), 57 deletions(-) diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 34748fd0..a0088b2a 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -106,7 +106,8 @@ pub trait InnerGenCtxt: GenCtxt { fn push_arg(&mut self, name: String) -> usize; fn push_local(&mut self, name: String, original_scope: usize) -> usize; fn get_nbr_locals(&self) -> usize; - fn get_literal(&self, idx: usize) -> Option<&Literal>; // is this needed? + fn get_literal(&self, idx: usize) -> Option<&Literal>; + // is this needed? fn push_literal(&mut self, literal: Literal) -> usize; fn remove_literal(&mut self, idx: usize) -> Option; fn get_cur_instr_idx(&self) -> usize; @@ -137,21 +138,21 @@ impl GenCtxt for BlockGenCtxt<'_> { (self.locals.iter().position(|(local_name, local_scope)| { local_name == name && (*local_scope == self.current_scope()) })) - .map(|idx| FoundVar::Local(0, idx as u8)) - .or_else(|| { - self.locals - .iter() - .position(|(local_name, _)| local_name == name) - .map(|idx| FoundVar::Local(0, idx as u8)) - }) - .or_else(|| (self.args.get_index_of(name)).map(|idx| FoundVar::Argument(0, idx as u8))) - .or_else(|| { - self.outer.find_var(name).map(|found| match found { - FoundVar::Local(up_idx, idx) => FoundVar::Local(up_idx + 1, idx), - FoundVar::Argument(up_idx, idx) => FoundVar::Argument(up_idx + 1, idx), - FoundVar::Field(idx) => FoundVar::Field(idx), + .map(|idx| FoundVar::Local(0, idx as u8)) + .or_else(|| { + self.locals + .iter() + .position(|(local_name, _)| local_name == name) + .map(|idx| FoundVar::Local(0, idx as u8)) + }) + .or_else(|| (self.args.get_index_of(name)).map(|idx| FoundVar::Argument(0, idx as u8))) + .or_else(|| { + self.outer.find_var(name).map(|found| match found { + FoundVar::Local(up_idx, idx) => FoundVar::Local(up_idx + 1, idx), + FoundVar::Argument(up_idx, idx) => FoundVar::Argument(up_idx + 1, idx), + FoundVar::Field(idx) => FoundVar::Field(idx), + }) }) - }) } fn intern_symbol(&mut self, name: &str) -> Interned { @@ -249,7 +250,8 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { } fn remove_dup_popx_pop_sequences(&mut self) { - if self.body.is_none() || self.body.as_ref().unwrap().len() < 3 { // TODO once behavior is fixed, change to only one mutable borrow at the start like in the old code + if self.body.is_none() || self.body.as_ref().unwrap().len() < 3 { + // TODO once behavior is fixed, change to only one mutable borrow at the start like in the old code return; } @@ -263,6 +265,22 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { ) && matches!(bytecode_win[2], Bytecode::Pop) { + let are_bc_jump_targets = self.body.as_ref().unwrap().iter().enumerate().any(|(maybe_jump_idx, bc)| match bc { + Bytecode::Jump(jump_offset) + | Bytecode::JumpOnTrueTopNil(jump_offset) + | Bytecode::JumpOnFalseTopNil(jump_offset) + | Bytecode::JumpOnTruePop(jump_offset) + | Bytecode::JumpOnFalsePop(jump_offset) => { + let bc_target_idx = maybe_jump_idx + *jump_offset; + bc_target_idx == idx || bc_target_idx == idx + 2 + }, + _ => {false} + }); + + if are_bc_jump_targets { + continue + } + indices_to_remove.push(idx); indices_to_remove.push(idx + 2); } @@ -275,21 +293,44 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { let mut jumps_to_patch = vec![]; for (cur_idx, bc) in self.body.as_ref().unwrap().iter().enumerate() { match bc { - Bytecode::Jump(jump_offset) | Bytecode::JumpOnTrueTopNil(jump_offset) | Bytecode::JumpOnFalseTopNil(jump_offset) | - Bytecode::JumpOnTruePop(jump_offset) | Bytecode::JumpOnFalsePop(jump_offset) => { + Bytecode::Jump(jump_offset) + | Bytecode::JumpOnTrueTopNil(jump_offset) + | Bytecode::JumpOnFalseTopNil(jump_offset) + | Bytecode::JumpOnTruePop(jump_offset) + | Bytecode::JumpOnFalsePop(jump_offset) => { if indices_to_remove.contains(&(cur_idx + jump_offset)) { - let idx = indices_to_remove.iter().position(|&v| v == cur_idx + jump_offset).unwrap(); - indices_to_remove.remove(idx); - indices_to_remove.remove(idx - 1); + panic!("should be unreachable"); + // let jump_target_in_removes_idx = indices_to_remove + // .iter() + // .position(|&v| v == cur_idx + jump_offset) + // .unwrap(); + // indices_to_remove.remove(jump_target_in_removes_idx); + // // indices_to_remove.remove(jump_target_in_removes_idx - 1); + // let to_remove = (jump_target_in_removes_idx, + // match jump_target_in_removes_idx % 2 { + // 0 => jump_target_in_removes_idx + 1, + // 1 => jump_target_in_removes_idx - 1, + // _ => unreachable!() + // }); + // + // indices_to_remove.retain(|v| *v != to_remove.0 && *v != to_remove.1); + // continue; } - let nbr_to_adjust = indices_to_remove.iter().filter(|&&v| cur_idx < v && v <= cur_idx + jump_offset).count(); + let nbr_to_adjust = indices_to_remove + .iter() + .filter(|&&idx| cur_idx < idx && idx <= cur_idx + jump_offset) + .count(); jumps_to_patch.push((cur_idx, jump_offset - nbr_to_adjust)); - }, + } Bytecode::JumpBackward(jump_offset) => { - let nbr_to_adjust = indices_to_remove.iter().filter(|&&v| cur_idx > v && v > cur_idx - jump_offset).count(); + let nbr_to_adjust = indices_to_remove + .iter() + .filter(|&&idx| cur_idx > idx && idx > cur_idx - jump_offset) + .count(); jumps_to_patch.push((cur_idx, jump_offset - nbr_to_adjust)); - }, + // It's impossible for a JumpBackward to be generated to point to a duplicated dup/pop/pox sequence, as it stands, and as far as I know. + } _ => {} } } @@ -644,7 +685,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option<()> { - match message.signature.as_str() { + let has_inlined = match message.signature.as_str() { "ifTrue:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnFalse), "ifFalse:" => self.inline_if_true_or_if_false(ctxt, message, JumpOnTrue), "ifTrue:ifFalse:" => self.inline_if_true_if_false(ctxt, message, JumpOnFalse), @@ -67,7 +67,11 @@ impl PrimMessageInliner for ast::Expression { "and:" => self.inline_or_and(ctxt, message, And), // TODO: to:do, maybe others i'm forgetting _ => None, - } + }; + // if has_inlined.is_some() { // todo maybe? probably unneeded + // ctxt.remove_dup_popx_pop_sequences(); + // } + has_inlined } fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()> { @@ -166,7 +170,8 @@ impl PrimMessageInliner for ast::Expression { match block.literals.get(constant_idx as usize)? { lit => { let lit_idx = ctxt.push_literal(lit.clone()); - match lit_idx { // maybe create a function just for translating "constant_id (usize) <-> Bytecode" that to avoid duplication + match lit_idx { + // maybe create a function just for translating "constant_id (usize) <-> Bytecode" that to avoid duplication 0 => ctxt.push_instr(Bytecode::PushConstant0), 1 => ctxt.push_instr(Bytecode::PushConstant1), 2 => ctxt.push_instr(Bytecode::PushConstant2), diff --git a/som-interpreter-bc/tests/specialized_bc.rs b/som-interpreter-bc/tests/specialized_bc.rs index 46850829..a07d02e2 100644 --- a/som-interpreter-bc/tests/specialized_bc.rs +++ b/som-interpreter-bc/tests/specialized_bc.rs @@ -173,42 +173,53 @@ fn super_send_bytecodes() { ); } +#[ignore] #[test] -fn tmp_popx_pop_whatever_bug() { +fn something_jump_bug_popx() { + // todo: this test is about jump BC pointing to redundant dup/popx/pop sequences... + // ...therefore breaking when they're optimized and the jump doesn't know what to do. + // this issue is currently being circumvented by straight up not removing the sequence when it's a jump target. + // but this needs to be changed in the future. there's likely an underlying issue that this test right there exemplifies? + let class_txt = "Foo = ( - resolve: a = ( - (a == nil) ifFalse: [ ^ a ]. - ) - - run = ( ^ [:a | a - 1] value: 43 ) + testIfTrueTrueResult = ( + | result | + result := true ifTrue: [ 1 ]. + ^ result class + ) ) "; - let bytecodes = get_bytecodes_from_method(class_txt, "resolve:"); + let bytecodes = get_bytecodes_from_method(class_txt, "testIfTrueTrueResult"); dbg!(&bytecodes); - let actual_wrong_bc = &[ - PushArgument( - 0, - 1, - ), - PushNil, - Send2( - 0, - ), - JumpOnTrueTopNil( - 3, - ), - PushArgument( - 0, - 1, - ), + let _bc_no_removal = &[ + PushGlobal(0), + JumpOnFalseTopNil(2), + Push1, + Dup, + PopLocal(0, 0), + Pop, + PushLocal(0, 0), + Send1(1), + ReturnNonLocal, + Pop, + PushArgument(0, 0), + ReturnLocal, + ]; + + let expected_bytecodes: &[Bytecode] = &[ + PushGlobal(0), + JumpOnFalseTopNil(2), + Push1, + PopLocal(0, 0), + PushLocal(0, 0), + Send1(1), ReturnNonLocal, Pop, - PushArgument( - 0, - 0, - ), + PushArgument(0, 0), ReturnLocal, ]; -} \ No newline at end of file + + expect_bytecode_sequence(&bytecodes, expected_bytecodes); +} From 1868393c663f5749c8b4ea791e8724629b13e2a4 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 6 Mar 2024 10:47:06 +0000 Subject: [PATCH 80/88] ongoing work: adapt_block_after_outer_inlined(). which will remove the need for ast_body in Block --- som-interpreter-bc/src/block.rs | 6 +-- som-interpreter-bc/src/compiler.rs | 5 +-- som-interpreter-bc/src/inliner.rs | 64 +++++++++++++++++++++++++++--- 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/som-interpreter-bc/src/block.rs b/som-interpreter-bc/src/block.rs index a93c99c6..768bbc48 100644 --- a/som-interpreter-bc/src/block.rs +++ b/som-interpreter-bc/src/block.rs @@ -1,4 +1,3 @@ -use som_core::ast; use std::cell::RefCell; use std::fmt; use std::rc::Rc; @@ -27,10 +26,7 @@ pub struct BlockInfo { pub struct Block { /// Reference to the captured stack frame. pub frame: Option>, - pub blk_info: Rc, - // OLarose: not a fan... but it's needed when inlining to be able to recreate a working version of the block from the original AST - // (see PushBlock in inliner) - pub ast_body: ast::Block, + pub blk_info: Rc } impl Block { diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index a0088b2a..0cd84e69 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -721,7 +721,7 @@ fn compile_method(outer: &mut dyn GenCtxt, defn: &ast::MethodDef) -> Option Option { +fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { // println!("(system) compiling block ..."); let mut rand_thread = rand::thread_rng(); @@ -773,8 +773,7 @@ pub(crate) fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Optio body, nb_params, inline_cache, - }), - ast_body: defn.clone(), // not a fan of this, only needed during parsing and cloning feels needless... TODO + }) }; // println!("(system) compiled block !"); diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 2f767007..d3912fe3 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -1,6 +1,6 @@ -use crate::block::BlockInfo; +use crate::block::{Block, BlockInfo}; use crate::compiler::MethodCodegen; -use crate::compiler::{compile_block, InnerGenCtxt, Literal}; +use crate::compiler::{InnerGenCtxt, Literal}; use crate::inliner::JumpType::{JumpOnFalse, JumpOnTrue}; use crate::inliner::OrAndChoice::{And, Or}; use rand::Rng; @@ -21,7 +21,8 @@ pub enum OrAndChoice { // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) - -> Option<()>; + -> Option<()>; + fn adapt_block_after_outer_inlined(&self, ctxt: &mut dyn InnerGenCtxt, block_body: &Block, adjust_scope_by: usize) -> Block; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()>; fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()>; fn inline_if_true_or_if_false( @@ -139,8 +140,8 @@ impl PrimMessageInliner for ast::Expression { Bytecode::PushBlock(block_idx) => { match block.literals.get(*block_idx as usize)? { Literal::Block(inner_block) => { - let new_block = - compile_block(ctxt.as_gen_ctxt(), &inner_block.ast_body)?; + // let new_block = inner_block.as_ref().clone(); + let new_block = self.adapt_block_after_outer_inlined(ctxt, &inner_block, 1); let idx = ctxt.push_literal(Literal::Block(Rc::from(new_block))); ctxt.push_instr(Bytecode::PushBlock(idx as u8)); } @@ -246,6 +247,59 @@ impl PrimMessageInliner for ast::Expression { } } + fn adapt_block_after_outer_inlined(&self, _ctxt: &mut dyn InnerGenCtxt, orig_block: &Block, adjust_scope_by: usize) -> Block { + let new_body = orig_block.blk_info.body.iter().map(|b| + match b { + Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | + Bytecode::PushArgument(up_idx, _) | Bytecode::PopArgument(up_idx, _) => { + let new_up_idx: u8 = match *up_idx as isize - adjust_scope_by as isize { + diff if diff < 0 => 0, + diff => diff as u8 + }; + match b { + Bytecode::PushLocal(_, idx) => Bytecode::PushLocal(new_up_idx, *idx), + Bytecode::PopLocal(_, idx) => Bytecode::PushLocal(new_up_idx, *idx), + Bytecode::PushArgument(_, idx) => Bytecode::PushArgument(new_up_idx, *idx), + Bytecode::PopArgument(_, idx) => Bytecode::PopArgument(new_up_idx, *idx), + _ => unreachable!() + } + }, + Bytecode::PushBlock(_block_idx) => { + todo!(); + // match orig_block.blk_info.literals.get(*block_idx as usize) { + // Some(Literal::Block(inner_block)) => { + // // can't just clone the inner_block because the body to be modified is behind an Rc (not Rc>), so immutable + // // though if we ever want to do some runtime bytecode rewriting, it'll have to be an Rc> and this code will be refactorable + // // let new_block = inner_block.as_ref().clone(); + // let new_block = self.adapt_block_after_outer_inlined(ctxt, inner_block.clone().as_ref(), adjust_scope_by + 1); + // + // todo!() + // // Bytecode::PushBlock(idx as u8) + // } + // None => panic!("PushBlock is associated with no literal whatsoever?"), + // _ => panic!("PushBlock is not actually pushing a block somehow"), + // }; + }, + + // Bytecode::ReturnNonLocal => Bytecode::ReturnNonLocal, + _ => b.clone() + } + ).collect(); + + // can't just clone the inner_block then modify the body because the body is behind an Rc (not Rc>), so immutable + // though if we ever want to do some runtime bytecode rewriting, it'll have to be an Rc> and this code will be refactorable (not so many individual calls to .clone()) + Block { + frame: orig_block.frame.clone(), + blk_info: Rc::new(BlockInfo { + locals: orig_block.blk_info.locals.clone(), + literals: orig_block.blk_info.literals.clone(), + body: new_body, + nb_params: orig_block.blk_info.nb_params, + inline_cache: orig_block.blk_info.inline_cache.clone(), + }), + } + } + fn inline_if_true_or_if_false( &self, ctxt: &mut dyn InnerGenCtxt, From c06e05ad0a7d04403e7178ed08857ed47204b400 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 6 Mar 2024 15:49:25 +0000 Subject: [PATCH 81/88] fixed disassembler bug related to push1 --- som-interpreter-bc/src/disassembler.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/som-interpreter-bc/src/disassembler.rs b/som-interpreter-bc/src/disassembler.rs index c7667c2e..4098ef6e 100644 --- a/som-interpreter-bc/src/disassembler.rs +++ b/som-interpreter-bc/src/disassembler.rs @@ -151,7 +151,9 @@ fn disassemble_body( Bytecode::JumpBackward(idx) => { println!(" {} (jump to bytecode index {})", idx, cur_idx - idx); } - Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil => {} + Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil => { + println!(); + } } } } From 355675e4619d9bbcd6c62355becd4116cfcbde77 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 6 Mar 2024 15:50:02 +0000 Subject: [PATCH 82/88] Progress with adapt_block_after_outer_inlined. passes every basic test except hash --- som-interpreter-bc/src/inliner.rs | 58 +++++++++++++++++++------------ 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index d3912fe3..6b32cd70 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -247,52 +247,64 @@ impl PrimMessageInliner for ast::Expression { } } - fn adapt_block_after_outer_inlined(&self, _ctxt: &mut dyn InnerGenCtxt, orig_block: &Block, adjust_scope_by: usize) -> Block { + fn adapt_block_after_outer_inlined(&self, ctxt: &mut dyn InnerGenCtxt, orig_block: &Block, adjust_scope_by: usize) -> Block { + let mut block_literals_to_patch = vec![]; let new_body = orig_block.blk_info.body.iter().map(|b| match b { Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | Bytecode::PushArgument(up_idx, _) | Bytecode::PopArgument(up_idx, _) => { - let new_up_idx: u8 = match *up_idx as isize - adjust_scope_by as isize { - diff if diff < 0 => 0, - diff => diff as u8 + let new_up_idx = match *up_idx { + 0 => 0, // local var/arg, not affected by inlining, stays the same + d if d > adjust_scope_by as u8 => *up_idx - 1, + _ => *up_idx }; + match b { Bytecode::PushLocal(_, idx) => Bytecode::PushLocal(new_up_idx, *idx), - Bytecode::PopLocal(_, idx) => Bytecode::PushLocal(new_up_idx, *idx), + Bytecode::PopLocal(_, idx) => Bytecode::PopLocal(new_up_idx, *idx), Bytecode::PushArgument(_, idx) => Bytecode::PushArgument(new_up_idx, *idx), Bytecode::PopArgument(_, idx) => Bytecode::PopArgument(new_up_idx, *idx), _ => unreachable!() } }, - Bytecode::PushBlock(_block_idx) => { - todo!(); - // match orig_block.blk_info.literals.get(*block_idx as usize) { - // Some(Literal::Block(inner_block)) => { - // // can't just clone the inner_block because the body to be modified is behind an Rc (not Rc>), so immutable - // // though if we ever want to do some runtime bytecode rewriting, it'll have to be an Rc> and this code will be refactorable - // // let new_block = inner_block.as_ref().clone(); - // let new_block = self.adapt_block_after_outer_inlined(ctxt, inner_block.clone().as_ref(), adjust_scope_by + 1); - // - // todo!() - // // Bytecode::PushBlock(idx as u8) - // } - // None => panic!("PushBlock is associated with no literal whatsoever?"), - // _ => panic!("PushBlock is not actually pushing a block somehow"), - // }; - }, + Bytecode::PushBlock(block_idx) => { + let inner_lit = orig_block.blk_info.literals.get(*block_idx as usize) + .unwrap_or_else(|| panic!("PushBlock is associated with no literal whatsoever?")); + let inner_block = match inner_lit { + Literal::Block(inner_blk) => inner_blk, + _ => panic!("PushBlock is not actually pushing a block somehow") + }; + + let new_block = self.adapt_block_after_outer_inlined(ctxt, inner_block.clone().as_ref(), adjust_scope_by); + block_literals_to_patch.push((block_idx, Rc::from(new_block))); + + Bytecode::PushBlock(*block_idx) + }, // Bytecode::ReturnNonLocal => Bytecode::ReturnNonLocal, _ => b.clone() } ).collect(); - // can't just clone the inner_block then modify the body because the body is behind an Rc (not Rc>), so immutable + // can't just clone the inner_block then modify the body/literals because the body is behind an Rc (not Rc>), so immutable // though if we ever want to do some runtime bytecode rewriting, it'll have to be an Rc> and this code will be refactorable (not so many individual calls to .clone()) Block { frame: orig_block.frame.clone(), blk_info: Rc::new(BlockInfo { locals: orig_block.blk_info.locals.clone(), - literals: orig_block.blk_info.literals.clone(), + literals: orig_block.blk_info.literals.iter().enumerate() + .map(|(idx, l)| { + let a = block_literals_to_patch + .iter() + .find_map(|(block_idx, blk)| {(**block_idx == idx as u8).then(|| blk)}); + + if a.is_some() { + Literal::Block(Rc::clone(a.unwrap())) + } else { + l.clone() + } + }) + .collect(), body: new_body, nb_params: orig_block.blk_info.nb_params, inline_cache: orig_block.blk_info.inline_cache.clone(), From fabf18d28a198d0c701ca503708706afa3b8596c Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Wed, 6 Mar 2024 15:54:14 +0000 Subject: [PATCH 83/88] block adaptation after inlining: seems functional, actually! --- run_benchmarks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_benchmarks.sh b/run_benchmarks.sh index 06be9776..3091b30b 100755 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -4,7 +4,7 @@ BENCHMARKS=("Bounce" "Mandelbrot" "List" "Permute" "Queens" "QuickSort" "Sieve" for bench in "${BENCHMARKS[@]}" do - cargo run --bin som-interpreter-bc -- -c core-lib/Smalltalk core-lib/Examples/Benchmarks core-lib/Examples/Benchmarks/Json core-lib/Examples/Benchmarks/Richards core-lib/Examples/Benchmarks/DeltaBlue -- core-lib/Examples/Benchmarks/BenchmarkHarness.som $bench 1 0 7 + cargo run --bin som-interpreter-bc -- -c core-lib/Smalltalk core-lib/Examples/Benchmarks core-lib/Examples/Benchmarks/Json core-lib/Examples/Benchmarks/Richards core-lib/Examples/Benchmarks/DeltaBlue -- core-lib/Examples/Benchmarks/BenchmarkHarness.som $bench 1 7 echo -ne "\n" done From 6ee323cf47b53eb1ea349b37969b22cabd2ce712 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 8 Mar 2024 17:08:59 +0000 Subject: [PATCH 84/88] test to see if perf is affected (probably not): changing rules for inlining returnnonlocal --- .gitlab-ci.yml | 2 +- som-interpreter-bc/src/inliner.rs | 31 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1392144b..da268b04 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,7 +16,7 @@ build-and-test-interpreters: - cargo clean - cargo build --release - - cargo test + # - cargo test # Unit Tests # - PYTHONPATH=src python3 -m pytest diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 6b32cd70..f21d504a 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -185,12 +185,31 @@ impl PrimMessageInliner for ast::Expression { // TODO; if the new context level is 0 (check prev bytecode emitted?), gotta emit a RETURNLOCAL instead! // as far as i understand... this still works? and is just slower? TODO fix though obviously - // dbg!(&ctxt.get_instructions().last()); - // match ctxt.get_instructions().last().unwrap() { - // Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnLocal), - // _ => ctxt.push_instr(Bytecode::ReturnNonLocal) - // } - ctxt.push_instr(Bytecode::ReturnNonLocal) + match ctxt.get_instructions().last()? { + Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil | Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnNonLocal), + Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | + Bytecode::PushArgument(up_idx, _) | Bytecode::PopArgument(up_idx, _) => { + match up_idx { + 0 => ctxt.push_instr(Bytecode::ReturnLocal), + _ => ctxt.push_instr(Bytecode::ReturnNonLocal) + } + }, + Bytecode::Send1(_) | Bytecode::Send2(_) | Bytecode::Send3(_) | Bytecode::SendN(_) => {ctxt.push_instr(Bytecode::ReturnNonLocal)}, + Bytecode::PushField(_) | Bytecode::PopField(_) => { + match ctxt.current_scope() { + 0 => ctxt.push_instr(Bytecode::ReturnLocal), + _ => ctxt.push_instr(Bytecode::ReturnNonLocal) + } + }, + // Bytecode::PushConstant(_) | Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => ctxt.push_instr(Bytecode::ReturnNonLocal), + _ => { + // dbg!(ctxt.get_instructions().last()?); + ctxt.push_instr(Bytecode::ReturnLocal) + } + } + // dbg!(ctxt.get_instructions().last()); + ctxt.push_instr(Bytecode::ReturnLocal) + // ctxt.push_instr(Bytecode::ReturnNonLocal) } Bytecode::ReturnLocal => {} // todo: hmm... do we? if so, add these to the _ case i guess. From 8ba8c586dd807a198c72c162ca36648a967f7ad2 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 8 Mar 2024 17:53:13 +0000 Subject: [PATCH 85/88] another test for inlining returnnonlocal (deltablue disabled!). if perf is inaffected, then this is fine --- rebench.conf | 1 - som-interpreter-bc/src/inliner.rs | 12 ++---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/rebench.conf b/rebench.conf index 1af79853..5db7ad83 100644 --- a/rebench.conf +++ b/rebench.conf @@ -22,7 +22,6 @@ benchmark_suites: iterations: 10 benchmarks: - Richards: {extra_args: 1, machines: [yuria2]} - - DeltaBlue: {extra_args: 50, machines: [yuria2]} - NBody: {extra_args: 500, machines: [yuria2]} - JsonSmall: {extra_args: 1, machines: [yuria2]} - GraphSearch: {extra_args: 4, machines: [yuria2]} diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index f21d504a..d27cf592 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -182,9 +182,7 @@ impl PrimMessageInliner for ast::Expression { }; } Bytecode::ReturnNonLocal => { - // TODO; if the new context level is 0 (check prev bytecode emitted?), gotta emit a RETURNLOCAL instead! - // as far as i understand... this still works? and is just slower? TODO fix though obviously - + // TODO: this is incomplete, but I don't *think* this affects performance? match ctxt.get_instructions().last()? { Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil | Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnNonLocal), Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | @@ -194,22 +192,16 @@ impl PrimMessageInliner for ast::Expression { _ => ctxt.push_instr(Bytecode::ReturnNonLocal) } }, - Bytecode::Send1(_) | Bytecode::Send2(_) | Bytecode::Send3(_) | Bytecode::SendN(_) => {ctxt.push_instr(Bytecode::ReturnNonLocal)}, Bytecode::PushField(_) | Bytecode::PopField(_) => { match ctxt.current_scope() { 0 => ctxt.push_instr(Bytecode::ReturnLocal), _ => ctxt.push_instr(Bytecode::ReturnNonLocal) } }, - // Bytecode::PushConstant(_) | Bytecode::PushConstant0 | Bytecode::PushConstant1 | Bytecode::PushConstant2 => ctxt.push_instr(Bytecode::ReturnNonLocal), _ => { - // dbg!(ctxt.get_instructions().last()?); - ctxt.push_instr(Bytecode::ReturnLocal) + ctxt.push_instr(Bytecode::ReturnLocal) // TODO nonlocal instead } } - // dbg!(ctxt.get_instructions().last()); - ctxt.push_instr(Bytecode::ReturnLocal) - // ctxt.push_instr(Bytecode::ReturnNonLocal) } Bytecode::ReturnLocal => {} // todo: hmm... do we? if so, add these to the _ case i guess. From aabc4ace0000e78e5ba24ac978a7ae0c2e536ed3 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 8 Mar 2024 18:09:49 +0000 Subject: [PATCH 86/88] reactivated tests, nonlocal case is fine --- .gitlab-ci.yml | 2 +- rebench.conf | 1 + som-interpreter-bc/src/inliner.rs | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index da268b04..1392144b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,7 +16,7 @@ build-and-test-interpreters: - cargo clean - cargo build --release - # - cargo test + - cargo test # Unit Tests # - PYTHONPATH=src python3 -m pytest diff --git a/rebench.conf b/rebench.conf index 5db7ad83..1af79853 100644 --- a/rebench.conf +++ b/rebench.conf @@ -22,6 +22,7 @@ benchmark_suites: iterations: 10 benchmarks: - Richards: {extra_args: 1, machines: [yuria2]} + - DeltaBlue: {extra_args: 50, machines: [yuria2]} - NBody: {extra_args: 500, machines: [yuria2]} - JsonSmall: {extra_args: 1, machines: [yuria2]} - GraphSearch: {extra_args: 4, machines: [yuria2]} diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index d27cf592..148f6931 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -182,7 +182,11 @@ impl PrimMessageInliner for ast::Expression { }; } Bytecode::ReturnNonLocal => { - // TODO: this is incomplete, but I don't *think* this affects performance? + // TODO: this is incomplete, but it doesn't seem to affect performance? + // Incomplete because Send{1|2|..} get turned to ReturnNonLocal when in 99% of cases, they should be "ReturnLocal"s + // and maybe some other cases that I forget + // But I don't observe any speedup from turning those nonlocal rets to local rets and deactivating the ONE broken benchmark, so... who cares, I guess? + // TODO: also if ReturnNonLocal ever gets a scope as argument (as it should) this code should be super simplifiable ("ReturnNonLocal(scope - 1), or ReturnLocal if scope is 1") match ctxt.get_instructions().last()? { Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil | Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnNonLocal), Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | @@ -199,7 +203,7 @@ impl PrimMessageInliner for ast::Expression { } }, _ => { - ctxt.push_instr(Bytecode::ReturnLocal) // TODO nonlocal instead + ctxt.push_instr(Bytecode::ReturnNonLocal) } } } From 1a900aaead1d55e72a7621be88087940726ac9d7 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 8 Mar 2024 19:09:35 +0000 Subject: [PATCH 87/88] returning to the old "only nonlocal ret" strat, and added an expansive comment as to why --- som-interpreter-bc/src/inliner.rs | 46 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 148f6931..1cf9c3ac 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -182,30 +182,34 @@ impl PrimMessageInliner for ast::Expression { }; } Bytecode::ReturnNonLocal => { - // TODO: this is incomplete, but it doesn't seem to affect performance? + // TODO: this commented out code is incomplete, but it doesn't really affect performance // Incomplete because Send{1|2|..} get turned to ReturnNonLocal when in 99% of cases, they should be "ReturnLocal"s // and maybe some other cases that I forget // But I don't observe any speedup from turning those nonlocal rets to local rets and deactivating the ONE broken benchmark, so... who cares, I guess? - // TODO: also if ReturnNonLocal ever gets a scope as argument (as it should) this code should be super simplifiable ("ReturnNonLocal(scope - 1), or ReturnLocal if scope is 1") - match ctxt.get_instructions().last()? { - Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil | Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnNonLocal), - Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | - Bytecode::PushArgument(up_idx, _) | Bytecode::PopArgument(up_idx, _) => { - match up_idx { - 0 => ctxt.push_instr(Bytecode::ReturnLocal), - _ => ctxt.push_instr(Bytecode::ReturnNonLocal) - } - }, - Bytecode::PushField(_) | Bytecode::PopField(_) => { - match ctxt.current_scope() { - 0 => ctxt.push_instr(Bytecode::ReturnLocal), - _ => ctxt.push_instr(Bytecode::ReturnNonLocal) - } - }, - _ => { - ctxt.push_instr(Bytecode::ReturnNonLocal) - } - } + // to be fair, always returning nonlocal instead of using this more elaborate code loses, like, 2% on SOME benchmarks + // but this commented out code also breaks in some rare cases and it's not worth it to fix it because eventually: + // TODO: ReturnNonLocal should eventually get a scope as argument, which will make fixing this case super simple ("ReturnNonLocal(scope - 1), or ReturnLocal if scope is 1") + + // match ctxt.get_instructions().last()? { + // Bytecode::Push0 | Bytecode::Push1 | Bytecode::PushNil | Bytecode::PushGlobal(_) => ctxt.push_instr(Bytecode::ReturnNonLocal), + // Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | + // Bytecode::PushArgument(up_idx, _) | Bytecode::PopArgument(up_idx, _) => { + // match up_idx { + // 0 => ctxt.push_instr(Bytecode::ReturnLocal), + // _ => ctxt.push_instr(Bytecode::ReturnNonLocal) + // } + // }, + // Bytecode::PushField(_) | Bytecode::PopField(_) => { + // match ctxt.current_scope() { + // 0 => ctxt.push_instr(Bytecode::ReturnLocal), + // _ => ctxt.push_instr(Bytecode::ReturnNonLocal) + // } + // }, + // _ => { + // ctxt.push_instr(Bytecode::ReturnNonLocal) + // } + // } + ctxt.push_instr(Bytecode::ReturnNonLocal) } Bytecode::ReturnLocal => {} // todo: hmm... do we? if so, add these to the _ case i guess. From 5bc87a7d74f00e5e8d19ba1c4b9cf4ddca363588 Mon Sep 17 00:00:00 2001 From: Octave Larose Date: Fri, 8 Mar 2024 19:10:15 +0000 Subject: [PATCH 88/88] formatting pass too --- som-interpreter-bc/src/block.rs | 2 +- som-interpreter-bc/src/compiler.rs | 60 ++++++++++-------- som-interpreter-bc/src/inliner.rs | 71 +++++++++++++++------- som-interpreter-bc/tests/inlining_tests.rs | 2 +- som-interpreter-bc/tests/specialized_bc.rs | 2 +- 5 files changed, 86 insertions(+), 51 deletions(-) diff --git a/som-interpreter-bc/src/block.rs b/som-interpreter-bc/src/block.rs index 768bbc48..d4719cc9 100644 --- a/som-interpreter-bc/src/block.rs +++ b/som-interpreter-bc/src/block.rs @@ -26,7 +26,7 @@ pub struct BlockInfo { pub struct Block { /// Reference to the captured stack frame. pub frame: Option>, - pub blk_info: Rc + pub blk_info: Rc, } impl Block { diff --git a/som-interpreter-bc/src/compiler.rs b/som-interpreter-bc/src/compiler.rs index 0cd84e69..c9efc844 100644 --- a/som-interpreter-bc/src/compiler.rs +++ b/som-interpreter-bc/src/compiler.rs @@ -138,21 +138,21 @@ impl GenCtxt for BlockGenCtxt<'_> { (self.locals.iter().position(|(local_name, local_scope)| { local_name == name && (*local_scope == self.current_scope()) })) - .map(|idx| FoundVar::Local(0, idx as u8)) - .or_else(|| { - self.locals - .iter() - .position(|(local_name, _)| local_name == name) - .map(|idx| FoundVar::Local(0, idx as u8)) - }) - .or_else(|| (self.args.get_index_of(name)).map(|idx| FoundVar::Argument(0, idx as u8))) - .or_else(|| { - self.outer.find_var(name).map(|found| match found { - FoundVar::Local(up_idx, idx) => FoundVar::Local(up_idx + 1, idx), - FoundVar::Argument(up_idx, idx) => FoundVar::Argument(up_idx + 1, idx), - FoundVar::Field(idx) => FoundVar::Field(idx), - }) + .map(|idx| FoundVar::Local(0, idx as u8)) + .or_else(|| { + self.locals + .iter() + .position(|(local_name, _)| local_name == name) + .map(|idx| FoundVar::Local(0, idx as u8)) + }) + .or_else(|| (self.args.get_index_of(name)).map(|idx| FoundVar::Argument(0, idx as u8))) + .or_else(|| { + self.outer.find_var(name).map(|found| match found { + FoundVar::Local(up_idx, idx) => FoundVar::Local(up_idx + 1, idx), + FoundVar::Argument(up_idx, idx) => FoundVar::Argument(up_idx + 1, idx), + FoundVar::Field(idx) => FoundVar::Field(idx), }) + }) } fn intern_symbol(&mut self, name: &str) -> Interned { @@ -265,20 +265,26 @@ impl InnerGenCtxt for BlockGenCtxt<'_> { ) && matches!(bytecode_win[2], Bytecode::Pop) { - let are_bc_jump_targets = self.body.as_ref().unwrap().iter().enumerate().any(|(maybe_jump_idx, bc)| match bc { - Bytecode::Jump(jump_offset) - | Bytecode::JumpOnTrueTopNil(jump_offset) - | Bytecode::JumpOnFalseTopNil(jump_offset) - | Bytecode::JumpOnTruePop(jump_offset) - | Bytecode::JumpOnFalsePop(jump_offset) => { - let bc_target_idx = maybe_jump_idx + *jump_offset; - bc_target_idx == idx || bc_target_idx == idx + 2 - }, - _ => {false} - }); + let are_bc_jump_targets = + self.body + .as_ref() + .unwrap() + .iter() + .enumerate() + .any(|(maybe_jump_idx, bc)| match bc { + Bytecode::Jump(jump_offset) + | Bytecode::JumpOnTrueTopNil(jump_offset) + | Bytecode::JumpOnFalseTopNil(jump_offset) + | Bytecode::JumpOnTruePop(jump_offset) + | Bytecode::JumpOnFalsePop(jump_offset) => { + let bc_target_idx = maybe_jump_idx + *jump_offset; + bc_target_idx == idx || bc_target_idx == idx + 2 + } + _ => false, + }); if are_bc_jump_targets { - continue + continue; } indices_to_remove.push(idx); @@ -773,7 +779,7 @@ fn compile_block(outer: &mut dyn GenCtxt, defn: &ast::Block) -> Option { body, nb_params, inline_cache, - }) + }), }; // println!("(system) compiled block !"); diff --git a/som-interpreter-bc/src/inliner.rs b/som-interpreter-bc/src/inliner.rs index 1cf9c3ac..1bafe0f6 100644 --- a/som-interpreter-bc/src/inliner.rs +++ b/som-interpreter-bc/src/inliner.rs @@ -21,8 +21,13 @@ pub enum OrAndChoice { // TODO some of those should return Result types and throw errors instead, most likely. pub trait PrimMessageInliner { fn inline_if_possible(&self, ctxt: &mut dyn InnerGenCtxt, message: &ast::Message) - -> Option<()>; - fn adapt_block_after_outer_inlined(&self, ctxt: &mut dyn InnerGenCtxt, block_body: &Block, adjust_scope_by: usize) -> Block; + -> Option<()>; + fn adapt_block_after_outer_inlined( + &self, + ctxt: &mut dyn InnerGenCtxt, + block_body: &Block, + adjust_scope_by: usize, + ) -> Block; fn inline_compiled_block(&self, ctxt: &mut dyn InnerGenCtxt, block: &BlockInfo) -> Option<()>; fn inline_last_push_block_bc(&self, ctxt: &mut dyn InnerGenCtxt) -> Option<()>; fn inline_if_true_or_if_false( @@ -141,7 +146,8 @@ impl PrimMessageInliner for ast::Expression { match block.literals.get(*block_idx as usize)? { Literal::Block(inner_block) => { // let new_block = inner_block.as_ref().clone(); - let new_block = self.adapt_block_after_outer_inlined(ctxt, &inner_block, 1); + let new_block = + self.adapt_block_after_outer_inlined(ctxt, &inner_block, 1); let idx = ctxt.push_literal(Literal::Block(Rc::from(new_block))); ctxt.push_instr(Bytecode::PushBlock(idx as u8)); } @@ -266,16 +272,26 @@ impl PrimMessageInliner for ast::Expression { } } - fn adapt_block_after_outer_inlined(&self, ctxt: &mut dyn InnerGenCtxt, orig_block: &Block, adjust_scope_by: usize) -> Block { + fn adapt_block_after_outer_inlined( + &self, + ctxt: &mut dyn InnerGenCtxt, + orig_block: &Block, + adjust_scope_by: usize, + ) -> Block { let mut block_literals_to_patch = vec![]; - let new_body = orig_block.blk_info.body.iter().map(|b| - match b { - Bytecode::PushLocal(up_idx, _) | Bytecode::PopLocal(up_idx, _) | - Bytecode::PushArgument(up_idx, _) | Bytecode::PopArgument(up_idx, _) => { + let new_body = orig_block + .blk_info + .body + .iter() + .map(|b| match b { + Bytecode::PushLocal(up_idx, _) + | Bytecode::PopLocal(up_idx, _) + | Bytecode::PushArgument(up_idx, _) + | Bytecode::PopArgument(up_idx, _) => { let new_up_idx = match *up_idx { 0 => 0, // local var/arg, not affected by inlining, stays the same d if d > adjust_scope_by as u8 => *up_idx - 1, - _ => *up_idx + _ => *up_idx, }; match b { @@ -283,27 +299,36 @@ impl PrimMessageInliner for ast::Expression { Bytecode::PopLocal(_, idx) => Bytecode::PopLocal(new_up_idx, *idx), Bytecode::PushArgument(_, idx) => Bytecode::PushArgument(new_up_idx, *idx), Bytecode::PopArgument(_, idx) => Bytecode::PopArgument(new_up_idx, *idx), - _ => unreachable!() + _ => unreachable!(), } - }, + } Bytecode::PushBlock(block_idx) => { - let inner_lit = orig_block.blk_info.literals.get(*block_idx as usize) - .unwrap_or_else(|| panic!("PushBlock is associated with no literal whatsoever?")); + let inner_lit = orig_block + .blk_info + .literals + .get(*block_idx as usize) + .unwrap_or_else(|| { + panic!("PushBlock is associated with no literal whatsoever?") + }); let inner_block = match inner_lit { Literal::Block(inner_blk) => inner_blk, - _ => panic!("PushBlock is not actually pushing a block somehow") + _ => panic!("PushBlock is not actually pushing a block somehow"), }; - let new_block = self.adapt_block_after_outer_inlined(ctxt, inner_block.clone().as_ref(), adjust_scope_by); + let new_block = self.adapt_block_after_outer_inlined( + ctxt, + inner_block.clone().as_ref(), + adjust_scope_by, + ); block_literals_to_patch.push((block_idx, Rc::from(new_block))); Bytecode::PushBlock(*block_idx) - }, + } // Bytecode::ReturnNonLocal => Bytecode::ReturnNonLocal, - _ => b.clone() - } - ).collect(); + _ => b.clone(), + }) + .collect(); // can't just clone the inner_block then modify the body/literals because the body is behind an Rc (not Rc>), so immutable // though if we ever want to do some runtime bytecode rewriting, it'll have to be an Rc> and this code will be refactorable (not so many individual calls to .clone()) @@ -311,11 +336,15 @@ impl PrimMessageInliner for ast::Expression { frame: orig_block.frame.clone(), blk_info: Rc::new(BlockInfo { locals: orig_block.blk_info.locals.clone(), - literals: orig_block.blk_info.literals.iter().enumerate() + literals: orig_block + .blk_info + .literals + .iter() + .enumerate() .map(|(idx, l)| { let a = block_literals_to_patch .iter() - .find_map(|(block_idx, blk)| {(**block_idx == idx as u8).then(|| blk)}); + .find_map(|(block_idx, blk)| (**block_idx == idx as u8).then(|| blk)); if a.is_some() { Literal::Block(Rc::clone(a.unwrap())) diff --git a/som-interpreter-bc/tests/inlining_tests.rs b/som-interpreter-bc/tests/inlining_tests.rs index 077756a1..10c93387 100644 --- a/som-interpreter-bc/tests/inlining_tests.rs +++ b/som-interpreter-bc/tests/inlining_tests.rs @@ -267,4 +267,4 @@ fn inlining_pyramid() { expect_bytecode_sequence(&bytecodes, expected_bc); expect_bytecode_sequence(&bytecodes2, expected_bc); -} \ No newline at end of file +} diff --git a/som-interpreter-bc/tests/specialized_bc.rs b/som-interpreter-bc/tests/specialized_bc.rs index a07d02e2..c46e7534 100644 --- a/som-interpreter-bc/tests/specialized_bc.rs +++ b/som-interpreter-bc/tests/specialized_bc.rs @@ -180,7 +180,7 @@ fn something_jump_bug_popx() { // ...therefore breaking when they're optimized and the jump doesn't know what to do. // this issue is currently being circumvented by straight up not removing the sequence when it's a jump target. // but this needs to be changed in the future. there's likely an underlying issue that this test right there exemplifies? - + let class_txt = "Foo = ( testIfTrueTrueResult = ( | result |