refactor Linker & add LinkAddr to allow bytecode with partially resolved symbols / symbol-relative addresses

This commit is contained in:
Michael Sippel 2024-05-08 04:13:40 +02:00
parent b94723f1ba
commit 35760d76ce
Signed by: senvas
GPG key ID: F96CF119C34B64A6
4 changed files with 183 additions and 43 deletions

View file

@ -1,74 +1,103 @@
use {
crate::{VM_Word, VM_Instruction}
crate::{
VM_Word, VM_Instruction,
linker::LinkAddr
}
};
#[derive(Clone)]
pub enum AssemblyWord {
Symbol( LinkAddr ),
Lit( VM_Word )
}
impl AssemblyWord {
pub fn inst(i: VM_Instruction) -> Self {
AssemblyWord::Lit(i as VM_Word)
}
pub fn lit(w: VM_Word) -> Self {
AssemblyWord::Lit(w)
}
pub fn symbol(name: &str) -> Self {
AssemblyWord::Symbol(LinkAddr::Relative{ symbol: name.into(), offset: 0 })
}
}
pub struct Assembler {
instructions: Vec< VM_Word >
words: Vec< AssemblyWord >,
}
impl Assembler {
pub fn new() -> Self {
Assembler {
instructions: Vec::new()
words: Vec::new()
}
}
pub fn build(mut self) -> Vec< VM_Word > {
self.instructions.push( VM_Instruction::Ret as VM_Word );
self.instructions
pub fn build(mut self) -> Vec< AssemblyWord > {
self.words.push( AssemblyWord::inst(VM_Instruction::Ret) );
self.words
}
pub fn lit(mut self, w: VM_Word) -> Assembler {
self.instructions.push( VM_Instruction::Lit as VM_Word );
self.instructions.push( w );
self.words.push( AssemblyWord::inst(VM_Instruction::Lit) );
self.words.push( AssemblyWord::lit(w) );
self
}
pub fn litf(mut self, wf: f64) -> Assembler {
let w : VM_Word = unsafe{ std::mem::transmute(wf) };
self.instructions.push( VM_Instruction::Lit as VM_Word );
self.instructions.push( w );
self.words.push( AssemblyWord::inst(VM_Instruction::Lit) );
self.words.push( AssemblyWord::lit(w) );
self
}
pub fn instruction(mut self, i: VM_Instruction) -> Assembler {
self.instructions.push( i as VM_Word );
pub fn inst(mut self, i: VM_Instruction) -> Assembler {
self.words.push( AssemblyWord::inst(i) );
self
}
pub fn call(mut self, addr: VM_Word) -> Assembler {
self.instructions.push(VM_Instruction::Call as VM_Word);
self.instructions.push(addr);
pub fn call(mut self, s: &str) -> Assembler {
self.words.push( AssemblyWord::inst(VM_Instruction::Call) );
self.words.push( AssemblyWord::symbol(s) );
self
}
pub fn static_ref(mut self, s: &str) -> Assembler {
self.words.push( AssemblyWord::inst(VM_Instruction::Lit) );
self.words.push( AssemblyWord::symbol(s) );
self
}
pub fn while_loop(mut self, mut condition: Assembler, mut body: Assembler) -> Assembler {
let cond_len = condition.instructions.len();
let body_len = body.instructions.len();
self.instructions.append( &mut condition.instructions );
let cond_len = condition.words.len();
let body_len = body.words.len();
self.words.append( &mut condition.words );
// jump to end
self.instructions.push( VM_Instruction::Branch as VM_Word );
self.instructions.push( body_len as VM_Word + 2 );
self.words.push( AssemblyWord::inst(VM_Instruction::Branch) );
self.words.push( AssemblyWord::lit(body_len as VM_Word + 2) );
self.instructions.append( &mut body.instructions );
self.words.append( &mut body.words );
// jump back to condition
self.instructions.push( VM_Instruction::Jmp as VM_Word );
self.instructions.push( -2-(body_len as VM_Word)-2-(cond_len as VM_Word) );
self.words.push( AssemblyWord::inst(VM_Instruction::Jmp) );
self.words.push( AssemblyWord::lit(-2-(body_len as VM_Word)-2-(cond_len as VM_Word)) );
self
}
pub fn branch(mut self, mut if_branch: Assembler, mut else_branch: Assembler) -> Assembler {
self.instructions.push(VM_Instruction::Branch as VM_Word);
self.instructions.push( if_branch.instructions.len() as VM_Word + 2);
self.words.push(AssemblyWord::inst(VM_Instruction::Branch));
self.words.push(AssemblyWord::lit(if_branch.words.len() as VM_Word + 2));
self.instructions.append( &mut if_branch.instructions );
self.instructions.push( VM_Instruction::Jmp as VM_Word );
self.instructions.push( else_branch.instructions.len() as VM_Word);
self.words.append( &mut if_branch.words );
self.words.push(AssemblyWord::inst(VM_Instruction::Jmp));
self.words.push(AssemblyWord::lit(else_branch.words.len() as VM_Word));
self.instructions.append( &mut else_branch.instructions );
self.words.append( &mut else_branch.words );
self
}

View file

@ -11,6 +11,6 @@ pub mod test;
pub use {
vm::{VM_Instruction, VM_Word, VM},
assembler::Assembler,
linker::Linker
linker::{Linker, LinkAddr}
};

View file

@ -1,27 +1,130 @@
use std::collections::HashMap;
use {
std::collections::HashMap,
crate::{VM_Word, assembler::AssemblyWord}
};
#[derive(Clone, Debug)]
pub enum LinkAddr {
Absolute(VM_Word),
Relative{
symbol: String,
offset: VM_Word
}
}
pub struct Section {
addr: VM_Word,
data: Vec< AssemblyWord >
}
pub struct Linker {
symbols: HashMap<String, crate::VM_Word>,
current_addr: crate::VM_Word,
symbols: HashMap<String, Section>,
next_addr: VM_Word
}
#[derive(Clone, Debug)]
pub enum LinkError {
UnresolvedSymbol( String ),
RelativeAddrOutOfBounds( String, VM_Word )
}
impl Linker {
pub fn new(start_addr: crate::VM_Word) -> Self {
pub fn new() -> Self {
Linker {
symbols: HashMap::new(),
current_addr: start_addr
next_addr: 0
}
}
pub fn resolve_symbol(&self, name: &String) -> Option< crate::VM_Word > {
self.symbols.get(name).cloned()
pub fn link_partial(self) -> Result<Vec<AssemblyWord>, LinkError> {
let mut bytecode = vec![ AssemblyWord::Lit(0); self.next_addr as usize ];
for (sym, Section{ mut addr, data }) in self.symbols.iter() {
for w in data {
bytecode[ addr as usize ] = self.resolve_link(w.clone())?;
addr += 1;
}
}
Ok(bytecode)
}
pub fn link(&mut self, vm: &mut crate::VM, symbol: String, bytecode: Vec< crate::VM_Word >) {
self.symbols.insert(symbol, self.current_addr);
for i in 0 .. bytecode.len() {
vm.memory[ self.current_addr as usize ] = bytecode[i];
self.current_addr += 1;
pub fn link_total(self) -> Result<Vec<VM_Word>, LinkError> {
self.link_partial()?
.into_iter()
.map(
|w| match w {
AssemblyWord::Lit(w) => Ok(w),
AssemblyWord::Symbol( LinkAddr::Absolute(w) ) => Ok(w),
AssemblyWord::Symbol( LinkAddr::Relative{ symbol, offset }) => {
Err(LinkError::UnresolvedSymbol(symbol))
}
}
)
.collect()
}
pub fn get_link_addr(&self, name: &String) -> Option<VM_Word> {
match self.symbols.get(name) {
Some(section) => Some(section.addr),
None => None
}
}
pub fn resolve_link(&self, w: AssemblyWord) -> Result<AssemblyWord, LinkError> {
match w {
AssemblyWord::Symbol(addr) =>
Ok(AssemblyWord::Symbol( self.resolve_link_addr(addr)? )),
AssemblyWord::Lit(w) => Ok(AssemblyWord::Lit(w))
}
}
pub fn resolve_link_addr(&self, addr: LinkAddr) -> Result<LinkAddr, LinkError> {
match addr {
LinkAddr::Absolute(w) => Ok(LinkAddr::Absolute(w)),
LinkAddr::Relative{ symbol, offset } => {
match self.symbols.get(&symbol){
Some(Section{ addr, data }) => {
if offset < data.len() as i64 && offset >= 0 {
Ok(LinkAddr::Absolute( *addr as i64 + offset as i64 ))
} else {
Err(LinkError::RelativeAddrOutOfBounds(symbol, offset))
}
},
None => Ok(LinkAddr::Relative{ symbol, offset })
}
}
}
}
pub fn add_static(
&mut self,
symbol: &str,
data: Vec<VM_Word>,
) {
let link_addr = self.next_addr;
self.next_addr += data.len() as i64;
self.symbols.insert(
symbol.into(),
Section {
addr: link_addr,
data: data.into_iter().map(|w| AssemblyWord::Lit(w)).collect()
}
);
}
pub fn add_procedure(
&mut self,
symbol: &str,
bytecode: Vec< AssemblyWord >
) {
let link_addr = self.next_addr;
self.next_addr += bytecode.len() as i64;
self.symbols.insert(
symbol.into(),
Section {
addr: link_addr,
data: bytecode
}
);
}
}

View file

@ -31,6 +31,14 @@ impl VM {
}
}
pub fn load(&mut self, bytecode: Vec< VM_Word >) {
let mut addr = 0;
for w in bytecode {
self.memory[addr] = w;
addr += 1;
}
}
pub fn execute(&mut self, entry: VM_Word) {
self.inst_ptr = entry;
while self.execute_step() {}