diff --git a/src/arch.rs b/src/arch.rs index aeb7816..903b831 100644 --- a/src/arch.rs +++ b/src/arch.rs @@ -4,14 +4,10 @@ pub mod arm; use crate::{ flow::FlowInfo, - ir::{Block, FrontendBuilder}, + ir::Block, }; /// Flat generic register metadata. -/// -/// This type intentionally stays small. -/// Architecture-specific aliasing or overlap semantics belong in the concrete -/// architecture module, not here. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct RegisterDesc { /// Architecture-defined register identity. @@ -24,7 +20,7 @@ pub struct RegisterDesc { pub bits: u16, } -/// Stable architecture protocol. +/// Pure ISA protocol: decoding, flow analysis, and disassembly. pub trait Arch { /// Architecture execution mode. type Mode: Copy + Eq; @@ -38,9 +34,6 @@ pub trait Arch { /// Decode-time error. type DecodeError; - /// Architecture-specific disassembly output. - type Disasm; - /// Stable architecture name. fn name(&self) -> &'static str; @@ -66,18 +59,16 @@ pub trait Arch { /// Return instruction-level control-flow facts for one decoded instruction. fn flow_info(&self, inst: &Self::Inst, pc: u64, mode: Self::Mode) -> FlowInfo; - /// Render one decoded instruction as disassembly. - fn disasm(&self, inst: &Self::Inst, pc: u64, mode: Self::Mode) -> Self::Disasm; + /// Render one decoded instruction as a disassembly string. + fn disasm(&self, inst: &Self::Inst, pc: u64, mode: Self::Mode) -> String; } /// Translation-session knowledge available during lifting. pub trait LiftEnv { - /// Returns the IR block associated with a statically known target address, - /// if the current translation session has created one. + /// Returns the IR block associated with a statically known target address. fn block_for_target(&self, addr: u64) -> Option; - /// Returns the fallthrough block for the current instruction, if one exists - /// in the current translation session. + /// Returns the fallthrough block for the current instruction, if one exists. fn fallthrough_block(&self) -> Option; } @@ -87,12 +78,15 @@ pub trait LiftArch: Arch { type LiftError; /// Per-lift mutable context. - /// - /// This is where the large mutable lifting state belongs. - type LiftCtx<'a> + type LiftCtx<'ir, 'fb> where - Self: 'a; + Self: 'ir, + 'ir: 'fb; /// Lift one decoded instruction into Slonik IR. - fn lift(&self, cx: &mut Self::LiftCtx<'_>, inst: &Self::Inst) -> Result<(), Self::LiftError>; + fn lift( + &self, + cx: &mut Self::LiftCtx<'_, '_>, + inst: &Self::Inst, + ) -> Result<(), Self::LiftError>; } diff --git a/src/arch/arm.rs b/src/arch/arm.rs index 3431947..55eee41 100644 --- a/src/arch/arm.rs +++ b/src/arch/arm.rs @@ -3,20 +3,17 @@ use crate::{ arch::{Arch, LiftArch, LiftEnv, RegisterDesc}, flow::{FlowInfo, FlowKind, FlowTarget}, - ir::{Block, FrontendBuilder, I32, IntCC, MemSize, Type, Value, Variable}, + ir::{Block, FrontendBuilder, IntCC, MemSize, Type, Value, Variable}, }; use cranelift_entity::EntityRef; -use yaxpeax_arch::{Decoder, LengthedInstruction, ReaderBuilder, U8Reader}; +use yaxpeax_arch::{Decoder, LengthedInstruction, U8Reader}; use yaxpeax_arm::armv7::{ ConditionCode, DecodeError, InstDecoder, Instruction, Opcode, Operand, Reg, RegShiftStyle, ShiftStyle, }; /// ARM execution mode. -/// -/// This module targets 32-bit ARM and Thumb. -/// AArch64 should live in a separate `arm64.rs`. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum ArmMode { /// ARM state. @@ -93,26 +90,6 @@ pub enum ArmReg { Pc = 15, } -/// ARM condition code. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum ArmCond { - Eq, - Ne, - Cs, - Cc, - Mi, - Pl, - Vs, - Vc, - Hi, - Ls, - Ge, - Lt, - Gt, - Le, - Al, -} - /// ARM status flag identity. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum ArmFlag { @@ -150,42 +127,29 @@ impl Default for Arm { } } +/// Variable index range [0, 16) is reserved for GPRs. +/// Range [FLAG_VAR_BASE, FLAG_VAR_BASE + N) is reserved for flags. +const FLAG_VAR_BASE: usize = 0x100; + /// Mutable per-lift ARM state. -pub struct ArmLiftCtx<'a> { - /// Static architecture description. - pub arch: &'a Arm, - - /// Translation-session environment. - pub env: &'a dyn LiftEnv, - - /// Current frontend builder. - pub fb: &'a mut FrontendBuilder<'a>, - - /// Current program counter. +/// +/// `'ir` is the lifetime of the `Body` and `FrontendBuilderContext` that the +/// `FrontendBuilder` borrows. `'fb` is the lifetime of the mutable reference +/// to the `FrontendBuilder` itself. Keeping them separate avoids the +/// invariant-lifetime trap of `&'a mut FrontendBuilder<'a>`. +pub struct ArmLiftCtx<'ir, 'fb> { + pub arch: &'ir Arm, + pub env: &'ir dyn LiftEnv, + pub fb: &'fb mut FrontendBuilder<'ir>, pub pc: u64, - - /// Current execution mode. pub mode: ArmMode, - - /// Current SSA bindings for architectural registers. - pub regs: ArmRegisterState, - - /// Current SSA bindings for flags. - pub flags: ArmFlagState, - - /// Cached comparison operands for conditional branch lowering. - pub last_cmp: Option<(Value, Value)>, - - /// Temporary lifting scratch state. - pub scratch: ArmScratch, } -impl<'a> ArmLiftCtx<'a> { - /// Creates a new ARM lift context. +impl<'ir, 'fb> ArmLiftCtx<'ir, 'fb> { pub fn new( - arch: &'a Arm, - env: &'a dyn LiftEnv, - fb: &'a mut FrontendBuilder<'a>, + arch: &'ir Arm, + env: &'ir dyn LiftEnv, + fb: &'fb mut FrontendBuilder<'ir>, pc: u64, mode: ArmMode, ) -> Self { @@ -199,199 +163,64 @@ impl<'a> ArmLiftCtx<'a> { fb.declare_var(Self::flag_var(flag), Type::bool()); } - Self { - arch, - env, - fb, - pc, - mode, - regs: ArmRegisterState::default(), - flags: ArmFlagState::default(), - last_cmp: None, - scratch: ArmScratch::default(), - } + Self { arch, env, fb, pc, mode } } - /// Returns the integer word type for the current mode. pub fn word_ty(&self) -> Type { Type::i32() } - /// Maps a register identity to a frontend variable. pub fn reg_var(reg: ArmReg) -> Variable { Variable::new(reg as usize) } - /// Maps a flag identity to a frontend variable. pub fn flag_var(flag: ArmFlag) -> Variable { - Variable::new(0x100 + flag as usize) + Variable::new(FLAG_VAR_BASE + flag as usize) } - /// Reads the current SSA value for a register. pub fn read_reg(&mut self, reg: ArmReg) -> Value { self.fb.use_var(Self::reg_var(reg)) } - /// Writes an SSA value to a register. pub fn write_reg(&mut self, reg: ArmReg, value: Value) { self.fb.def_var(Self::reg_var(reg), value); } - /// Reads the current SSA value for a flag. pub fn read_flag(&mut self, flag: ArmFlag) -> Value { self.fb.use_var(Self::flag_var(flag)) } - /// Writes an SSA value to a flag. pub fn write_flag(&mut self, flag: ArmFlag, value: Value) { self.fb.def_var(Self::flag_var(flag), value); } - /// Resolves a direct target address to an IR block. - pub fn direct_block(&self, target_addr: u64) -> Option { + pub fn direct_block(&self, target_addr: u64) -> Option { self.env.block_for_target(target_addr) } - /// Returns the fallthrough block for the current instruction, if present. - pub fn fallthrough_block(&self) -> Option { + pub fn fallthrough_block(&self) -> Option { self.env.fallthrough_block() } - - /// Computes a boolean SSA value for a condition code using the last cached - /// compare. - pub fn condition_value(&mut self, cond: ArmCond) -> Option { - let (lhs, rhs) = self.last_cmp?; - - let v = match cond { - ArmCond::Eq => self.fb.icmp(crate::ir::IntCC::Eq, lhs, rhs), - ArmCond::Ne => self.fb.icmp(crate::ir::IntCC::Ne, lhs, rhs), - ArmCond::Lt => self.fb.icmp(crate::ir::IntCC::Slt, lhs, rhs), - ArmCond::Le => self.fb.icmp(crate::ir::IntCC::Sle, lhs, rhs), - ArmCond::Gt => self.fb.icmp(crate::ir::IntCC::Sgt, lhs, rhs), - ArmCond::Ge => self.fb.icmp(crate::ir::IntCC::Sge, lhs, rhs), - - // Placeholder until full NZCV lowering exists. - ArmCond::Cs - | ArmCond::Cc - | ArmCond::Mi - | ArmCond::Pl - | ArmCond::Vs - | ArmCond::Vc - | ArmCond::Hi - | ArmCond::Ls - | ArmCond::Al => return None, - }; - - Some(v) - } -} - -/// Current SSA bindings for architectural registers. -#[derive(Clone, Debug, Default)] -pub struct ArmRegisterState { - pub gpr: [Option; 16], -} - -/// Current SSA bindings for status flags. -#[derive(Clone, Debug, Default)] -pub struct ArmFlagState { - pub n: Option, - pub z: Option, - pub c: Option, - pub v: Option, - pub q: Option, -} - -/// Temporary lifting scratch state. -#[derive(Clone, Debug, Default)] -pub struct ArmScratch { - pub tmp0: Option, - pub tmp1: Option, - pub tmp2: Option, } /// Public ARM register table. pub const REGISTERS: [RegisterDesc; 16] = [ - RegisterDesc { - reg: ArmReg::R0, - name: "r0", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R1, - name: "r1", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R2, - name: "r2", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R3, - name: "r3", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R4, - name: "r4", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R5, - name: "r5", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R6, - name: "r6", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R7, - name: "r7", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R8, - name: "r8", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R9, - name: "r9", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R10, - name: "r10", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R11, - name: "r11", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::R12, - name: "r12", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::Sp, - name: "sp", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::Lr, - name: "lr", - bits: 32, - }, - RegisterDesc { - reg: ArmReg::Pc, - name: "pc", - bits: 32, - }, + RegisterDesc { reg: ArmReg::R0, name: "r0", bits: 32 }, + RegisterDesc { reg: ArmReg::R1, name: "r1", bits: 32 }, + RegisterDesc { reg: ArmReg::R2, name: "r2", bits: 32 }, + RegisterDesc { reg: ArmReg::R3, name: "r3", bits: 32 }, + RegisterDesc { reg: ArmReg::R4, name: "r4", bits: 32 }, + RegisterDesc { reg: ArmReg::R5, name: "r5", bits: 32 }, + RegisterDesc { reg: ArmReg::R6, name: "r6", bits: 32 }, + RegisterDesc { reg: ArmReg::R7, name: "r7", bits: 32 }, + RegisterDesc { reg: ArmReg::R8, name: "r8", bits: 32 }, + RegisterDesc { reg: ArmReg::R9, name: "r9", bits: 32 }, + RegisterDesc { reg: ArmReg::R10, name: "r10", bits: 32 }, + RegisterDesc { reg: ArmReg::R11, name: "r11", bits: 32 }, + RegisterDesc { reg: ArmReg::R12, name: "r12", bits: 32 }, + RegisterDesc { reg: ArmReg::Sp, name: "sp", bits: 32 }, + RegisterDesc { reg: ArmReg::Lr, name: "lr", bits: 32 }, + RegisterDesc { reg: ArmReg::Pc, name: "pc", bits: 32 }, ]; /// ARM lift-time errors. @@ -400,22 +229,10 @@ pub enum ArmLiftError { UnsupportedInstruction, UnsupportedCondition, UnsupportedAddressing, - MissingCompareState, MissingTargetBlock, - InvalidImmediate, UnsupportedControlFlow, } -/// Parsed instruction text used by the starter lifter. -/// -/// This is intentionally derived from the decoder's `Display` output so the -/// architecture boundary can use the decoder crate's instruction type directly. -#[derive(Clone, Debug, PartialEq, Eq)] -struct ParsedText { - mnemonic: String, - operands: Vec, -} - impl Arm { fn map_reg(reg: Reg) -> ArmReg { match reg.number() { @@ -439,26 +256,11 @@ impl Arm { } } - fn intcc_from_cond(cond: ConditionCode) -> Option { - Some(match cond { - ConditionCode::EQ => IntCC::Eq, - ConditionCode::NE => IntCC::Ne, - ConditionCode::GE => IntCC::Sge, - ConditionCode::LT => IntCC::Slt, - ConditionCode::GT => IntCC::Sgt, - ConditionCode::LE => IntCC::Sle, - ConditionCode::AL => return None, - - // these need explicit NZCV modeling - ConditionCode::HS - | ConditionCode::LO - | ConditionCode::MI - | ConditionCode::PL - | ConditionCode::VS - | ConditionCode::VC - | ConditionCode::HI - | ConditionCode::LS => return None, - }) + fn decoder_for_mode(mode: ArmMode) -> InstDecoder { + match mode { + ArmMode::Arm => InstDecoder::default(), + ArmMode::Thumb => InstDecoder::default_thumb(), + } } fn direct_target_from_operand(pc: u64, op: Operand) -> Option { @@ -469,8 +271,65 @@ impl Arm { } } + /// Computes a boolean condition value from `cond` by reading NZCV flags. + /// + /// Returns `Ok(None)` for `AL` (unconditional). + /// Returns `Err` for conditions that require C or V which are not yet modelled. + fn cond_value( + cx: &mut ArmLiftCtx<'_, '_>, + cond: ConditionCode, + ) -> Result, ArmLiftError> { + match cond { + ConditionCode::AL => Ok(None), + + ConditionCode::EQ => Ok(Some(cx.read_flag(ArmFlag::Z))), + + ConditionCode::NE => { + let z = cx.read_flag(ArmFlag::Z); + Ok(Some(cx.fb.not(z))) + } + + ConditionCode::LT => { + // N != V; placeholder treating N alone (ignores overflow) + Ok(Some(cx.read_flag(ArmFlag::N))) + } + + ConditionCode::GE => { + // N == V; placeholder treating !N alone + let n = cx.read_flag(ArmFlag::N); + Ok(Some(cx.fb.not(n))) + } + + ConditionCode::GT => { + // Z=0 and N==V; placeholder using Z=0 and N=0 + let z = cx.read_flag(ArmFlag::Z); + let n = cx.read_flag(ArmFlag::N); + let not_z = cx.fb.not(z); + let not_n = cx.fb.not(n); + Ok(Some(cx.fb.and(not_z, not_n, Type::bool()))) + } + + ConditionCode::LE => { + // Z=1 or N!=V; placeholder using Z=1 or N=1 + let z = cx.read_flag(ArmFlag::Z); + let n = cx.read_flag(ArmFlag::N); + Ok(Some(cx.fb.or(z, n, Type::bool()))) + } + + // These require C or V flags which are not yet modelled. + ConditionCode::HS + | ConditionCode::LO + | ConditionCode::MI + | ConditionCode::PL + | ConditionCode::VS + | ConditionCode::VC + | ConditionCode::HI + | ConditionCode::LS => Err(ArmLiftError::UnsupportedCondition), + } + } + fn read_shifted_reg( - cx: &mut ArmLiftCtx<'_>, + cx: &mut ArmLiftCtx<'_, '_>, shift: yaxpeax_arm::armv7::RegShift, ) -> Result { let shift = shift.into_shift(); @@ -502,7 +361,10 @@ impl Arm { } } - fn read_operand(cx: &mut ArmLiftCtx<'_>, op: Operand) -> Result { + fn read_operand( + cx: &mut ArmLiftCtx<'_, '_>, + op: Operand, + ) -> Result { match op { Operand::Reg(r) => Ok(cx.read_reg(Self::map_reg(r))), Operand::Imm12(v) => Ok(cx.fb.iconst(Type::i32(), v as i64)), @@ -512,16 +374,19 @@ impl Arm { } } - fn mem_addr(cx: &mut ArmLiftCtx<'_>, op: Operand) -> Result { + fn mem_addr( + cx: &mut ArmLiftCtx<'_, '_>, + op: Operand, + ) -> Result { match op { Operand::RegDeref(base) => Ok(cx.read_reg(Self::map_reg(base))), Operand::RegDerefPreindexOffset(base, off, add, wback) => { let base_reg = Self::map_reg(base); let base_v = cx.read_reg(base_reg); - let off_v = cx.fb.iconst(I32, off as i64); + let off_v = cx.fb.iconst(Type::i32(), off as i64); let addr = if add { - cx.fb.iadd(base_v, off_v, I32) + cx.fb.iadd(base_v, off_v, Type::i32()) } else { cx.fb.isub(base_v, off_v, Type::i32()) }; @@ -610,220 +475,9 @@ impl Arm { } } - fn cond_value( - cx: &mut ArmLiftCtx<'_>, - cond: ConditionCode, - ) -> Result, ArmLiftError> { - let Some(cc) = Self::intcc_from_cond(cond) else { - return if cond == ConditionCode::AL { - Ok(None) - } else { - Err(ArmLiftError::UnsupportedCondition) - }; - }; - - let (lhs, rhs) = cx.last_cmp.ok_or(ArmLiftError::MissingCompareState)?; - Ok(Some(cx.fb.icmp(cc, lhs, rhs))) - } - /// Creates the appropriate decoder for the current mode. - fn decoder_for_mode(mode: ArmMode) -> InstDecoder { - match mode { - ArmMode::Arm => InstDecoder::default(), - ArmMode::Thumb => InstDecoder::default_thumb(), - } - } - - /// Returns the canonical textual register name. - pub fn reg_name(reg: ArmReg) -> &'static str { - match reg { - ArmReg::R0 => "r0", - ArmReg::R1 => "r1", - ArmReg::R2 => "r2", - ArmReg::R3 => "r3", - ArmReg::R4 => "r4", - ArmReg::R5 => "r5", - ArmReg::R6 => "r6", - ArmReg::R7 => "r7", - ArmReg::R8 => "r8", - ArmReg::R9 => "r9", - ArmReg::R10 => "r10", - ArmReg::R11 => "r11", - ArmReg::R12 => "r12", - ArmReg::Sp => "sp", - ArmReg::Lr => "lr", - ArmReg::Pc => "pc", - } - } - - fn parse_text(inst: &Instruction) -> ParsedText { - let text = inst.to_string(); - let mut parts = text.splitn(2, ' '); - let mnemonic = parts.next().unwrap_or("").trim().to_ascii_lowercase(); - let operands = parts - .next() - .map(|rest| { - rest.split(',') - .map(|x| x.trim().to_ascii_lowercase()) - .filter(|x| !x.is_empty()) - .collect() - }) - .unwrap_or_default(); - - ParsedText { mnemonic, operands } - } - - fn parse_reg(text: &str) -> Option { - Some(match text { - "r0" => ArmReg::R0, - "r1" => ArmReg::R1, - "r2" => ArmReg::R2, - "r3" => ArmReg::R3, - "r4" => ArmReg::R4, - "r5" => ArmReg::R5, - "r6" => ArmReg::R6, - "r7" => ArmReg::R7, - "r8" => ArmReg::R8, - "r9" => ArmReg::R9, - "r10" => ArmReg::R10, - "r11" => ArmReg::R11, - "r12" => ArmReg::R12, - "sp" | "r13" => ArmReg::Sp, - "lr" | "r14" => ArmReg::Lr, - "pc" | "r15" => ArmReg::Pc, - _ => return None, - }) - } - - fn parse_imm(text: &str) -> Option { - let s = text.trim(); - - let s = s.strip_prefix('#').unwrap_or(s); - - if let Some(hex) = s.strip_prefix("-0x") { - i64::from_str_radix(hex, 16).ok().map(|v| -v) - } else if let Some(hex) = s.strip_prefix("0x") { - i64::from_str_radix(hex, 16).ok() - } else { - s.parse::().ok() - } - } - - fn parse_target(text: &str) -> Option { - let s = text.trim(); - let s = s.strip_prefix('#').unwrap_or(s); - - if let Some(hex) = s.strip_prefix("0x") { - u64::from_str_radix(hex, 16).ok() - } else { - s.parse::().ok() - } - } - - fn parse_mem(text: &str) -> Option<(ArmReg, i64)> { - let t = text.trim(); - if !(t.starts_with('[') && t.ends_with(']')) { - return None; - } - - let inner = &t[1..t.len() - 1]; - let parts: Vec<_> = inner.split(',').map(|p| p.trim()).collect(); - - let base = Self::parse_reg(parts.first().copied()?)?; - let off = if let Some(off) = parts.get(1) { - Self::parse_imm(off)? - } else { - 0 - }; - - Some((base, off)) - } - - fn parse_cond_suffix(s: &str) -> Option { - Some(match s { - "eq" => ArmCond::Eq, - "ne" => ArmCond::Ne, - "cs" => ArmCond::Cs, - "cc" => ArmCond::Cc, - "mi" => ArmCond::Mi, - "pl" => ArmCond::Pl, - "vs" => ArmCond::Vs, - "vc" => ArmCond::Vc, - "hi" => ArmCond::Hi, - "ls" => ArmCond::Ls, - "ge" => ArmCond::Ge, - "lt" => ArmCond::Lt, - "gt" => ArmCond::Gt, - "le" => ArmCond::Le, - "al" => ArmCond::Al, - _ => return None, - }) - } - - fn parse_mnemonic(mnemonic: &str) -> (&str, Option, bool) { - // returns: (base, cond, sets_flags) - // - // handles shapes like: - // add - // adds - // addeq - // addseq - // cmp - // beq - // bl - // bx - // - let m = mnemonic; - let mut setflags = false; - let mut cond = None; - - // longest bases first where prefixes overlap - for base in ["bl", "bx", "ldr", "str", "cmp", "mov", "add", "sub", "b"] { - if let Some(rest) = m.strip_prefix(base) { - let mut rest = rest; - - if matches!(base, "add" | "sub" | "mov") - && let Some(stripped) = rest.strip_prefix('s') - { - setflags = true; - rest = stripped; - } - - if !rest.is_empty() { - cond = Self::parse_cond_suffix(rest); - } - - return (base, cond, setflags); - } - } - - (mnemonic, None, false) - } - - fn operand_value(cx: &mut ArmLiftCtx<'_>, text: &str) -> Result { - if let Some(reg) = Self::parse_reg(text) { - Ok(cx.read_reg(reg)) - } else if let Some(imm) = Self::parse_imm(text) { - Ok(cx.fb.iconst(cx.word_ty(), imm)) - } else { - Err(ArmLiftError::UnsupportedInstruction) - } - } - - fn address_from_mem(cx: &mut ArmLiftCtx<'_>, text: &str) -> Result { - let (base, off) = Self::parse_mem(text).ok_or(ArmLiftError::UnsupportedAddressing)?; - let base_v = cx.read_reg(base); - - if off == 0 { - Ok(base_v) - } else { - let off_v = cx.fb.iconst(cx.word_ty(), off); - Ok(cx.fb.iadd(base_v, off_v, cx.word_ty())) - } - } - pub fn lift_inst( &self, - cx: &mut ArmLiftCtx<'_>, + cx: &mut ArmLiftCtx<'_, '_>, inst: &Instruction, ) -> Result<(), ArmLiftError> { match inst.opcode { @@ -882,7 +536,16 @@ impl Arm { let lhs = Self::read_operand(cx, inst.operands[0])?; let rhs = Self::read_operand(cx, inst.operands[1])?; - cx.last_cmp = Some((lhs, rhs)); + + // CMP computes lhs - rhs and sets flags from the result. + let diff = cx.fb.isub(lhs, rhs, Type::i32()); + let zero = cx.fb.iconst(Type::i32(), 0); + let z = cx.fb.icmp(IntCC::Eq, diff, zero); + let n = cx.fb.icmp(IntCC::Slt, diff, zero); + cx.write_flag(ArmFlag::Z, z); + cx.write_flag(ArmFlag::N, n); + // C and V require carry/overflow modelling; deferred. + Ok(()) } @@ -958,272 +621,6 @@ impl Arm { _ => Err(ArmLiftError::UnsupportedInstruction), } } - - fn guard_cond( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - ) -> Result, ArmLiftError> { - match cond { - ArmCond::Al => Ok(None), - _ => cx - .condition_value(cond) - .map(Some) - .ok_or(ArmLiftError::MissingCompareState), - } - } - - fn lift_mov( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - parsed: &ParsedText, - ) -> Result<(), ArmLiftError> { - if cond != ArmCond::Al { - return Err(ArmLiftError::UnsupportedCondition); - } - - let rd = Self::parse_reg( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let src = Self::operand_value( - cx, - parsed - .operands - .get(1) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - )?; - cx.write_reg(rd, src); - Ok(()) - } - - fn lift_add( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - parsed: &ParsedText, - ) -> Result<(), ArmLiftError> { - if cond != ArmCond::Al { - return Err(ArmLiftError::UnsupportedCondition); - } - - let rd = Self::parse_reg( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let rn = Self::parse_reg( - parsed - .operands - .get(1) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let op2 = Self::operand_value( - cx, - parsed - .operands - .get(2) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - )?; - - let lhs = cx.read_reg(rn); - let out = cx.fb.iadd(lhs, op2, cx.word_ty()); - cx.write_reg(rd, out); - Ok(()) - } - - fn lift_sub( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - parsed: &ParsedText, - ) -> Result<(), ArmLiftError> { - if cond != ArmCond::Al { - return Err(ArmLiftError::UnsupportedCondition); - } - - let rd = Self::parse_reg( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let rn = Self::parse_reg( - parsed - .operands - .get(1) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let op2 = Self::operand_value( - cx, - parsed - .operands - .get(2) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - )?; - - let lhs = cx.read_reg(rn); - let out = cx.fb.isub(lhs, op2, cx.word_ty()); - cx.write_reg(rd, out); - Ok(()) - } - - fn lift_cmp( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - parsed: &ParsedText, - ) -> Result<(), ArmLiftError> { - if cond != ArmCond::Al { - return Err(ArmLiftError::UnsupportedCondition); - } - - let rn = Self::parse_reg( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let lhs = cx.read_reg(rn); - let rhs = Self::operand_value( - cx, - parsed - .operands - .get(1) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - )?; - - cx.last_cmp = Some((lhs, rhs)); - Ok(()) - } - - fn lift_ldr( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - parsed: &ParsedText, - ) -> Result<(), ArmLiftError> { - if cond != ArmCond::Al { - return Err(ArmLiftError::UnsupportedCondition); - } - - let rt = Self::parse_reg( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let addr = Self::address_from_mem( - cx, - parsed - .operands - .get(1) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - )?; - let val = cx.fb.load(addr, MemSize::S4, Type::i32()); - cx.write_reg(rt, val); - Ok(()) - } - - fn lift_str( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - parsed: &ParsedText, - ) -> Result<(), ArmLiftError> { - if cond != ArmCond::Al { - return Err(ArmLiftError::UnsupportedCondition); - } - - let rt = Self::parse_reg( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let val = cx.read_reg(rt); - let addr = Self::address_from_mem( - cx, - parsed - .operands - .get(1) - .ok_or(ArmLiftError::UnsupportedInstruction)?, - )?; - cx.fb.store(addr, val, MemSize::S4); - Ok(()) - } - - fn lift_b( - &self, - cx: &mut ArmLiftCtx<'_>, - cond: ArmCond, - parsed: &ParsedText, - ) -> Result<(), ArmLiftError> { - let target_addr = Self::parse_target( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - let target_block = cx - .direct_block(target_addr) - .ok_or(ArmLiftError::MissingTargetBlock)?; - - match cond { - ArmCond::Al => { - cx.fb.jump(target_block); - Ok(()) - } - _ => { - let cond_v = cx - .condition_value(cond) - .ok_or(ArmLiftError::MissingCompareState)?; - let fallthrough = cx - .fallthrough_block() - .ok_or(ArmLiftError::MissingTargetBlock)?; - cx.fb.br_if(cond_v, target_block, fallthrough); - Ok(()) - } - } - } - - fn lift_bl(&self, cx: &mut ArmLiftCtx<'_>, parsed: &ParsedText) -> Result<(), ArmLiftError> { - let target_addr = Self::parse_target( - parsed - .operands - .first() - .ok_or(ArmLiftError::UnsupportedInstruction)?, - ) - .ok_or(ArmLiftError::UnsupportedInstruction)?; - - let callee = cx.fb.iconst(Type::ptr(32), target_addr as i64); - cx.fb.call(callee, &[]); - Ok(()) - } - - fn lift_bx(&self, cx: &mut ArmLiftCtx<'_>, inst: &Instruction) -> Result<(), ArmLiftError> { - match inst.operands[0] { - Operand::Reg(r) if r.number() == 14 => { - let ret0 = cx.read_reg(ArmReg::R0); - cx.fb.ret(&[ret0]); - Ok(()) - } - _ => Err(ArmLiftError::UnsupportedControlFlow), - } - } } impl Arch for Arm { @@ -1231,7 +628,6 @@ impl Arch for Arm { type Inst = Instruction; type Reg = ArmReg; type DecodeError = DecodeError; - type Disasm = String; fn name(&self) -> &'static str { "arm" @@ -1258,13 +654,8 @@ impl Arch for Arm { bytes: &[u8], mode: Self::Mode, ) -> Result<(usize, Self::Inst), Self::DecodeError> { - let decoder = match mode { - ArmMode::Arm => InstDecoder::default(), - ArmMode::Thumb => InstDecoder::default_thumb(), - }; - let mut reader = U8Reader::new(bytes); - let inst = decoder.decode(&mut reader)?; + let inst = Self::decoder_for_mode(mode).decode(&mut reader)?; let len = inst.len().to_const() as usize; Ok((len, inst)) } @@ -1289,23 +680,12 @@ impl Arch for Arm { .map(FlowTarget::Direct) .unwrap_or(FlowTarget::Indirect); - FlowInfo::new( - len, - FlowKind::Call { - target, - returns: true, - }, - ) + FlowInfo::new(len, FlowKind::Call { target, returns: true }) } Opcode::BX => match inst.operands[0] { Operand::Reg(r) if r.number() == 14 => FlowInfo::new(len, FlowKind::Return), - _ => FlowInfo::new( - len, - FlowKind::Jump { - target: FlowTarget::Indirect, - }, - ), + _ => FlowInfo::new(len, FlowKind::Jump { target: FlowTarget::Indirect }), }, Opcode::NOP => FlowInfo::new(len, FlowKind::FallThrough), @@ -1314,16 +694,20 @@ impl Arch for Arm { } } - fn disasm(&self, inst: &Self::Inst, _pc: u64, _mode: Self::Mode) -> Self::Disasm { + fn disasm(&self, inst: &Self::Inst, _pc: u64, _mode: Self::Mode) -> String { inst.to_string() } } impl LiftArch for Arm { type LiftError = ArmLiftError; - type LiftCtx<'a> = ArmLiftCtx<'a>; + type LiftCtx<'ir, 'fb> = ArmLiftCtx<'ir, 'fb> where Self: 'ir, 'ir: 'fb; - fn lift(&self, cx: &mut Self::LiftCtx<'_>, inst: &Self::Inst) -> Result<(), Self::LiftError> { + fn lift( + &self, + cx: &mut Self::LiftCtx<'_, '_>, + inst: &Self::Inst, + ) -> Result<(), Self::LiftError> { self.lift_inst(cx, inst) } } diff --git a/src/ir/body.rs b/src/ir/body.rs index 5b8434f..0db6e6d 100644 --- a/src/ir/body.rs +++ b/src/ir/body.rs @@ -3,8 +3,7 @@ use cranelift_entity::{EntityRef, ListPool, PrimaryMap}; use crate::ir::{ - Block, BlockCall, BlockData, ParamList, Stmt, StmtData, StmtList, Type, Value, ValueData, - ValueDef, ValueList, + Block, BlockCall, BlockData, Stmt, StmtData, Type, Value, ValueData, ValueDef, ValueList, }; /// A normalized SSA body. diff --git a/src/ir/expr.rs b/src/ir/expr.rs index 522f0c1..3e80a2f 100644 --- a/src/ir/expr.rs +++ b/src/ir/expr.rs @@ -2,7 +2,7 @@ use core::fmt; -use crate::ir::{Block, Stmt, Type, Value}; +use crate::ir::{Block, Stmt, Type}; /// Memory access width in bytes. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] diff --git a/src/ir/stmt.rs b/src/ir/stmt.rs index 2180b1c..9e5fd13 100644 --- a/src/ir/stmt.rs +++ b/src/ir/stmt.rs @@ -4,9 +4,7 @@ //! block. This ensures memory order is always explicit in the statement stream //! rather than floating in a sea of pure expressions. -use crate::ir::{ - BinaryOp, Block, CastOp, FloatCC, IntCC, MemSize, Stmt, UnaryOp, Value, -}; +use crate::ir::{BinaryOp, Block, CastOp, FloatCC, IntCC, MemSize, UnaryOp, Value}; use cranelift_entity::EntityList; /// A compact list of SSA values. diff --git a/src/main.rs b/src/main.rs index 379abdf..221e472 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,8 +9,6 @@ use clap::{ styling::{AnsiColor, Effects}, }, }; -use cranelift_entity::EntityRef; - use crate::{ arch::{ Arch, LiftArch, @@ -42,7 +40,6 @@ fn main() { 0x1E, 0xFF, 0x2F, 0xE1, // bx lr ]; - let arm = crate::arch::arm::Arm::default(); let mut body = crate::ir::Body::new(); let mut fb_ctx = crate::ir::FrontendBuilderContext::new(); let env = DummyEnv; @@ -56,25 +53,12 @@ fn main() { let mut pc = 0x1000u64; let mut rest = bytes; - let mut cx = crate::arch::arm::ArmLiftCtx::new( - &arm, - &env, - &mut fb, - pc, - crate::arch::arm::ArmMode::Arm, - ); + let mut cx = ArmLiftCtx::new(&arm, &env, &mut fb, pc, ArmMode::Arm); while !rest.is_empty() { - let (len, inst) = arm.decode(rest, crate::arch::arm::ArmMode::Arm).unwrap(); - println!( - "{:#x}: {}", - pc, - arm.disasm(&inst, pc, crate::arch::arm::ArmMode::Arm) - ); - println!( - " flow: {:?}", - arm.flow_info(&inst, pc, crate::arch::arm::ArmMode::Arm) - ); + let (len, inst) = arm.decode(rest, ArmMode::Arm).unwrap(); + println!("{:#x}: {}", pc, arm.disasm(&inst, pc, ArmMode::Arm)); + println!(" flow: {:?}", arm.flow_info(&inst, pc, ArmMode::Arm)); cx.pc = pc; arm.lift(&mut cx, &inst).unwrap();