diff --git a/Cargo.lock b/Cargo.lock index 46757dd..2cc93e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,12 +70,53 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "bumpalo" version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "capstone" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f442ae0f2f3f1b923334b4a5386c95c69c1cfa072bafa23d6fae6d9682eb1dd4" +dependencies = [ + "capstone-sys", + "static_assertions", +] + +[[package]] +name = "capstone-sys" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e8087cab6731295f5a2a2bd82989ba4f41d3a428aab2e7c98d8f4db38aac05" +dependencies = [ + "cc", +] + +[[package]] +name = "cc" +version = "1.2.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7a4d3ec6524d28a329fc53654bbadc9bdd7b0431f5d65f1a56ffb28a1ee5283" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -188,6 +229,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -206,6 +253,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "hashbrown" version = "0.15.5" @@ -361,6 +414,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "raw-cpuid" version = "11.6.0" @@ -411,15 +470,24 @@ dependencies = [ "syn", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "slonik" version = "0.1.0" dependencies = [ + "capstone", "clap", "cranelift-entity", "egg", "petgraph", "smallvec", + "yaxpeax-arch", + "yaxpeax-arm", ] [[package]] @@ -428,6 +496,12 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -462,6 +536,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "thiserror" version = "1.0.69" @@ -601,3 +681,35 @@ checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "yaxpeax-arch" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36274fcc5403da2a7636ffda4d02eca12a1b2b8267b9d2e04447bd2ccfc72082" +dependencies = [ + "num-traits", + "serde", + "serde_derive", +] + +[[package]] +name = "yaxpeax-arm" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8aa9155f0d727d10e91e5a94f68f415ec24c7a5faab4eac2386a1069e4a02d7" +dependencies = [ + "bitvec", + "serde", + "serde_derive", + "yaxpeax-arch", +] diff --git a/Cargo.toml b/Cargo.toml index 4e8a412..d8d5810 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,8 +4,11 @@ version = "0.1.0" edition = "2024" [dependencies] +capstone = "0.14.0" clap = { version = "4.6.0", features = ["derive"] } cranelift-entity = "0.130.0" egg = "0.11.0" petgraph = "0.8.3" smallvec = "1.15.1" +yaxpeax-arch = "0.3.2" +yaxpeax-arm = "0.4.0" diff --git a/src/arch.rs b/src/arch.rs index 75a29ac..aeb7816 100644 --- a/src/arch.rs +++ b/src/arch.rs @@ -1,30 +1,98 @@ -// pub trait Architecture { -// type Mode: Copy + Eq + Send + Sync + 'static; -// type DecodedInstruction: Send + Sync + 'static; +//! Architecture-facing interfaces. -// fn name(&self) -> &'static str; +pub mod arm; -// fn decode( -// &self, -// bytes: &[u8], -// addr: u64, -// mode: Self::Mode, -// ) -> Result<(usize, Self::DecodedInstruction), DecodeError>; +use crate::{ + flow::FlowInfo, + ir::{Block, FrontendBuilder}, +}; -// fn lift( -// &self, -// insn: &Self::DecodedInstruction, -// addr: u64, -// mode: Self::Mode, -// b: &mut Builder, -// ) -> Result; +/// Flat generic register metadata. +/// +/// This type intentionally stays small. +/// Architecture-specific aliasing or overlap semantics belong in the concrete +/// architecture module, not here. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct RegisterDesc { + /// Architecture-defined register identity. + pub reg: R, -// fn registers(&self) -> &'static [RegisterDesc]; -// fn flags(&self) -> &'static [FlagDesc]; -// } + /// Human-readable register name. + pub name: &'static str, -// pub struct InsnInfo { -// pub len: u8, -// pub branches: smallvec::SmallVec<[Branch; 2]>, -// pub terminates_block: bool, -// } + /// Register width in bits. + pub bits: u16, +} + +/// Stable architecture protocol. +pub trait Arch { + /// Architecture execution mode. + type Mode: Copy + Eq; + + /// Decoded instruction type. + type Inst; + + /// Architecture-specific register identity. + type Reg: Copy + Eq; + + /// Decode-time error. + type DecodeError; + + /// Architecture-specific disassembly output. + type Disasm; + + /// Stable architecture name. + fn name(&self) -> &'static str; + + /// Pointer width in bytes for the given mode. + fn address_size(&self, mode: Self::Mode) -> u8; + + /// Maximum instruction length in bytes. + fn max_instruction_len(&self) -> u8; + + /// Architectural register set. + fn registers(&self) -> &'static [RegisterDesc]; + + /// Stack pointer register, if the architecture has one. + fn stack_pointer(&self) -> Option; + + /// Decode one instruction from `bytes`. + fn decode( + &self, + bytes: &[u8], + mode: Self::Mode, + ) -> Result<(usize, Self::Inst), Self::DecodeError>; + + /// Return instruction-level control-flow facts for one decoded instruction. + fn flow_info(&self, inst: &Self::Inst, pc: u64, mode: Self::Mode) -> FlowInfo; + + /// Render one decoded instruction as disassembly. + fn disasm(&self, inst: &Self::Inst, pc: u64, mode: Self::Mode) -> Self::Disasm; +} + +/// Translation-session knowledge available during lifting. +pub trait LiftEnv { + /// Returns the IR block associated with a statically known target address, + /// if the current translation session has created one. + fn block_for_target(&self, addr: u64) -> Option; + + /// Returns the fallthrough block for the current instruction, if one exists + /// in the current translation session. + fn fallthrough_block(&self) -> Option; +} + +/// Extension trait for architectures that can lift into Slonik IR. +pub trait LiftArch: Arch { + /// Lift-time error. + type LiftError; + + /// Per-lift mutable context. + /// + /// This is where the large mutable lifting state belongs. + type LiftCtx<'a> + where + Self: 'a; + + /// Lift one decoded instruction into Slonik IR. + fn lift(&self, cx: &mut Self::LiftCtx<'_>, inst: &Self::Inst) -> Result<(), Self::LiftError>; +} diff --git a/src/arch/arm.rs b/src/arch/arm.rs new file mode 100644 index 0000000..312d063 --- /dev/null +++ b/src/arch/arm.rs @@ -0,0 +1,1341 @@ +//! 32-bit ARM architecture support backed by `yaxpeax-arm`. + +use crate::{ + arch::{Arch, LiftArch, LiftEnv, RegisterDesc}, + flow::{FlowInfo, FlowKind, FlowTarget}, + ir::{Block, FrontendBuilder, IntCC, MemSize, Type, Value, Variable}, +}; + +use cranelift_entity::EntityRef; +use yaxpeax_arch::{Decoder, LengthedInstruction, ReaderBuilder, U8Reader}; +use yaxpeax_arm::armv7::{ + ConditionCode, DecodeError, InstDecoder, Instruction, Opcode, Operand, Reg, RegShiftStyle, + ShiftStyle, +}; + +/// ARM execution mode. +/// +/// This module targets 32-bit ARM and Thumb. +/// AArch64 should live in a separate `arm64.rs`. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmMode { + /// ARM state. + Arm, + + /// Thumb / Thumb-2 state. + Thumb, +} + +/// ARM endianness. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmEndian { + Little, + Big, +} + +/// ARM architectural profile. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmProfile { + A, + R, + M, +} + +/// ARM ISA version / feature profile. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ArmFeatures { + pub version: ArmVersion, + pub thumb2: bool, + pub vfp: bool, + pub neon: bool, + pub dsp: bool, +} + +impl Default for ArmFeatures { + fn default() -> Self { + Self { + version: ArmVersion::V7, + thumb2: true, + vfp: false, + neon: false, + dsp: false, + } + } +} + +/// ARM ISA version. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmVersion { + V5, + V6, + V7, +} + +/// Flat ARM architectural register identity. +#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmReg { + R0 = 0, + R1 = 1, + R2 = 2, + R3 = 3, + R4 = 4, + R5 = 5, + R6 = 6, + R7 = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + Sp = 13, + Lr = 14, + Pc = 15, +} + +/// ARM condition code. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmCond { + Eq, + Ne, + Cs, + Cc, + Mi, + Pl, + Vs, + Vc, + Hi, + Ls, + Ge, + Lt, + Gt, + Le, + Al, +} + +/// ARM status flag identity. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmFlag { + N, + Z, + C, + V, + Q, +} + +/// Calling-convention placeholder. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ArmCallingConvention { + Aapcs, + AapcsVfp, +} + +/// Reusable ARM architecture object. +#[derive(Clone, Debug)] +pub struct Arm { + pub endian: ArmEndian, + pub profile: ArmProfile, + pub features: ArmFeatures, + pub default_cc: ArmCallingConvention, +} + +impl Default for Arm { + fn default() -> Self { + Self { + endian: ArmEndian::Little, + profile: ArmProfile::A, + features: ArmFeatures::default(), + default_cc: ArmCallingConvention::Aapcs, + } + } +} + +/// Mutable per-lift ARM state. +pub struct ArmLiftCtx<'a> { + /// Static architecture description. + pub arch: &'a Arm, + + /// Translation-session environment. + pub env: &'a dyn LiftEnv, + + /// Current frontend builder. + pub fb: &'a mut FrontendBuilder<'a>, + + /// Current program counter. + pub pc: u64, + + /// Current execution mode. + pub mode: ArmMode, + + /// Current SSA bindings for architectural registers. + pub regs: ArmRegisterState, + + /// Current SSA bindings for flags. + pub flags: ArmFlagState, + + /// Cached comparison operands for conditional branch lowering. + pub last_cmp: Option<(Value, Value)>, + + /// Temporary lifting scratch state. + pub scratch: ArmScratch, +} + +impl<'a> ArmLiftCtx<'a> { + /// Creates a new ARM lift context. + pub fn new( + arch: &'a Arm, + env: &'a dyn LiftEnv, + fb: &'a mut FrontendBuilder<'a>, + pc: u64, + mode: ArmMode, + ) -> Self { + let word_ty = Type::i32(); + + for reg in REGISTERS { + fb.declare_var(Self::reg_var(reg.reg), word_ty); + } + + for flag in [ArmFlag::N, ArmFlag::Z, ArmFlag::C, ArmFlag::V, ArmFlag::Q] { + fb.declare_var(Self::flag_var(flag), Type::bool()); + } + + Self { + arch, + env, + fb, + pc, + mode, + regs: ArmRegisterState::default(), + flags: ArmFlagState::default(), + last_cmp: None, + scratch: ArmScratch::default(), + } + } + + /// Returns the integer word type for the current mode. + pub fn word_ty(&self) -> Type { + Type::i32() + } + + /// Maps a register identity to a frontend variable. + pub fn reg_var(reg: ArmReg) -> Variable { + Variable::new(reg as usize) + } + + /// Maps a flag identity to a frontend variable. + pub fn flag_var(flag: ArmFlag) -> Variable { + Variable::new(0x100 + flag as usize) + } + + /// Reads the current SSA value for a register. + pub fn read_reg(&mut self, reg: ArmReg) -> Value { + self.fb.use_var(Self::reg_var(reg)) + } + + /// Writes an SSA value to a register. + pub fn write_reg(&mut self, reg: ArmReg, value: Value) { + self.fb.def_var(Self::reg_var(reg), value); + } + + /// Reads the current SSA value for a flag. + pub fn read_flag(&mut self, flag: ArmFlag) -> Value { + self.fb.use_var(Self::flag_var(flag)) + } + + /// Writes an SSA value to a flag. + pub fn write_flag(&mut self, flag: ArmFlag, value: Value) { + self.fb.def_var(Self::flag_var(flag), value); + } + + /// Resolves a direct target address to an IR block. + pub fn direct_block(&self, target_addr: u64) -> Option { + self.env.block_for_target(target_addr) + } + + /// Returns the fallthrough block for the current instruction, if present. + pub fn fallthrough_block(&self) -> Option { + self.env.fallthrough_block() + } + + /// Computes a boolean SSA value for a condition code using the last cached + /// compare. + pub fn condition_value(&mut self, cond: ArmCond) -> Option { + let (lhs, rhs) = self.last_cmp?; + + let v = match cond { + ArmCond::Eq => self.fb.icmp(crate::ir::IntCC::Eq, lhs, rhs), + ArmCond::Ne => self.fb.icmp(crate::ir::IntCC::Ne, lhs, rhs), + ArmCond::Lt => self.fb.icmp(crate::ir::IntCC::Slt, lhs, rhs), + ArmCond::Le => self.fb.icmp(crate::ir::IntCC::Sle, lhs, rhs), + ArmCond::Gt => self.fb.icmp(crate::ir::IntCC::Sgt, lhs, rhs), + ArmCond::Ge => self.fb.icmp(crate::ir::IntCC::Sge, lhs, rhs), + + // Placeholder until full NZCV lowering exists. + ArmCond::Cs + | ArmCond::Cc + | ArmCond::Mi + | ArmCond::Pl + | ArmCond::Vs + | ArmCond::Vc + | ArmCond::Hi + | ArmCond::Ls + | ArmCond::Al => return None, + }; + + Some(v) + } +} + +/// Current SSA bindings for architectural registers. +#[derive(Clone, Debug, Default)] +pub struct ArmRegisterState { + pub gpr: [Option; 16], +} + +/// Current SSA bindings for status flags. +#[derive(Clone, Debug, Default)] +pub struct ArmFlagState { + pub n: Option, + pub z: Option, + pub c: Option, + pub v: Option, + pub q: Option, +} + +/// Temporary lifting scratch state. +#[derive(Clone, Debug, Default)] +pub struct ArmScratch { + pub tmp0: Option, + pub tmp1: Option, + pub tmp2: Option, +} + +/// Public ARM register table. +pub const REGISTERS: [RegisterDesc; 16] = [ + RegisterDesc { + reg: ArmReg::R0, + name: "r0", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R1, + name: "r1", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R2, + name: "r2", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R3, + name: "r3", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R4, + name: "r4", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R5, + name: "r5", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R6, + name: "r6", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R7, + name: "r7", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R8, + name: "r8", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R9, + name: "r9", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R10, + name: "r10", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R11, + name: "r11", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::R12, + name: "r12", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::Sp, + name: "sp", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::Lr, + name: "lr", + bits: 32, + }, + RegisterDesc { + reg: ArmReg::Pc, + name: "pc", + bits: 32, + }, +]; + +/// ARM lift-time errors. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ArmLiftError { + UnsupportedInstruction, + UnsupportedCondition, + UnsupportedAddressing, + MissingCompareState, + MissingTargetBlock, + InvalidImmediate, + UnsupportedControlFlow, +} + +/// Parsed instruction text used by the starter lifter. +/// +/// This is intentionally derived from the decoder's `Display` output so the +/// architecture boundary can use the decoder crate's instruction type directly. +#[derive(Clone, Debug, PartialEq, Eq)] +struct ParsedText { + mnemonic: String, + operands: Vec, +} + +impl Arm { + fn map_reg(reg: Reg) -> ArmReg { + match reg.number() { + 0 => ArmReg::R0, + 1 => ArmReg::R1, + 2 => ArmReg::R2, + 3 => ArmReg::R3, + 4 => ArmReg::R4, + 5 => ArmReg::R5, + 6 => ArmReg::R6, + 7 => ArmReg::R7, + 8 => ArmReg::R8, + 9 => ArmReg::R9, + 10 => ArmReg::R10, + 11 => ArmReg::R11, + 12 => ArmReg::R12, + 13 => ArmReg::Sp, + 14 => ArmReg::Lr, + 15 => ArmReg::Pc, + _ => unreachable!("armv7 Reg::number() is documented as 0..=15"), + } + } + + fn intcc_from_cond(cond: ConditionCode) -> Option { + Some(match cond { + ConditionCode::EQ => IntCC::Eq, + ConditionCode::NE => IntCC::Ne, + ConditionCode::GE => IntCC::Sge, + ConditionCode::LT => IntCC::Slt, + ConditionCode::GT => IntCC::Sgt, + ConditionCode::LE => IntCC::Sle, + ConditionCode::AL => return None, + + // these need explicit NZCV modeling + ConditionCode::HS + | ConditionCode::LO + | ConditionCode::MI + | ConditionCode::PL + | ConditionCode::VS + | ConditionCode::VC + | ConditionCode::HI + | ConditionCode::LS => return None, + }) + } + + fn direct_target_from_operand(pc: u64, op: Operand) -> Option { + match op { + Operand::BranchOffset(off) => Some(pc.wrapping_add_signed((off as i64) << 2)), + Operand::BranchThumbOffset(off) => Some(pc.wrapping_add_signed((off as i64) << 1)), + _ => None, + } + } + + fn read_shifted_reg( + cx: &mut ArmLiftCtx<'_>, + shift: yaxpeax_arm::armv7::RegShift, + ) -> Result { + let shift = shift.into_shift(); + + match shift { + RegShiftStyle::RegImm(s) => { + let base = cx.read_reg(Self::map_reg(s.shiftee())); + let amt = cx.fb.iconst(Type::i32(), s.imm() as i64); + + Ok(match s.stype() { + ShiftStyle::LSL => cx.fb.shl(base, amt, Type::i32()), + ShiftStyle::LSR => cx.fb.lshr(base, amt, Type::i32()), + ShiftStyle::ASR => cx.fb.ashr(base, amt, Type::i32()), + ShiftStyle::ROR => return Err(ArmLiftError::UnsupportedInstruction), + }) + } + + RegShiftStyle::RegReg(s) => { + let base = cx.read_reg(Self::map_reg(s.shiftee())); + let amt = cx.read_reg(Self::map_reg(s.shifter())); + + Ok(match s.stype() { + ShiftStyle::LSL => cx.fb.shl(base, amt, Type::i32()), + ShiftStyle::LSR => cx.fb.lshr(base, amt, Type::i32()), + ShiftStyle::ASR => cx.fb.ashr(base, amt, Type::i32()), + ShiftStyle::ROR => return Err(ArmLiftError::UnsupportedInstruction), + }) + } + } + } + + fn read_operand(cx: &mut ArmLiftCtx<'_>, op: Operand) -> Result { + match op { + Operand::Reg(r) => Ok(cx.read_reg(Self::map_reg(r))), + Operand::Imm12(v) => Ok(cx.fb.iconst(Type::i32(), v as i64)), + Operand::Imm32(v) => Ok(cx.fb.iconst(Type::i32(), v as i64)), + Operand::RegShift(shift) => Self::read_shifted_reg(cx, shift), + _ => Err(ArmLiftError::UnsupportedInstruction), + } + } + + fn mem_addr(cx: &mut ArmLiftCtx<'_>, op: Operand) -> Result { + match op { + Operand::RegDeref(base) => Ok(cx.read_reg(Self::map_reg(base))), + + Operand::RegDerefPreindexOffset(base, off, add, wback) => { + let base_reg = Self::map_reg(base); + let base_v = cx.read_reg(base_reg); + let off_v = cx.fb.iconst(Type::i32(), off as i64); + let addr = if add { + cx.fb.iadd(base_v, off_v, Type::i32()) + } else { + cx.fb.isub(base_v, off_v, Type::i32()) + }; + if wback { + cx.write_reg(base_reg, addr); + } + Ok(addr) + } + + Operand::RegDerefPostindexOffset(base, off, add, wback) => { + let base_reg = Self::map_reg(base); + let base_v = cx.read_reg(base_reg); + let off_v = cx.fb.iconst(Type::i32(), off as i64); + let new_base = if add { + cx.fb.iadd(base_v, off_v, Type::i32()) + } else { + cx.fb.isub(base_v, off_v, Type::i32()) + }; + if wback { + cx.write_reg(base_reg, new_base); + } + Ok(base_v) + } + + Operand::RegDerefPreindexReg(base, idx, add, wback) => { + let base_reg = Self::map_reg(base); + let base_v = cx.read_reg(base_reg); + let idx_v = cx.read_reg(Self::map_reg(idx)); + let addr = if add { + cx.fb.iadd(base_v, idx_v, Type::i32()) + } else { + cx.fb.isub(base_v, idx_v, Type::i32()) + }; + if wback { + cx.write_reg(base_reg, addr); + } + Ok(addr) + } + + Operand::RegDerefPostindexReg(base, idx, add, wback) => { + let base_reg = Self::map_reg(base); + let base_v = cx.read_reg(base_reg); + let idx_v = cx.read_reg(Self::map_reg(idx)); + let new_base = if add { + cx.fb.iadd(base_v, idx_v, Type::i32()) + } else { + cx.fb.isub(base_v, idx_v, Type::i32()) + }; + if wback { + cx.write_reg(base_reg, new_base); + } + Ok(base_v) + } + + Operand::RegDerefPreindexRegShift(base, shift, add, wback) => { + let base_reg = Self::map_reg(base); + let base_v = cx.read_reg(base_reg); + let idx_v = Self::read_shifted_reg(cx, shift)?; + let addr = if add { + cx.fb.iadd(base_v, idx_v, Type::i32()) + } else { + cx.fb.isub(base_v, idx_v, Type::i32()) + }; + if wback { + cx.write_reg(base_reg, addr); + } + Ok(addr) + } + + Operand::RegDerefPostindexRegShift(base, shift, add, wback) => { + let base_reg = Self::map_reg(base); + let base_v = cx.read_reg(base_reg); + let idx_v = Self::read_shifted_reg(cx, shift)?; + let new_base = if add { + cx.fb.iadd(base_v, idx_v, Type::i32()) + } else { + cx.fb.isub(base_v, idx_v, Type::i32()) + }; + if wback { + cx.write_reg(base_reg, new_base); + } + Ok(base_v) + } + + _ => Err(ArmLiftError::UnsupportedAddressing), + } + } + + fn cond_value( + cx: &mut ArmLiftCtx<'_>, + cond: ConditionCode, + ) -> Result, ArmLiftError> { + let Some(cc) = Self::intcc_from_cond(cond) else { + return if cond == ConditionCode::AL { + Ok(None) + } else { + Err(ArmLiftError::UnsupportedCondition) + }; + }; + + let (lhs, rhs) = cx.last_cmp.ok_or(ArmLiftError::MissingCompareState)?; + Ok(Some(cx.fb.icmp(cc, lhs, rhs))) + } + /// Creates the appropriate decoder for the current mode. + fn decoder_for_mode(mode: ArmMode) -> InstDecoder { + match mode { + ArmMode::Arm => InstDecoder::default(), + ArmMode::Thumb => InstDecoder::default_thumb(), + } + } + + /// Returns the canonical textual register name. + pub fn reg_name(reg: ArmReg) -> &'static str { + match reg { + ArmReg::R0 => "r0", + ArmReg::R1 => "r1", + ArmReg::R2 => "r2", + ArmReg::R3 => "r3", + ArmReg::R4 => "r4", + ArmReg::R5 => "r5", + ArmReg::R6 => "r6", + ArmReg::R7 => "r7", + ArmReg::R8 => "r8", + ArmReg::R9 => "r9", + ArmReg::R10 => "r10", + ArmReg::R11 => "r11", + ArmReg::R12 => "r12", + ArmReg::Sp => "sp", + ArmReg::Lr => "lr", + ArmReg::Pc => "pc", + } + } + + fn parse_text(inst: &Instruction) -> ParsedText { + let text = inst.to_string(); + let mut parts = text.splitn(2, ' '); + let mnemonic = parts.next().unwrap_or("").trim().to_ascii_lowercase(); + let operands = parts + .next() + .map(|rest| { + rest.split(',') + .map(|x| x.trim().to_ascii_lowercase()) + .filter(|x| !x.is_empty()) + .collect() + }) + .unwrap_or_default(); + + ParsedText { mnemonic, operands } + } + + fn parse_reg(text: &str) -> Option { + Some(match text { + "r0" => ArmReg::R0, + "r1" => ArmReg::R1, + "r2" => ArmReg::R2, + "r3" => ArmReg::R3, + "r4" => ArmReg::R4, + "r5" => ArmReg::R5, + "r6" => ArmReg::R6, + "r7" => ArmReg::R7, + "r8" => ArmReg::R8, + "r9" => ArmReg::R9, + "r10" => ArmReg::R10, + "r11" => ArmReg::R11, + "r12" => ArmReg::R12, + "sp" | "r13" => ArmReg::Sp, + "lr" | "r14" => ArmReg::Lr, + "pc" | "r15" => ArmReg::Pc, + _ => return None, + }) + } + + fn parse_imm(text: &str) -> Option { + let s = text.trim(); + + let s = s.strip_prefix('#').unwrap_or(s); + + if let Some(hex) = s.strip_prefix("-0x") { + i64::from_str_radix(hex, 16).ok().map(|v| -v) + } else if let Some(hex) = s.strip_prefix("0x") { + i64::from_str_radix(hex, 16).ok() + } else { + s.parse::().ok() + } + } + + fn parse_target(text: &str) -> Option { + let s = text.trim(); + let s = s.strip_prefix('#').unwrap_or(s); + + if let Some(hex) = s.strip_prefix("0x") { + u64::from_str_radix(hex, 16).ok() + } else { + s.parse::().ok() + } + } + + fn parse_mem(text: &str) -> Option<(ArmReg, i64)> { + let t = text.trim(); + if !(t.starts_with('[') && t.ends_with(']')) { + return None; + } + + let inner = &t[1..t.len() - 1]; + let parts: Vec<_> = inner.split(',').map(|p| p.trim()).collect(); + + let base = Self::parse_reg(parts.first().copied()?)?; + let off = if let Some(off) = parts.get(1) { + Self::parse_imm(off)? + } else { + 0 + }; + + Some((base, off)) + } + + fn parse_cond_suffix(s: &str) -> Option { + Some(match s { + "eq" => ArmCond::Eq, + "ne" => ArmCond::Ne, + "cs" => ArmCond::Cs, + "cc" => ArmCond::Cc, + "mi" => ArmCond::Mi, + "pl" => ArmCond::Pl, + "vs" => ArmCond::Vs, + "vc" => ArmCond::Vc, + "hi" => ArmCond::Hi, + "ls" => ArmCond::Ls, + "ge" => ArmCond::Ge, + "lt" => ArmCond::Lt, + "gt" => ArmCond::Gt, + "le" => ArmCond::Le, + "al" => ArmCond::Al, + _ => return None, + }) + } + + fn parse_mnemonic(mnemonic: &str) -> (&str, Option, bool) { + // returns: (base, cond, sets_flags) + // + // handles shapes like: + // add + // adds + // addeq + // addseq + // cmp + // beq + // bl + // bx + // + let m = mnemonic; + let mut setflags = false; + let mut cond = None; + + // longest bases first where prefixes overlap + for base in ["bl", "bx", "ldr", "str", "cmp", "mov", "add", "sub", "b"] { + if let Some(rest) = m.strip_prefix(base) { + let mut rest = rest; + + if matches!(base, "add" | "sub" | "mov") + && let Some(stripped) = rest.strip_prefix('s') + { + setflags = true; + rest = stripped; + } + + if !rest.is_empty() { + cond = Self::parse_cond_suffix(rest); + } + + return (base, cond, setflags); + } + } + + (mnemonic, None, false) + } + + fn operand_value(cx: &mut ArmLiftCtx<'_>, text: &str) -> Result { + if let Some(reg) = Self::parse_reg(text) { + Ok(cx.read_reg(reg)) + } else if let Some(imm) = Self::parse_imm(text) { + Ok(cx.fb.iconst(cx.word_ty(), imm)) + } else { + Err(ArmLiftError::UnsupportedInstruction) + } + } + + fn address_from_mem(cx: &mut ArmLiftCtx<'_>, text: &str) -> Result { + let (base, off) = Self::parse_mem(text).ok_or(ArmLiftError::UnsupportedAddressing)?; + let base_v = cx.read_reg(base); + + if off == 0 { + Ok(base_v) + } else { + let off_v = cx.fb.iconst(cx.word_ty(), off); + Ok(cx.fb.iadd(base_v, off_v, cx.word_ty())) + } + } + + pub fn lift_inst( + &self, + cx: &mut ArmLiftCtx<'_>, + inst: &Instruction, + ) -> Result<(), ArmLiftError> { + match inst.opcode { + Opcode::NOP => Ok(()), + + Opcode::MOV => { + if inst.condition != ConditionCode::AL { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rd = match inst.operands[0] { + Operand::Reg(r) => Self::map_reg(r), + _ => return Err(ArmLiftError::UnsupportedInstruction), + }; + let src = Self::read_operand(cx, inst.operands[1])?; + cx.write_reg(rd, src); + Ok(()) + } + + Opcode::ADD => { + if inst.condition != ConditionCode::AL { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rd = match inst.operands[0] { + Operand::Reg(r) => Self::map_reg(r), + _ => return Err(ArmLiftError::UnsupportedInstruction), + }; + let lhs = Self::read_operand(cx, inst.operands[1])?; + let rhs = Self::read_operand(cx, inst.operands[2])?; + let out = cx.fb.iadd(lhs, rhs, Type::i32()); + cx.write_reg(rd, out); + Ok(()) + } + + Opcode::SUB => { + if inst.condition != ConditionCode::AL { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rd = match inst.operands[0] { + Operand::Reg(r) => Self::map_reg(r), + _ => return Err(ArmLiftError::UnsupportedInstruction), + }; + let lhs = Self::read_operand(cx, inst.operands[1])?; + let rhs = Self::read_operand(cx, inst.operands[2])?; + let out = cx.fb.isub(lhs, rhs, Type::i32()); + cx.write_reg(rd, out); + Ok(()) + } + + Opcode::CMP => { + if inst.condition != ConditionCode::AL { + return Err(ArmLiftError::UnsupportedCondition); + } + + let lhs = Self::read_operand(cx, inst.operands[0])?; + let rhs = Self::read_operand(cx, inst.operands[1])?; + cx.last_cmp = Some((lhs, rhs)); + Ok(()) + } + + Opcode::LDR => { + if inst.condition != ConditionCode::AL { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rt = match inst.operands[0] { + Operand::Reg(r) => Self::map_reg(r), + _ => return Err(ArmLiftError::UnsupportedInstruction), + }; + let addr = Self::mem_addr(cx, inst.operands[1])?; + let val = cx.fb.load(addr, MemSize::S4, Type::i32()); + cx.write_reg(rt, val); + Ok(()) + } + + Opcode::STR => { + if inst.condition != ConditionCode::AL { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rt = match inst.operands[0] { + Operand::Reg(r) => Self::map_reg(r), + _ => return Err(ArmLiftError::UnsupportedInstruction), + }; + let addr = Self::mem_addr(cx, inst.operands[1])?; + let val = cx.read_reg(rt); + cx.fb.store(addr, val, MemSize::S4); + Ok(()) + } + + Opcode::B => { + let target_addr = Self::direct_target_from_operand(cx.pc, inst.operands[0]) + .ok_or(ArmLiftError::MissingTargetBlock)?; + let target_block = cx + .direct_block(target_addr) + .ok_or(ArmLiftError::MissingTargetBlock)?; + + match Self::cond_value(cx, inst.condition)? { + None => { + cx.fb.jump(target_block); + Ok(()) + } + Some(cond) => { + let fallthrough = cx + .fallthrough_block() + .ok_or(ArmLiftError::MissingTargetBlock)?; + cx.fb.br_if(cond, target_block, fallthrough); + Ok(()) + } + } + } + + Opcode::BL | Opcode::BLX => { + let target_addr = Self::direct_target_from_operand(cx.pc, inst.operands[0]) + .ok_or(ArmLiftError::UnsupportedControlFlow)?; + let callee = cx.fb.iconst(Type::ptr(32), target_addr as i64); + cx.fb.call(callee, &[]); + Ok(()) + } + + Opcode::BX => match inst.operands[0] { + Operand::Reg(r) if r.number() == 14 => { + let ret0 = cx.read_reg(ArmReg::R0); + cx.fb.ret(&[ret0]); + Ok(()) + } + _ => Err(ArmLiftError::UnsupportedControlFlow), + }, + + _ => Err(ArmLiftError::UnsupportedInstruction), + } + } + + fn guard_cond( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + ) -> Result, ArmLiftError> { + match cond { + ArmCond::Al => Ok(None), + _ => cx + .condition_value(cond) + .map(Some) + .ok_or(ArmLiftError::MissingCompareState), + } + } + + fn lift_mov( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + parsed: &ParsedText, + ) -> Result<(), ArmLiftError> { + if cond != ArmCond::Al { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rd = Self::parse_reg( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let src = Self::operand_value( + cx, + parsed + .operands + .get(1) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + )?; + cx.write_reg(rd, src); + Ok(()) + } + + fn lift_add( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + parsed: &ParsedText, + ) -> Result<(), ArmLiftError> { + if cond != ArmCond::Al { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rd = Self::parse_reg( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let rn = Self::parse_reg( + parsed + .operands + .get(1) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let op2 = Self::operand_value( + cx, + parsed + .operands + .get(2) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + )?; + + let lhs = cx.read_reg(rn); + let out = cx.fb.iadd(lhs, op2, cx.word_ty()); + cx.write_reg(rd, out); + Ok(()) + } + + fn lift_sub( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + parsed: &ParsedText, + ) -> Result<(), ArmLiftError> { + if cond != ArmCond::Al { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rd = Self::parse_reg( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let rn = Self::parse_reg( + parsed + .operands + .get(1) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let op2 = Self::operand_value( + cx, + parsed + .operands + .get(2) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + )?; + + let lhs = cx.read_reg(rn); + let out = cx.fb.isub(lhs, op2, cx.word_ty()); + cx.write_reg(rd, out); + Ok(()) + } + + fn lift_cmp( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + parsed: &ParsedText, + ) -> Result<(), ArmLiftError> { + if cond != ArmCond::Al { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rn = Self::parse_reg( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let lhs = cx.read_reg(rn); + let rhs = Self::operand_value( + cx, + parsed + .operands + .get(1) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + )?; + + cx.last_cmp = Some((lhs, rhs)); + Ok(()) + } + + fn lift_ldr( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + parsed: &ParsedText, + ) -> Result<(), ArmLiftError> { + if cond != ArmCond::Al { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rt = Self::parse_reg( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let addr = Self::address_from_mem( + cx, + parsed + .operands + .get(1) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + )?; + let val = cx.fb.load(addr, MemSize::S4, Type::i32()); + cx.write_reg(rt, val); + Ok(()) + } + + fn lift_str( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + parsed: &ParsedText, + ) -> Result<(), ArmLiftError> { + if cond != ArmCond::Al { + return Err(ArmLiftError::UnsupportedCondition); + } + + let rt = Self::parse_reg( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let val = cx.read_reg(rt); + let addr = Self::address_from_mem( + cx, + parsed + .operands + .get(1) + .ok_or(ArmLiftError::UnsupportedInstruction)?, + )?; + cx.fb.store(addr, val, MemSize::S4); + Ok(()) + } + + fn lift_b( + &self, + cx: &mut ArmLiftCtx<'_>, + cond: ArmCond, + parsed: &ParsedText, + ) -> Result<(), ArmLiftError> { + let target_addr = Self::parse_target( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + let target_block = cx + .direct_block(target_addr) + .ok_or(ArmLiftError::MissingTargetBlock)?; + + match cond { + ArmCond::Al => { + cx.fb.jump(target_block); + Ok(()) + } + _ => { + let cond_v = cx + .condition_value(cond) + .ok_or(ArmLiftError::MissingCompareState)?; + let fallthrough = cx + .fallthrough_block() + .ok_or(ArmLiftError::MissingTargetBlock)?; + cx.fb.br_if(cond_v, target_block, fallthrough); + Ok(()) + } + } + } + + fn lift_bl(&self, cx: &mut ArmLiftCtx<'_>, parsed: &ParsedText) -> Result<(), ArmLiftError> { + let target_addr = Self::parse_target( + parsed + .operands + .first() + .ok_or(ArmLiftError::UnsupportedInstruction)?, + ) + .ok_or(ArmLiftError::UnsupportedInstruction)?; + + let callee = cx.fb.iconst(Type::ptr(32), target_addr as i64); + cx.fb.call(callee, &[]); + Ok(()) + } + + fn lift_bx(&self, cx: &mut ArmLiftCtx<'_>, inst: &Instruction) -> Result<(), ArmLiftError> { + match inst.operands[0] { + Operand::Reg(r) if r.number() == 14 => { + let ret0 = cx.read_reg(ArmReg::R0); + cx.fb.ret(&[ret0]); + Ok(()) + } + _ => Err(ArmLiftError::UnsupportedControlFlow), + } + } +} + +impl Arch for Arm { + type Mode = ArmMode; + type Inst = Instruction; + type Reg = ArmReg; + type DecodeError = DecodeError; + type Disasm = String; + + fn name(&self) -> &'static str { + "arm" + } + + fn address_size(&self, _mode: Self::Mode) -> u8 { + 4 + } + + fn max_instruction_len(&self) -> u8 { + 4 + } + + fn registers(&self) -> &'static [RegisterDesc] { + ®ISTERS + } + + fn stack_pointer(&self) -> Option { + Some(ArmReg::Sp) + } + + fn decode( + &self, + bytes: &[u8], + mode: Self::Mode, + ) -> Result<(usize, Self::Inst), Self::DecodeError> { + let decoder = match mode { + ArmMode::Arm => InstDecoder::default(), + ArmMode::Thumb => InstDecoder::default_thumb(), + }; + + let mut reader = U8Reader::new(bytes); + let inst = decoder.decode(&mut reader)?; + let len = inst.len().to_const() as usize; + Ok((len, inst)) + } + + fn flow_info(&self, inst: &Self::Inst, pc: u64, _mode: Self::Mode) -> FlowInfo { + let len = inst.len().to_const() as u8; + + match inst.opcode { + Opcode::B => { + let target = Self::direct_target_from_operand(pc, inst.operands[0]) + .map(FlowTarget::Direct) + .unwrap_or(FlowTarget::Indirect); + + match inst.condition { + ConditionCode::AL => FlowInfo::new(len, FlowKind::Jump { target }), + _ => FlowInfo::new(len, FlowKind::CondJump { target }), + } + } + + Opcode::BL | Opcode::BLX => { + let target = Self::direct_target_from_operand(pc, inst.operands[0]) + .map(FlowTarget::Direct) + .unwrap_or(FlowTarget::Indirect); + + FlowInfo::new( + len, + FlowKind::Call { + target, + returns: true, + }, + ) + } + + Opcode::BX => match inst.operands[0] { + Operand::Reg(r) if r.number() == 14 => FlowInfo::new(len, FlowKind::Return), + _ => FlowInfo::new( + len, + FlowKind::Jump { + target: FlowTarget::Indirect, + }, + ), + }, + + Opcode::NOP => FlowInfo::new(len, FlowKind::FallThrough), + + _ => FlowInfo::new(len, FlowKind::FallThrough), + } + } + + fn disasm(&self, inst: &Self::Inst, _pc: u64, _mode: Self::Mode) -> Self::Disasm { + inst.to_string() + } +} + +impl LiftArch for Arm { + type LiftError = ArmLiftError; + type LiftCtx<'a> = ArmLiftCtx<'a>; + + fn lift(&self, cx: &mut Self::LiftCtx<'_>, inst: &Self::Inst) -> Result<(), Self::LiftError> { + self.lift_inst(cx, inst) + } +} + +pub struct DummyEnv; + +impl LiftEnv for DummyEnv { + fn block_for_target(&self, _addr: u64) -> Option { + None + } + + fn fallthrough_block(&self) -> Option { + None + } +} diff --git a/src/flow.rs b/src/flow.rs new file mode 100644 index 0000000..6b76854 --- /dev/null +++ b/src/flow.rs @@ -0,0 +1,104 @@ +//! Instruction-level control-flow facts. + +/// A statically described branch or call target. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum FlowTarget { + /// A statically known target address. + Direct(u64), + + /// A dynamically computed target. + Indirect, +} + +/// The control-flow behavior of one decoded instruction. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum FlowKind { + /// Execution continues at the next instruction. + FallThrough, + + /// Unconditional jump. + Jump { target: FlowTarget }, + + /// Conditional jump. + /// + /// A conditional jump always has a taken edge and a fallthrough path. + CondJump { target: FlowTarget }, + + /// Call instruction. + /// + /// `returns` tells later analyses whether the architecture/lifter believes + /// execution may continue at the next instruction after the call. + Call { target: FlowTarget, returns: bool }, + + /// Return from the current routine. + Return, + + /// Trap, breakpoint, or other terminal fault-like instruction. + Trap, +} + +/// Flow facts for one decoded instruction. +/// +/// This structure is the architecture-facing summary used by later CFG-building +/// or region-building code. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct FlowInfo { + /// How many bytes does this instruction occupy + len: u8, + /// What kind of control-flow behaviour does it have? + kind: FlowKind, +} + +impl FlowInfo { + /// Creates a new flow summary. + pub const fn new(len: u8, kind: FlowKind) -> Self { + Self { len, kind } + } + + /// Returns the instruction length in bytes. + pub const fn len(self) -> u8 { + self.len + } + + /// Returns the control-flow kind. + pub const fn kind(self) -> FlowKind { + self.kind + } + + /// Returns whether this instruction ends the current basic block. + /// + /// This is still an instruction-level fact, not whole-program CFG recovery. + pub const fn terminates_block(self) -> bool { + match self.kind { + FlowKind::FallThrough => false, + FlowKind::Jump { .. } => true, + FlowKind::CondJump { .. } => true, + FlowKind::Call { returns, .. } => !returns, + FlowKind::Return => true, + FlowKind::Trap => true, + } + } + + /// Returns whether execution may continue at the next instruction. + pub const fn has_fallthrough(self) -> bool { + match self.kind { + FlowKind::FallThrough => true, + FlowKind::Jump { .. } => false, + FlowKind::CondJump { .. } => true, + FlowKind::Call { returns, .. } => returns, + FlowKind::Return => false, + FlowKind::Trap => false, + } + } + + /// Returns the explicit non-fallthrough target carried by the instruction, + /// if any. + pub const fn target(self) -> Option { + match self.kind { + FlowKind::Jump { target } => Some(target), + FlowKind::CondJump { target } => Some(target), + FlowKind::Call { target, .. } => Some(target), + FlowKind::FallThrough | FlowKind::Return | FlowKind::Trap => None, + } + } +} diff --git a/src/main.rs b/src/main.rs index 5a6110f..379abdf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod arch; +mod flow; mod ir; use clap::{ @@ -10,7 +11,13 @@ use clap::{ }; use cranelift_entity::EntityRef; -use crate::ir::*; +use crate::{ + arch::{ + Arch, LiftArch, + arm::{Arm, ArmLiftCtx, ArmMode, DummyEnv}, + }, + ir::*, +}; const STYLES: Styles = Styles::styled() .header(AnsiColor::Green.on_default().effects(Effects::BOLD)) @@ -26,175 +33,57 @@ const STYLES: Styles = Styles::styled() struct Args {} fn main() { - // let args = Args::parse(); + let arm = Arm::default(); + + let bytes: &[u8] = &[ + 0x01, 0x00, 0xA0, 0xE3, // mov r0, #1 + 0x02, 0x10, 0xA0, 0xE3, // mov r1, #2 + 0x01, 0x00, 0x80, 0xE0, // add r0, r0, r1 + 0x1E, 0xFF, 0x2F, 0xE1, // bx lr + ]; + + let arm = crate::arch::arm::Arm::default(); + let mut body = crate::ir::Body::new(); + let mut fb_ctx = crate::ir::FrontendBuilderContext::new(); + let env = DummyEnv; { - let mut body = Body::new(); - let mut cx = FrontendBuilderContext::new(); - { - let mut b = FrontendBuilder::new(&mut body, &mut cx); + let mut fb = crate::ir::FrontendBuilder::new(&mut body, &mut fb_ctx); + let entry = fb.create_block(); + fb.switch_to_block(entry); + fb.seal_block(entry); - let entry = b.create_block(); - let then_block = b.create_block(); - let else_block = b.create_block(); - let merge_block = b.create_block(); + let mut pc = 0x1000u64; + let mut rest = bytes; - let x = Variable::new(0); - let y = Variable::new(1); - let sum = Variable::new(2); - let acc = Variable::new(3); + let mut cx = crate::arch::arm::ArmLiftCtx::new( + &arm, + &env, + &mut fb, + pc, + crate::arch::arm::ArmMode::Arm, + ); - b.declare_var(x, Type::i32()); - b.declare_var(y, Type::i32()); - b.declare_var(sum, Type::i32()); - b.declare_var(acc, Type::i32()); + while !rest.is_empty() { + let (len, inst) = arm.decode(rest, crate::arch::arm::ArmMode::Arm).unwrap(); + println!( + "{:#x}: {}", + pc, + arm.disasm(&inst, pc, crate::arch::arm::ArmMode::Arm) + ); + println!( + " flow: {:?}", + arm.flow_info(&inst, pc, crate::arch::arm::ArmMode::Arm) + ); - // entry: - // - // x = 7 - // y = 5 - // sum = x + y - // if (sum > 10) goto then else goto else - // - b.switch_to_block(entry); - b.seal_block(entry); + cx.pc = pc; + arm.lift(&mut cx, &inst).unwrap(); - let c7 = b.iconst(Type::i32(), 7); - let c5 = b.iconst(Type::i32(), 5); - let c10 = b.iconst(Type::i32(), 10); - - b.def_var(x, c7); - b.def_var(y, c5); - - let lhs = b.use_var(x); - let rhs = b.use_var(y); - let sum_val = b.iadd(lhs, rhs, Type::i32()); - b.def_var(sum, sum_val); - - let lhs1 = b.use_var(sum); - let cond = b.icmp(IntCC::Sgt, lhs1, c10); - b.br_if(cond, then_block, else_block); - - // Both then/else now have their predecessor set. - b.seal_block(then_block); - b.seal_block(else_block); - - // then: - // - // acc = x * 2 - // goto merge - // - b.switch_to_block(then_block); - - let c2 = b.iconst(Type::i32(), 2); - let lhs2 = b.use_var(x); - let doubled = b.imul(lhs2, c2, Type::i32()); - b.def_var(acc, doubled); - b.jump(merge_block); - - // else: - // - // acc = 0 - y - // goto merge - // - b.switch_to_block(else_block); - - let c0 = b.iconst(Type::i32(), 0); - let rhs1 = b.use_var(y); - let neg_y = b.isub(c0, rhs1, Type::i32()); - b.def_var(acc, neg_y); - b.jump(merge_block); - - // merge: - // - // acc and sum should come in as block params synthesized by use_var() - // result = acc + sum - // return result - // - b.seal_block(merge_block); - b.switch_to_block(merge_block); - - let merged_acc = b.use_var(acc); - let merged_sum = b.use_var(sum); - let result = b.iadd(merged_acc, merged_sum, Type::i32()); - - b.ret(&[result]); + pc += len as u64; + rest = &rest[len..]; } - - verify(&body).unwrap(); - println!("{body}"); } - { - let mut body = Body::new(); - let mut cx = FrontendBuilderContext::new(); - - { - let mut b = FrontendBuilder::new(&mut body, &mut cx); - - let entry = b.create_block(); - let loop_header = b.create_block(); - let loop_body = b.create_block(); - let exit = b.create_block(); - - let i = Variable::new(0); - b.declare_var(i, Type::i32()); - - // entry: - // i = 0 - // br loop_header - b.switch_to_block(entry); - b.seal_block(entry); - - let c0 = b.iconst(Type::i32(), 0); - b.def_var(i, c0); - b.jump(loop_header); - - // loop_header: - // if (i < 4) goto loop_body else goto exit - // - // IMPORTANT: - // Do not seal this block yet. We still have a backedge coming from - // loop_body, and we want use_var(i) here to synthesize a block param. - b.switch_to_block(loop_header); - - let iv = b.use_var(i); - let c4 = b.iconst(Type::i32(), 4); - let cond = b.icmp(IntCC::Slt, iv, c4); - b.br_if(cond, loop_body, exit); - - // loop_body now has its predecessor. - b.seal_block(loop_body); - - // loop_body: - // i = i + 1 - // br loop_header - b.switch_to_block(loop_body); - - let iv = b.use_var(i); - let c1 = b.iconst(Type::i32(), 1); - let next = b.iadd(iv, c1, Type::i32()); - b.def_var(i, next); - b.jump(loop_header); - - // Now loop_header has all of its predecessors: - // - entry - // - loop_body - b.seal_block(loop_header); - - // exit: - // return i - // - // exit already has one predecessor from loop_header's br_if, so using - // `i` here should synthesize a block param and patch that edge. - b.seal_block(exit); - b.switch_to_block(exit); - - let out = b.use_var(i); - b.ret(&[out]); - } - - verify(&body).unwrap(); - println!("{body}"); - } + crate::ir::verify(&body).unwrap(); + println!("{body}"); }