feat: implement arch, initial arm prototype

This commit is contained in:
Igor kehrazy 2026-04-07 13:49:49 +03:00
parent 4a4d49f7de
commit ebf798e22e
6 changed files with 1704 additions and 187 deletions

112
Cargo.lock generated
View file

@ -70,12 +70,53 @@ version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]]
name = "bumpalo"
version = "3.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]]
name = "capstone"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f442ae0f2f3f1b923334b4a5386c95c69c1cfa072bafa23d6fae6d9682eb1dd4"
dependencies = [
"capstone-sys",
"static_assertions",
]
[[package]]
name = "capstone-sys"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4e8087cab6731295f5a2a2bd82989ba4f41d3a428aab2e7c98d8f4db38aac05"
dependencies = [
"cc",
]
[[package]]
name = "cc"
version = "1.2.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7a4d3ec6524d28a329fc53654bbadc9bdd7b0431f5d65f1a56ffb28a1ee5283"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
@ -188,6 +229,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "find-msvc-tools"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
name = "fixedbitset"
version = "0.5.7"
@ -206,6 +253,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "hashbrown"
version = "0.15.5"
@ -361,6 +414,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]]
name = "raw-cpuid"
version = "11.6.0"
@ -411,15 +470,24 @@ dependencies = [
"syn",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "slonik"
version = "0.1.0"
dependencies = [
"capstone",
"clap",
"cranelift-entity",
"egg",
"petgraph",
"smallvec",
"yaxpeax-arch",
"yaxpeax-arm",
]
[[package]]
@ -428,6 +496,12 @@ version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strsim"
version = "0.11.1"
@ -462,6 +536,12 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "thiserror"
version = "1.0.69"
@ -601,3 +681,35 @@ checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]
[[package]]
name = "yaxpeax-arch"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36274fcc5403da2a7636ffda4d02eca12a1b2b8267b9d2e04447bd2ccfc72082"
dependencies = [
"num-traits",
"serde",
"serde_derive",
]
[[package]]
name = "yaxpeax-arm"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8aa9155f0d727d10e91e5a94f68f415ec24c7a5faab4eac2386a1069e4a02d7"
dependencies = [
"bitvec",
"serde",
"serde_derive",
"yaxpeax-arch",
]

View file

@ -4,8 +4,11 @@ version = "0.1.0"
edition = "2024"
[dependencies]
capstone = "0.14.0"
clap = { version = "4.6.0", features = ["derive"] }
cranelift-entity = "0.130.0"
egg = "0.11.0"
petgraph = "0.8.3"
smallvec = "1.15.1"
yaxpeax-arch = "0.3.2"
yaxpeax-arm = "0.4.0"

View file

@ -1,30 +1,98 @@
// pub trait Architecture {
// type Mode: Copy + Eq + Send + Sync + 'static;
// type DecodedInstruction: Send + Sync + 'static;
//! Architecture-facing interfaces.
// fn name(&self) -> &'static str;
pub mod arm;
// fn decode(
// &self,
// bytes: &[u8],
// addr: u64,
// mode: Self::Mode,
// ) -> Result<(usize, Self::DecodedInstruction), DecodeError>;
use crate::{
flow::FlowInfo,
ir::{Block, FrontendBuilder},
};
// fn lift(
// &self,
// insn: &Self::DecodedInstruction,
// addr: u64,
// mode: Self::Mode,
// b: &mut Builder,
// ) -> Result<InstructionInfo, LiftError>;
/// Flat generic register metadata.
///
/// This type intentionally stays small.
/// Architecture-specific aliasing or overlap semantics belong in the concrete
/// architecture module, not here.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct RegisterDesc<R> {
/// Architecture-defined register identity.
pub reg: R,
// fn registers(&self) -> &'static [RegisterDesc];
// fn flags(&self) -> &'static [FlagDesc];
// }
/// Human-readable register name.
pub name: &'static str,
// pub struct InsnInfo {
// pub len: u8,
// pub branches: smallvec::SmallVec<[Branch; 2]>,
// pub terminates_block: bool,
// }
/// Register width in bits.
pub bits: u16,
}
/// Stable architecture protocol.
pub trait Arch {
/// Architecture execution mode.
type Mode: Copy + Eq;
/// Decoded instruction type.
type Inst;
/// Architecture-specific register identity.
type Reg: Copy + Eq;
/// Decode-time error.
type DecodeError;
/// Architecture-specific disassembly output.
type Disasm;
/// Stable architecture name.
fn name(&self) -> &'static str;
/// Pointer width in bytes for the given mode.
fn address_size(&self, mode: Self::Mode) -> u8;
/// Maximum instruction length in bytes.
fn max_instruction_len(&self) -> u8;
/// Architectural register set.
fn registers(&self) -> &'static [RegisterDesc<Self::Reg>];
/// Stack pointer register, if the architecture has one.
fn stack_pointer(&self) -> Option<Self::Reg>;
/// Decode one instruction from `bytes`.
fn decode(
&self,
bytes: &[u8],
mode: Self::Mode,
) -> Result<(usize, Self::Inst), Self::DecodeError>;
/// Return instruction-level control-flow facts for one decoded instruction.
fn flow_info(&self, inst: &Self::Inst, pc: u64, mode: Self::Mode) -> FlowInfo;
/// Render one decoded instruction as disassembly.
fn disasm(&self, inst: &Self::Inst, pc: u64, mode: Self::Mode) -> Self::Disasm;
}
/// Translation-session knowledge available during lifting.
pub trait LiftEnv {
/// Returns the IR block associated with a statically known target address,
/// if the current translation session has created one.
fn block_for_target(&self, addr: u64) -> Option<Block>;
/// Returns the fallthrough block for the current instruction, if one exists
/// in the current translation session.
fn fallthrough_block(&self) -> Option<Block>;
}
/// Extension trait for architectures that can lift into Slonik IR.
pub trait LiftArch: Arch {
/// Lift-time error.
type LiftError;
/// Per-lift mutable context.
///
/// This is where the large mutable lifting state belongs.
type LiftCtx<'a>
where
Self: 'a;
/// Lift one decoded instruction into Slonik IR.
fn lift(&self, cx: &mut Self::LiftCtx<'_>, inst: &Self::Inst) -> Result<(), Self::LiftError>;
}

1341
src/arch/arm.rs Normal file

File diff suppressed because it is too large Load diff

104
src/flow.rs Normal file
View file

@ -0,0 +1,104 @@
//! Instruction-level control-flow facts.
/// A statically described branch or call target.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum FlowTarget {
/// A statically known target address.
Direct(u64),
/// A dynamically computed target.
Indirect,
}
/// The control-flow behavior of one decoded instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum FlowKind {
/// Execution continues at the next instruction.
FallThrough,
/// Unconditional jump.
Jump { target: FlowTarget },
/// Conditional jump.
///
/// A conditional jump always has a taken edge and a fallthrough path.
CondJump { target: FlowTarget },
/// Call instruction.
///
/// `returns` tells later analyses whether the architecture/lifter believes
/// execution may continue at the next instruction after the call.
Call { target: FlowTarget, returns: bool },
/// Return from the current routine.
Return,
/// Trap, breakpoint, or other terminal fault-like instruction.
Trap,
}
/// Flow facts for one decoded instruction.
///
/// This structure is the architecture-facing summary used by later CFG-building
/// or region-building code.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct FlowInfo {
/// How many bytes does this instruction occupy
len: u8,
/// What kind of control-flow behaviour does it have?
kind: FlowKind,
}
impl FlowInfo {
/// Creates a new flow summary.
pub const fn new(len: u8, kind: FlowKind) -> Self {
Self { len, kind }
}
/// Returns the instruction length in bytes.
pub const fn len(self) -> u8 {
self.len
}
/// Returns the control-flow kind.
pub const fn kind(self) -> FlowKind {
self.kind
}
/// Returns whether this instruction ends the current basic block.
///
/// This is still an instruction-level fact, not whole-program CFG recovery.
pub const fn terminates_block(self) -> bool {
match self.kind {
FlowKind::FallThrough => false,
FlowKind::Jump { .. } => true,
FlowKind::CondJump { .. } => true,
FlowKind::Call { returns, .. } => !returns,
FlowKind::Return => true,
FlowKind::Trap => true,
}
}
/// Returns whether execution may continue at the next instruction.
pub const fn has_fallthrough(self) -> bool {
match self.kind {
FlowKind::FallThrough => true,
FlowKind::Jump { .. } => false,
FlowKind::CondJump { .. } => true,
FlowKind::Call { returns, .. } => returns,
FlowKind::Return => false,
FlowKind::Trap => false,
}
}
/// Returns the explicit non-fallthrough target carried by the instruction,
/// if any.
pub const fn target(self) -> Option<FlowTarget> {
match self.kind {
FlowKind::Jump { target } => Some(target),
FlowKind::CondJump { target } => Some(target),
FlowKind::Call { target, .. } => Some(target),
FlowKind::FallThrough | FlowKind::Return | FlowKind::Trap => None,
}
}
}

View file

@ -1,4 +1,5 @@
mod arch;
mod flow;
mod ir;
use clap::{
@ -10,7 +11,13 @@ use clap::{
};
use cranelift_entity::EntityRef;
use crate::ir::*;
use crate::{
arch::{
Arch, LiftArch,
arm::{Arm, ArmLiftCtx, ArmMode, DummyEnv},
},
ir::*,
};
const STYLES: Styles = Styles::styled()
.header(AnsiColor::Green.on_default().effects(Effects::BOLD))
@ -26,175 +33,57 @@ const STYLES: Styles = Styles::styled()
struct Args {}
fn main() {
// let args = Args::parse();
let arm = Arm::default();
let bytes: &[u8] = &[
0x01, 0x00, 0xA0, 0xE3, // mov r0, #1
0x02, 0x10, 0xA0, 0xE3, // mov r1, #2
0x01, 0x00, 0x80, 0xE0, // add r0, r0, r1
0x1E, 0xFF, 0x2F, 0xE1, // bx lr
];
let arm = crate::arch::arm::Arm::default();
let mut body = crate::ir::Body::new();
let mut fb_ctx = crate::ir::FrontendBuilderContext::new();
let env = DummyEnv;
{
let mut body = Body::new();
let mut cx = FrontendBuilderContext::new();
{
let mut b = FrontendBuilder::new(&mut body, &mut cx);
let mut fb = crate::ir::FrontendBuilder::new(&mut body, &mut fb_ctx);
let entry = fb.create_block();
fb.switch_to_block(entry);
fb.seal_block(entry);
let entry = b.create_block();
let then_block = b.create_block();
let else_block = b.create_block();
let merge_block = b.create_block();
let mut pc = 0x1000u64;
let mut rest = bytes;
let x = Variable::new(0);
let y = Variable::new(1);
let sum = Variable::new(2);
let acc = Variable::new(3);
let mut cx = crate::arch::arm::ArmLiftCtx::new(
&arm,
&env,
&mut fb,
pc,
crate::arch::arm::ArmMode::Arm,
);
b.declare_var(x, Type::i32());
b.declare_var(y, Type::i32());
b.declare_var(sum, Type::i32());
b.declare_var(acc, Type::i32());
while !rest.is_empty() {
let (len, inst) = arm.decode(rest, crate::arch::arm::ArmMode::Arm).unwrap();
println!(
"{:#x}: {}",
pc,
arm.disasm(&inst, pc, crate::arch::arm::ArmMode::Arm)
);
println!(
" flow: {:?}",
arm.flow_info(&inst, pc, crate::arch::arm::ArmMode::Arm)
);
// entry:
//
// x = 7
// y = 5
// sum = x + y
// if (sum > 10) goto then else goto else
//
b.switch_to_block(entry);
b.seal_block(entry);
cx.pc = pc;
arm.lift(&mut cx, &inst).unwrap();
let c7 = b.iconst(Type::i32(), 7);
let c5 = b.iconst(Type::i32(), 5);
let c10 = b.iconst(Type::i32(), 10);
b.def_var(x, c7);
b.def_var(y, c5);
let lhs = b.use_var(x);
let rhs = b.use_var(y);
let sum_val = b.iadd(lhs, rhs, Type::i32());
b.def_var(sum, sum_val);
let lhs1 = b.use_var(sum);
let cond = b.icmp(IntCC::Sgt, lhs1, c10);
b.br_if(cond, then_block, else_block);
// Both then/else now have their predecessor set.
b.seal_block(then_block);
b.seal_block(else_block);
// then:
//
// acc = x * 2
// goto merge
//
b.switch_to_block(then_block);
let c2 = b.iconst(Type::i32(), 2);
let lhs2 = b.use_var(x);
let doubled = b.imul(lhs2, c2, Type::i32());
b.def_var(acc, doubled);
b.jump(merge_block);
// else:
//
// acc = 0 - y
// goto merge
//
b.switch_to_block(else_block);
let c0 = b.iconst(Type::i32(), 0);
let rhs1 = b.use_var(y);
let neg_y = b.isub(c0, rhs1, Type::i32());
b.def_var(acc, neg_y);
b.jump(merge_block);
// merge:
//
// acc and sum should come in as block params synthesized by use_var()
// result = acc + sum
// return result
//
b.seal_block(merge_block);
b.switch_to_block(merge_block);
let merged_acc = b.use_var(acc);
let merged_sum = b.use_var(sum);
let result = b.iadd(merged_acc, merged_sum, Type::i32());
b.ret(&[result]);
pc += len as u64;
rest = &rest[len..];
}
verify(&body).unwrap();
println!("{body}");
}
{
let mut body = Body::new();
let mut cx = FrontendBuilderContext::new();
{
let mut b = FrontendBuilder::new(&mut body, &mut cx);
let entry = b.create_block();
let loop_header = b.create_block();
let loop_body = b.create_block();
let exit = b.create_block();
let i = Variable::new(0);
b.declare_var(i, Type::i32());
// entry:
// i = 0
// br loop_header
b.switch_to_block(entry);
b.seal_block(entry);
let c0 = b.iconst(Type::i32(), 0);
b.def_var(i, c0);
b.jump(loop_header);
// loop_header:
// if (i < 4) goto loop_body else goto exit
//
// IMPORTANT:
// Do not seal this block yet. We still have a backedge coming from
// loop_body, and we want use_var(i) here to synthesize a block param.
b.switch_to_block(loop_header);
let iv = b.use_var(i);
let c4 = b.iconst(Type::i32(), 4);
let cond = b.icmp(IntCC::Slt, iv, c4);
b.br_if(cond, loop_body, exit);
// loop_body now has its predecessor.
b.seal_block(loop_body);
// loop_body:
// i = i + 1
// br loop_header
b.switch_to_block(loop_body);
let iv = b.use_var(i);
let c1 = b.iconst(Type::i32(), 1);
let next = b.iadd(iv, c1, Type::i32());
b.def_var(i, next);
b.jump(loop_header);
// Now loop_header has all of its predecessors:
// - entry
// - loop_body
b.seal_block(loop_header);
// exit:
// return i
//
// exit already has one predecessor from loop_header's br_if, so using
// `i` here should synthesize a block param and patch that edge.
b.seal_block(exit);
b.switch_to_block(exit);
let out = b.use_var(i);
b.ret(&[out]);
}
verify(&body).unwrap();
println!("{body}");
}
crate::ir::verify(&body).unwrap();
println!("{body}");
}