diff --git a/src/ir.rs b/src/ir.rs index 91b3f32..65c9f9c 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,18 +1,25 @@ //! Slonik normalized SSA IR. +mod block; mod body; mod dfg; -mod inst; +mod expr; +mod frontend; mod layout; -mod petgraph; +mod stmt; mod ty; +mod verify; +mod write; +pub use block::*; pub use body::*; -pub use dfg::*; -pub use inst::*; +pub use expr::*; +pub use frontend::*; pub use layout::*; -pub use petgraph::*; +pub use stmt::*; pub use ty::*; +pub use verify::*; +pub use write::*; /// Defines a thin `u32` entity handle. /// @@ -40,9 +47,30 @@ entity! { /// A handle to a basic block in a `Body`. pub struct Block = "block"; + /// A handle to a pure expression node. + /// + /// Expressions compute values and have no direct side effects. + /// They are the primary source of SSA values in the normalized IR. + pub struct Expr = "expr"; + + + /// A handle to an ordered statement node. + /// + /// Statements live inside blocks and represent side effects or control flow, + /// such as stores, branches, calls, and returns. + pub struct Stmt = "stmt"; + /// A handle to an instruction in a `Body`. pub struct Inst = "inst"; /// A handle to an SSA value. + /// + /// Values are either: + /// + /// - the result of an expression + /// - a block parameter pub struct Value = "v"; + + /// A frontend-level source variable. + pub struct Variable = "var"; } diff --git a/src/ir/block.rs b/src/ir/block.rs new file mode 100644 index 0000000..9d33bb2 --- /dev/null +++ b/src/ir/block.rs @@ -0,0 +1,63 @@ +//! Basic-block storage for Slonik IR. +//! +//! A block is the unit of control-flow in the normalized IR. +//! +//! Each block owns: +//! +//! - a list of block parameters +//! - an ordered list of statements +//! +//! # Block parameters +//! +//! Slonik uses block arguments instead of phi nodes. +//! A predecessor transfers control to a block together with the incoming SSA +//! values for that block's parameters. +//! +//! # Statement order +//! +//! Statements inside a block are ordered. +//! Terminators, when present, must appear as the final statement in the block. +//! +//! # Scope +//! +//! This module only defines **per-block storage**. +//! It does not define: +//! +//! - global body ownership +//! - SSA value storage +//! - expression storage +//! - verification policy +//! +//! Those belong in other IR modules. + +use cranelift_entity::EntityList; + +use crate::ir::{Stmt, Value}; + +/// A compact list of block parameters, owned by the [`Body`]. +pub type ParamList = EntityList; + +/// A compact ordered list of statements, owned by the [`Body`]. +pub type StmtList = EntityList; + +/// Per-block storage. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct BlockData { + /// The SSA parameters accepted by this block. + pub params: ParamList, + + /// The ordered statements inside this block. + pub stmts: StmtList, +} + +impl BlockData { + /// Creates an empty block. + pub fn new() -> Self { + Self::default() + } + + /// Returns whether the block has no parameters and no statements. + pub fn is_empty(&self) -> bool { + self.params.is_empty() && self.stmts.is_empty() + } +} diff --git a/src/ir/body.rs b/src/ir/body.rs index bd564a9..b29e142 100644 --- a/src/ir/body.rs +++ b/src/ir/body.rs @@ -1,15 +1,42 @@ //! The owning container for a normalized Slonik IR body. -use crate::ir::{Block, DataFlowGraph, Inst, InstructionData, Layout, Type, Value}; +use cranelift_entity::{EntityRef, ListPool, PrimaryMap, SecondaryMap}; + +use crate::ir::{ + Block, BlockCall, BlockData, Expr, ExprData, MemSize, ParamList, Stmt, StmtData, StmtList, + Type, Value, ValueData, ValueDef, ValueList, +}; + +/// Per-expression storage owned by a [`Body`]. +#[derive(Clone, Debug, PartialEq)] +pub struct ExprNode { + /// The semantic payload of the expression. + pub data: ExprData, + + /// The SSA value produced by this expression. + pub value: Value, +} /// A normalized SSA body. #[derive(Default)] pub struct Body { - /// Semantic storage for blocks, instructions, values, and value lists. - pub dfg: DataFlowGraph, + /// All blocks in the body. + pub blocks: PrimaryMap, - /// Program order and instruction/block containment. - pub layout: Layout, + /// All expressions in the body. + pub exprs: PrimaryMap, + + /// All statements in the body. + pub stmts: PrimaryMap, + + /// All SSA values in the body. + pub values: PrimaryMap, + + /// Pooled storage for compact value lists. + pub value_lists: ListPool, + + /// Pooled storage for compact statement lists. + pub stmt_lists: ListPool, } impl Body { @@ -18,102 +45,183 @@ impl Body { Self::default() } - /// Returns whether the body contains no blocks. + /// Returns whether the body contains no blocks, expressions, statements, or values. pub fn is_empty(&self) -> bool { - self.dfg.num_blocks() == 0 + self.blocks.is_empty() + && self.exprs.is_empty() + && self.stmts.is_empty() + && self.values.is_empty() } /// Returns the number of blocks in the body. - pub fn block_count(&self) -> usize { - self.dfg.num_blocks() + pub fn num_blocks(&self) -> usize { + self.blocks.len() } - /// Returns the number of instructions in the body. - pub fn inst_count(&self) -> usize { - self.dfg.num_insts() + /// Returns the number of expressions in the body. + pub fn num_exprs(&self) -> usize { + self.exprs.len() + } + + /// Returns the number of statements in the body. + pub fn num_stmts(&self) -> usize { + self.stmts.len() } /// Returns the number of SSA values in the body. - pub fn value_count(&self) -> usize { - self.dfg.num_values() + pub fn num_values(&self) -> usize { + self.values.len() } - /// Creates a new block and appends it to the block layout order. + /// Creates a new empty block. pub fn create_block(&mut self) -> Block { - let block = self.dfg.create_block(); - self.layout.append_block(block); - block + self.blocks.push(BlockData::default()) + } + + /// Returns the data for `block`. + pub fn block_data(&self, block: Block) -> &BlockData { + &self.blocks[block] + } + + /// Returns the mutable data for `block`. + pub fn block_data_mut(&mut self, block: Block) -> &mut BlockData { + &mut self.blocks[block] + } + + /// Returns an iterator over blocks in creation order. + pub fn blocks(&self) -> Blocks { + Blocks { + next: 0, + len: self.blocks.len(), + } } /// Appends a block parameter of type `ty` to `block`. - /// - /// The returned SSA value is defined as a block parameter. pub fn append_block_param(&mut self, block: Block, ty: Type) -> Value { - self.dfg.append_block_param(block, ty) + let index = self.blocks[block].params.len(&self.value_lists) as u16; + + let value = self.values.push(ValueData { + ty, + def: ValueDef::Param(block, index), + }); + + self.blocks[block].params.push(value, &mut self.value_lists); + value } - /// Creates an instruction with `data`, assigns SSA result values for - /// `result_tys`, and appends the instruction to the end of `block`. - pub fn append_inst( - &mut self, - block: Block, - data: InstructionData, - result_tys: &[Type], - ) -> Inst { - let inst = self.dfg.create_inst(data, result_tys); - self.layout.append_inst(block, inst); - inst + /// Returns the parameter list of `block`. + pub fn block_params(&self, block: Block) -> &[Value] { + self.blocks[block].params.as_slice(&self.value_lists) } - /// Returns the first block in layout order, if any. - pub fn first_block(&self) -> Option { - self.layout.first_block() + /// Returns the ordered statement list of `block`. + pub fn block_stmts(&self, block: Block) -> &[Stmt] { + self.blocks[block].stmts.as_slice(&self.stmt_lists) } - /// Returns the last block in layout order, if any. - pub fn last_block(&self) -> Option { - self.layout.last_block() + /// Returns the last statement in `block`, if any. + pub fn last_stmt(&self, block: Block) -> Option { + self.block_stmts(block).last().copied() } - /// Returns the last instruction in `block`, if any. - pub fn last_inst(&self, block: Block) -> Option { - self.layout.last_inst(block) + /// Creates a pooled value list from `values`. + pub fn make_value_list(&mut self, values: &[Value]) -> ValueList { + ValueList::from_slice(values, &mut self.value_lists) } - /// Returns the terminator instruction of `block`, if the block ends in one. - pub fn block_terminator(&self, block: Block) -> Option { - let inst = self.layout.last_inst(block)?; - self.dfg.inst_data(inst).is_terminator().then_some(inst) + /// Creates and appends a statement to `block`. + pub fn append_stmt(&mut self, block: Block, data: StmtData) -> Stmt { + let stmt = self.stmts.push(data); + self.blocks[block].stmts.push(stmt, &mut self.stmt_lists); + stmt } - /// Returns the terminator data for `block`, if the block ends in a terminator. - pub fn block_terminator_data(&self, block: Block) -> Option<&InstructionData> { - let inst = self.block_terminator(block)?; - Some(self.dfg.inst_data(inst)) + /// Returns the data for `stmt`. + pub fn stmt_data(&self, stmt: Stmt) -> &StmtData { + &self.stmts[stmt] } - /// Returns an iterator over the successor blocks of `block`. + /// Returns the mutable data for `stmt`. + pub fn stmt_data_mut(&mut self, stmt: Stmt) -> &mut StmtData { + &mut self.stmts[stmt] + } + + /// Creates a new expression producing one SSA value of type `ty`. + pub fn create_expr(&mut self, data: ExprData, ty: Type) -> Expr { + let expr = self.exprs.next_key(); + let value = self.values.push(ValueData { + ty, + def: ValueDef::Expr(expr), + }); + + let expr = self.exprs.push(ExprNode { data, value }); + debug_assert_eq!(self.expr_value(expr), value); + expr + } + + /// Returns the data for `expr`. + pub fn expr_data(&self, expr: Expr) -> &ExprData { + &self.exprs[expr].data + } + + /// Returns the mutable data for `expr`. + pub fn expr_data_mut(&mut self, expr: Expr) -> &mut ExprData { + &mut self.exprs[expr].data + } + + /// Returns the SSA value produced by `expr`. + pub fn expr_value(&self, expr: Expr) -> Value { + self.exprs[expr].value + } + + /// Returns the full value record for `value`. + pub fn value_data(&self, value: Value) -> &ValueData { + &self.values[value] + } + + /// Returns the semantic type of `value`. + pub fn value_type(&self, value: Value) -> Type { + self.values[value].ty + } + + /// Returns the definition site of `value`. + pub fn value_def(&self, value: Value) -> ValueDef { + self.values[value].def + } + + /// Returns the terminator statement of `block`, if the block ends in one. + pub fn block_terminator(&self, block: Block) -> Option { + let stmt = self.last_stmt(block)?; + self.stmt_data(stmt).is_terminator().then_some(stmt) + } + + /// Returns the terminator data for `block`, if present. + pub fn block_terminator_data(&self, block: Block) -> Option<&StmtData> { + let stmt = self.block_terminator(block)?; + Some(self.stmt_data(stmt)) + } + + /// Returns an iterator over the CFG successors of `block`. /// - /// Successors are derived from the block's terminator instruction. - /// Non-terminating blocks and blocks ending in `return` have no successors. + /// Successors are derived from the block's terminator statement. pub fn block_successors(&self, block: Block) -> BlockSuccessors { let Some(term) = self.block_terminator_data(block) else { return BlockSuccessors::Empty; }; match term { - InstructionData::Jump { dst, .. } => BlockSuccessors::One(Some(dst.block)), + StmtData::Jump { dst } => BlockSuccessors::One(Some(dst.block)), - InstructionData::BrIf { + StmtData::BrIf { then_dst, else_dst, .. } => BlockSuccessors::Two { first: Some(then_dst.block), second: Some(else_dst.block), }, - InstructionData::Return { .. } => BlockSuccessors::Empty, + StmtData::Return { .. } => BlockSuccessors::Empty, - _ => BlockSuccessors::Empty, + StmtData::Store { .. } | StmtData::Call { .. } => BlockSuccessors::Empty, } } @@ -121,16 +229,38 @@ impl Body { pub fn block_successor_count(&self, block: Block) -> usize { self.block_successors(block).count() } + + /// Creates a block call target with pooled SSA arguments. + pub fn block_call(&mut self, block: Block, args: &[Value]) -> BlockCall { + BlockCall { + block, + args: self.make_value_list(args), + } + } } -/// Iterator over the successor blocks of a basic block. -/// -/// This is intentionally tiny because the normalized IR currently has very -/// simple terminators: -/// -/// - `jump` has one successor -/// - `br_if` has two successors -/// - `return` has zero successors +/// Iterator over blocks in creation order. +#[derive(Clone, Debug)] +pub struct Blocks { + next: usize, + len: usize, +} + +impl Iterator for Blocks { + type Item = Block; + + fn next(&mut self) -> Option { + if self.next == self.len { + return None; + } + + let block = Block::new(self.next); + self.next += 1; + Some(block) + } +} + +/// Iterator over the CFG successors of a block. #[derive(Clone, Debug)] pub enum BlockSuccessors { /// No successors. @@ -152,9 +282,7 @@ impl Iterator for BlockSuccessors { fn next(&mut self) -> Option { match self { Self::Empty => None, - Self::One(slot) => slot.take(), - Self::Two { first, second } => { if let Some(block) = first.take() { Some(block) diff --git a/src/ir/dfg.rs b/src/ir/dfg.rs index 26af998..8b13789 100644 --- a/src/ir/dfg.rs +++ b/src/ir/dfg.rs @@ -1,173 +1 @@ -//! Data-flow storage for Slonik IR. -use cranelift_entity::{ListPool, PrimaryMap}; - -use crate::ir::{Block, Inst, InstructionData, Type, Value, ValueData, ValueDef, ValueList}; - -/// Per-block data owned by the [`DataFlowGraph`]. -#[derive(Clone, Debug, Default)] -pub struct BlockData { - /// The SSA parameters of this block. - pub params: ValueList, -} - -/// Per-instruction data owned by the [`DataFlowGraph`]. -#[derive(Clone, Debug, PartialEq)] -pub struct InstData { - /// The semantic payload of the instruction. - pub data: InstructionData, - - /// The SSA results defined by this instruction. - pub results: ValueList, -} - -/// The semantic storage for a Slonik IR body. -/// -/// The DFG owns all blocks, instructions, and SSA values, but does not own -/// their order. Program order is tracked by [`crate::ir::Layout`]. -#[derive(Default)] -pub struct DataFlowGraph { - /// All blocks in the body. - pub blocks: PrimaryMap, - - /// All instructions in the body. - pub insts: PrimaryMap, - - /// All SSA values in the body. - pub values: PrimaryMap, - - /// Pooled storage for compact value lists. - pub value_lists: ListPool, -} - -impl DataFlowGraph { - /// Creates an empty data-flow graph. - pub fn new() -> Self { - Self::default() - } - - /// Returns the number of blocks in the graph. - pub fn num_blocks(&self) -> usize { - self.blocks.len() - } - - /// Returns the number of instructions in the graph. - pub fn num_insts(&self) -> usize { - self.insts.len() - } - - /// Returns the number of SSA values in the graph. - pub fn num_values(&self) -> usize { - self.values.len() - } - - /// Creates a new empty block. - pub fn create_block(&mut self) -> Block { - self.blocks.push(BlockData::default()) - } - - /// Returns the data for `block`. - pub fn block_data(&self, block: Block) -> &BlockData { - &self.blocks[block] - } - - /// Returns the mutable data for `block`. - pub fn block_data_mut(&mut self, block: Block) -> &mut BlockData { - &mut self.blocks[block] - } - - /// Returns the parameter values of `block`. - pub fn block_params(&self, block: Block) -> &[Value] { - self.blocks[block].params.as_slice(&self.value_lists) - } - - /// Appends a block parameter of type `ty` to `block`. - /// - /// The returned SSA value is defined by `ValueDef::Param(block, index)`. - pub fn append_block_param(&mut self, block: Block, ty: Type) -> Value { - let index = self.blocks[block].params.len(&self.value_lists) as u16; - - let value = self.values.push(ValueData { - ty, - def: ValueDef::Param(block, index), - }); - - self.blocks[block].params.push(value, &mut self.value_lists); - value - } - - /// Creates a pooled value list from `values`. - /// - /// This is mainly useful when constructing variable-arity instructions such - /// as calls or returns. - pub fn make_value_list(&mut self, values: &[Value]) -> ValueList { - ValueList::from_slice(values, &mut self.value_lists) - } - - /// Creates a new instruction with the given semantic payload and result - /// types. - /// - /// One SSA result value is created for each entry in `result_tys`, in order. - /// Those values are recorded as `ValueDef::Result(inst, index)`. - pub fn create_inst(&mut self, data: InstructionData, result_tys: &[Type]) -> Inst { - let inst = self.insts.push(InstData { - data, - results: ValueList::new(), - }); - - let mut results = ValueList::new(); - for (index, ty) in result_tys.iter().copied().enumerate() { - let value = self.values.push(ValueData { - ty, - def: ValueDef::Result(inst, index as u16), - }); - - results.push(value, &mut self.value_lists); - } - - self.insts[inst].results = results; - inst - } - - /// Returns the data for `inst`. - pub fn inst_data(&self, inst: Inst) -> &InstructionData { - &self.insts[inst].data - } - - /// Returns the mutable data for `inst`. - pub fn inst_data_mut(&mut self, inst: Inst) -> &mut InstructionData { - &mut self.insts[inst].data - } - - /// Replaces the semantic payload of `inst`. - /// - /// This does not change the instruction’s existing result values. - pub fn replace_inst_data(&mut self, inst: Inst, data: InstructionData) { - self.insts[inst].data = data; - } - - /// Returns the SSA results defined by `inst`. - pub fn inst_results(&self, inst: Inst) -> &[Value] { - self.insts[inst].results.as_slice(&self.value_lists) - } - - /// Returns the first SSA result of `inst`, if any. - pub fn first_result(&self, inst: Inst) -> Option { - self.insts[inst].results.first(&self.value_lists) - } - - /// Returns the full value record for `value`. - pub fn value_data(&self, value: Value) -> &ValueData { - &self.values[value] - } - - /// Returns the type of `value`. - pub fn value_type(&self, value: Value) -> Type { - self.values[value].ty - } - - /// Returns the definition site of `value`. - pub fn value_def(&self, value: Value) -> ValueDef { - self.values[value].def - } -} diff --git a/src/ir/expr.rs b/src/ir/expr.rs new file mode 100644 index 0000000..0e92406 --- /dev/null +++ b/src/ir/expr.rs @@ -0,0 +1,396 @@ +//! Pure expression nodes and SSA value definitions for Slonik IR. + +use core::fmt; + +use crate::ir::{Block, Expr, Type, Value}; + +/// Memory access width in bytes. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum MemSize { + S1, + S2, + S4, + S8, + S16, +} + +impl MemSize { + /// Returns the access width in bytes. + pub const fn bytes(self) -> u8 { + match self { + Self::S1 => 1, + Self::S2 => 2, + Self::S4 => 4, + Self::S8 => 8, + Self::S16 => 16, + } + } + + /// Returns the access width in bits. + pub const fn bits(self) -> u16 { + (self.bytes() as u16) * 8 + } +} + +impl fmt::Display for MemSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}B", self.bytes()) + } +} + +/// Integer comparison condition codes. +/// +/// These are used by [`ExprData::Icmp`]. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum IntCC { + Eq, + Ne, + + Ult, + Ule, + Ugt, + Uge, + + Slt, + Sle, + Sgt, + Sge, +} + +impl IntCC { + /// Returns the logical inverse of this condition code. + pub const fn invert(self) -> Self { + match self { + Self::Eq => Self::Ne, + Self::Ne => Self::Eq, + + Self::Ult => Self::Uge, + Self::Ule => Self::Ugt, + Self::Ugt => Self::Ule, + Self::Uge => Self::Ult, + + Self::Slt => Self::Sge, + Self::Sle => Self::Sgt, + Self::Sgt => Self::Sle, + Self::Sge => Self::Slt, + } + } + + /// Returns the condition code obtained by swapping the operands. + pub const fn swap_args(self) -> Self { + match self { + Self::Eq => Self::Eq, + Self::Ne => Self::Ne, + + Self::Ult => Self::Ugt, + Self::Ule => Self::Uge, + Self::Ugt => Self::Ult, + Self::Uge => Self::Ule, + + Self::Slt => Self::Sgt, + Self::Sle => Self::Sge, + Self::Sgt => Self::Slt, + Self::Sge => Self::Sle, + } + } +} + +impl fmt::Display for IntCC { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Eq => "eq", + Self::Ne => "ne", + + Self::Ult => "ult", + Self::Ule => "ule", + Self::Ugt => "ugt", + Self::Uge => "uge", + + Self::Slt => "slt", + Self::Sle => "sle", + Self::Sgt => "sgt", + Self::Sge => "sge", + }; + + f.write_str(s) + } +} + +/// Floating-point comparison condition codes. +/// +/// These are used by [`ExprData::Fcmp`]. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FloatCC { + Eq, + Ne, + Lt, + Le, + Gt, + Ge, + Ordered, + Unordered, +} + +impl FloatCC { + /// Returns the logical inverse of this condition code. + pub const fn invert(self) -> Self { + match self { + Self::Eq => Self::Ne, + Self::Ne => Self::Eq, + Self::Lt => Self::Ge, + Self::Le => Self::Gt, + Self::Gt => Self::Le, + Self::Ge => Self::Lt, + Self::Ordered => Self::Unordered, + Self::Unordered => Self::Ordered, + } + } + + /// Returns the condition code obtained by swapping the operands. + pub const fn swap_args(self) -> Self { + match self { + Self::Eq => Self::Eq, + Self::Ne => Self::Ne, + Self::Lt => Self::Gt, + Self::Le => Self::Ge, + Self::Gt => Self::Lt, + Self::Ge => Self::Le, + Self::Ordered => Self::Ordered, + Self::Unordered => Self::Unordered, + } + } +} + +impl fmt::Display for FloatCC { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Eq => "eq", + Self::Ne => "ne", + Self::Lt => "lt", + Self::Le => "le", + Self::Gt => "gt", + Self::Ge => "ge", + Self::Ordered => "ord", + Self::Unordered => "uno", + }; + + f.write_str(s) + } +} + +/// Unary expression operators. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum UnaryOp { + /// Integer or bitvector negation. + Neg, + + /// Bitwise not. + Not, + + /// Floating-point negation. + FNeg, +} + +impl fmt::Display for UnaryOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Neg => "neg", + Self::Not => "not", + Self::FNeg => "fneg", + }; + + f.write_str(s) + } +} + +/// Binary expression operators. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum BinaryOp { + IAdd, + ISub, + IMul, + UDiv, + SDiv, + URem, + SRem, + + And, + Or, + Xor, + + Shl, + LShr, + AShr, + + FAdd, + FSub, + FMul, + FDiv, +} + +impl BinaryOp { + /// Returns whether this binary operator is commutative. + pub const fn is_commutative(self) -> bool { + matches!( + self, + Self::IAdd | Self::IMul | Self::And | Self::Or | Self::Xor | Self::FAdd | Self::FMul + ) + } +} + +impl fmt::Display for BinaryOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::IAdd => "iadd", + Self::ISub => "isub", + Self::IMul => "imul", + Self::UDiv => "udiv", + Self::SDiv => "sdiv", + Self::URem => "urem", + Self::SRem => "srem", + + Self::And => "and", + Self::Or => "or", + Self::Xor => "xor", + + Self::Shl => "shl", + Self::LShr => "lshr", + Self::AShr => "ashr", + + Self::FAdd => "fadd", + Self::FSub => "fsub", + Self::FMul => "fmul", + Self::FDiv => "fdiv", + }; + + f.write_str(s) + } +} + +/// Cast and conversion operators. +/// +/// The source type is taken from the operand value. +/// The destination type is taken from the result value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum CastOp { + /// Zero-extension. + Zext, + + /// Sign-extension. + Sext, + + /// Truncation. + Trunc, + + /// Bit-preserving reinterpretation. + Bitcast, + + /// Integer-to-pointer conversion. + IntToPtr, + + /// Pointer-to-integer conversion. + PtrToInt, +} + +impl fmt::Display for CastOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Zext => "zext", + Self::Sext => "sext", + Self::Trunc => "trunc", + Self::Bitcast => "bitcast", + Self::IntToPtr => "inttoptr", + Self::PtrToInt => "ptrtoint", + }; + + f.write_str(s) + } +} + +/// A pure value-producing expression node. +/// +/// Every expression defines exactly one SSA value. +/// The type of that value is stored in the corresponding [`ValueData`]. +#[derive(Clone, Debug, PartialEq)] +pub enum ExprData { + /// A signed integer literal. + /// + /// The result type determines the intended width. + IConst { imm: i64 }, + + /// A 32-bit floating-point literal stored as raw IEEE-754 bits. + F32Const { bits: u32 }, + + /// A 64-bit floating-point literal stored as raw IEEE-754 bits. + F64Const { bits: u64 }, + + /// A boolean literal. + BConst { value: bool }, + + /// A unary operation. + Unary { op: UnaryOp, arg: Value }, + + /// A binary operation. + Binary { + op: BinaryOp, + lhs: Value, + rhs: Value, + }, + + /// A cast or conversion. + Cast { op: CastOp, arg: Value }, + + /// An integer comparison. + /// + /// The result type is expected to be `bool`. + Icmp { cc: IntCC, lhs: Value, rhs: Value }, + + /// A floating-point comparison. + /// + /// The result type is expected to be `bool`. + Fcmp { cc: FloatCC, lhs: Value, rhs: Value }, + + /// A conditional select. + /// + /// `cond` must be a boolean value. + Select { + cond: Value, + if_true: Value, + if_false: Value, + }, + + /// A memory load. + Load { addr: Value, size: MemSize }, +} + +impl ExprData { + /// Returns whether this expression may observe memory. + pub const fn may_read_memory(&self) -> bool { + matches!(self, Self::Load { .. }) + } + + /// Returns whether this expression may trap. + pub const fn may_trap(&self) -> bool { + matches!(self, Self::Load { .. }) + } +} + +/// The definition site of an SSA value. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ValueDef { + /// A value defined by an expression. + Expr(Expr), + + /// A block parameter at position `index`. + Param(Block, u16), +} + +/// Metadata attached to an SSA value. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct ValueData { + /// The semantic type of the value. + pub ty: Type, + + /// The definition site of the value. + pub def: ValueDef, +} diff --git a/src/ir/frontend.rs b/src/ir/frontend.rs new file mode 100644 index 0000000..42bb242 --- /dev/null +++ b/src/ir/frontend.rs @@ -0,0 +1,545 @@ +//! A frontend-oriented SSA builder for Slonik IR. + +use std::collections::HashMap; + +use crate::ir::{ + BinaryOp, Block, Body, CastOp, ExprData, FloatCC, IntCC, MemSize, Stmt, StmtData, Type, + UnaryOp, Value, Variable, +}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum IncomingEdgeKind { + Jump, + Then, + Else, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct IncomingEdge { + pred: Block, + stmt: Stmt, + kind: IncomingEdgeKind, +} + +#[derive(Clone, Debug, Default)] +struct BlockState { + /// Whether all predecessors of this block are known. + sealed: bool, + + /// Whether a terminator has already been inserted into this block. + filled: bool, + + /// Incoming CFG edges that target this block. + preds: Vec, + + /// Current SSA definitions visible at the end of this block. + defs: HashMap, + + /// The variables that have synthesized block parameters in this block, + /// in block-parameter order. + param_order: Vec, + + /// Mapping from variable to its synthesized block parameter value. + params: HashMap, +} + +/// Reusable scratch state for [`FrontendBuilder`]. +#[derive(Default)] +pub struct FrontendBuilderContext { + vars: HashMap, + blocks: HashMap, +} + +impl FrontendBuilderContext { + /// Creates an empty reusable frontend-builder context. + pub fn new() -> Self { + Self::default() + } + + /// Clears all stored frontend state. + pub fn clear(&mut self) { + self.vars.clear(); + self.blocks.clear(); + } +} + +/// A frontend-oriented builder for Slonik IR. +pub struct FrontendBuilder<'a> { + body: &'a mut Body, + ctx: &'a mut FrontendBuilderContext, + cur_block: Option, +} + +impl<'a> FrontendBuilder<'a> { + /// Creates a new frontend-oriented builder. + pub fn new(body: &'a mut Body, ctx: &'a mut FrontendBuilderContext) -> Self { + Self { + body, + ctx, + cur_block: None, + } + } + + /// Returns a shared reference to the underlying body. + pub fn body(&self) -> &Body { + self.body + } + + /// Returns a mutable reference to the underlying body. + pub fn body_mut(&mut self) -> &mut Body { + self.body + } + + /// Creates a new block. + pub fn create_block(&mut self) -> Block { + let block = self.body.create_block(); + self.ctx.blocks.entry(block).or_default(); + block + } + + /// Switches insertion to `block`. + pub fn switch_to_block(&mut self, block: Block) { + self.ctx.blocks.entry(block).or_default(); + self.cur_block = Some(block); + } + + /// Returns the current insertion block, if any. + pub fn current_block(&self) -> Option { + self.cur_block + } + + /// Declares a frontend variable with semantic type `ty`. + pub fn declare_var(&mut self, var: Variable, ty: Type) { + self.ctx.vars.insert(var, ty); + } + + /// Returns whether `var` has been declared. + pub fn is_var_declared(&self, var: Variable) -> bool { + self.ctx.vars.contains_key(&var) + } + + /// Returns the declared type of `var`. + /// + /// # Panics + /// + /// Panics if `var` was not declared first. + pub fn var_type(&self, var: Variable) -> Type { + *self + .ctx + .vars + .get(&var) + .expect("attempted to query undeclared variable") + } + + /// Returns whether `block` has been sealed. + pub fn is_sealed(&self, block: Block) -> bool { + self.ctx + .blocks + .get(&block) + .map(|s| s.sealed) + .unwrap_or(false) + } + + /// Returns whether `block` already has a terminator. + pub fn is_filled(&self, block: Block) -> bool { + self.ctx + .blocks + .get(&block) + .map(|s| s.filled) + .unwrap_or(false) + } + + /// Marks `block` as sealed. + /// + /// Sealing a block means all of its predecessors are now known. + pub fn seal_block(&mut self, block: Block) { + self.ctx.blocks.entry(block).or_default().sealed = true; + } + + /// Marks every known block as sealed. + pub fn seal_all_blocks(&mut self) { + for state in self.ctx.blocks.values_mut() { + state.sealed = true; + } + } + + /// Assigns SSA value `value` to frontend variable `var` in the current block. + /// + /// # Panics + /// + /// Panics if: + /// + /// - no current block is selected + /// - `var` is undeclared + /// - the value's type does not match the variable's declared type + pub fn def_var(&mut self, var: Variable, value: Value) { + let block = self + .cur_block + .expect("attempted to define a variable without a current block"); + + let expected = self.var_type(var); + let actual = self.body.value_type(value); + + assert!( + expected == actual, + "type mismatch in def_var: variable declared as {expected}, got value of type {actual}", + ); + + self.ctx + .blocks + .entry(block) + .or_default() + .defs + .insert(var, value); + } + + /// Reads the current SSA value for frontend variable `var`. + /// + /// This may recursively resolve definitions across predecessor blocks and + /// may synthesize block parameters as needed. + /// + /// # Panics + /// + /// Panics if: + /// + /// - no current block is selected + /// - `var` is undeclared + pub fn use_var(&mut self, var: Variable) -> Value { + let block = self + .cur_block + .expect("attempted to use a variable without a current block"); + self.use_var_in_block(block, var) + } + + fn use_var_in_block(&mut self, block: Block, var: Variable) -> Value { + if let Some(value) = self + .ctx + .blocks + .get(&block) + .and_then(|state| state.defs.get(&var).copied()) + { + return value; + } + + let ty = self.var_type(var); + + let preds = self + .ctx + .blocks + .get(&block) + .map(|state| state.preds.clone()) + .unwrap_or_default(); + + // If the block is sealed and has no predecessors, this is a genuine + // unbound use: there is nowhere to get the value from. + if self.is_sealed(block) && preds.is_empty() { + panic!( + "variable {var} used in sealed block {block} with no local definition and no predecessors" + ); + } + + // Any non-local use becomes a block parameter, regardless of predecessor + // count. This keeps cross-block dataflow fully explicit in block arguments, + // even for single-predecessor blocks. + self.ensure_block_param(block, var, ty) + } + + fn ensure_block_param(&mut self, block: Block, var: Variable, ty: Type) -> Value { + if let Some(value) = self + .ctx + .blocks + .get(&block) + .and_then(|state| state.params.get(&var).copied()) + { + return value; + } + + let value = self.body.append_block_param(block, ty); + + { + let state = self.ctx.blocks.entry(block).or_default(); + state.param_order.push(var); + state.params.insert(var, value); + state.defs.insert(var, value); + } + + let preds = self + .ctx + .blocks + .get(&block) + .map(|state| state.preds.clone()) + .unwrap_or_default(); + + for edge in preds { + let arg = self.use_var_in_block(edge.pred, var); + self.append_edge_arg(edge, arg); + } + + value + } + + fn append_edge_arg(&mut self, edge: IncomingEdge, arg: Value) { + let body = self.body_mut(); + + let Body { + stmts, value_lists, .. + } = body; + + match (&mut stmts[edge.stmt], edge.kind) { + (StmtData::Jump { dst }, IncomingEdgeKind::Jump) => { + dst.args.push(arg, value_lists); + } + + (StmtData::BrIf { then_dst, .. }, IncomingEdgeKind::Then) => { + then_dst.args.push(arg, value_lists); + } + + (StmtData::BrIf { else_dst, .. }, IncomingEdgeKind::Else) => { + else_dst.args.push(arg, value_lists); + } + + _ => { + panic!("frontend predecessor bookkeeping became inconsistent"); + } + } + } + + fn record_incoming_edge( + &mut self, + dst: Block, + pred: Block, + stmt: Stmt, + kind: IncomingEdgeKind, + ) { + self.ctx + .blocks + .entry(dst) + .or_default() + .preds + .push(IncomingEdge { pred, stmt, kind }); + } + + fn edge_args_to(&mut self, dst: Block) -> Vec { + let vars = self + .ctx + .blocks + .get(&dst) + .map(|state| state.param_order.clone()) + .unwrap_or_default(); + + vars.into_iter().map(|var| self.use_var(var)).collect() + } + + fn append_stmt_in_current_block(&mut self, data: StmtData) -> Stmt { + let block = self + .cur_block + .expect("attempted to append a statement without a current block"); + + assert!( + !self.is_filled(block), + "attempted to append a statement after the block was already terminated" + ); + + let is_terminator = data.is_terminator(); + let stmt = self.body.append_stmt(block, data); + + if is_terminator { + self.ctx.blocks.entry(block).or_default().filled = true; + } + + stmt + } + + fn create_expr_value(&mut self, data: ExprData, ty: Type) -> Value { + let expr = self.body.create_expr(data, ty); + self.body.expr_value(expr) + } + + /// Creates an integer constant expression. + pub fn iconst(&mut self, ty: Type, imm: i64) -> Value { + self.create_expr_value(ExprData::IConst { imm }, ty) + } + + /// Creates a 32-bit floating-point constant expression from raw IEEE-754 bits. + pub fn f32const_bits(&mut self, bits: u32) -> Value { + self.create_expr_value(ExprData::F32Const { bits }, Type::f32()) + } + + /// Creates a 64-bit floating-point constant expression from raw IEEE-754 bits. + pub fn f64const_bits(&mut self, bits: u64) -> Value { + self.create_expr_value(ExprData::F64Const { bits }, Type::f64()) + } + + /// Creates a boolean constant expression. + pub fn bconst(&mut self, value: bool) -> Value { + self.create_expr_value(ExprData::BConst { value }, Type::bool()) + } + + /// Creates a unary expression. + fn unary(&mut self, op: UnaryOp, arg: Value, ty: Type) -> Value { + self.create_expr_value(ExprData::Unary { op, arg }, ty) + } + + /// Creates a binary expression. + fn binary(&mut self, op: BinaryOp, lhs: Value, rhs: Value, ty: Type) -> Value { + self.create_expr_value(ExprData::Binary { op, lhs, rhs }, ty) + } + + /// Creates a cast expression. + pub fn cast(&mut self, op: CastOp, arg: Value, ty: Type) -> Value { + self.create_expr_value(ExprData::Cast { op, arg }, ty) + } + + /// Creates an integer comparison expression with boolean result. + pub fn icmp(&mut self, cc: IntCC, lhs: Value, rhs: Value) -> Value { + self.create_expr_value(ExprData::Icmp { cc, lhs, rhs }, Type::bool()) + } + + /// Creates a floating-point comparison expression with boolean result. + pub fn fcmp(&mut self, cc: FloatCC, lhs: Value, rhs: Value) -> Value { + self.create_expr_value(ExprData::Fcmp { cc, lhs, rhs }, Type::bool()) + } + + /// Creates a `select` expression. + pub fn select(&mut self, cond: Value, if_true: Value, if_false: Value, ty: Type) -> Value { + self.create_expr_value( + ExprData::Select { + cond, + if_true, + if_false, + }, + ty, + ) + } + + /// Creates a memory load expression. + pub fn load(&mut self, addr: Value, size: MemSize, ty: Type) -> Value { + self.create_expr_value(ExprData::Load { addr, size }, ty) + } + + /// Creates an integer add expression. + pub fn iadd(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::IAdd, lhs, rhs, ty) + } + + /// Creates an integer subtract expression. + pub fn isub(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::ISub, lhs, rhs, ty) + } + + /// Creates an integer multiply expression. + pub fn imul(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::IMul, lhs, rhs, ty) + } + + /// Creates a bitwise and expression. + pub fn and(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::And, lhs, rhs, ty) + } + + /// Creates a bitwise or expression. + pub fn or(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::Or, lhs, rhs, ty) + } + + /// Creates a bitwise xor expression. + pub fn xor(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::Xor, lhs, rhs, ty) + } + + /// Creates a logical shift-left expression. + pub fn shl(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::Shl, lhs, rhs, ty) + } + + /// Creates a logical shift-right expression. + pub fn lshr(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::LShr, lhs, rhs, ty) + } + + /// Creates an arithmetic shift-right expression. + pub fn ashr(&mut self, lhs: Value, rhs: Value, ty: Type) -> Value { + self.binary(BinaryOp::AShr, lhs, rhs, ty) + } + + /// Creates a zero-extension expression. + pub fn zext(&mut self, arg: Value, ty: Type) -> Value { + self.cast(CastOp::Zext, arg, ty) + } + + /// Creates a sign-extension expression. + pub fn sext(&mut self, arg: Value, ty: Type) -> Value { + self.cast(CastOp::Sext, arg, ty) + } + + /// Creates a truncation expression. + pub fn trunc(&mut self, arg: Value, ty: Type) -> Value { + self.cast(CastOp::Trunc, arg, ty) + } + + /// Creates a bitcast expression. + pub fn bitcast(&mut self, arg: Value, ty: Type) -> Value { + self.cast(CastOp::Bitcast, arg, ty) + } + + /// Appends a memory store statement. + pub fn store(&mut self, addr: Value, value: Value, size: MemSize) -> Stmt { + self.append_stmt_in_current_block(StmtData::Store { addr, value, size }) + } + + /// Appends an effectful call statement. + pub fn call(&mut self, callee: Value, args: &[Value]) -> Stmt { + let args = self.body.make_value_list(args); + self.append_stmt_in_current_block(StmtData::Call { callee, args }) + } + + /// Appends an unconditional jump to `dst`. + /// + /// The builder automatically supplies block arguments for all synthesized + /// block parameters currently known on `dst`. + pub fn jump(&mut self, dst: Block) -> Stmt { + let pred = self + .cur_block + .expect("attempted to append jump without a current block"); + + let args = self.edge_args_to(dst); + let dst_call = self.body.block_call(dst, &args); + let stmt = self.append_stmt_in_current_block(StmtData::Jump { dst: dst_call }); + + self.record_incoming_edge(dst, pred, stmt, IncomingEdgeKind::Jump); + stmt + } + + /// Appends a conditional branch. + /// + /// The builder automatically supplies block arguments for all synthesized + /// block parameters currently known on each destination block. + pub fn br_if(&mut self, cond: Value, then_block: Block, else_block: Block) -> Stmt { + let pred = self + .cur_block + .expect("attempted to append br_if without a current block"); + + let then_args = self.edge_args_to(then_block); + let else_args = self.edge_args_to(else_block); + + let then_dst = self.body.block_call(then_block, &then_args); + let else_dst = self.body.block_call(else_block, &else_args); + + let stmt = self.append_stmt_in_current_block(StmtData::BrIf { + cond, + then_dst, + else_dst, + }); + + self.record_incoming_edge(then_block, pred, stmt, IncomingEdgeKind::Then); + self.record_incoming_edge(else_block, pred, stmt, IncomingEdgeKind::Else); + + stmt + } + + /// Appends a return statement. + pub fn ret(&mut self, values: &[Value]) -> Stmt { + let values = self.body.make_value_list(values); + self.append_stmt_in_current_block(StmtData::Return { values }) + } +} diff --git a/src/ir/inst.rs b/src/ir/inst.rs deleted file mode 100644 index 63b25a8..0000000 --- a/src/ir/inst.rs +++ /dev/null @@ -1,413 +0,0 @@ -//! Instruction opcodes and instruction data formats for Slonik. - -use core::fmt; -use cranelift_entity::EntityList; - -use crate::ir::{Block, Inst, Type, Value}; - -/// A compact list of SSA values. -/// -/// These lists are stored in a `ListPool` owned by the data-flow graph. -pub type ValueList = EntityList; - -/// A compact list of blocks. -/// -/// This is primarily useful for generic control-flow helpers and side tables. -pub type BlockList = EntityList; - -/// A target block together with the SSA arguments passed to that block. -/// -/// Slonik IR uses block arguments instead of phi nodes. A terminator that -/// transfers control to a block also provides the values for that block's -/// parameters. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct BlockCall { - /// The destination block. - pub block: Block, - - /// The arguments passed to the destination block. - pub args: ValueList, -} - -/// Integer comparison condition codes. -/// -/// These are used by `Opcode::Icmp`. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum IntCC { - /// Equal. - Eq, - - /// Not equal. - Ne, - - /// Unsigned less than. - Ult, - - /// Unsigned less than or equal. - Ule, - - /// Unsigned greater than. - Ugt, - - /// Unsigned greater than or equal. - Uge, - - /// Signed less than. - Slt, - - /// Signed less than or equal. - Sle, - - /// Signed greater than. - Sgt, - - /// Signed greater than or equal. - Sge, -} - -impl IntCC { - /// Returns the logical inverse of this condition code. - pub const fn invert(self) -> Self { - match self { - Self::Eq => Self::Ne, - Self::Ne => Self::Eq, - Self::Ult => Self::Uge, - Self::Ule => Self::Ugt, - Self::Ugt => Self::Ule, - Self::Uge => Self::Ult, - Self::Slt => Self::Sge, - Self::Sle => Self::Sgt, - Self::Sgt => Self::Sle, - Self::Sge => Self::Slt, - } - } - - /// Returns the condition code with operands swapped. - pub const fn swap_args(self) -> Self { - match self { - Self::Eq => Self::Eq, - Self::Ne => Self::Ne, - Self::Ult => Self::Ugt, - Self::Ule => Self::Uge, - Self::Ugt => Self::Ult, - Self::Uge => Self::Ule, - Self::Slt => Self::Sgt, - Self::Sle => Self::Sge, - Self::Sgt => Self::Slt, - Self::Sge => Self::Sle, - } - } -} - -impl fmt::Display for IntCC { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - Self::Eq => "eq", - Self::Ne => "ne", - Self::Ult => "ult", - Self::Ule => "ule", - Self::Ugt => "ugt", - Self::Uge => "uge", - Self::Slt => "slt", - Self::Sle => "sle", - Self::Sgt => "sgt", - Self::Sge => "sge", - }; - - f.write_str(s) - } -} - -/// The set of operations that generic analysis and decompilation passes -/// are expected to understand. -/// -/// If a frontend encounters something that does not fit naturally here, that is -/// a sign it should stay in the lifted IR longer or be lowered through a helper -/// sequence before entering this IR. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum Opcode { - Iconst, - F32const, - F64const, - Bconst, - - Zext, - Sext, - Trunc, - Bitcast, - - Iadd, - Isub, - Imul, - Udiv, - Sdiv, - Urem, - Srem, - - And, - Or, - Xor, - Not, - - Shl, - Lshr, - Ashr, - - Fadd, - Fsub, - Fmul, - Fdiv, - Fneg, - - Icmp, - Select, - - Load, - Store, - - Jump, - BrIf, - Call, - Return, -} - -impl Opcode { - /// Returns the textual name of this opcode. - pub const fn name(&self) -> &'static str { - match self { - Self::Iconst => "iconst", - Self::F32const => "f32const", - Self::F64const => "f64const", - Self::Bconst => "bconst", - - Self::Zext => "zext", - Self::Sext => "sext", - Self::Trunc => "trunc", - Self::Bitcast => "bitcast", - - Self::Iadd => "iadd", - Self::Isub => "isub", - Self::Imul => "imul", - Self::Udiv => "udiv", - Self::Sdiv => "sdiv", - Self::Urem => "urem", - Self::Srem => "srem", - - Self::And => "and", - Self::Or => "or", - Self::Xor => "xor", - Self::Not => "not", - - Self::Shl => "shl", - Self::Lshr => "lshr", - Self::Ashr => "ashr", - - Self::Fadd => "fadd", - Self::Fsub => "fsub", - Self::Fmul => "fmul", - Self::Fdiv => "fdiv", - Self::Fneg => "fneg", - - Self::Icmp => "icmp", - Self::Select => "select", - - Self::Load => "load", - Self::Store => "store", - - Self::Jump => "jump", - Self::BrIf => "br_if", - Self::Call => "call", - Self::Return => "return", - } - } - - /// Returns whether this opcode is a terminator. - pub const fn is_terminator(self) -> bool { - matches!(self, Self::Jump | Self::BrIf | Self::Return) - } - - /// Returns whether this opcode may read memory. - pub const fn may_read_memory(self) -> bool { - matches!(self, Self::Load | Self::Call) - } - - /// Returns whether this opcode may write memory. - pub const fn may_write_memory(self) -> bool { - matches!(self, Self::Store | Self::Call) - } - - /// Returns whether this opcode is side-effecting. - pub const fn has_side_effects(self) -> bool { - matches!( - self, - Self::Store | Self::Jump | Self::BrIf | Self::Call | Self::Return - ) - } -} - -impl fmt::Display for Opcode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.name()) - } -} - -/// A memory access size in bytes. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum MemSize { - S1, - S2, - S4, - S8, - S16, -} - -impl MemSize { - /// Returns the memory access width in bytes. - pub const fn bytes(self) -> u8 { - match self { - Self::S1 => 1, - Self::S2 => 2, - Self::S4 => 4, - Self::S8 => 8, - Self::S16 => 16, - } - } - - /// Returns the memory access width in bits. - pub const fn bits(self) -> u16 { - // TODO: Some machines may have different byte sizes. - (self.bytes() as u16) * 8 - } -} - -impl fmt::Display for MemSize { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}B", self.bytes()) - } -} - -/// The payload of an instruction. -#[derive(Clone, Debug, PartialEq)] -pub enum InstructionData { - /// A zero-operand instruction with no embedded immediate. - Nullary { opcode: Opcode }, - - /// A constant instruction holding an `i64` immediate. - Iconst { opcode: Opcode, imm: i64 }, - - /// A constant instruction holding a 32-bit floating-point bit pattern. - /// - /// The value is stored as raw IEEE-754 bits. - F32const { opcode: Opcode, bits: u32 }, - - /// A constant instruction holding a 64-bit floating-point bit pattern. - /// - /// The value is stored as raw IEEE-754 bits. - F64const { opcode: Opcode, bits: u64 }, - - /// A boolean constant. - Bconst { opcode: Opcode, value: bool }, - - /// A one-argument instruction. - Unary { opcode: Opcode, arg: Value }, - - /// A two-argument instruction. - Binary { opcode: Opcode, args: [Value; 2] }, - - /// A three-argument instruction. - /// - /// This is primarily useful for operations like `select`. - Ternary { opcode: Opcode, args: [Value; 3] }, - - /// An integer comparison instruction. - Icmp { - opcode: Opcode, - cc: IntCC, - args: [Value; 2], - }, - - /// A load from memory. - Load { - opcode: Opcode, - addr: Value, - size: MemSize, - }, - - /// A store to memory. - Store { - opcode: Opcode, - addr: Value, - value: Value, - size: MemSize, - }, - - /// An unconditional branch to another block with block arguments. - Jump { opcode: Opcode, dst: BlockCall }, - - /// A conditional branch with explicit true/false targets. - // TODO: Introduce direct call references - BrIf { - opcode: Opcode, - cond: Value, - then_dst: BlockCall, - else_dst: BlockCall, - }, - - /// A call through a value. - Call { - opcode: Opcode, - callee: Value, - args: ValueList, - }, - - /// A return from the current body. - Return { opcode: Opcode, values: ValueList }, -} - -impl InstructionData { - /// Returns the opcode of this instruction. - pub const fn opcode(&self) -> Opcode { - match *self { - Self::Nullary { opcode } - | Self::Iconst { opcode, .. } - | Self::F32const { opcode, .. } - | Self::F64const { opcode, .. } - | Self::Bconst { opcode, .. } - | Self::Unary { opcode, .. } - | Self::Binary { opcode, .. } - | Self::Ternary { opcode, .. } - | Self::Icmp { opcode, .. } - | Self::Load { opcode, .. } - | Self::Store { opcode, .. } - | Self::Jump { opcode, .. } - | Self::BrIf { opcode, .. } - | Self::Call { opcode, .. } - | Self::Return { opcode, .. } => opcode, - } - } - - /// Returns whether this instruction is a terminator. - pub const fn is_terminator(&self) -> bool { - self.opcode().is_terminator() - } -} - -/// The definition site of an SSA value. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum ValueDef { - /// A value produced as result `index` of an instruction. - Result(Inst, u16), - - /// A block parameter at position `index`. - Param(Block, u16), -} - -/// Type information attached to a value in the data-flow graph. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct ValueData { - /// The semantic type of the value. - pub ty: Type, - - /// Where the value was defined. - pub def: ValueDef, -} diff --git a/src/ir/petgraph.rs b/src/ir/petgraph.rs deleted file mode 100644 index 5b9bc1f..0000000 --- a/src/ir/petgraph.rs +++ /dev/null @@ -1,216 +0,0 @@ -//! [`petgraph`] adapters for Slonik. - -use std::collections::HashSet; - -use cranelift_entity::EntityRef; -use petgraph::{ - Directed, Direction, - visit::{ - GraphBase, GraphProp, GraphRef, IntoNeighbors, IntoNeighborsDirected, IntoNodeIdentifiers, - NodeCompactIndexable, NodeCount, NodeIndexable, Visitable, - }, -}; - -use crate::ir::{Block, BlockSuccessors, Body}; - -/// A borrowed control-flow graph view over a [`Body`]. -#[derive(Copy, Clone)] -pub struct CfgView<'a> { - body: &'a Body, -} - -impl<'a> CfgView<'a> { - /// Creates a borrowed CFG view over `body`. - pub const fn new(body: &'a Body) -> Self { - Self { body } - } - - /// Returns the underlying IR body. - pub const fn body(self) -> &'a Body { - self.body - } -} - -impl Body { - /// Returns a borrowed `petgraph`-compatible CFG view. - pub const fn cfg(&self) -> CfgView<'_> { - CfgView::new(self) - } -} - -/// Iterator over blocks in layout order. -#[derive(Clone)] -pub struct Blocks<'a> { - body: &'a Body, - cur: Option, -} - -impl<'a> Blocks<'a> { - fn new(body: &'a Body) -> Self { - Self { - body, - cur: body.layout.first_block.expand(), - } - } -} - -impl Iterator for Blocks<'_> { - type Item = Block; - - fn next(&mut self) -> Option { - let cur = self.cur?; - self.cur = self.body.layout.blocks[cur].next.expand(); - Some(cur) - } -} - -/// Iterator over outgoing CFG neighbors. -#[derive(Clone)] -pub struct OutNeighbors { - iter: BlockSuccessors, -} - -impl OutNeighbors { - fn new(body: &Body, block: Block) -> Self { - Self { - iter: body.block_successors(block), - } - } -} - -impl Iterator for OutNeighbors { - type Item = Block; - - fn next(&mut self) -> Option { - self.iter.next() - } -} - -// TODO: cached predecessor map? -/// Iterator over incoming CFG neighbors. -#[derive(Clone)] -pub struct InNeighbors<'a> { - body: &'a Body, - cur: Option, - target: Block, -} - -impl<'a> InNeighbors<'a> { - fn new(body: &'a Body, target: Block) -> Self { - Self { - body, - cur: body.layout.first_block(), - target, - } - } -} - -impl Iterator for InNeighbors<'_> { - type Item = Block; - - fn next(&mut self) -> Option { - while let Some(block) = self.cur { - self.cur = self.body.layout.next_block(block); - - if self - .body - .block_successors(block) - .any(|succ| succ == self.target) - { - return Some(block); - } - } - - None - } -} - -/// Iterator over neighbors in a requested direction. -#[derive(Clone)] -pub enum Neighbors<'a> { - Out(OutNeighbors), - In(InNeighbors<'a>), -} - -impl Iterator for Neighbors<'_> { - type Item = Block; - - fn next(&mut self) -> Option { - match self { - Self::Out(iter) => iter.next(), - Self::In(iter) => iter.next(), - } - } -} - -impl GraphBase for CfgView<'_> { - type NodeId = Block; - type EdgeId = (Block, Block); -} - -impl GraphRef for CfgView<'_> {} - -impl GraphProp for CfgView<'_> { - type EdgeType = Directed; -} - -impl NodeCount for CfgView<'_> { - fn node_count(&self) -> usize { - self.body.block_count() - } -} - -impl NodeIndexable for CfgView<'_> { - fn node_bound(&self) -> usize { - self.body.block_count() - } - - fn to_index(&self, a: Self::NodeId) -> usize { - a.index() - } - - fn from_index(&self, i: usize) -> Self::NodeId { - Block::new(i) - } -} - -impl NodeCompactIndexable for CfgView<'_> {} - -impl<'a> IntoNodeIdentifiers for CfgView<'a> { - type NodeIdentifiers = Blocks<'a>; - - fn node_identifiers(self) -> Self::NodeIdentifiers { - Blocks::new(self.body) - } -} - -impl<'a> IntoNeighbors for CfgView<'a> { - type Neighbors = OutNeighbors; - - fn neighbors(self, a: Self::NodeId) -> Self::Neighbors { - OutNeighbors::new(self.body, a) - } -} - -impl<'a> IntoNeighborsDirected for CfgView<'a> { - type NeighborsDirected = Neighbors<'a>; - - fn neighbors_directed(self, n: Self::NodeId, d: Direction) -> Self::NeighborsDirected { - match d { - Direction::Outgoing => Neighbors::Out(OutNeighbors::new(self.body, n)), - Direction::Incoming => Neighbors::In(InNeighbors::new(self.body, n)), - } - } -} - -impl Visitable for CfgView<'_> { - type Map = HashSet; - - fn visit_map(&self) -> Self::Map { - HashSet::with_capacity(self.body.block_count()) - } - - fn reset_map(&self, map: &mut Self::Map) { - map.clear(); - } -} diff --git a/src/ir/stmt.rs b/src/ir/stmt.rs new file mode 100644 index 0000000..91747d2 --- /dev/null +++ b/src/ir/stmt.rs @@ -0,0 +1,78 @@ +//! Ordered block-local statements for Slonik IR. + +use crate::ir::{Block, Stmt, Value}; +use cranelift_entity::EntityList; + +/// A compact list of SSA values. +pub type ValueList = EntityList; + +/// A target block together with the SSA arguments passed to that block. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct BlockCall { + /// The destination block. + pub block: Block, + + /// The arguments passed to the destination block. + pub args: ValueList, +} + +/// An ordered block-local statement. +#[derive(Clone, Debug, PartialEq)] +pub enum StmtData { + /// Store `value` to memory at `addr`. + /// + /// `size` is the access width in bytes. + Store { + addr: Value, + value: Value, + size: crate::ir::MemSize, + }, + + /// Call a callee value with SSA arguments. + Call { callee: Value, args: ValueList }, + + /// Unconditional transfer of control to another block. + Jump { dst: BlockCall }, + + /// Conditional transfer of control. + BrIf { + cond: Value, + then_dst: BlockCall, + else_dst: BlockCall, + }, + + /// Return from the current body. + Return { values: ValueList }, +} + +impl StmtData { + /// Returns whether this statement is a terminator. + pub const fn is_terminator(&self) -> bool { + matches!( + self, + Self::Jump { .. } | Self::BrIf { .. } | Self::Return { .. } + ) + } + + /// Returns whether this statement may read memory. + pub const fn may_read_memory(&self) -> bool { + matches!(self, Self::Call { .. }) + } + + /// Returns whether this statement may write memory. + pub const fn may_write_memory(&self) -> bool { + matches!(self, Self::Store { .. } | Self::Call { .. }) + } + + /// Returns whether this statement has side effects. + pub const fn has_side_effects(&self) -> bool { + true + } +} + +/// Structural data attached to a statement handle. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct StmtDataRef { + /// The statement handle this record describes. + pub stmt: Stmt, +} diff --git a/src/ir/verify.rs b/src/ir/verify.rs new file mode 100644 index 0000000..72ed384 --- /dev/null +++ b/src/ir/verify.rs @@ -0,0 +1,374 @@ +//! Structural verification for Slonik IR. + +use core::fmt; + +use crate::ir::{Block, BlockCall, Body, Expr, ExprData, Stmt, StmtData, Type, Value, ValueDef}; + +/// A verification error. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct VerifyError { + msg: String, +} + +impl VerifyError { + fn new(msg: impl Into) -> Self { + Self { msg: msg.into() } + } +} + +impl fmt::Display for VerifyError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.msg) + } +} + +impl std::error::Error for VerifyError {} + +/// Verifies the structural correctness of `body`. +pub fn verify(body: &Body) -> Result<(), VerifyError> { + Verifier { body }.run() +} + +struct Verifier<'a> { + body: &'a Body, +} + +impl Verifier<'_> { + fn run(self) -> Result<(), VerifyError> { + self.verify_values()?; + self.verify_blocks()?; + self.verify_exprs()?; + self.verify_stmts()?; + Ok(()) + } + + fn verify_values(&self) -> Result<(), VerifyError> { + for (value, data) in self.body.values.iter() { + match data.def { + ValueDef::Expr(expr) => { + self.ensure_valid_expr(expr, format!("value {value}"))?; + + let owner = &self.body.exprs[expr]; + if owner.value != value { + return Err(VerifyError::new(format!( + "value {value} claims to be defined by expression {expr}, \ + but that expression produces {}", + owner.value + ))); + } + } + + ValueDef::Param(block, index) => { + self.ensure_valid_block(block, format!("value {value}"))?; + + let params = self.body.block_params(block); + let Some(actual) = params.get(index as usize).copied() else { + return Err(VerifyError::new(format!( + "value {value} claims to be block parameter #{index} of {block}, \ + but that block has only {} parameter(s)", + params.len() + ))); + }; + + if actual != value { + return Err(VerifyError::new(format!( + "value {value} claims to be block parameter #{index} of {block}, \ + but that slot contains {actual}" + ))); + } + } + } + + if data.ty.is_void() { + return Err(VerifyError::new(format!( + "value {value} has type void, which is not a valid SSA value type" + ))); + } + } + + Ok(()) + } + + fn verify_blocks(&self) -> Result<(), VerifyError> { + for block in self.body.blocks() { + let params = self.body.block_params(block); + for (index, ¶m) in params.iter().enumerate() { + self.ensure_valid_value(param, format!("block {block} parameter list"))?; + + let data = self.body.value_data(param); + let expected = ValueDef::Param(block, index as u16); + if data.def != expected { + return Err(VerifyError::new(format!( + "block {block} parameter #{index} is {param}, \ + but that value is recorded as {:?}", + data.def + ))); + } + } + + let stmts = self.body.block_stmts(block); + for (index, &stmt) in stmts.iter().enumerate() { + self.ensure_valid_stmt(stmt, format!("block {block} statement list"))?; + + let is_last = index + 1 == stmts.len(); + let is_term = self.body.stmt_data(stmt).is_terminator(); + + if is_term && !is_last { + return Err(VerifyError::new(format!( + "terminator statement {stmt} in {block} is not the final statement" + ))); + } + + if !is_term && is_last { + // This is allowed: unterminated blocks may exist during construction. + } + } + } + + Ok(()) + } + + fn verify_exprs(&self) -> Result<(), VerifyError> { + for (expr, node) in self.body.exprs.iter() { + let value = node.value; + self.ensure_valid_value(value, format!("expression {expr} result"))?; + + let vdata = self.body.value_data(value); + if vdata.def != ValueDef::Expr(expr) { + return Err(VerifyError::new(format!( + "expression {expr} produces value {value}, \ + but that value is recorded as {:?}", + vdata.def + ))); + } + + match &node.data { + ExprData::IConst { .. } => {} + + ExprData::F32Const { .. } => { + if self.body.value_type(value) != Type::f32() { + return Err(VerifyError::new(format!( + "expression {expr} is an f32 constant but produces type {}", + self.body.value_type(value) + ))); + } + } + + ExprData::F64Const { .. } => { + if self.body.value_type(value) != Type::f64() { + return Err(VerifyError::new(format!( + "expression {expr} is an f64 constant but produces type {}", + self.body.value_type(value) + ))); + } + } + + ExprData::BConst { .. } => { + if self.body.value_type(value) != Type::bool() { + return Err(VerifyError::new(format!( + "expression {expr} is a boolean constant but produces type {}", + self.body.value_type(value) + ))); + } + } + + ExprData::Unary { arg, .. } => { + self.ensure_valid_value(*arg, format!("expression {expr} unary operand"))?; + } + + ExprData::Binary { lhs, rhs, .. } => { + self.ensure_valid_value(*lhs, format!("expression {expr} binary lhs"))?; + self.ensure_valid_value(*rhs, format!("expression {expr} binary rhs"))?; + } + + ExprData::Cast { arg, .. } => { + self.ensure_valid_value(*arg, format!("expression {expr} cast operand"))?; + } + + ExprData::Icmp { lhs, rhs, .. } | ExprData::Fcmp { lhs, rhs, .. } => { + self.ensure_valid_value(*lhs, format!("expression {expr} compare lhs"))?; + self.ensure_valid_value(*rhs, format!("expression {expr} compare rhs"))?; + + if self.body.value_type(value) != Type::bool() { + return Err(VerifyError::new(format!( + "comparison expression {expr} must produce bool, got {}", + self.body.value_type(value) + ))); + } + } + + ExprData::Select { + cond, + if_true, + if_false, + } => { + self.ensure_valid_value(*cond, format!("expression {expr} select condition"))?; + self.ensure_valid_value( + *if_true, + format!("expression {expr} select true arm"), + )?; + self.ensure_valid_value( + *if_false, + format!("expression {expr} select false arm"), + )?; + + if self.body.value_type(*cond) != Type::bool() { + return Err(VerifyError::new(format!( + "select expression {expr} condition {} must have type bool, got {}", + cond, + self.body.value_type(*cond) + ))); + } + + let t_ty = self.body.value_type(*if_true); + let f_ty = self.body.value_type(*if_false); + let out_ty = self.body.value_type(value); + + if t_ty != f_ty { + return Err(VerifyError::new(format!( + "select expression {expr} arms have mismatched types: {t_ty} vs {f_ty}" + ))); + } + + if out_ty != t_ty { + return Err(VerifyError::new(format!( + "select expression {expr} result type {out_ty} does not match arm type {t_ty}" + ))); + } + } + + ExprData::Load { addr, .. } => { + self.ensure_valid_value(*addr, format!("expression {expr} load address"))?; + } + } + } + + Ok(()) + } + + fn verify_stmts(&self) -> Result<(), VerifyError> { + for (stmt, data) in self.body.stmts.iter() { + match data { + StmtData::Store { addr, value, .. } => { + self.ensure_valid_value(*addr, format!("statement {stmt} store address"))?; + self.ensure_valid_value(*value, format!("statement {stmt} store value"))?; + } + + StmtData::Call { callee, args } => { + self.ensure_valid_value(*callee, format!("statement {stmt} callee"))?; + self.verify_value_list( + args.as_slice(&self.body.value_lists), + format!("statement {stmt} call arguments"), + )?; + } + + StmtData::Jump { dst } => { + self.verify_block_call(*dst, format!("statement {stmt} jump target"))?; + } + + StmtData::BrIf { + cond, + then_dst, + else_dst, + } => { + self.ensure_valid_value(*cond, format!("statement {stmt} branch condition"))?; + + if self.body.value_type(*cond) != Type::bool() { + return Err(VerifyError::new(format!( + "statement {stmt} branch condition {} must have type bool, got {}", + cond, + self.body.value_type(*cond) + ))); + } + + self.verify_block_call(*then_dst, format!("statement {stmt} then-target"))?; + self.verify_block_call(*else_dst, format!("statement {stmt} else-target"))?; + } + + StmtData::Return { values } => { + self.verify_value_list( + values.as_slice(&self.body.value_lists), + format!("statement {stmt} return values"), + )?; + } + } + } + + Ok(()) + } + + fn verify_block_call(&self, call: BlockCall, context: String) -> Result<(), VerifyError> { + self.ensure_valid_block(call.block, context.clone())?; + + let args = call.args.as_slice(&self.body.value_lists); + let params = self.body.block_params(call.block); + + if args.len() != params.len() { + return Err(VerifyError::new(format!( + "{context} passes {} argument(s) to {}, but that block expects {} parameter(s)", + args.len(), + call.block, + params.len() + ))); + } + + for (index, (&arg, ¶m)) in args.iter().zip(params.iter()).enumerate() { + self.ensure_valid_value(arg, format!("{context} argument #{index}"))?; + + let arg_ty = self.body.value_type(arg); + let param_ty = self.body.value_type(param); + + if arg_ty != param_ty { + return Err(VerifyError::new(format!( + "{context} argument #{index} to {} has type {}, but destination parameter has type {}", + call.block, arg_ty, param_ty + ))); + } + } + + Ok(()) + } + + fn verify_value_list(&self, values: &[Value], context: String) -> Result<(), VerifyError> { + for (index, &value) in values.iter().enumerate() { + self.ensure_valid_value(value, format!("{context} #{index}"))?; + } + Ok(()) + } + + fn ensure_valid_block(&self, block: Block, context: String) -> Result<(), VerifyError> { + if !self.body.blocks.is_valid(block) { + return Err(VerifyError::new(format!( + "{context} references invalid block {block}" + ))); + } + Ok(()) + } + + fn ensure_valid_expr(&self, expr: Expr, context: String) -> Result<(), VerifyError> { + if !self.body.exprs.is_valid(expr) { + return Err(VerifyError::new(format!( + "{context} references invalid expression {expr}" + ))); + } + Ok(()) + } + + fn ensure_valid_stmt(&self, stmt: Stmt, context: String) -> Result<(), VerifyError> { + if !self.body.stmts.is_valid(stmt) { + return Err(VerifyError::new(format!( + "{context} references invalid statement {stmt}" + ))); + } + Ok(()) + } + + fn ensure_valid_value(&self, value: Value, context: String) -> Result<(), VerifyError> { + if !self.body.values.is_valid(value) { + return Err(VerifyError::new(format!( + "{context} references invalid value {value}" + ))); + } + Ok(()) + } +} diff --git a/src/ir/write.rs b/src/ir/write.rs new file mode 100644 index 0000000..7df621f --- /dev/null +++ b/src/ir/write.rs @@ -0,0 +1,363 @@ +//! Text formatting for Slonik IR. +//! +//! Expressions that are never referenced by any statement are emitted in a +//! trailing `// dead expressions` section. + +use std::collections::HashSet; +use std::fmt; + +use crate::ir::{Block, BlockCall, Body, Expr, ExprData, Stmt, StmtData, Value, ValueDef}; + +/// Writes `body` in textual form. +pub fn write_body(f: &mut fmt::Formatter<'_>, body: &Body) -> fmt::Result { + let mut printer = Printer::new(body); + printer.write_body(f) +} + +impl fmt::Display for Body { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write_body(f, self) + } +} + +struct Printer<'a> { + body: &'a Body, + emitted: HashSet, + visiting: HashSet, +} + +impl<'a> Printer<'a> { + fn new(body: &'a Body) -> Self { + Self { + body, + emitted: HashSet::new(), + visiting: HashSet::new(), + } + } + + fn write_body(&mut self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "body {{")?; + + let mut first_block = true; + for block in self.body.blocks() { + if !first_block { + writeln!(f)?; + } + first_block = false; + self.write_block(f, block)?; + } + + let mut wrote_dead_header = false; + for (expr, node) in self.body.exprs.iter() { + let value = node.value; + if !self.emitted.contains(&value) { + if !wrote_dead_header { + if !first_block { + writeln!(f)?; + } + writeln!(f, " // dead expressions")?; + wrote_dead_header = true; + } + + self.emit_expr(f, expr, 1)?; + } + } + + writeln!(f, "}}") + } + + fn write_block(&mut self, f: &mut fmt::Formatter<'_>, block: Block) -> fmt::Result { + self.write_block_header(f, block)?; + + for &stmt in self.body.block_stmts(block) { + self.emit_stmt_deps(f, stmt, 2)?; + self.write_stmt(f, stmt, 2)?; + } + + Ok(()) + } + + fn write_block_header(&mut self, f: &mut fmt::Formatter<'_>, block: Block) -> fmt::Result { + write!(f, " ^{}", block)?; + + let params = self.body.block_params(block); + if !params.is_empty() { + write!(f, "(")?; + for (i, ¶m) in params.iter().enumerate() { + if i != 0 { + write!(f, ", ")?; + } + + let ty = self.body.value_type(param); + write!(f, "%{}: {}", param, ty)?; + } + write!(f, ")")?; + } + + writeln!(f, ":") + } + + fn emit_stmt_deps( + &mut self, + f: &mut fmt::Formatter<'_>, + stmt: Stmt, + indent: usize, + ) -> fmt::Result { + match self.body.stmt_data(stmt) { + StmtData::Store { addr, value, .. } => { + self.emit_value(f, *addr, indent)?; + self.emit_value(f, *value, indent)?; + } + + StmtData::Call { callee, args } => { + self.emit_value(f, *callee, indent)?; + for &arg in args.as_slice(&self.body.value_lists) { + self.emit_value(f, arg, indent)?; + } + } + + StmtData::Jump { dst } => { + self.emit_block_call_args(f, *dst, indent)?; + } + + StmtData::BrIf { + cond, + then_dst, + else_dst, + } => { + self.emit_value(f, *cond, indent)?; + self.emit_block_call_args(f, *then_dst, indent)?; + self.emit_block_call_args(f, *else_dst, indent)?; + } + + StmtData::Return { values } => { + for &value in values.as_slice(&self.body.value_lists) { + self.emit_value(f, value, indent)?; + } + } + } + + Ok(()) + } + + fn emit_block_call_args( + &mut self, + f: &mut fmt::Formatter<'_>, + call: BlockCall, + indent: usize, + ) -> fmt::Result { + for &arg in call.args.as_slice(&self.body.value_lists) { + self.emit_value(f, arg, indent)?; + } + Ok(()) + } + + fn emit_value( + &mut self, + f: &mut fmt::Formatter<'_>, + value: Value, + indent: usize, + ) -> fmt::Result { + if self.emitted.contains(&value) { + return Ok(()); + } + + match self.body.value_def(value) { + ValueDef::Param(_, _) => Ok(()), + ValueDef::Expr(expr) => self.emit_expr(f, expr, indent), + } + } + + fn emit_expr(&mut self, f: &mut fmt::Formatter<'_>, expr: Expr, indent: usize) -> fmt::Result { + let value = self.body.expr_value(expr); + if self.emitted.contains(&value) { + return Ok(()); + } + + if !self.visiting.insert(expr) { + return Err(fmt::Error); + } + + match self.body.expr_data(expr) { + ExprData::IConst { .. } + | ExprData::F32Const { .. } + | ExprData::F64Const { .. } + | ExprData::BConst { .. } => {} + + ExprData::Unary { arg, .. } | ExprData::Cast { arg, .. } => { + self.emit_value(f, *arg, indent)?; + } + + ExprData::Binary { lhs, rhs, .. } + | ExprData::Icmp { lhs, rhs, .. } + | ExprData::Fcmp { lhs, rhs, .. } => { + self.emit_value(f, *lhs, indent)?; + self.emit_value(f, *rhs, indent)?; + } + + ExprData::Select { + cond, + if_true, + if_false, + } => { + self.emit_value(f, *cond, indent)?; + self.emit_value(f, *if_true, indent)?; + self.emit_value(f, *if_false, indent)?; + } + + ExprData::Load { addr, .. } => { + self.emit_value(f, *addr, indent)?; + } + } + + self.write_indent(f, indent)?; + self.write_expr_binding(f, expr)?; + self.visiting.remove(&expr); + self.emitted.insert(value); + + Ok(()) + } + + fn write_expr_binding(&mut self, f: &mut fmt::Formatter<'_>, expr: Expr) -> fmt::Result { + let value = self.body.expr_value(expr); + let ty = self.body.value_type(value); + + write!(f, "%{} = ", value)?; + + match self.body.expr_data(expr) { + ExprData::IConst { imm } => { + write!(f, "iconst {}", imm)?; + writeln!(f, " : {}", ty) + } + + ExprData::F32Const { bits } => { + write!(f, "f32const 0x{bits:08x}")?; + writeln!(f, " : {}", ty) + } + + ExprData::F64Const { bits } => { + write!(f, "f64const 0x{bits:016x}")?; + writeln!(f, " : {}", ty) + } + + ExprData::BConst { value } => { + write!(f, "bconst {}", value)?; + writeln!(f, " : {}", ty) + } + + ExprData::Unary { op, arg } => { + write!(f, "{} %{}", op, arg)?; + writeln!(f, " : {}", ty) + } + + ExprData::Binary { op, lhs, rhs } => { + write!(f, "{} %{}, %{}", op, lhs, rhs)?; + writeln!(f, " : {}", ty) + } + + ExprData::Cast { op, arg } => { + let src_ty = self.body.value_type(*arg); + writeln!(f, "{} %{} : {} -> {}", op, arg, src_ty, ty) + } + + ExprData::Icmp { cc, lhs, rhs } => { + let operand_ty = self.body.value_type(*lhs); + writeln!(f, "icmp {} %{}, %{} : {}", cc, lhs, rhs, operand_ty) + } + + ExprData::Fcmp { cc, lhs, rhs } => { + let operand_ty = self.body.value_type(*lhs); + writeln!(f, "fcmp {} %{}, %{} : {}", cc, lhs, rhs, operand_ty) + } + + ExprData::Select { + cond, + if_true, + if_false, + } => { + write!(f, "select %{}, %{}, %{}", cond, if_true, if_false)?; + writeln!(f, " : {}", ty) + } + + ExprData::Load { addr, size } => { + writeln!(f, "load %{} : {} -> {}", addr, size, ty) + } + } + } + + fn write_stmt(&mut self, f: &mut fmt::Formatter<'_>, stmt: Stmt, indent: usize) -> fmt::Result { + self.write_indent(f, indent)?; + + match self.body.stmt_data(stmt) { + StmtData::Store { addr, value, size } => { + writeln!(f, "store %{}, %{} : {}", value, addr, size) + } + + StmtData::Call { callee, args } => { + write!(f, "call %{}", callee)?; + + if !args.is_empty() { + write!(f, "(")?; + self.write_value_list(f, args.as_slice(&self.body.value_lists))?; + writeln!(f, ")")?; + } + Ok(()) + } + + StmtData::Jump { dst } => { + write!(f, "br ")?; + self.write_block_call(f, *dst)?; + writeln!(f) + } + + StmtData::BrIf { + cond, + then_dst, + else_dst, + } => { + write!(f, "cond_br %{}, ", cond)?; + self.write_block_call(f, *then_dst)?; + write!(f, ", ")?; + self.write_block_call(f, *else_dst)?; + writeln!(f) + } + + StmtData::Return { values } => { + write!(f, "return")?; + let values = values.as_slice(&self.body.value_lists); + if !values.is_empty() { + write!(f, " ")?; + self.write_value_list(f, values)?; + } + writeln!(f) + } + } + } + + fn write_block_call(&mut self, f: &mut fmt::Formatter<'_>, call: BlockCall) -> fmt::Result { + write!(f, "^{}", call.block)?; + if !call.args.is_empty() { + write!(f, "(")?; + self.write_value_list(f, call.args.as_slice(&self.body.value_lists))?; + write!(f, ")")?; + } + Ok(()) + } + + fn write_value_list(&mut self, f: &mut fmt::Formatter<'_>, values: &[Value]) -> fmt::Result { + for (i, value) in values.iter().enumerate() { + if i != 0 { + write!(f, ", ")?; + } + write!(f, "%{}", value)?; + } + Ok(()) + } + + fn write_indent(&mut self, f: &mut fmt::Formatter<'_>, indent: usize) -> fmt::Result { + for _ in 0..indent { + write!(f, " ")?; + } + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 5f470be..5a6110f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,9 @@ use clap::{ styling::{AnsiColor, Effects}, }, }; +use cranelift_entity::EntityRef; + +use crate::ir::*; const STYLES: Styles = Styles::styled() .header(AnsiColor::Green.on_default().effects(Effects::BOLD)) @@ -23,5 +26,175 @@ const STYLES: Styles = Styles::styled() struct Args {} fn main() { - let args = Args::parse(); + // let args = Args::parse(); + + { + let mut body = Body::new(); + let mut cx = FrontendBuilderContext::new(); + { + let mut b = FrontendBuilder::new(&mut body, &mut cx); + + let entry = b.create_block(); + let then_block = b.create_block(); + let else_block = b.create_block(); + let merge_block = b.create_block(); + + let x = Variable::new(0); + let y = Variable::new(1); + let sum = Variable::new(2); + let acc = Variable::new(3); + + b.declare_var(x, Type::i32()); + b.declare_var(y, Type::i32()); + b.declare_var(sum, Type::i32()); + b.declare_var(acc, Type::i32()); + + // entry: + // + // x = 7 + // y = 5 + // sum = x + y + // if (sum > 10) goto then else goto else + // + b.switch_to_block(entry); + b.seal_block(entry); + + let c7 = b.iconst(Type::i32(), 7); + let c5 = b.iconst(Type::i32(), 5); + let c10 = b.iconst(Type::i32(), 10); + + b.def_var(x, c7); + b.def_var(y, c5); + + let lhs = b.use_var(x); + let rhs = b.use_var(y); + let sum_val = b.iadd(lhs, rhs, Type::i32()); + b.def_var(sum, sum_val); + + let lhs1 = b.use_var(sum); + let cond = b.icmp(IntCC::Sgt, lhs1, c10); + b.br_if(cond, then_block, else_block); + + // Both then/else now have their predecessor set. + b.seal_block(then_block); + b.seal_block(else_block); + + // then: + // + // acc = x * 2 + // goto merge + // + b.switch_to_block(then_block); + + let c2 = b.iconst(Type::i32(), 2); + let lhs2 = b.use_var(x); + let doubled = b.imul(lhs2, c2, Type::i32()); + b.def_var(acc, doubled); + b.jump(merge_block); + + // else: + // + // acc = 0 - y + // goto merge + // + b.switch_to_block(else_block); + + let c0 = b.iconst(Type::i32(), 0); + let rhs1 = b.use_var(y); + let neg_y = b.isub(c0, rhs1, Type::i32()); + b.def_var(acc, neg_y); + b.jump(merge_block); + + // merge: + // + // acc and sum should come in as block params synthesized by use_var() + // result = acc + sum + // return result + // + b.seal_block(merge_block); + b.switch_to_block(merge_block); + + let merged_acc = b.use_var(acc); + let merged_sum = b.use_var(sum); + let result = b.iadd(merged_acc, merged_sum, Type::i32()); + + b.ret(&[result]); + } + + verify(&body).unwrap(); + println!("{body}"); + } + + { + let mut body = Body::new(); + let mut cx = FrontendBuilderContext::new(); + + { + let mut b = FrontendBuilder::new(&mut body, &mut cx); + + let entry = b.create_block(); + let loop_header = b.create_block(); + let loop_body = b.create_block(); + let exit = b.create_block(); + + let i = Variable::new(0); + b.declare_var(i, Type::i32()); + + // entry: + // i = 0 + // br loop_header + b.switch_to_block(entry); + b.seal_block(entry); + + let c0 = b.iconst(Type::i32(), 0); + b.def_var(i, c0); + b.jump(loop_header); + + // loop_header: + // if (i < 4) goto loop_body else goto exit + // + // IMPORTANT: + // Do not seal this block yet. We still have a backedge coming from + // loop_body, and we want use_var(i) here to synthesize a block param. + b.switch_to_block(loop_header); + + let iv = b.use_var(i); + let c4 = b.iconst(Type::i32(), 4); + let cond = b.icmp(IntCC::Slt, iv, c4); + b.br_if(cond, loop_body, exit); + + // loop_body now has its predecessor. + b.seal_block(loop_body); + + // loop_body: + // i = i + 1 + // br loop_header + b.switch_to_block(loop_body); + + let iv = b.use_var(i); + let c1 = b.iconst(Type::i32(), 1); + let next = b.iadd(iv, c1, Type::i32()); + b.def_var(i, next); + b.jump(loop_header); + + // Now loop_header has all of its predecessors: + // - entry + // - loop_body + b.seal_block(loop_header); + + // exit: + // return i + // + // exit already has one predecessor from loop_header's br_if, so using + // `i` here should synthesize a block param and patch that edge. + b.seal_block(exit); + b.switch_to_block(exit); + + let out = b.use_var(i); + b.ret(&[out]); + } + + verify(&body).unwrap(); + println!("{body}"); + } }