slonik/build.rs

978 lines
31 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/// Code-generator: reads ARM ISA XML specs and emits `$OUT_DIR/arm_a32.rs`.
use std::collections::HashMap;
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
fn main() {
let spec_dir = Path::new("specs");
println!("cargo:rerun-if-changed={}", spec_dir.display());
println!("cargo:rerun-if-changed=build.rs");
let mut entries = collect_encodings(spec_dir);
// most-specific (most fixed bits) first so the decoder's linear scan is correct
entries.sort_by(|a, b| {
b.mask.count_ones().cmp(&a.mask.count_ones()).then(a.id.cmp(&b.id))
});
let code = generate_rust(&entries);
let out = PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("arm_a32.rs");
std::fs::write(out, code).unwrap();
}
// ---------------------------------------------------------------------------
// Data types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone)]
struct BoxInfo {
hibit: i32,
width: i32,
name: Option<String>,
usename: bool,
/// One entry per bit position (MSB first). Values: "0", "1", "" or a
/// constraint string like "!= 1111" / "Z" / "N".
values: Vec<String>,
settings: i32,
}
impl BoxInfo {
fn lobit(&self) -> i32 {
self.hibit - self.width + 1
}
}
#[derive(Debug, Clone)]
struct FieldDef {
name: String, // lowercased Rust identifier
hibit: i32,
lobit: i32,
width: i32,
}
#[derive(Debug, Clone)]
struct AsmTok {
is_link: bool, // false = literal text, true = <a> symbol
text: String,
}
#[derive(Debug)]
struct EncEntry {
id: String,
mnemonic: String,
mask: u32,
pattern: u32,
fields: Vec<FieldDef>,
/// true if the `cond` field has a "!= 1111" constraint
cond_ne_1111: bool,
asm_tokens: Vec<AsmTok>,
/// symbol text → encodedin field name
sym_map: HashMap<String, String>,
}
// ---------------------------------------------------------------------------
// XML collection
// ---------------------------------------------------------------------------
fn collect_encodings(dir: &Path) -> Vec<EncEntry> {
let mut result = Vec::new();
let mut paths: Vec<PathBuf> = std::fs::read_dir(dir)
.unwrap()
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map_or(false, |e| e == "xml"))
.collect();
paths.sort();
for path in paths {
let text = match std::fs::read_to_string(&path) {
Ok(t) => t,
Err(_) => continue,
};
let opts = roxmltree::ParsingOptions { allow_dtd: true, ..Default::default() };
let doc = match roxmltree::Document::parse_with_options(&text, opts) {
Ok(d) => d,
Err(_) => continue,
};
let root = doc.root_element();
if root.tag_name().name() != "instructionsection" {
continue;
}
// Only "general" instr-class
let instr_class = root
.descendants()
.find(|n| n.tag_name().name() == "docvars")
.and_then(|dv| {
dv.children()
.filter(|n| n.is_element())
.find(|n| n.attribute("key") == Some("instr-class"))
.and_then(|n| n.attribute("value"))
});
if instr_class != Some("general") {
continue;
}
// Build file-level symbol → encodedin map from explanations
let sym_map = build_sym_map(&root);
// Walk iclass elements
for iclass in root.descendants().filter(|n| {
n.is_element()
&& n.tag_name().name() == "iclass"
&& n.attribute("isa") == Some("A32")
}) {
let base_boxes = parse_regdiagram_boxes(&iclass);
for enc in iclass.children().filter(|n| {
n.is_element() && n.tag_name().name() == "encoding"
}) {
if let Some(entry) =
build_entry(&enc, &base_boxes, &sym_map)
{
result.push(entry);
}
}
}
}
result
}
fn build_sym_map(root: &roxmltree::Node) -> HashMap<String, String> {
let mut map = HashMap::new();
for exp in root.descendants().filter(|n| {
n.is_element() && n.tag_name().name() == "explanation"
}) {
let sym = exp
.children()
.find(|n| n.is_element() && n.tag_name().name() == "symbol")
.and_then(|n| n.text())
.map(str::to_string);
let encodedin = exp
.children()
.find(|n| n.is_element() && n.tag_name().name() == "account")
.and_then(|n| n.attribute("encodedin"))
.unwrap_or("")
.to_string();
if let Some(s) = sym {
map.entry(s).or_insert(encodedin);
}
}
map
}
fn parse_regdiagram_boxes(iclass: &roxmltree::Node) -> Vec<BoxInfo> {
let rd = match iclass
.children()
.find(|n| n.is_element() && n.tag_name().name() == "regdiagram")
{
Some(n) => n,
None => return Vec::new(),
};
rd.children()
.filter(|n| n.is_element() && n.tag_name().name() == "box")
.map(parse_box)
.collect()
}
fn parse_box(node: roxmltree::Node) -> BoxInfo {
let hibit: i32 = node
.attribute("hibit")
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let width: i32 = node
.attribute("width")
.and_then(|v| v.parse().ok())
.unwrap_or(1);
let name = node.attribute("name").map(str::to_string);
let usename = node.attribute("usename") == Some("1");
let settings: i32 = node
.attribute("settings")
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let mut values: Vec<String> = Vec::new();
for c in node
.children()
.filter(|n| n.is_element() && n.tag_name().name() == "c")
{
let span: usize = c
.attribute("colspan")
.and_then(|v| v.parse().ok())
.unwrap_or(1);
let text = c.text().unwrap_or("").trim().to_string();
if span == 1 || text == "0" || text == "1" {
for _ in 0..span {
values.push(text.clone());
}
} else {
// Multi-bit constraint ("!= 1111" etc.)
for _ in 0..span {
values.push(text.clone());
}
}
}
BoxInfo { hibit, width, name, usename, values, settings }
}
/// Merge iclass boxes with encoding-specific overrides (encoding wins).
fn merge_boxes(base: &[BoxInfo], overrides: &[BoxInfo]) -> Vec<BoxInfo> {
let mut result = base.to_vec();
for ov in overrides {
if let Some(pos) = result.iter().position(|b| b.hibit == ov.hibit) {
result[pos] = ov.clone();
}
}
result
}
fn build_entry(
enc: &roxmltree::Node,
base_boxes: &[BoxInfo],
sym_map: &HashMap<String, String>,
) -> Option<EncEntry> {
let id = enc.attribute("name")?.to_string();
let mnemonic = enc
.descendants()
.find(|n| {
n.is_element()
&& n.tag_name().name() == "docvar"
&& n.attribute("key") == Some("mnemonic")
})
.and_then(|n| n.attribute("value"))
.unwrap_or("UNK")
.to_string();
let enc_boxes: Vec<BoxInfo> = enc
.children()
.filter(|n| n.is_element() && n.tag_name().name() == "box")
.map(parse_box)
.collect();
let all_boxes = merge_boxes(base_boxes, &enc_boxes);
let (mask, pattern, cond_ne_1111) = compute_mask_pattern(&all_boxes);
let fields = extract_fields(&all_boxes);
// ASM template (first one)
let asm_tokens = enc
.descendants()
.find(|n| n.is_element() && n.tag_name().name() == "asmtemplate")
.map(|tmpl| {
tmpl.children()
.filter(|n| n.is_element())
.map(|n| AsmTok {
is_link: n.tag_name().name() == "a",
text: n.text().unwrap_or("").to_string(),
})
.collect()
})
.unwrap_or_default();
Some(EncEntry {
id,
mnemonic,
mask,
pattern,
fields,
cond_ne_1111,
asm_tokens,
sym_map: sym_map.clone(),
})
}
fn compute_mask_pattern(boxes: &[BoxInfo]) -> (u32, u32, bool) {
let mut mask: u32 = 0;
let mut pattern: u32 = 0;
let mut cond_ne_1111 = false;
for b in boxes {
let is_cond = b.name.as_deref() == Some("cond");
if b.values.iter().any(|v| v.contains("!= 1111")) {
if is_cond {
cond_ne_1111 = true;
}
continue;
}
for (i, val) in b.values.iter().enumerate() {
if i as i32 >= b.width {
break;
}
let bit_pos = (b.hibit - i as i32) as u32;
match val.as_str() {
"0" => {
mask |= 1 << bit_pos;
}
"1" => {
mask |= 1 << bit_pos;
pattern |= 1 << bit_pos;
}
_ => {}
}
}
}
(mask, pattern, cond_ne_1111)
}
fn extract_fields(boxes: &[BoxInfo]) -> Vec<FieldDef> {
let mut seen = std::collections::HashSet::new();
let mut fields = Vec::new();
for b in boxes {
if !b.usename {
continue;
}
let raw = match &b.name {
Some(n) if !n.is_empty() && n != "?" => n.as_str(),
_ => continue,
};
let name = rust_field_name(raw);
if seen.insert(name.clone()) {
fields.push(FieldDef { name, hibit: b.hibit, lobit: b.lobit(), width: b.width });
}
}
fields
}
// ---------------------------------------------------------------------------
// Name helpers
// ---------------------------------------------------------------------------
fn variant_name(id: &str) -> String {
id.split('_')
.map(|part| {
let mut chars = part.chars();
match chars.next() {
None => String::new(),
Some(c) => {
c.to_uppercase().to_string() + &chars.as_str().to_lowercase()
}
}
})
.collect()
}
fn rust_field_name(xml_name: &str) -> String {
// First apply well-known renames, then sanitize any remaining invalid chars.
let base = match xml_name {
"type" => "ty",
"fn" => "fn_reg",
"register_list" => "regs",
other => other,
};
// Sanitize: replace anything that isn't alphanumeric or `_` with `_`,
// collapse runs of `_`, and strip leading/trailing `_`.
let sanitized: String = base
.to_lowercase()
.chars()
.map(|c| if c.is_ascii_alphanumeric() || c == '_' { c } else { '_' })
.collect();
// Collapse repeated underscores and trim edges.
let mut out = String::with_capacity(sanitized.len());
let mut prev_under = false;
for c in sanitized.chars() {
if c == '_' {
if !prev_under {
out.push(c);
}
prev_under = true;
} else {
out.push(c);
prev_under = false;
}
}
let trimmed = out.trim_matches('_').to_string();
if trimmed.is_empty() {
"field".to_string()
} else {
trimmed
}
}
fn field_rust_type(width: i32) -> &'static str {
if width == 1 {
"bool"
} else if width <= 8 {
"u8"
} else if width <= 16 {
"u16"
} else {
"u32"
}
}
// ---------------------------------------------------------------------------
// Rust code generation
// ---------------------------------------------------------------------------
fn generate_rust(entries: &[EncEntry]) -> String {
let mut out = String::new();
writeln!(out, "// AUTO-GENERATED from ARM ISA XML (A-profile 2022-12). DO NOT EDIT.").unwrap();
writeln!(out, "use core::fmt;").unwrap();
writeln!(out).unwrap();
// ---- Enum ---------------------------------------------------------------
writeln!(out, "#[derive(Clone, Debug)]").unwrap();
writeln!(out, "pub enum A32Inst {{").unwrap();
for e in entries {
let vname = variant_name(&e.id);
if e.fields.is_empty() {
writeln!(out, " {vname},").unwrap();
} else {
let fields: Vec<String> = e
.fields
.iter()
.map(|f| format!("{}: {}", f.name, field_rust_type(f.width)))
.collect();
writeln!(out, " {vname} {{ {} }},", fields.join(", ")).unwrap();
}
}
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
// ---- DecodeError --------------------------------------------------------
writeln!(out, "#[derive(Clone, Copy, Debug, PartialEq, Eq)]").unwrap();
writeln!(out, "pub enum A32DecodeError {{ TooShort, Unknown }}").unwrap();
writeln!(out).unwrap();
// ---- Per-encoding field-extractor functions ------------------------------
// One tiny `fn a32_dec_N(w: u32) -> A32Inst` per encoding.
for (idx, e) in entries.iter().enumerate() {
let vname = variant_name(&e.id);
writeln!(out, "fn a32_dec_{idx}(w: u32) -> A32Inst {{").unwrap();
if e.fields.is_empty() {
writeln!(out, " let _ = w; A32Inst::{vname}").unwrap();
} else {
let extracts: Vec<String> =
e.fields.iter().map(|f| gen_field_extract_w(f)).collect();
writeln!(out, " A32Inst::{vname} {{ {} }}", extracts.join(", ")).unwrap();
}
writeln!(out, "}}").unwrap();
}
writeln!(out).unwrap();
// ---- Decoder function pointer table -------------------------------------
let n = entries.len();
writeln!(out, "type A32DecFn = fn(u32) -> A32Inst;").unwrap();
write!(out, "static A32_DECODERS: [A32DecFn; {n}] = [").unwrap();
for idx in 0..n {
write!(out, "a32_dec_{idx},").unwrap();
}
writeln!(out, "];").unwrap();
writeln!(out).unwrap();
// ---- Build 256-slot lookup table ----------------------------------------
// slot index = bits[27:20] of the instruction word.
// For each encoding, enumerate all hi-byte values it can match.
let mut slots: Vec<Vec<(usize, u32)>> = vec![vec![]; 256];
for (enc_idx, e) in entries.iter().enumerate() {
let hi_mask = ((e.mask >> 20) & 0xFF) as u8;
let hi_pat = ((e.pattern >> 20) & 0xFF) as u8;
for x in 0u8..=255 {
if x & hi_mask == hi_pat {
slots[x as usize].push((enc_idx, e.mask.count_ones()));
}
}
}
// Within each slot keep the most-specific (most fixed bits) first.
for slot in &mut slots {
slot.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
}
// Flatten into one array; record (start, count) per slot.
// Entry format: (mask, pattern, flags, dec_idx)
// flags bit 0: 1 = cond must not be 0xF
let mut all_cands: Vec<(u32, u32, u8, u16)> = Vec::new();
let mut slot_table: Vec<(u16, u16)> = Vec::new();
for slot in &slots {
let start = all_cands.len() as u16;
let count = slot.len() as u16;
for &(enc_idx, _) in slot {
let e = &entries[enc_idx];
let flags = u8::from(e.cond_ne_1111);
all_cands.push((e.mask, e.pattern, flags, enc_idx as u16));
}
slot_table.push((start, count));
}
// Emit the flat candidate array.
let total = all_cands.len();
writeln!(
out,
"static A32_CANDS: [(u32, u32, u8, u16); {total}] = ["
)
.unwrap();
for (mask, pat, flags, idx) in &all_cands {
writeln!(
out,
" (0x{mask:08X}, 0x{pat:08X}, {flags}, {idx}),"
)
.unwrap();
}
writeln!(out, "];").unwrap();
writeln!(out).unwrap();
// Emit the slot table.
writeln!(out, "static A32_SLOTS: [(u16, u16); 256] = [").unwrap();
for (start, count) in &slot_table {
write!(out, " ({start}, {count}),").unwrap();
}
writeln!(out, "\n];").unwrap();
writeln!(out).unwrap();
// ---- Main decode function ------------------------------------------------
writeln!(
out,
"pub fn decode_a32(bytes: &[u8]) -> Result<(usize, A32Inst), A32DecodeError> {{"
)
.unwrap();
writeln!(out, " if bytes.len() < 4 {{ return Err(A32DecodeError::TooShort); }}").unwrap();
writeln!(
out,
" let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);"
)
.unwrap();
writeln!(out, " let cond = ((word >> 28) & 0xF) as u8;").unwrap();
writeln!(out, " let hi = ((word >> 20) & 0xFF) as usize;").unwrap();
writeln!(out, " let (start, count) = A32_SLOTS[hi];").unwrap();
writeln!(out, " for i in start as usize..(start + count) as usize {{").unwrap();
writeln!(out, " let (mask, pattern, flags, dec_idx) = A32_CANDS[i];").unwrap();
writeln!(
out,
" if word & mask == pattern && (flags == 0 || cond != 0xF) {{"
)
.unwrap();
writeln!(
out,
" return Ok((4, A32_DECODERS[dec_idx as usize](word)));"
)
.unwrap();
writeln!(out, " }}").unwrap();
writeln!(out, " }}").unwrap();
writeln!(out, " Err(A32DecodeError::Unknown)").unwrap();
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
// ---- Display ------------------------------------------------------------
writeln!(out, "impl fmt::Display for A32Inst {{").unwrap();
writeln!(
out,
" fn fmt(&self, __f: &mut fmt::Formatter<'_>) -> fmt::Result {{"
)
.unwrap();
writeln!(out, " match self {{").unwrap();
for e in entries {
gen_display_arm(&mut out, e);
}
writeln!(out, " }}").unwrap();
writeln!(out, " }}").unwrap();
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
// ---- Helpers ------------------------------------------------------------
out.push_str(HELPERS);
out
}
fn gen_field_extract(f: &FieldDef) -> String {
let lobit = f.lobit as u32;
let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 };
let ty = field_rust_type(f.width);
let name = &f.name;
if ty == "bool" {
format!("{name}: (word >> {lobit}) & 1 == 1")
} else if ty == "u8" {
format!("{name}: ((word >> {lobit}) & 0x{mask:X}) as u8")
} else if ty == "u16" {
format!("{name}: ((word >> {lobit}) & 0x{mask:X}) as u16")
} else {
format!("{name}: (word >> {lobit}) & 0x{mask:X}")
}
}
/// Same as `gen_field_extract` but references the variable `w` (used in per-encoding
/// decoder functions where the parameter is named `w`, not `word`).
fn gen_field_extract_w(f: &FieldDef) -> String {
let lobit = f.lobit as u32;
let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 };
let ty = field_rust_type(f.width);
let name = &f.name;
if ty == "bool" {
format!("{name}: (w >> {lobit}) & 1 == 1")
} else if ty == "u8" {
format!("{name}: ((w >> {lobit}) & 0x{mask:X}) as u8")
} else if ty == "u16" {
format!("{name}: ((w >> {lobit}) & 0x{mask:X}) as u16")
} else {
format!("{name}: (w >> {lobit}) & 0x{mask:X}")
}
}
// ---------------------------------------------------------------------------
// Display generation
// ---------------------------------------------------------------------------
fn gen_display_arm(out: &mut String, e: &EncEntry) {
let vname = variant_name(&e.id);
// Build field list for destructuring
let field_names: Vec<&str> = e.fields.iter().map(|f| f.name.as_str()).collect();
let destruct = if field_names.is_empty() {
String::new()
} else {
format!("{{ {} }}", field_names.join(", "))
};
writeln!(out, " Self::{vname} {destruct} => {{").unwrap();
// Process ASM template tokens into a sequence of write!() calls.
// We track optional-group depth; groups get collapsed to their content
// with special handling for the few known patterns.
let toks = &e.asm_tokens;
let sym_map = &e.sym_map;
// Find field lookup by Rust name
let field_by_xmlname: HashMap<String, &FieldDef> = e
.fields
.iter()
.map(|f| (f.name.clone(), f))
.collect();
let mut i = 0;
while i < toks.len() {
let tok = &toks[i];
if !tok.is_link && tok.text == "{" {
// Peek at optional group content
let (group_toks, end) = collect_opt_group(toks, i);
i = end + 1;
emit_opt_group(out, &group_toks, sym_map, &field_by_xmlname, e);
} else {
emit_token(out, tok, sym_map, &field_by_xmlname, e);
i += 1;
}
}
writeln!(out, " Ok(())").unwrap();
writeln!(out, " }}").unwrap();
}
/// Returns the tokens inside the next `{...}` group starting at `start`
/// (which must be `{`), and the index of the closing `}`.
fn collect_opt_group(toks: &[AsmTok], start: usize) -> (Vec<&AsmTok>, usize) {
let mut depth = 0;
let mut group = Vec::new();
let mut i = start;
while i < toks.len() {
let tok = &toks[i];
if !tok.is_link && tok.text == "{" {
depth += 1;
if depth > 1 {
group.push(tok);
}
} else if !tok.is_link && tok.text == "}" {
depth -= 1;
if depth == 0 {
return (group, i);
} else {
group.push(tok);
}
} else if depth > 0 {
group.push(tok);
}
i += 1;
}
(group, i)
}
fn emit_opt_group(
out: &mut String,
group: &[&AsmTok],
sym_map: &HashMap<String, String>,
fields: &HashMap<String, &FieldDef>,
e: &EncEntry,
) {
// Identify group type by content
let is_only_link = group.len() == 1 && group[0].is_link;
if is_only_link {
match group[0].text.as_str() {
"<c>" => {
// condition suffix always print (may be empty)
if fields.contains_key("cond") {
writeln!(
out,
" write!(__f,\"{{}}\", a32_cond(*cond))?;"
)
.unwrap();
}
return;
}
"<q>" => {
// qualifier always skip
return;
}
"{!}" | "!" => {
// writeback conditional on W or writeback field
if fields.contains_key("w") {
writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap();
} else if fields.contains_key("wback") {
writeln!(out, " if *wback {{ write!(__f,\"!\")?; }}").unwrap();
}
return;
}
"{IA}" | "IA" => {
// default LDM addressing mode omit
return;
}
_ => {}
}
}
// {<Rd>,} optional dest register, always print with following comma
let all_texts: Vec<&str> = group.iter().map(|t| t.text.as_str()).collect();
let link_texts: Vec<&str> = group
.iter()
.filter(|t| t.is_link)
.map(|t| t.text.as_str())
.collect();
// {, <shift> #<amount>} or {, <shift>}
if link_texts.contains(&"<shift>") {
// Emit shift conditionally: skip if LSL #0
let has_amount = link_texts.contains(&"<amount>");
let stype_field = fields.get("stype").or_else(|| fields.get("shift"));
let amount_field = fields.get("imm5").or_else(|| fields.get("amount"));
if has_amount {
if stype_field.is_some() && amount_field.is_some() {
writeln!(out,
" if *stype != 0 || *imm5 != 0 {{ write!(__f,\", {{}} #{{}}\", a32_shift(*stype), *imm5)?; }}"
).unwrap();
}
} else {
// {, <shift>} without amount likely RRX variant; always print
if stype_field.is_some() {
writeln!(out,
" write!(__f,\", {{}}\", a32_shift(*stype))?;"
).unwrap();
}
}
return;
}
// {, #{+/-}<imm>} memory offset
if link_texts.contains(&"{+/-}") || link_texts.contains(&"+/-") {
// Always emit
for tok in group {
emit_token(out, tok, sym_map, fields, e);
}
return;
}
// Everything else: always emit contents
for tok in group {
emit_token(out, tok, sym_map, fields, e);
}
}
fn emit_token(
out: &mut String,
tok: &AsmTok,
sym_map: &HashMap<String, String>,
fields: &HashMap<String, &FieldDef>,
e: &EncEntry,
) {
if !tok.is_link {
// Literal text escape for Rust string
let escaped = tok.text.replace('\\', "\\\\").replace('"', "\\\"");
if !escaped.is_empty() {
writeln!(out, " write!(__f,\"{escaped}\")?;").unwrap();
}
return;
}
// It's an <a> symbol
let sym = tok.text.as_str();
match sym {
"<c>" => {
if fields.contains_key("cond") {
writeln!(
out,
" write!(__f,\"{{}}\", a32_cond(*cond))?;"
)
.unwrap();
}
}
"<q>" => { /* skip */ }
"{!}" | "!" => {
if fields.contains_key("w") {
writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap();
}
}
"{+/-}" | "+/-" => {
if fields.contains_key("u") {
writeln!(
out,
" write!(__f,\"{{}}\", if *u {{ '+' }} else {{ '-' }})?;"
)
.unwrap();
}
}
"{IA}" | "IA" | "SP," => {
let literal = match sym {
"SP," => "sp, ",
"IA" | "{IA}" => "",
_ => sym,
};
if !literal.is_empty() {
writeln!(out, " write!(__f,\"{literal}\")?;").unwrap();
}
}
"<shift>" => {
if let Some(_f) = fields.get("stype") {
writeln!(
out,
" write!(__f,\"{{}}\", a32_shift(*stype))?;"
)
.unwrap();
}
}
"<registers>" | "<registers_with_pc>" | "<registers_without_pc>" => {
if let Some(fld) = fields.get("regs").or_else(|| fields.get("register_list")) {
let fname = &fld.name;
writeln!(
out,
" write!(__f,\"{{}}\", a32_reglist(*{fname} as u32))?;"
)
.unwrap();
}
}
"<label>" => {
// raw offset; Display can't compute absolute address
let encodedin = sym_map.get(sym).map(String::as_str).unwrap_or("imm24");
let fname = resolve_encodedin_field(encodedin, fields);
if let Some(fname) = fname {
writeln!(
out,
" write!(__f,\"{{:#x}}\", a32_branch_offset(*{fname} as u32))?;"
)
.unwrap();
}
}
_ => {
// Register or immediate symbol look up encodedin
let encodedin = sym_map.get(sym).map(String::as_str).unwrap_or("");
if let Some(fname) = resolve_encodedin_field(encodedin, fields) {
if is_reg_sym(sym) {
writeln!(
out,
" write!(__f,\"{{}}\", a32_reg(*{fname} as u8))?;"
)
.unwrap();
} else {
// Immediate
if sym == "<const>" {
// Modified immediate: expand
writeln!(
out,
" write!(__f,\"{{}}\", a32_expand_imm12(*{fname} as u32))?;"
)
.unwrap();
} else {
writeln!(
out,
" write!(__f,\"{{}}\", *{fname})?;"
)
.unwrap();
}
}
} else {
// Unknown / no encodedin skip
}
}
}
}
fn is_reg_sym(sym: &str) -> bool {
matches!(
sym,
"<Rd>" | "<Rn>" | "<Rm>" | "<Rs>" | "<Rt>" | "<Ra>"
| "<Rt2>" | "<RdHi>" | "<RdLo>" | "<Rdm>" | "<Rdn>"
)
}
/// Given an `encodedin` string (possibly composite like "imm4H:imm4L"),
/// return the Rust field name in the variant that holds the value.
fn resolve_encodedin_field<'a>(
encodedin: &str,
fields: &'a HashMap<String, &FieldDef>,
) -> Option<String> {
if encodedin.is_empty() {
return None;
}
// Simple case direct field name match
let lower = rust_field_name(encodedin);
if fields.contains_key(lower.as_str()) {
return Some(lower);
}
// Composite "X:Y" use first part (MSB chunk) and note we don't reassemble
// This is imprecise but acceptable for display purposes
let first = encodedin.split(':').next().unwrap_or("");
let lower_first = rust_field_name(first);
if fields.contains_key(lower_first.as_str()) {
return Some(lower_first);
}
None
}
// ---------------------------------------------------------------------------
// Static helpers emitted into the generated file
// ---------------------------------------------------------------------------
const HELPERS: &str = r#"
pub const fn a32_cond(cond: u8) -> &'static str {
match cond & 0xF {
0 => "eq", 1 => "ne", 2 => "hs", 3 => "lo",
4 => "mi", 5 => "pl", 6 => "vs", 7 => "vc",
8 => "hi", 9 => "ls", 10 => "ge", 11 => "lt",
12 => "gt", 13 => "le", _ => "",
}
}
pub const fn a32_reg(r: u8) -> &'static str {
match r & 0xF {
0 => "r0", 1 => "r1", 2 => "r2", 3 => "r3",
4 => "r4", 5 => "r5", 6 => "r6", 7 => "r7",
8 => "r8", 9 => "r9", 10 => "r10", 11 => "r11",
12 => "r12", 13 => "sp", 14 => "lr", 15 => "pc",
_ => "??",
}
}
pub const fn a32_shift(stype: u8) -> &'static str {
match stype & 3 {
0 => "lsl", 1 => "lsr", 2 => "asr", 3 => "ror", _ => "??",
}
}
pub const fn a32_expand_imm12(imm12: u32) -> u32 {
let rot = (imm12 >> 8) & 0xF;
let imm8 = imm12 & 0xFF;
imm8.rotate_right(rot * 2)
}
pub const fn a32_branch_offset(imm24: u32) -> i32 {
// Sign-extend the 24-bit field and convert to bytes (<<2)
(imm24 << 8) as i32 >> 6
}
// a32_reglist cannot be const fn because it builds a String at runtime.
pub fn a32_reglist(regs: u32) -> String {
let mut s = String::from("{");
let mut first = true;
for i in 0..16u32 {
if regs & (1 << i) != 0 {
if !first { s.push_str(", "); }
s.push_str(a32_reg(i as u8));
first = false;
}
}
s.push('}');
s
}
"#;