use std::collections::HashMap; use std::fmt::Write as _; use std::path::{Path, PathBuf}; fn main() { let spec_dir = Path::new("specs"); println!("cargo:rerun-if-changed={}", spec_dir.display()); println!("cargo:rerun-if-changed=build.rs"); let mut entries = collect_encodings(spec_dir); // most-specific (most fixed bits) first so the decoder's linear scan is correct entries.sort_by(|a, b| { b.mask .count_ones() .cmp(&a.mask.count_ones()) .then(a.id.cmp(&b.id)) }); let code = generate_rust(&entries); let out = PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("arm_a32.rs"); std::fs::write(out, code).unwrap(); } #[derive(Debug, Clone)] struct BoxInfo { hibit: i32, width: i32, name: Option, usename: bool, /// One entry per bit position (MSB first). Values: "0", "1", "" or a /// constraint string like "!= 1111" / "Z" / "N". values: Vec, settings: i32, } impl BoxInfo { fn lobit(&self) -> i32 { self.hibit - self.width + 1 } } #[derive(Debug, Clone)] struct FieldDef { name: String, hibit: i32, lobit: i32, width: i32, } #[derive(Debug, Clone)] struct AsmTok { is_link: bool, // false = literal text, true = symbol text: String, } #[derive(Debug)] struct EncEntry { id: String, mnemonic: String, mask: u32, pattern: u32, fields: Vec, cond_ne_1111: bool, asm_tokens: Vec, sym_map: HashMap, } fn collect_encodings(dir: &Path) -> Vec { let mut result = Vec::new(); let mut paths: Vec = std::fs::read_dir(dir) .unwrap() .filter_map(|e| e.ok()) .map(|e| e.path()) .filter(|p| p.extension().is_some_and(|e| e == "xml")) .collect(); paths.sort(); for path in paths { let text = match std::fs::read_to_string(&path) { Ok(t) => t, Err(_) => continue, }; let opts = roxmltree::ParsingOptions { allow_dtd: true, ..Default::default() }; let doc = match roxmltree::Document::parse_with_options(&text, opts) { Ok(d) => d, Err(_) => continue, }; let root = doc.root_element(); if root.tag_name().name() != "instructionsection" { continue; } // Only "general" instr-class let instr_class = root .descendants() .find(|n| n.tag_name().name() == "docvars") .and_then(|dv| { dv.children() .filter(|n| n.is_element()) .find(|n| n.attribute("key") == Some("instr-class")) .and_then(|n| n.attribute("value")) }); if instr_class != Some("general") { continue; } // Build file-level symbol → encodedin map from explanations let sym_map = build_sym_map(&root); // Walk iclass elements for iclass in root.descendants().filter(|n| { n.is_element() && n.tag_name().name() == "iclass" && n.attribute("isa") == Some("A32") }) { let base_boxes = parse_regdiagram_boxes(&iclass); for enc in iclass .children() .filter(|n| n.is_element() && n.tag_name().name() == "encoding") { if let Some(entry) = build_entry(&enc, &base_boxes, &sym_map) { result.push(entry); } } } } result } fn build_sym_map(root: &roxmltree::Node) -> HashMap { let mut map = HashMap::new(); for exp in root .descendants() .filter(|n| n.is_element() && n.tag_name().name() == "explanation") { let sym = exp .children() .find(|n| n.is_element() && n.tag_name().name() == "symbol") .and_then(|n| n.text()) .map(str::to_string); let encodedin = exp .children() .find(|n| n.is_element() && n.tag_name().name() == "account") .and_then(|n| n.attribute("encodedin")) .unwrap_or("") .to_string(); if let Some(s) = sym { map.entry(s).or_insert(encodedin); } } map } fn parse_regdiagram_boxes(iclass: &roxmltree::Node) -> Vec { let rd = match iclass .children() .find(|n| n.is_element() && n.tag_name().name() == "regdiagram") { Some(n) => n, None => return Vec::new(), }; rd.children() .filter(|n| n.is_element() && n.tag_name().name() == "box") .map(parse_box) .collect() } fn parse_box(node: roxmltree::Node) -> BoxInfo { let hibit: i32 = node .attribute("hibit") .and_then(|v| v.parse().ok()) .unwrap_or(0); let width: i32 = node .attribute("width") .and_then(|v| v.parse().ok()) .unwrap_or(1); let name = node.attribute("name").map(str::to_string); let usename = node.attribute("usename") == Some("1"); let settings: i32 = node .attribute("settings") .and_then(|v| v.parse().ok()) .unwrap_or(0); let mut values: Vec = Vec::new(); for c in node .children() .filter(|n| n.is_element() && n.tag_name().name() == "c") { let span: usize = c .attribute("colspan") .and_then(|v| v.parse().ok()) .unwrap_or(1); let text = c.text().unwrap_or("").trim().to_string(); if span == 1 || text == "0" || text == "1" { for _ in 0..span { values.push(text.clone()); } } else { // Multi-bit constraint ("!= 1111" etc.) for _ in 0..span { values.push(text.clone()); } } } BoxInfo { hibit, width, name, usename, values, settings, } } /// Merge iclass boxes with encoding-specific overrides (encoding wins). fn merge_boxes(base: &[BoxInfo], overrides: &[BoxInfo]) -> Vec { let mut result = base.to_vec(); for ov in overrides { if let Some(pos) = result.iter().position(|b| b.hibit == ov.hibit) { result[pos] = ov.clone(); } } result } fn build_entry( enc: &roxmltree::Node, base_boxes: &[BoxInfo], sym_map: &HashMap, ) -> Option { let id = enc.attribute("name")?.to_string(); let mnemonic = enc .descendants() .find(|n| { n.is_element() && n.tag_name().name() == "docvar" && n.attribute("key") == Some("mnemonic") }) .and_then(|n| n.attribute("value")) .unwrap_or("UNK") .to_string(); let enc_boxes: Vec = enc .children() .filter(|n| n.is_element() && n.tag_name().name() == "box") .map(parse_box) .collect(); let all_boxes = merge_boxes(base_boxes, &enc_boxes); let (mask, pattern, cond_ne_1111) = compute_mask_pattern(&all_boxes); let fields = extract_fields(&all_boxes); // ASM template (first one) let asm_tokens = enc .descendants() .find(|n| n.is_element() && n.tag_name().name() == "asmtemplate") .map(|tmpl| { tmpl.children() .filter(|n| n.is_element()) .map(|n| AsmTok { is_link: n.tag_name().name() == "a", text: n.text().unwrap_or("").to_string(), }) .collect() }) .unwrap_or_default(); Some(EncEntry { id, mnemonic, mask, pattern, fields, cond_ne_1111, asm_tokens, sym_map: sym_map.clone(), }) } fn compute_mask_pattern(boxes: &[BoxInfo]) -> (u32, u32, bool) { let mut mask: u32 = 0; let mut pattern: u32 = 0; let mut cond_ne_1111 = false; for b in boxes { let is_cond = b.name.as_deref() == Some("cond"); if b.values.iter().any(|v| v.contains("!= 1111")) { if is_cond { cond_ne_1111 = true; } continue; } for (i, val) in b.values.iter().enumerate() { if i as i32 >= b.width { break; } let bit_pos = (b.hibit - i as i32) as u32; match val.as_str() { "0" => { mask |= 1 << bit_pos; } "1" => { mask |= 1 << bit_pos; pattern |= 1 << bit_pos; } _ => {} } } } (mask, pattern, cond_ne_1111) } fn extract_fields(boxes: &[BoxInfo]) -> Vec { let mut seen = std::collections::HashSet::new(); let mut fields = Vec::new(); for b in boxes { if !b.usename { continue; } let raw = match &b.name { Some(n) if !n.is_empty() && n != "?" => n.as_str(), _ => continue, }; let name = rust_field_name(raw); if seen.insert(name.clone()) { fields.push(FieldDef { name, hibit: b.hibit, lobit: b.lobit(), width: b.width, }); } } fields } fn variant_name(id: &str) -> String { id.split('_') .map(|part| { let mut chars = part.chars(); match chars.next() { None => String::new(), Some(c) => c.to_uppercase().to_string() + &chars.as_str().to_lowercase(), } }) .collect() } fn rust_field_name(xml_name: &str) -> String { // First apply well-known renames, then sanitize any remaining invalid chars. let base = match xml_name { "type" => "ty", "fn" => "fn_reg", "register_list" => "regs", other => other, }; // Sanitize: replace anything that isn't alphanumeric or `_` with `_`, // collapse runs of `_`, and strip leading/trailing `_`. let sanitized: String = base .to_lowercase() .chars() .map(|c| { if c.is_ascii_alphanumeric() || c == '_' { c } else { '_' } }) .collect(); // Collapse repeated underscores and trim edges. let mut out = String::with_capacity(sanitized.len()); let mut prev_under = false; for c in sanitized.chars() { if c == '_' { if !prev_under { out.push(c); } prev_under = true; } else { out.push(c); prev_under = false; } } let trimmed = out.trim_matches('_').to_string(); if trimmed.is_empty() { "field".to_string() } else { trimmed } } fn field_rust_type(width: i32) -> &'static str { if width == 1 { "bool" } else if width <= 8 { "u8" } else if width <= 16 { "u16" } else { "u32" } } fn generate_rust(entries: &[EncEntry]) -> String { let mut out = String::new(); writeln!( out, "// AUTO-GENERATED from ARM ISA XML (A-profile 2022-12). DO NOT EDIT." ) .unwrap(); writeln!(out, "use core::fmt;").unwrap(); writeln!(out).unwrap(); writeln!(out, "#[derive(Clone, Debug)]").unwrap(); writeln!(out, "pub enum A32Inst {{").unwrap(); for e in entries { let vname = variant_name(&e.id); if e.fields.is_empty() { writeln!(out, " {vname},").unwrap(); } else { let fields: Vec = e .fields .iter() .map(|f| format!("{}: {}", f.name, field_rust_type(f.width))) .collect(); writeln!(out, " {vname} {{ {} }},", fields.join(", ")).unwrap(); } } writeln!(out, "}}").unwrap(); writeln!(out).unwrap(); writeln!(out, "#[derive(Clone, Copy, Debug, PartialEq, Eq)]").unwrap(); writeln!(out, "pub enum A32DecodeError {{ TooShort, Unknown }}").unwrap(); writeln!(out).unwrap(); // One tiny `fn a32_dec_N(w: u32) -> A32Inst` per encoding. for (idx, e) in entries.iter().enumerate() { let vname = variant_name(&e.id); writeln!(out, "const fn a32_dec_{idx}(w: u32) -> A32Inst {{").unwrap(); if e.fields.is_empty() { writeln!(out, " let _ = w; A32Inst::{vname}").unwrap(); } else { let extracts: Vec = e.fields.iter().map(gen_field_extract_w).collect(); writeln!(out, " A32Inst::{vname} {{ {} }}", extracts.join(", ")).unwrap(); } writeln!(out, "}}").unwrap(); } writeln!(out).unwrap(); let n = entries.len(); writeln!(out, "type A32DecFn = fn(u32) -> A32Inst;").unwrap(); write!(out, "static A32_DECODERS: [A32DecFn; {n}] = [").unwrap(); for idx in 0..n { write!(out, "a32_dec_{idx},").unwrap(); } writeln!(out, "];").unwrap(); writeln!(out).unwrap(); // slot index = bits[27:20] of the instruction word. // For each encoding, enumerate all hi-byte values it can match. let mut slots: Vec> = vec![vec![]; 256]; for (enc_idx, e) in entries.iter().enumerate() { let hi_mask = ((e.mask >> 20) & 0xFF) as u8; let hi_pat = ((e.pattern >> 20) & 0xFF) as u8; for x in 0u8..=255 { if x & hi_mask == hi_pat { slots[x as usize].push((enc_idx, e.mask.count_ones())); } } } // Within each slot keep the most-specific (most fixed bits) first. for slot in &mut slots { slot.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); } // Flatten into one array; record (start, count) per slot. // Entry format: (mask, pattern, flags, dec_idx) // flags bit 0: 1 = cond must not be 0xF let mut all_cands: Vec<(u32, u32, u8, u16)> = Vec::new(); let mut slot_table: Vec<(u16, u16)> = Vec::new(); for slot in &slots { let start = all_cands.len() as u16; let count = slot.len() as u16; for &(enc_idx, _) in slot { let e = &entries[enc_idx]; let flags = u8::from(e.cond_ne_1111); all_cands.push((e.mask, e.pattern, flags, enc_idx as u16)); } slot_table.push((start, count)); } // Emit the flat candidate array. let total = all_cands.len(); writeln!(out, "static A32_CANDS: [(u32, u32, u8, u16); {total}] = [").unwrap(); for (mask, pat, flags, idx) in &all_cands { writeln!(out, " (0x{mask:08X}, 0x{pat:08X}, {flags}, {idx}),").unwrap(); } writeln!(out, "];").unwrap(); writeln!(out).unwrap(); // Emit the slot table. writeln!(out, "static A32_SLOTS: [(u16, u16); 256] = [").unwrap(); for (start, count) in &slot_table { write!(out, " ({start}, {count}),").unwrap(); } writeln!(out, "\n];").unwrap(); writeln!(out).unwrap(); writeln!( out, "pub fn decode_a32(bytes: &[u8]) -> Result<(usize, A32Inst), A32DecodeError> {{" ) .unwrap(); writeln!( out, " if bytes.len() < 4 {{ return Err(A32DecodeError::TooShort); }}" ) .unwrap(); writeln!( out, " let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);" ) .unwrap(); writeln!(out, " let cond = ((word >> 28) & 0xF) as u8;").unwrap(); writeln!(out, " let hi = ((word >> 20) & 0xFF) as usize;").unwrap(); writeln!(out, " let (start, count) = A32_SLOTS[hi];").unwrap(); writeln!( out, " for i in start as usize..(start + count) as usize {{" ) .unwrap(); writeln!( out, " let (mask, pattern, flags, dec_idx) = A32_CANDS[i];" ) .unwrap(); writeln!( out, " if word & mask == pattern && (flags == 0 || cond != 0xF) {{" ) .unwrap(); writeln!( out, " return Ok((4, A32_DECODERS[dec_idx as usize](word)));" ) .unwrap(); writeln!(out, " }}").unwrap(); writeln!(out, " }}").unwrap(); writeln!(out, " Err(A32DecodeError::Unknown)").unwrap(); writeln!(out, "}}").unwrap(); writeln!(out).unwrap(); writeln!(out, "impl fmt::Display for A32Inst {{").unwrap(); writeln!( out, " fn fmt(&self, __f: &mut fmt::Formatter<'_>) -> fmt::Result {{" ) .unwrap(); writeln!(out, " match self {{").unwrap(); for e in entries { gen_display_arm(&mut out, e); } writeln!(out, " }}").unwrap(); writeln!(out, " }}").unwrap(); writeln!(out, "}}").unwrap(); writeln!(out).unwrap(); out.push_str(HELPERS); out } fn gen_field_extract(f: &FieldDef) -> String { let lobit = f.lobit as u32; let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 }; let ty = field_rust_type(f.width); let name = &f.name; if ty == "bool" { format!("{name}: (word >> {lobit}) & 1 == 1") } else if ty == "u8" { format!("{name}: ((word >> {lobit}) & 0x{mask:X}) as u8") } else if ty == "u16" { format!("{name}: ((word >> {lobit}) & 0x{mask:X}) as u16") } else { format!("{name}: (word >> {lobit}) & 0x{mask:X}") } } /// Same as `gen_field_extract` but references the variable `w` (used in per-encoding /// decoder functions where the parameter is named `w`, not `word`). fn gen_field_extract_w(f: &FieldDef) -> String { let lobit = f.lobit as u32; let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 }; let ty = field_rust_type(f.width); let name = &f.name; if ty == "bool" { format!("{name}: (w >> {lobit}) & 1 == 1") } else if ty == "u8" { format!("{name}: ((w >> {lobit}) & 0x{mask:X}) as u8") } else if ty == "u16" { format!("{name}: ((w >> {lobit}) & 0x{mask:X}) as u16") } else { format!("{name}: (w >> {lobit}) & 0x{mask:X}") } } fn gen_display_arm(out: &mut String, e: &EncEntry) { let vname = variant_name(&e.id); // Build field list for destructuring let field_names: Vec<&str> = e.fields.iter().map(|f| f.name.as_str()).collect(); let destruct = if field_names.is_empty() { String::new() } else { format!("{{ {} }}", field_names.join(", ")) }; writeln!(out, " Self::{vname} {destruct} => {{").unwrap(); // Process ASM template tokens into a sequence of write!() calls. // We track optional-group depth; groups get collapsed to their content // with special handling for the few known patterns. let toks = &e.asm_tokens; let sym_map = &e.sym_map; // Find field lookup by Rust name let field_by_xmlname: HashMap = e.fields.iter().map(|f| (f.name.clone(), f)).collect(); let mut i = 0; while i < toks.len() { let tok = &toks[i]; if !tok.is_link && tok.text == "{" { // Peek at optional group content let (group_toks, end) = collect_opt_group(toks, i); i = end + 1; emit_opt_group(out, &group_toks, sym_map, &field_by_xmlname, e); } else { emit_token(out, tok, sym_map, &field_by_xmlname, e); i += 1; } } writeln!(out, " Ok(())").unwrap(); writeln!(out, " }}").unwrap(); } /// Returns the tokens inside the next `{...}` group starting at `start` /// (which must be `{`), and the index of the closing `}`. fn collect_opt_group(toks: &[AsmTok], start: usize) -> (Vec<&AsmTok>, usize) { let mut depth = 0; let mut group = Vec::new(); let mut i = start; while i < toks.len() { let tok = &toks[i]; if !tok.is_link && tok.text == "{" { depth += 1; if depth > 1 { group.push(tok); } } else if !tok.is_link && tok.text == "}" { depth -= 1; if depth == 0 { return (group, i); } else { group.push(tok); } } else if depth > 0 { group.push(tok); } i += 1; } (group, i) } fn emit_opt_group( out: &mut String, group: &[&AsmTok], sym_map: &HashMap, fields: &HashMap, e: &EncEntry, ) { // Identify group type by content let is_only_link = group.len() == 1 && group[0].is_link; if is_only_link { match group[0].text.as_str() { "" => { // condition suffix - always print (may be empty) if fields.contains_key("cond") { writeln!( out, " write!(__f,\"{{}}\", a32_cond(*cond))?;" ) .unwrap(); } return; } "" => { // qualifier - always skip return; } "{!}" | "!" => { // writeback - conditional on W or writeback field if fields.contains_key("w") { writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap(); } else if fields.contains_key("wback") { writeln!(out, " if *wback {{ write!(__f,\"!\")?; }}").unwrap(); } return; } "{IA}" | "IA" => { // default LDM addressing mode - omit return; } _ => {} } } // {,} - optional dest register, always print with following comma let all_texts: Vec<&str> = group.iter().map(|t| t.text.as_str()).collect(); let link_texts: Vec<&str> = group .iter() .filter(|t| t.is_link) .map(|t| t.text.as_str()) .collect(); // {, #} or {, } if link_texts.contains(&"") { // Emit shift conditionally: skip if LSL #0 let has_amount = link_texts.contains(&""); let stype_field = fields.get("stype").or_else(|| fields.get("shift")); let amount_field = fields.get("imm5").or_else(|| fields.get("amount")); if has_amount { if stype_field.is_some() && amount_field.is_some() { writeln!(out, " if *stype != 0 || *imm5 != 0 {{ write!(__f,\", {{}} #{{}}\", a32_shift(*stype), *imm5)?; }}" ).unwrap(); } } else { // {, } without amount - likely RRX variant; always print if stype_field.is_some() { writeln!( out, " write!(__f,\", {{}}\", a32_shift(*stype))?;" ) .unwrap(); } } return; } // {, #{+/-}} - memory offset if link_texts.contains(&"{+/-}") || link_texts.contains(&"+/-") { // Always emit for tok in group { emit_token(out, tok, sym_map, fields, e); } return; } // Everything else: always emit contents for tok in group { emit_token(out, tok, sym_map, fields, e); } } fn emit_token( out: &mut String, tok: &AsmTok, sym_map: &HashMap, fields: &HashMap, e: &EncEntry, ) { if !tok.is_link { // Literal text - escape for Rust string let escaped = tok .text .replace('\\', "\\\\") .replace('"', "\\\"") .to_lowercase(); if !escaped.is_empty() { writeln!(out, " write!(__f,\"{escaped}\")?;").unwrap(); } return; } // It's an symbol let sym = tok.text.as_str(); match sym { "" => { if fields.contains_key("cond") { writeln!( out, " write!(__f,\"{{}}\", a32_cond(*cond))?;" ) .unwrap(); } } "" => { /* skip */ } "{!}" | "!" => { if fields.contains_key("w") { writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap(); } } "{+/-}" | "+/-" => { if fields.contains_key("u") { writeln!( out, " write!(__f,\"{{}}\", if *u {{ '+' }} else {{ '-' }})?;" ) .unwrap(); } } "{IA}" | "IA" | "SP," => { let literal = match sym { "SP," => "sp, ", "IA" | "{IA}" => "", _ => sym, }; if !literal.is_empty() { writeln!(out, " write!(__f,\"{literal}\")?;").unwrap(); } } "" => { if let Some(_f) = fields.get("stype") { writeln!( out, " write!(__f,\"{{}}\", a32_shift(*stype))?;" ) .unwrap(); } } "" | "" | "" => { if let Some(fld) = fields.get("regs").or_else(|| fields.get("register_list")) { let fname = &fld.name; writeln!( out, " write!(__f,\"{{}}\", a32_reglist(*{fname} as u32))?;" ) .unwrap(); } } "