diff --git a/build.rs b/build.rs index 4095c22..af49e54 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,3 @@ -/// Code-generator: reads ARM ISA XML specs and emits `$OUT_DIR/arm_a32.rs`. use std::collections::HashMap; use std::fmt::Write as _; use std::path::{Path, PathBuf}; @@ -11,7 +10,10 @@ fn main() { let mut entries = collect_encodings(spec_dir); // most-specific (most fixed bits) first so the decoder's linear scan is correct entries.sort_by(|a, b| { - b.mask.count_ones().cmp(&a.mask.count_ones()).then(a.id.cmp(&b.id)) + b.mask + .count_ones() + .cmp(&a.mask.count_ones()) + .then(a.id.cmp(&b.id)) }); let code = generate_rust(&entries); @@ -19,10 +21,6 @@ fn main() { std::fs::write(out, code).unwrap(); } -// --------------------------------------------------------------------------- -// Data types -// --------------------------------------------------------------------------- - #[derive(Debug, Clone)] struct BoxInfo { hibit: i32, @@ -43,7 +41,7 @@ impl BoxInfo { #[derive(Debug, Clone)] struct FieldDef { - name: String, // lowercased Rust identifier + name: String, hibit: i32, lobit: i32, width: i32, @@ -62,24 +60,18 @@ struct EncEntry { mask: u32, pattern: u32, fields: Vec, - /// true if the `cond` field has a "!= 1111" constraint cond_ne_1111: bool, asm_tokens: Vec, - /// symbol text → encodedin field name sym_map: HashMap, } -// --------------------------------------------------------------------------- -// XML collection -// --------------------------------------------------------------------------- - fn collect_encodings(dir: &Path) -> Vec { let mut result = Vec::new(); let mut paths: Vec = std::fs::read_dir(dir) .unwrap() .filter_map(|e| e.ok()) .map(|e| e.path()) - .filter(|p| p.extension().map_or(false, |e| e == "xml")) + .filter(|p| p.extension().is_some_and(|e| e == "xml")) .collect(); paths.sort(); for path in paths { @@ -87,7 +79,10 @@ fn collect_encodings(dir: &Path) -> Vec { Ok(t) => t, Err(_) => continue, }; - let opts = roxmltree::ParsingOptions { allow_dtd: true, ..Default::default() }; + let opts = roxmltree::ParsingOptions { + allow_dtd: true, + ..Default::default() + }; let doc = match roxmltree::Document::parse_with_options(&text, opts) { Ok(d) => d, Err(_) => continue, @@ -113,17 +108,14 @@ fn collect_encodings(dir: &Path) -> Vec { let sym_map = build_sym_map(&root); // Walk iclass elements for iclass in root.descendants().filter(|n| { - n.is_element() - && n.tag_name().name() == "iclass" - && n.attribute("isa") == Some("A32") + n.is_element() && n.tag_name().name() == "iclass" && n.attribute("isa") == Some("A32") }) { let base_boxes = parse_regdiagram_boxes(&iclass); - for enc in iclass.children().filter(|n| { - n.is_element() && n.tag_name().name() == "encoding" - }) { - if let Some(entry) = - build_entry(&enc, &base_boxes, &sym_map) - { + for enc in iclass + .children() + .filter(|n| n.is_element() && n.tag_name().name() == "encoding") + { + if let Some(entry) = build_entry(&enc, &base_boxes, &sym_map) { result.push(entry); } } @@ -134,9 +126,10 @@ fn collect_encodings(dir: &Path) -> Vec { fn build_sym_map(root: &roxmltree::Node) -> HashMap { let mut map = HashMap::new(); - for exp in root.descendants().filter(|n| { - n.is_element() && n.tag_name().name() == "explanation" - }) { + for exp in root + .descendants() + .filter(|n| n.is_element() && n.tag_name().name() == "explanation") + { let sym = exp .children() .find(|n| n.is_element() && n.tag_name().name() == "symbol") @@ -206,7 +199,14 @@ fn parse_box(node: roxmltree::Node) -> BoxInfo { } } } - BoxInfo { hibit, width, name, usename, values, settings } + BoxInfo { + hibit, + width, + name, + usename, + values, + settings, + } } /// Merge iclass boxes with encoding-specific overrides (encoding wins). @@ -324,25 +324,24 @@ fn extract_fields(boxes: &[BoxInfo]) -> Vec { }; let name = rust_field_name(raw); if seen.insert(name.clone()) { - fields.push(FieldDef { name, hibit: b.hibit, lobit: b.lobit(), width: b.width }); + fields.push(FieldDef { + name, + hibit: b.hibit, + lobit: b.lobit(), + width: b.width, + }); } } fields } -// --------------------------------------------------------------------------- -// Name helpers -// --------------------------------------------------------------------------- - fn variant_name(id: &str) -> String { id.split('_') .map(|part| { let mut chars = part.chars(); match chars.next() { None => String::new(), - Some(c) => { - c.to_uppercase().to_string() + &chars.as_str().to_lowercase() - } + Some(c) => c.to_uppercase().to_string() + &chars.as_str().to_lowercase(), } }) .collect() @@ -361,7 +360,13 @@ fn rust_field_name(xml_name: &str) -> String { let sanitized: String = base .to_lowercase() .chars() - .map(|c| if c.is_ascii_alphanumeric() || c == '_' { c } else { '_' }) + .map(|c| { + if c.is_ascii_alphanumeric() || c == '_' { + c + } else { + '_' + } + }) .collect(); // Collapse repeated underscores and trim edges. let mut out = String::with_capacity(sanitized.len()); @@ -397,17 +402,16 @@ fn field_rust_type(width: i32) -> &'static str { } } -// --------------------------------------------------------------------------- -// Rust code generation -// --------------------------------------------------------------------------- - fn generate_rust(entries: &[EncEntry]) -> String { let mut out = String::new(); - writeln!(out, "// AUTO-GENERATED from ARM ISA XML (A-profile 2022-12). DO NOT EDIT.").unwrap(); + writeln!( + out, + "// AUTO-GENERATED from ARM ISA XML (A-profile 2022-12). DO NOT EDIT." + ) + .unwrap(); writeln!(out, "use core::fmt;").unwrap(); writeln!(out).unwrap(); - // ---- Enum --------------------------------------------------------------- writeln!(out, "#[derive(Clone, Debug)]").unwrap(); writeln!(out, "pub enum A32Inst {{").unwrap(); for e in entries { @@ -426,12 +430,10 @@ fn generate_rust(entries: &[EncEntry]) -> String { writeln!(out, "}}").unwrap(); writeln!(out).unwrap(); - // ---- DecodeError -------------------------------------------------------- writeln!(out, "#[derive(Clone, Copy, Debug, PartialEq, Eq)]").unwrap(); writeln!(out, "pub enum A32DecodeError {{ TooShort, Unknown }}").unwrap(); writeln!(out).unwrap(); - // ---- Per-encoding field-extractor functions ------------------------------ // One tiny `fn a32_dec_N(w: u32) -> A32Inst` per encoding. for (idx, e) in entries.iter().enumerate() { let vname = variant_name(&e.id); @@ -439,15 +441,13 @@ fn generate_rust(entries: &[EncEntry]) -> String { if e.fields.is_empty() { writeln!(out, " let _ = w; A32Inst::{vname}").unwrap(); } else { - let extracts: Vec = - e.fields.iter().map(|f| gen_field_extract_w(f)).collect(); + let extracts: Vec = e.fields.iter().map(|f| gen_field_extract_w(f)).collect(); writeln!(out, " A32Inst::{vname} {{ {} }}", extracts.join(", ")).unwrap(); } writeln!(out, "}}").unwrap(); } writeln!(out).unwrap(); - // ---- Decoder function pointer table ------------------------------------- let n = entries.len(); writeln!(out, "type A32DecFn = fn(u32) -> A32Inst;").unwrap(); write!(out, "static A32_DECODERS: [A32DecFn; {n}] = [").unwrap(); @@ -457,13 +457,12 @@ fn generate_rust(entries: &[EncEntry]) -> String { writeln!(out, "];").unwrap(); writeln!(out).unwrap(); - // ---- Build 256-slot lookup table ---------------------------------------- // slot index = bits[27:20] of the instruction word. // For each encoding, enumerate all hi-byte values it can match. let mut slots: Vec> = vec![vec![]; 256]; for (enc_idx, e) in entries.iter().enumerate() { let hi_mask = ((e.mask >> 20) & 0xFF) as u8; - let hi_pat = ((e.pattern >> 20) & 0xFF) as u8; + let hi_pat = ((e.pattern >> 20) & 0xFF) as u8; for x in 0u8..=255 { if x & hi_mask == hi_pat { slots[x as usize].push((enc_idx, e.mask.count_ones())); @@ -493,17 +492,9 @@ fn generate_rust(entries: &[EncEntry]) -> String { // Emit the flat candidate array. let total = all_cands.len(); - writeln!( - out, - "static A32_CANDS: [(u32, u32, u8, u16); {total}] = [" - ) - .unwrap(); + writeln!(out, "static A32_CANDS: [(u32, u32, u8, u16); {total}] = [").unwrap(); for (mask, pat, flags, idx) in &all_cands { - writeln!( - out, - " (0x{mask:08X}, 0x{pat:08X}, {flags}, {idx})," - ) - .unwrap(); + writeln!(out, " (0x{mask:08X}, 0x{pat:08X}, {flags}, {idx}),").unwrap(); } writeln!(out, "];").unwrap(); writeln!(out).unwrap(); @@ -516,13 +507,16 @@ fn generate_rust(entries: &[EncEntry]) -> String { writeln!(out, "\n];").unwrap(); writeln!(out).unwrap(); - // ---- Main decode function ------------------------------------------------ writeln!( out, "pub fn decode_a32(bytes: &[u8]) -> Result<(usize, A32Inst), A32DecodeError> {{" ) .unwrap(); - writeln!(out, " if bytes.len() < 4 {{ return Err(A32DecodeError::TooShort); }}").unwrap(); + writeln!( + out, + " if bytes.len() < 4 {{ return Err(A32DecodeError::TooShort); }}" + ) + .unwrap(); writeln!( out, " let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);" @@ -531,8 +525,16 @@ fn generate_rust(entries: &[EncEntry]) -> String { writeln!(out, " let cond = ((word >> 28) & 0xF) as u8;").unwrap(); writeln!(out, " let hi = ((word >> 20) & 0xFF) as usize;").unwrap(); writeln!(out, " let (start, count) = A32_SLOTS[hi];").unwrap(); - writeln!(out, " for i in start as usize..(start + count) as usize {{").unwrap(); - writeln!(out, " let (mask, pattern, flags, dec_idx) = A32_CANDS[i];").unwrap(); + writeln!( + out, + " for i in start as usize..(start + count) as usize {{" + ) + .unwrap(); + writeln!( + out, + " let (mask, pattern, flags, dec_idx) = A32_CANDS[i];" + ) + .unwrap(); writeln!( out, " if word & mask == pattern && (flags == 0 || cond != 0xF) {{" @@ -549,7 +551,6 @@ fn generate_rust(entries: &[EncEntry]) -> String { writeln!(out, "}}").unwrap(); writeln!(out).unwrap(); - // ---- Display ------------------------------------------------------------ writeln!(out, "impl fmt::Display for A32Inst {{").unwrap(); writeln!( out, @@ -565,7 +566,6 @@ fn generate_rust(entries: &[EncEntry]) -> String { writeln!(out, "}}").unwrap(); writeln!(out).unwrap(); - // ---- Helpers ------------------------------------------------------------ out.push_str(HELPERS); out @@ -573,7 +573,11 @@ fn generate_rust(entries: &[EncEntry]) -> String { fn gen_field_extract(f: &FieldDef) -> String { let lobit = f.lobit as u32; - let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 }; + let mask = if f.width >= 32 { + u32::MAX + } else { + (1u32 << f.width) - 1 + }; let ty = field_rust_type(f.width); let name = &f.name; if ty == "bool" { @@ -591,7 +595,11 @@ fn gen_field_extract(f: &FieldDef) -> String { /// decoder functions where the parameter is named `w`, not `word`). fn gen_field_extract_w(f: &FieldDef) -> String { let lobit = f.lobit as u32; - let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 }; + let mask = if f.width >= 32 { + u32::MAX + } else { + (1u32 << f.width) - 1 + }; let ty = field_rust_type(f.width); let name = &f.name; if ty == "bool" { @@ -605,10 +613,6 @@ fn gen_field_extract_w(f: &FieldDef) -> String { } } -// --------------------------------------------------------------------------- -// Display generation -// --------------------------------------------------------------------------- - fn gen_display_arm(out: &mut String, e: &EncEntry) { let vname = variant_name(&e.id); @@ -628,11 +632,8 @@ fn gen_display_arm(out: &mut String, e: &EncEntry) { let toks = &e.asm_tokens; let sym_map = &e.sym_map; // Find field lookup by Rust name - let field_by_xmlname: HashMap = e - .fields - .iter() - .map(|f| (f.name.clone(), f)) - .collect(); + let field_by_xmlname: HashMap = + e.fields.iter().map(|f| (f.name.clone(), f)).collect(); let mut i = 0; while i < toks.len() { @@ -693,7 +694,7 @@ fn emit_opt_group( if is_only_link { match group[0].text.as_str() { "" => { - // condition suffix – always print (may be empty) + // condition suffix - always print (may be empty) if fields.contains_key("cond") { writeln!( out, @@ -704,11 +705,11 @@ fn emit_opt_group( return; } "" => { - // qualifier – always skip + // qualifier - always skip return; } "{!}" | "!" => { - // writeback – conditional on W or writeback field + // writeback - conditional on W or writeback field if fields.contains_key("w") { writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap(); } else if fields.contains_key("wback") { @@ -717,14 +718,14 @@ fn emit_opt_group( return; } "{IA}" | "IA" => { - // default LDM addressing mode – omit + // default LDM addressing mode - omit return; } _ => {} } } - // {,} – optional dest register, always print with following comma + // {,} - optional dest register, always print with following comma let all_texts: Vec<&str> = group.iter().map(|t| t.text.as_str()).collect(); let link_texts: Vec<&str> = group .iter() @@ -745,17 +746,19 @@ fn emit_opt_group( ).unwrap(); } } else { - // {, } without amount – likely RRX variant; always print + // {, } without amount - likely RRX variant; always print if stype_field.is_some() { - writeln!(out, + writeln!( + out, " write!(__f,\", {{}}\", a32_shift(*stype))?;" - ).unwrap(); + ) + .unwrap(); } } return; } - // {, #{+/-}} – memory offset + // {, #{+/-}} - memory offset if link_texts.contains(&"{+/-}") || link_texts.contains(&"+/-") { // Always emit for tok in group { @@ -778,7 +781,7 @@ fn emit_token( e: &EncEntry, ) { if !tok.is_link { - // Literal text – escape for Rust string + // Literal text - escape for Rust string let escaped = tok.text.replace('\\', "\\\\").replace('"', "\\\""); if !escaped.is_empty() { writeln!(out, " write!(__f,\"{escaped}\")?;").unwrap(); @@ -855,7 +858,7 @@ fn emit_token( } } _ => { - // Register or immediate symbol – look up encodedin + // Register or immediate symbol - look up encodedin let encodedin = sym_map.get(sym).map(String::as_str).unwrap_or(""); if let Some(fname) = resolve_encodedin_field(encodedin, fields) { if is_reg_sym(sym) { @@ -874,15 +877,11 @@ fn emit_token( ) .unwrap(); } else { - writeln!( - out, - " write!(__f,\"{{}}\", *{fname})?;" - ) - .unwrap(); + writeln!(out, " write!(__f,\"{{}}\", *{fname})?;").unwrap(); } } } else { - // Unknown / no encodedin – skip + // Unknown / no encodedin - skip } } } @@ -891,26 +890,32 @@ fn emit_token( fn is_reg_sym(sym: &str) -> bool { matches!( sym, - "" | "" | "" | "" | "" | "" - | "" | "" | "" | "" | "" + "" + | "" + | "" + | "" + | "" + | "" + | "" + | "" + | "" + | "" + | "" ) } /// Given an `encodedin` string (possibly composite like "imm4H:imm4L"), /// return the Rust field name in the variant that holds the value. -fn resolve_encodedin_field<'a>( - encodedin: &str, - fields: &'a HashMap, -) -> Option { +fn resolve_encodedin_field(encodedin: &str, fields: &HashMap) -> Option { if encodedin.is_empty() { return None; } - // Simple case – direct field name match + // Simple case - direct field name match let lower = rust_field_name(encodedin); if fields.contains_key(lower.as_str()) { return Some(lower); } - // Composite "X:Y" – use first part (MSB chunk) and note we don't reassemble + // Composite "X:Y" - use first part (MSB chunk) and note we don't reassemble // This is imprecise but acceptable for display purposes let first = encodedin.split(':').next().unwrap_or(""); let lower_first = rust_field_name(first); @@ -920,10 +925,6 @@ fn resolve_encodedin_field<'a>( None } -// --------------------------------------------------------------------------- -// Static helpers emitted into the generated file -// --------------------------------------------------------------------------- - const HELPERS: &str = r#" pub const fn a32_cond(cond: u8) -> &'static str { match cond & 0xF {