build: apply clippy fixes

This commit is contained in:
Igor kehrazy 2026-04-07 17:38:37 +03:00
parent ca5681976d
commit e9858a3eb9

213
build.rs
View file

@ -1,4 +1,3 @@
/// Code-generator: reads ARM ISA XML specs and emits `$OUT_DIR/arm_a32.rs`.
use std::collections::HashMap;
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
@ -11,7 +10,10 @@ fn main() {
let mut entries = collect_encodings(spec_dir);
// most-specific (most fixed bits) first so the decoder's linear scan is correct
entries.sort_by(|a, b| {
b.mask.count_ones().cmp(&a.mask.count_ones()).then(a.id.cmp(&b.id))
b.mask
.count_ones()
.cmp(&a.mask.count_ones())
.then(a.id.cmp(&b.id))
});
let code = generate_rust(&entries);
@ -19,10 +21,6 @@ fn main() {
std::fs::write(out, code).unwrap();
}
// ---------------------------------------------------------------------------
// Data types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone)]
struct BoxInfo {
hibit: i32,
@ -43,7 +41,7 @@ impl BoxInfo {
#[derive(Debug, Clone)]
struct FieldDef {
name: String, // lowercased Rust identifier
name: String,
hibit: i32,
lobit: i32,
width: i32,
@ -62,24 +60,18 @@ struct EncEntry {
mask: u32,
pattern: u32,
fields: Vec<FieldDef>,
/// true if the `cond` field has a "!= 1111" constraint
cond_ne_1111: bool,
asm_tokens: Vec<AsmTok>,
/// symbol text → encodedin field name
sym_map: HashMap<String, String>,
}
// ---------------------------------------------------------------------------
// XML collection
// ---------------------------------------------------------------------------
fn collect_encodings(dir: &Path) -> Vec<EncEntry> {
let mut result = Vec::new();
let mut paths: Vec<PathBuf> = std::fs::read_dir(dir)
.unwrap()
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map_or(false, |e| e == "xml"))
.filter(|p| p.extension().is_some_and(|e| e == "xml"))
.collect();
paths.sort();
for path in paths {
@ -87,7 +79,10 @@ fn collect_encodings(dir: &Path) -> Vec<EncEntry> {
Ok(t) => t,
Err(_) => continue,
};
let opts = roxmltree::ParsingOptions { allow_dtd: true, ..Default::default() };
let opts = roxmltree::ParsingOptions {
allow_dtd: true,
..Default::default()
};
let doc = match roxmltree::Document::parse_with_options(&text, opts) {
Ok(d) => d,
Err(_) => continue,
@ -113,17 +108,14 @@ fn collect_encodings(dir: &Path) -> Vec<EncEntry> {
let sym_map = build_sym_map(&root);
// Walk iclass elements
for iclass in root.descendants().filter(|n| {
n.is_element()
&& n.tag_name().name() == "iclass"
&& n.attribute("isa") == Some("A32")
n.is_element() && n.tag_name().name() == "iclass" && n.attribute("isa") == Some("A32")
}) {
let base_boxes = parse_regdiagram_boxes(&iclass);
for enc in iclass.children().filter(|n| {
n.is_element() && n.tag_name().name() == "encoding"
}) {
if let Some(entry) =
build_entry(&enc, &base_boxes, &sym_map)
{
for enc in iclass
.children()
.filter(|n| n.is_element() && n.tag_name().name() == "encoding")
{
if let Some(entry) = build_entry(&enc, &base_boxes, &sym_map) {
result.push(entry);
}
}
@ -134,9 +126,10 @@ fn collect_encodings(dir: &Path) -> Vec<EncEntry> {
fn build_sym_map(root: &roxmltree::Node) -> HashMap<String, String> {
let mut map = HashMap::new();
for exp in root.descendants().filter(|n| {
n.is_element() && n.tag_name().name() == "explanation"
}) {
for exp in root
.descendants()
.filter(|n| n.is_element() && n.tag_name().name() == "explanation")
{
let sym = exp
.children()
.find(|n| n.is_element() && n.tag_name().name() == "symbol")
@ -206,7 +199,14 @@ fn parse_box(node: roxmltree::Node) -> BoxInfo {
}
}
}
BoxInfo { hibit, width, name, usename, values, settings }
BoxInfo {
hibit,
width,
name,
usename,
values,
settings,
}
}
/// Merge iclass boxes with encoding-specific overrides (encoding wins).
@ -324,25 +324,24 @@ fn extract_fields(boxes: &[BoxInfo]) -> Vec<FieldDef> {
};
let name = rust_field_name(raw);
if seen.insert(name.clone()) {
fields.push(FieldDef { name, hibit: b.hibit, lobit: b.lobit(), width: b.width });
fields.push(FieldDef {
name,
hibit: b.hibit,
lobit: b.lobit(),
width: b.width,
});
}
}
fields
}
// ---------------------------------------------------------------------------
// Name helpers
// ---------------------------------------------------------------------------
fn variant_name(id: &str) -> String {
id.split('_')
.map(|part| {
let mut chars = part.chars();
match chars.next() {
None => String::new(),
Some(c) => {
c.to_uppercase().to_string() + &chars.as_str().to_lowercase()
}
Some(c) => c.to_uppercase().to_string() + &chars.as_str().to_lowercase(),
}
})
.collect()
@ -361,7 +360,13 @@ fn rust_field_name(xml_name: &str) -> String {
let sanitized: String = base
.to_lowercase()
.chars()
.map(|c| if c.is_ascii_alphanumeric() || c == '_' { c } else { '_' })
.map(|c| {
if c.is_ascii_alphanumeric() || c == '_' {
c
} else {
'_'
}
})
.collect();
// Collapse repeated underscores and trim edges.
let mut out = String::with_capacity(sanitized.len());
@ -397,17 +402,16 @@ fn field_rust_type(width: i32) -> &'static str {
}
}
// ---------------------------------------------------------------------------
// Rust code generation
// ---------------------------------------------------------------------------
fn generate_rust(entries: &[EncEntry]) -> String {
let mut out = String::new();
writeln!(out, "// AUTO-GENERATED from ARM ISA XML (A-profile 2022-12). DO NOT EDIT.").unwrap();
writeln!(
out,
"// AUTO-GENERATED from ARM ISA XML (A-profile 2022-12). DO NOT EDIT."
)
.unwrap();
writeln!(out, "use core::fmt;").unwrap();
writeln!(out).unwrap();
// ---- Enum ---------------------------------------------------------------
writeln!(out, "#[derive(Clone, Debug)]").unwrap();
writeln!(out, "pub enum A32Inst {{").unwrap();
for e in entries {
@ -426,12 +430,10 @@ fn generate_rust(entries: &[EncEntry]) -> String {
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
// ---- DecodeError --------------------------------------------------------
writeln!(out, "#[derive(Clone, Copy, Debug, PartialEq, Eq)]").unwrap();
writeln!(out, "pub enum A32DecodeError {{ TooShort, Unknown }}").unwrap();
writeln!(out).unwrap();
// ---- Per-encoding field-extractor functions ------------------------------
// One tiny `fn a32_dec_N(w: u32) -> A32Inst` per encoding.
for (idx, e) in entries.iter().enumerate() {
let vname = variant_name(&e.id);
@ -439,15 +441,13 @@ fn generate_rust(entries: &[EncEntry]) -> String {
if e.fields.is_empty() {
writeln!(out, " let _ = w; A32Inst::{vname}").unwrap();
} else {
let extracts: Vec<String> =
e.fields.iter().map(|f| gen_field_extract_w(f)).collect();
let extracts: Vec<String> = e.fields.iter().map(|f| gen_field_extract_w(f)).collect();
writeln!(out, " A32Inst::{vname} {{ {} }}", extracts.join(", ")).unwrap();
}
writeln!(out, "}}").unwrap();
}
writeln!(out).unwrap();
// ---- Decoder function pointer table -------------------------------------
let n = entries.len();
writeln!(out, "type A32DecFn = fn(u32) -> A32Inst;").unwrap();
write!(out, "static A32_DECODERS: [A32DecFn; {n}] = [").unwrap();
@ -457,13 +457,12 @@ fn generate_rust(entries: &[EncEntry]) -> String {
writeln!(out, "];").unwrap();
writeln!(out).unwrap();
// ---- Build 256-slot lookup table ----------------------------------------
// slot index = bits[27:20] of the instruction word.
// For each encoding, enumerate all hi-byte values it can match.
let mut slots: Vec<Vec<(usize, u32)>> = vec![vec![]; 256];
for (enc_idx, e) in entries.iter().enumerate() {
let hi_mask = ((e.mask >> 20) & 0xFF) as u8;
let hi_pat = ((e.pattern >> 20) & 0xFF) as u8;
let hi_pat = ((e.pattern >> 20) & 0xFF) as u8;
for x in 0u8..=255 {
if x & hi_mask == hi_pat {
slots[x as usize].push((enc_idx, e.mask.count_ones()));
@ -493,17 +492,9 @@ fn generate_rust(entries: &[EncEntry]) -> String {
// Emit the flat candidate array.
let total = all_cands.len();
writeln!(
out,
"static A32_CANDS: [(u32, u32, u8, u16); {total}] = ["
)
.unwrap();
writeln!(out, "static A32_CANDS: [(u32, u32, u8, u16); {total}] = [").unwrap();
for (mask, pat, flags, idx) in &all_cands {
writeln!(
out,
" (0x{mask:08X}, 0x{pat:08X}, {flags}, {idx}),"
)
.unwrap();
writeln!(out, " (0x{mask:08X}, 0x{pat:08X}, {flags}, {idx}),").unwrap();
}
writeln!(out, "];").unwrap();
writeln!(out).unwrap();
@ -516,13 +507,16 @@ fn generate_rust(entries: &[EncEntry]) -> String {
writeln!(out, "\n];").unwrap();
writeln!(out).unwrap();
// ---- Main decode function ------------------------------------------------
writeln!(
out,
"pub fn decode_a32(bytes: &[u8]) -> Result<(usize, A32Inst), A32DecodeError> {{"
)
.unwrap();
writeln!(out, " if bytes.len() < 4 {{ return Err(A32DecodeError::TooShort); }}").unwrap();
writeln!(
out,
" if bytes.len() < 4 {{ return Err(A32DecodeError::TooShort); }}"
)
.unwrap();
writeln!(
out,
" let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);"
@ -531,8 +525,16 @@ fn generate_rust(entries: &[EncEntry]) -> String {
writeln!(out, " let cond = ((word >> 28) & 0xF) as u8;").unwrap();
writeln!(out, " let hi = ((word >> 20) & 0xFF) as usize;").unwrap();
writeln!(out, " let (start, count) = A32_SLOTS[hi];").unwrap();
writeln!(out, " for i in start as usize..(start + count) as usize {{").unwrap();
writeln!(out, " let (mask, pattern, flags, dec_idx) = A32_CANDS[i];").unwrap();
writeln!(
out,
" for i in start as usize..(start + count) as usize {{"
)
.unwrap();
writeln!(
out,
" let (mask, pattern, flags, dec_idx) = A32_CANDS[i];"
)
.unwrap();
writeln!(
out,
" if word & mask == pattern && (flags == 0 || cond != 0xF) {{"
@ -549,7 +551,6 @@ fn generate_rust(entries: &[EncEntry]) -> String {
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
// ---- Display ------------------------------------------------------------
writeln!(out, "impl fmt::Display for A32Inst {{").unwrap();
writeln!(
out,
@ -565,7 +566,6 @@ fn generate_rust(entries: &[EncEntry]) -> String {
writeln!(out, "}}").unwrap();
writeln!(out).unwrap();
// ---- Helpers ------------------------------------------------------------
out.push_str(HELPERS);
out
@ -573,7 +573,11 @@ fn generate_rust(entries: &[EncEntry]) -> String {
fn gen_field_extract(f: &FieldDef) -> String {
let lobit = f.lobit as u32;
let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 };
let mask = if f.width >= 32 {
u32::MAX
} else {
(1u32 << f.width) - 1
};
let ty = field_rust_type(f.width);
let name = &f.name;
if ty == "bool" {
@ -591,7 +595,11 @@ fn gen_field_extract(f: &FieldDef) -> String {
/// decoder functions where the parameter is named `w`, not `word`).
fn gen_field_extract_w(f: &FieldDef) -> String {
let lobit = f.lobit as u32;
let mask = if f.width >= 32 { u32::MAX } else { (1u32 << f.width) - 1 };
let mask = if f.width >= 32 {
u32::MAX
} else {
(1u32 << f.width) - 1
};
let ty = field_rust_type(f.width);
let name = &f.name;
if ty == "bool" {
@ -605,10 +613,6 @@ fn gen_field_extract_w(f: &FieldDef) -> String {
}
}
// ---------------------------------------------------------------------------
// Display generation
// ---------------------------------------------------------------------------
fn gen_display_arm(out: &mut String, e: &EncEntry) {
let vname = variant_name(&e.id);
@ -628,11 +632,8 @@ fn gen_display_arm(out: &mut String, e: &EncEntry) {
let toks = &e.asm_tokens;
let sym_map = &e.sym_map;
// Find field lookup by Rust name
let field_by_xmlname: HashMap<String, &FieldDef> = e
.fields
.iter()
.map(|f| (f.name.clone(), f))
.collect();
let field_by_xmlname: HashMap<String, &FieldDef> =
e.fields.iter().map(|f| (f.name.clone(), f)).collect();
let mut i = 0;
while i < toks.len() {
@ -693,7 +694,7 @@ fn emit_opt_group(
if is_only_link {
match group[0].text.as_str() {
"<c>" => {
// condition suffix always print (may be empty)
// condition suffix - always print (may be empty)
if fields.contains_key("cond") {
writeln!(
out,
@ -704,11 +705,11 @@ fn emit_opt_group(
return;
}
"<q>" => {
// qualifier always skip
// qualifier - always skip
return;
}
"{!}" | "!" => {
// writeback conditional on W or writeback field
// writeback - conditional on W or writeback field
if fields.contains_key("w") {
writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap();
} else if fields.contains_key("wback") {
@ -717,14 +718,14 @@ fn emit_opt_group(
return;
}
"{IA}" | "IA" => {
// default LDM addressing mode omit
// default LDM addressing mode - omit
return;
}
_ => {}
}
}
// {<Rd>,} optional dest register, always print with following comma
// {<Rd>,} - optional dest register, always print with following comma
let all_texts: Vec<&str> = group.iter().map(|t| t.text.as_str()).collect();
let link_texts: Vec<&str> = group
.iter()
@ -745,17 +746,19 @@ fn emit_opt_group(
).unwrap();
}
} else {
// {, <shift>} without amount likely RRX variant; always print
// {, <shift>} without amount - likely RRX variant; always print
if stype_field.is_some() {
writeln!(out,
writeln!(
out,
" write!(__f,\", {{}}\", a32_shift(*stype))?;"
).unwrap();
)
.unwrap();
}
}
return;
}
// {, #{+/-}<imm>} memory offset
// {, #{+/-}<imm>} - memory offset
if link_texts.contains(&"{+/-}") || link_texts.contains(&"+/-") {
// Always emit
for tok in group {
@ -778,7 +781,7 @@ fn emit_token(
e: &EncEntry,
) {
if !tok.is_link {
// Literal text escape for Rust string
// Literal text - escape for Rust string
let escaped = tok.text.replace('\\', "\\\\").replace('"', "\\\"");
if !escaped.is_empty() {
writeln!(out, " write!(__f,\"{escaped}\")?;").unwrap();
@ -855,7 +858,7 @@ fn emit_token(
}
}
_ => {
// Register or immediate symbol look up encodedin
// Register or immediate symbol - look up encodedin
let encodedin = sym_map.get(sym).map(String::as_str).unwrap_or("");
if let Some(fname) = resolve_encodedin_field(encodedin, fields) {
if is_reg_sym(sym) {
@ -874,15 +877,11 @@ fn emit_token(
)
.unwrap();
} else {
writeln!(
out,
" write!(__f,\"{{}}\", *{fname})?;"
)
.unwrap();
writeln!(out, " write!(__f,\"{{}}\", *{fname})?;").unwrap();
}
}
} else {
// Unknown / no encodedin skip
// Unknown / no encodedin - skip
}
}
}
@ -891,26 +890,32 @@ fn emit_token(
fn is_reg_sym(sym: &str) -> bool {
matches!(
sym,
"<Rd>" | "<Rn>" | "<Rm>" | "<Rs>" | "<Rt>" | "<Ra>"
| "<Rt2>" | "<RdHi>" | "<RdLo>" | "<Rdm>" | "<Rdn>"
"<Rd>"
| "<Rn>"
| "<Rm>"
| "<Rs>"
| "<Rt>"
| "<Ra>"
| "<Rt2>"
| "<RdHi>"
| "<RdLo>"
| "<Rdm>"
| "<Rdn>"
)
}
/// Given an `encodedin` string (possibly composite like "imm4H:imm4L"),
/// return the Rust field name in the variant that holds the value.
fn resolve_encodedin_field<'a>(
encodedin: &str,
fields: &'a HashMap<String, &FieldDef>,
) -> Option<String> {
fn resolve_encodedin_field(encodedin: &str, fields: &HashMap<String, &FieldDef>) -> Option<String> {
if encodedin.is_empty() {
return None;
}
// Simple case direct field name match
// Simple case - direct field name match
let lower = rust_field_name(encodedin);
if fields.contains_key(lower.as_str()) {
return Some(lower);
}
// Composite "X:Y" use first part (MSB chunk) and note we don't reassemble
// Composite "X:Y" - use first part (MSB chunk) and note we don't reassemble
// This is imprecise but acceptable for display purposes
let first = encodedin.split(':').next().unwrap_or("");
let lower_first = rust_field_name(first);
@ -920,10 +925,6 @@ fn resolve_encodedin_field<'a>(
None
}
// ---------------------------------------------------------------------------
// Static helpers emitted into the generated file
// ---------------------------------------------------------------------------
const HELPERS: &str = r#"
pub const fn a32_cond(cond: u8) -> &'static str {
match cond & 0xF {