983 lines
30 KiB
Rust
983 lines
30 KiB
Rust
use std::collections::HashMap;
|
|
use std::fmt::Write as _;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
fn main() {
|
|
let spec_dir = Path::new("specs");
|
|
println!("cargo:rerun-if-changed={}", spec_dir.display());
|
|
println!("cargo:rerun-if-changed=build.rs");
|
|
|
|
let mut entries = collect_encodings(spec_dir);
|
|
// most-specific (most fixed bits) first so the decoder's linear scan is correct
|
|
entries.sort_by(|a, b| {
|
|
b.mask
|
|
.count_ones()
|
|
.cmp(&a.mask.count_ones())
|
|
.then(a.id.cmp(&b.id))
|
|
});
|
|
|
|
let code = generate_rust(&entries);
|
|
let out = PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("arm_a32.rs");
|
|
std::fs::write(out, code).unwrap();
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct BoxInfo {
|
|
hibit: i32,
|
|
width: i32,
|
|
name: Option<String>,
|
|
usename: bool,
|
|
/// One entry per bit position (MSB first). Values: "0", "1", "" or a
|
|
/// constraint string like "!= 1111" / "Z" / "N".
|
|
values: Vec<String>,
|
|
settings: i32,
|
|
}
|
|
|
|
impl BoxInfo {
|
|
fn lobit(&self) -> i32 {
|
|
self.hibit - self.width + 1
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct FieldDef {
|
|
name: String,
|
|
hibit: i32,
|
|
lobit: i32,
|
|
width: i32,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct AsmTok {
|
|
is_link: bool, // false = literal text, true = <a> symbol
|
|
text: String,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct EncEntry {
|
|
id: String,
|
|
mnemonic: String,
|
|
mask: u32,
|
|
pattern: u32,
|
|
fields: Vec<FieldDef>,
|
|
cond_ne_1111: bool,
|
|
asm_tokens: Vec<AsmTok>,
|
|
sym_map: HashMap<String, String>,
|
|
}
|
|
|
|
fn collect_encodings(dir: &Path) -> Vec<EncEntry> {
|
|
let mut result = Vec::new();
|
|
let mut paths: Vec<PathBuf> = std::fs::read_dir(dir)
|
|
.unwrap()
|
|
.filter_map(|e| e.ok())
|
|
.map(|e| e.path())
|
|
.filter(|p| p.extension().is_some_and(|e| e == "xml"))
|
|
.collect();
|
|
paths.sort();
|
|
for path in paths {
|
|
let text = match std::fs::read_to_string(&path) {
|
|
Ok(t) => t,
|
|
Err(_) => continue,
|
|
};
|
|
let opts = roxmltree::ParsingOptions {
|
|
allow_dtd: true,
|
|
..Default::default()
|
|
};
|
|
let doc = match roxmltree::Document::parse_with_options(&text, opts) {
|
|
Ok(d) => d,
|
|
Err(_) => continue,
|
|
};
|
|
let root = doc.root_element();
|
|
if root.tag_name().name() != "instructionsection" {
|
|
continue;
|
|
}
|
|
// Only "general" instr-class
|
|
let instr_class = root
|
|
.descendants()
|
|
.find(|n| n.tag_name().name() == "docvars")
|
|
.and_then(|dv| {
|
|
dv.children()
|
|
.filter(|n| n.is_element())
|
|
.find(|n| n.attribute("key") == Some("instr-class"))
|
|
.and_then(|n| n.attribute("value"))
|
|
});
|
|
if instr_class != Some("general") {
|
|
continue;
|
|
}
|
|
// Build file-level symbol → encodedin map from explanations
|
|
let sym_map = build_sym_map(&root);
|
|
// Walk iclass elements
|
|
for iclass in root.descendants().filter(|n| {
|
|
n.is_element() && n.tag_name().name() == "iclass" && n.attribute("isa") == Some("A32")
|
|
}) {
|
|
let base_boxes = parse_regdiagram_boxes(&iclass);
|
|
for enc in iclass
|
|
.children()
|
|
.filter(|n| n.is_element() && n.tag_name().name() == "encoding")
|
|
{
|
|
if let Some(entry) = build_entry(&enc, &base_boxes, &sym_map) {
|
|
result.push(entry);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
fn build_sym_map(root: &roxmltree::Node) -> HashMap<String, String> {
|
|
let mut map = HashMap::new();
|
|
for exp in root
|
|
.descendants()
|
|
.filter(|n| n.is_element() && n.tag_name().name() == "explanation")
|
|
{
|
|
let sym = exp
|
|
.children()
|
|
.find(|n| n.is_element() && n.tag_name().name() == "symbol")
|
|
.and_then(|n| n.text())
|
|
.map(str::to_string);
|
|
let encodedin = exp
|
|
.children()
|
|
.find(|n| n.is_element() && n.tag_name().name() == "account")
|
|
.and_then(|n| n.attribute("encodedin"))
|
|
.unwrap_or("")
|
|
.to_string();
|
|
if let Some(s) = sym {
|
|
map.entry(s).or_insert(encodedin);
|
|
}
|
|
}
|
|
map
|
|
}
|
|
|
|
fn parse_regdiagram_boxes(iclass: &roxmltree::Node) -> Vec<BoxInfo> {
|
|
let rd = match iclass
|
|
.children()
|
|
.find(|n| n.is_element() && n.tag_name().name() == "regdiagram")
|
|
{
|
|
Some(n) => n,
|
|
None => return Vec::new(),
|
|
};
|
|
rd.children()
|
|
.filter(|n| n.is_element() && n.tag_name().name() == "box")
|
|
.map(parse_box)
|
|
.collect()
|
|
}
|
|
|
|
fn parse_box(node: roxmltree::Node) -> BoxInfo {
|
|
let hibit: i32 = node
|
|
.attribute("hibit")
|
|
.and_then(|v| v.parse().ok())
|
|
.unwrap_or(0);
|
|
let width: i32 = node
|
|
.attribute("width")
|
|
.and_then(|v| v.parse().ok())
|
|
.unwrap_or(1);
|
|
let name = node.attribute("name").map(str::to_string);
|
|
let usename = node.attribute("usename") == Some("1");
|
|
let settings: i32 = node
|
|
.attribute("settings")
|
|
.and_then(|v| v.parse().ok())
|
|
.unwrap_or(0);
|
|
|
|
let mut values: Vec<String> = Vec::new();
|
|
for c in node
|
|
.children()
|
|
.filter(|n| n.is_element() && n.tag_name().name() == "c")
|
|
{
|
|
let span: usize = c
|
|
.attribute("colspan")
|
|
.and_then(|v| v.parse().ok())
|
|
.unwrap_or(1);
|
|
let text = c.text().unwrap_or("").trim().to_string();
|
|
if span == 1 || text == "0" || text == "1" {
|
|
for _ in 0..span {
|
|
values.push(text.clone());
|
|
}
|
|
} else {
|
|
// Multi-bit constraint ("!= 1111" etc.)
|
|
for _ in 0..span {
|
|
values.push(text.clone());
|
|
}
|
|
}
|
|
}
|
|
BoxInfo {
|
|
hibit,
|
|
width,
|
|
name,
|
|
usename,
|
|
values,
|
|
settings,
|
|
}
|
|
}
|
|
|
|
/// Merge iclass boxes with encoding-specific overrides (encoding wins).
|
|
fn merge_boxes(base: &[BoxInfo], overrides: &[BoxInfo]) -> Vec<BoxInfo> {
|
|
let mut result = base.to_vec();
|
|
for ov in overrides {
|
|
if let Some(pos) = result.iter().position(|b| b.hibit == ov.hibit) {
|
|
result[pos] = ov.clone();
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
fn build_entry(
|
|
enc: &roxmltree::Node,
|
|
base_boxes: &[BoxInfo],
|
|
sym_map: &HashMap<String, String>,
|
|
) -> Option<EncEntry> {
|
|
let id = enc.attribute("name")?.to_string();
|
|
|
|
let mnemonic = enc
|
|
.descendants()
|
|
.find(|n| {
|
|
n.is_element()
|
|
&& n.tag_name().name() == "docvar"
|
|
&& n.attribute("key") == Some("mnemonic")
|
|
})
|
|
.and_then(|n| n.attribute("value"))
|
|
.unwrap_or("UNK")
|
|
.to_string();
|
|
|
|
let enc_boxes: Vec<BoxInfo> = enc
|
|
.children()
|
|
.filter(|n| n.is_element() && n.tag_name().name() == "box")
|
|
.map(parse_box)
|
|
.collect();
|
|
|
|
let all_boxes = merge_boxes(base_boxes, &enc_boxes);
|
|
|
|
let (mask, pattern, cond_ne_1111) = compute_mask_pattern(&all_boxes);
|
|
let fields = extract_fields(&all_boxes);
|
|
|
|
// ASM template (first one)
|
|
let asm_tokens = enc
|
|
.descendants()
|
|
.find(|n| n.is_element() && n.tag_name().name() == "asmtemplate")
|
|
.map(|tmpl| {
|
|
tmpl.children()
|
|
.filter(|n| n.is_element())
|
|
.map(|n| AsmTok {
|
|
is_link: n.tag_name().name() == "a",
|
|
text: n.text().unwrap_or("").to_string(),
|
|
})
|
|
.collect()
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
Some(EncEntry {
|
|
id,
|
|
mnemonic,
|
|
mask,
|
|
pattern,
|
|
fields,
|
|
cond_ne_1111,
|
|
asm_tokens,
|
|
sym_map: sym_map.clone(),
|
|
})
|
|
}
|
|
|
|
fn compute_mask_pattern(boxes: &[BoxInfo]) -> (u32, u32, bool) {
|
|
let mut mask: u32 = 0;
|
|
let mut pattern: u32 = 0;
|
|
let mut cond_ne_1111 = false;
|
|
|
|
for b in boxes {
|
|
let is_cond = b.name.as_deref() == Some("cond");
|
|
if b.values.iter().any(|v| v.contains("!= 1111")) {
|
|
if is_cond {
|
|
cond_ne_1111 = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
for (i, val) in b.values.iter().enumerate() {
|
|
if i as i32 >= b.width {
|
|
break;
|
|
}
|
|
let bit_pos = (b.hibit - i as i32) as u32;
|
|
match val.as_str() {
|
|
"0" => {
|
|
mask |= 1 << bit_pos;
|
|
}
|
|
"1" => {
|
|
mask |= 1 << bit_pos;
|
|
pattern |= 1 << bit_pos;
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
(mask, pattern, cond_ne_1111)
|
|
}
|
|
|
|
fn extract_fields(boxes: &[BoxInfo]) -> Vec<FieldDef> {
|
|
let mut seen = std::collections::HashSet::new();
|
|
let mut fields = Vec::new();
|
|
for b in boxes {
|
|
if !b.usename {
|
|
continue;
|
|
}
|
|
let raw = match &b.name {
|
|
Some(n) if !n.is_empty() && n != "?" => n.as_str(),
|
|
_ => continue,
|
|
};
|
|
let name = rust_field_name(raw);
|
|
if seen.insert(name.clone()) {
|
|
fields.push(FieldDef {
|
|
name,
|
|
hibit: b.hibit,
|
|
lobit: b.lobit(),
|
|
width: b.width,
|
|
});
|
|
}
|
|
}
|
|
fields
|
|
}
|
|
|
|
fn variant_name(id: &str) -> String {
|
|
id.split('_')
|
|
.map(|part| {
|
|
let mut chars = part.chars();
|
|
match chars.next() {
|
|
None => String::new(),
|
|
Some(c) => c.to_uppercase().to_string() + &chars.as_str().to_lowercase(),
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn rust_field_name(xml_name: &str) -> String {
|
|
// First apply well-known renames, then sanitize any remaining invalid chars.
|
|
let base = match xml_name {
|
|
"type" => "ty",
|
|
"fn" => "fn_reg",
|
|
"register_list" => "regs",
|
|
other => other,
|
|
};
|
|
// Sanitize: replace anything that isn't alphanumeric or `_` with `_`,
|
|
// collapse runs of `_`, and strip leading/trailing `_`.
|
|
let sanitized: String = base
|
|
.to_lowercase()
|
|
.chars()
|
|
.map(|c| {
|
|
if c.is_ascii_alphanumeric() || c == '_' {
|
|
c
|
|
} else {
|
|
'_'
|
|
}
|
|
})
|
|
.collect();
|
|
// Collapse repeated underscores and trim edges.
|
|
let mut out = String::with_capacity(sanitized.len());
|
|
let mut prev_under = false;
|
|
for c in sanitized.chars() {
|
|
if c == '_' {
|
|
if !prev_under {
|
|
out.push(c);
|
|
}
|
|
prev_under = true;
|
|
} else {
|
|
out.push(c);
|
|
prev_under = false;
|
|
}
|
|
}
|
|
let trimmed = out.trim_matches('_').to_string();
|
|
if trimmed.is_empty() {
|
|
"field".to_string()
|
|
} else {
|
|
trimmed
|
|
}
|
|
}
|
|
|
|
fn field_rust_type(width: i32) -> &'static str {
|
|
if width == 1 {
|
|
"bool"
|
|
} else if width <= 8 {
|
|
"u8"
|
|
} else if width <= 16 {
|
|
"u16"
|
|
} else {
|
|
"u32"
|
|
}
|
|
}
|
|
|
|
fn generate_rust(entries: &[EncEntry]) -> String {
|
|
let mut out = String::new();
|
|
writeln!(
|
|
out,
|
|
"// AUTO-GENERATED from ARM ISA XML (A-profile 2022-12). DO NOT EDIT."
|
|
)
|
|
.unwrap();
|
|
writeln!(out, "use core::fmt;").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
writeln!(out, "#[derive(Clone, Debug)]").unwrap();
|
|
writeln!(out, "pub enum A32Inst {{").unwrap();
|
|
for e in entries {
|
|
let vname = variant_name(&e.id);
|
|
if e.fields.is_empty() {
|
|
writeln!(out, " {vname},").unwrap();
|
|
} else {
|
|
let fields: Vec<String> = e
|
|
.fields
|
|
.iter()
|
|
.map(|f| format!("{}: {}", f.name, field_rust_type(f.width)))
|
|
.collect();
|
|
writeln!(out, " {vname} {{ {} }},", fields.join(", ")).unwrap();
|
|
}
|
|
}
|
|
writeln!(out, "}}").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
writeln!(out, "#[derive(Clone, Copy, Debug, PartialEq, Eq)]").unwrap();
|
|
writeln!(out, "pub enum A32DecodeError {{ TooShort, Unknown }}").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
// One tiny `fn a32_dec_N(w: u32) -> A32Inst` per encoding.
|
|
for (idx, e) in entries.iter().enumerate() {
|
|
let vname = variant_name(&e.id);
|
|
writeln!(out, "const fn a32_dec_{idx}(w: u32) -> A32Inst {{").unwrap();
|
|
if e.fields.is_empty() {
|
|
writeln!(out, " let _ = w; A32Inst::{vname}").unwrap();
|
|
} else {
|
|
let extracts: Vec<String> = e.fields.iter().map(gen_field_extract_w).collect();
|
|
writeln!(out, " A32Inst::{vname} {{ {} }}", extracts.join(", ")).unwrap();
|
|
}
|
|
writeln!(out, "}}").unwrap();
|
|
}
|
|
writeln!(out).unwrap();
|
|
|
|
let n = entries.len();
|
|
writeln!(out, "type A32DecFn = fn(u32) -> A32Inst;").unwrap();
|
|
write!(out, "static A32_DECODERS: [A32DecFn; {n}] = [").unwrap();
|
|
for idx in 0..n {
|
|
write!(out, "a32_dec_{idx},").unwrap();
|
|
}
|
|
writeln!(out, "];").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
// slot index = bits[27:20] of the instruction word.
|
|
// For each encoding, enumerate all hi-byte values it can match.
|
|
let mut slots: Vec<Vec<(usize, u32)>> = vec![vec![]; 256];
|
|
for (enc_idx, e) in entries.iter().enumerate() {
|
|
let hi_mask = ((e.mask >> 20) & 0xFF) as u8;
|
|
let hi_pat = ((e.pattern >> 20) & 0xFF) as u8;
|
|
for x in 0u8..=255 {
|
|
if x & hi_mask == hi_pat {
|
|
slots[x as usize].push((enc_idx, e.mask.count_ones()));
|
|
}
|
|
}
|
|
}
|
|
// Within each slot keep the most-specific (most fixed bits) first.
|
|
for slot in &mut slots {
|
|
slot.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
|
|
}
|
|
|
|
// Flatten into one array; record (start, count) per slot.
|
|
// Entry format: (mask, pattern, flags, dec_idx)
|
|
// flags bit 0: 1 = cond must not be 0xF
|
|
let mut all_cands: Vec<(u32, u32, u8, u16)> = Vec::new();
|
|
let mut slot_table: Vec<(u16, u16)> = Vec::new();
|
|
for slot in &slots {
|
|
let start = all_cands.len() as u16;
|
|
let count = slot.len() as u16;
|
|
for &(enc_idx, _) in slot {
|
|
let e = &entries[enc_idx];
|
|
let flags = u8::from(e.cond_ne_1111);
|
|
all_cands.push((e.mask, e.pattern, flags, enc_idx as u16));
|
|
}
|
|
slot_table.push((start, count));
|
|
}
|
|
|
|
// Emit the flat candidate array.
|
|
let total = all_cands.len();
|
|
writeln!(out, "static A32_CANDS: [(u32, u32, u8, u16); {total}] = [").unwrap();
|
|
for (mask, pat, flags, idx) in &all_cands {
|
|
writeln!(out, " (0x{mask:08X}, 0x{pat:08X}, {flags}, {idx}),").unwrap();
|
|
}
|
|
writeln!(out, "];").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
// Emit the slot table.
|
|
writeln!(out, "static A32_SLOTS: [(u16, u16); 256] = [").unwrap();
|
|
for (start, count) in &slot_table {
|
|
write!(out, " ({start}, {count}),").unwrap();
|
|
}
|
|
writeln!(out, "\n];").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
writeln!(
|
|
out,
|
|
"pub fn decode_a32(bytes: &[u8]) -> Result<(usize, A32Inst), A32DecodeError> {{"
|
|
)
|
|
.unwrap();
|
|
writeln!(
|
|
out,
|
|
" if bytes.len() < 4 {{ return Err(A32DecodeError::TooShort); }}"
|
|
)
|
|
.unwrap();
|
|
writeln!(
|
|
out,
|
|
" let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);"
|
|
)
|
|
.unwrap();
|
|
writeln!(out, " let cond = ((word >> 28) & 0xF) as u8;").unwrap();
|
|
writeln!(out, " let hi = ((word >> 20) & 0xFF) as usize;").unwrap();
|
|
writeln!(out, " let (start, count) = A32_SLOTS[hi];").unwrap();
|
|
writeln!(
|
|
out,
|
|
" for i in start as usize..(start + count) as usize {{"
|
|
)
|
|
.unwrap();
|
|
writeln!(
|
|
out,
|
|
" let (mask, pattern, flags, dec_idx) = A32_CANDS[i];"
|
|
)
|
|
.unwrap();
|
|
writeln!(
|
|
out,
|
|
" if word & mask == pattern && (flags == 0 || cond != 0xF) {{"
|
|
)
|
|
.unwrap();
|
|
writeln!(
|
|
out,
|
|
" return Ok((4, A32_DECODERS[dec_idx as usize](word)));"
|
|
)
|
|
.unwrap();
|
|
writeln!(out, " }}").unwrap();
|
|
writeln!(out, " }}").unwrap();
|
|
writeln!(out, " Err(A32DecodeError::Unknown)").unwrap();
|
|
writeln!(out, "}}").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
writeln!(out, "impl fmt::Display for A32Inst {{").unwrap();
|
|
writeln!(
|
|
out,
|
|
" fn fmt(&self, __f: &mut fmt::Formatter<'_>) -> fmt::Result {{"
|
|
)
|
|
.unwrap();
|
|
writeln!(out, " match self {{").unwrap();
|
|
for e in entries {
|
|
gen_display_arm(&mut out, e);
|
|
}
|
|
writeln!(out, " }}").unwrap();
|
|
writeln!(out, " }}").unwrap();
|
|
writeln!(out, "}}").unwrap();
|
|
writeln!(out).unwrap();
|
|
|
|
out.push_str(HELPERS);
|
|
|
|
out
|
|
}
|
|
|
|
fn gen_field_extract(f: &FieldDef) -> String {
|
|
let lobit = f.lobit as u32;
|
|
let mask = if f.width >= 32 {
|
|
u32::MAX
|
|
} else {
|
|
(1u32 << f.width) - 1
|
|
};
|
|
let ty = field_rust_type(f.width);
|
|
let name = &f.name;
|
|
if ty == "bool" {
|
|
format!("{name}: (word >> {lobit}) & 1 == 1")
|
|
} else if ty == "u8" {
|
|
format!("{name}: ((word >> {lobit}) & 0x{mask:X}) as u8")
|
|
} else if ty == "u16" {
|
|
format!("{name}: ((word >> {lobit}) & 0x{mask:X}) as u16")
|
|
} else {
|
|
format!("{name}: (word >> {lobit}) & 0x{mask:X}")
|
|
}
|
|
}
|
|
|
|
/// Same as `gen_field_extract` but references the variable `w` (used in per-encoding
|
|
/// decoder functions where the parameter is named `w`, not `word`).
|
|
fn gen_field_extract_w(f: &FieldDef) -> String {
|
|
let lobit = f.lobit as u32;
|
|
let mask = if f.width >= 32 {
|
|
u32::MAX
|
|
} else {
|
|
(1u32 << f.width) - 1
|
|
};
|
|
let ty = field_rust_type(f.width);
|
|
let name = &f.name;
|
|
if ty == "bool" {
|
|
format!("{name}: (w >> {lobit}) & 1 == 1")
|
|
} else if ty == "u8" {
|
|
format!("{name}: ((w >> {lobit}) & 0x{mask:X}) as u8")
|
|
} else if ty == "u16" {
|
|
format!("{name}: ((w >> {lobit}) & 0x{mask:X}) as u16")
|
|
} else {
|
|
format!("{name}: (w >> {lobit}) & 0x{mask:X}")
|
|
}
|
|
}
|
|
|
|
fn gen_display_arm(out: &mut String, e: &EncEntry) {
|
|
let vname = variant_name(&e.id);
|
|
|
|
// Build field list for destructuring
|
|
let field_names: Vec<&str> = e.fields.iter().map(|f| f.name.as_str()).collect();
|
|
let destruct = if field_names.is_empty() {
|
|
String::new()
|
|
} else {
|
|
format!("{{ {} }}", field_names.join(", "))
|
|
};
|
|
|
|
writeln!(out, " Self::{vname} {destruct} => {{").unwrap();
|
|
|
|
// Process ASM template tokens into a sequence of write!() calls.
|
|
// We track optional-group depth; groups get collapsed to their content
|
|
// with special handling for the few known patterns.
|
|
let toks = &e.asm_tokens;
|
|
let sym_map = &e.sym_map;
|
|
// Find field lookup by Rust name
|
|
let field_by_xmlname: HashMap<String, &FieldDef> =
|
|
e.fields.iter().map(|f| (f.name.clone(), f)).collect();
|
|
|
|
let mut i = 0;
|
|
while i < toks.len() {
|
|
let tok = &toks[i];
|
|
|
|
if !tok.is_link && tok.text == "{" {
|
|
// Peek at optional group content
|
|
let (group_toks, end) = collect_opt_group(toks, i);
|
|
i = end + 1;
|
|
emit_opt_group(out, &group_toks, sym_map, &field_by_xmlname, e);
|
|
} else {
|
|
emit_token(out, tok, sym_map, &field_by_xmlname, e);
|
|
i += 1;
|
|
}
|
|
}
|
|
|
|
writeln!(out, " Ok(())").unwrap();
|
|
writeln!(out, " }}").unwrap();
|
|
}
|
|
|
|
/// Returns the tokens inside the next `{...}` group starting at `start`
|
|
/// (which must be `{`), and the index of the closing `}`.
|
|
fn collect_opt_group(toks: &[AsmTok], start: usize) -> (Vec<&AsmTok>, usize) {
|
|
let mut depth = 0;
|
|
let mut group = Vec::new();
|
|
let mut i = start;
|
|
while i < toks.len() {
|
|
let tok = &toks[i];
|
|
if !tok.is_link && tok.text == "{" {
|
|
depth += 1;
|
|
if depth > 1 {
|
|
group.push(tok);
|
|
}
|
|
} else if !tok.is_link && tok.text == "}" {
|
|
depth -= 1;
|
|
if depth == 0 {
|
|
return (group, i);
|
|
} else {
|
|
group.push(tok);
|
|
}
|
|
} else if depth > 0 {
|
|
group.push(tok);
|
|
}
|
|
i += 1;
|
|
}
|
|
(group, i)
|
|
}
|
|
|
|
fn emit_opt_group(
|
|
out: &mut String,
|
|
group: &[&AsmTok],
|
|
sym_map: &HashMap<String, String>,
|
|
fields: &HashMap<String, &FieldDef>,
|
|
e: &EncEntry,
|
|
) {
|
|
// Identify group type by content
|
|
let is_only_link = group.len() == 1 && group[0].is_link;
|
|
if is_only_link {
|
|
match group[0].text.as_str() {
|
|
"<c>" => {
|
|
// condition suffix - always print (may be empty)
|
|
if fields.contains_key("cond") {
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{}}\", a32_cond(*cond))?;"
|
|
)
|
|
.unwrap();
|
|
}
|
|
return;
|
|
}
|
|
"<q>" => {
|
|
// qualifier - always skip
|
|
return;
|
|
}
|
|
"{!}" | "!" => {
|
|
// writeback - conditional on W or writeback field
|
|
if fields.contains_key("w") {
|
|
writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap();
|
|
} else if fields.contains_key("wback") {
|
|
writeln!(out, " if *wback {{ write!(__f,\"!\")?; }}").unwrap();
|
|
}
|
|
return;
|
|
}
|
|
"{IA}" | "IA" => {
|
|
// default LDM addressing mode - omit
|
|
return;
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
// {<Rd>,} - optional dest register, always print with following comma
|
|
let all_texts: Vec<&str> = group.iter().map(|t| t.text.as_str()).collect();
|
|
let link_texts: Vec<&str> = group
|
|
.iter()
|
|
.filter(|t| t.is_link)
|
|
.map(|t| t.text.as_str())
|
|
.collect();
|
|
|
|
// {, <shift> #<amount>} or {, <shift>}
|
|
if link_texts.contains(&"<shift>") {
|
|
// Emit shift conditionally: skip if LSL #0
|
|
let has_amount = link_texts.contains(&"<amount>");
|
|
let stype_field = fields.get("stype").or_else(|| fields.get("shift"));
|
|
let amount_field = fields.get("imm5").or_else(|| fields.get("amount"));
|
|
if has_amount {
|
|
if stype_field.is_some() && amount_field.is_some() {
|
|
writeln!(out,
|
|
" if *stype != 0 || *imm5 != 0 {{ write!(__f,\", {{}} #{{}}\", a32_shift(*stype), *imm5)?; }}"
|
|
).unwrap();
|
|
}
|
|
} else {
|
|
// {, <shift>} without amount - likely RRX variant; always print
|
|
if stype_field.is_some() {
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\", {{}}\", a32_shift(*stype))?;"
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// {, #{+/-}<imm>} - memory offset
|
|
if link_texts.contains(&"{+/-}") || link_texts.contains(&"+/-") {
|
|
// Always emit
|
|
for tok in group {
|
|
emit_token(out, tok, sym_map, fields, e);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Everything else: always emit contents
|
|
for tok in group {
|
|
emit_token(out, tok, sym_map, fields, e);
|
|
}
|
|
}
|
|
|
|
fn emit_token(
|
|
out: &mut String,
|
|
tok: &AsmTok,
|
|
sym_map: &HashMap<String, String>,
|
|
fields: &HashMap<String, &FieldDef>,
|
|
e: &EncEntry,
|
|
) {
|
|
if !tok.is_link {
|
|
// Literal text - escape for Rust string
|
|
let escaped = tok
|
|
.text
|
|
.replace('\\', "\\\\")
|
|
.replace('"', "\\\"")
|
|
.to_lowercase();
|
|
if !escaped.is_empty() {
|
|
writeln!(out, " write!(__f,\"{escaped}\")?;").unwrap();
|
|
}
|
|
return;
|
|
}
|
|
|
|
// It's an <a> symbol
|
|
let sym = tok.text.as_str();
|
|
match sym {
|
|
"<c>" => {
|
|
if fields.contains_key("cond") {
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{}}\", a32_cond(*cond))?;"
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
"<q>" => { /* skip */ }
|
|
"{!}" | "!" => {
|
|
if fields.contains_key("w") {
|
|
writeln!(out, " if *w {{ write!(__f,\"!\")?; }}").unwrap();
|
|
}
|
|
}
|
|
"{+/-}" | "+/-" => {
|
|
if fields.contains_key("u") {
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{}}\", if *u {{ '+' }} else {{ '-' }})?;"
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
"{IA}" | "IA" | "SP," => {
|
|
let literal = match sym {
|
|
"SP," => "sp, ",
|
|
"IA" | "{IA}" => "",
|
|
_ => sym,
|
|
};
|
|
if !literal.is_empty() {
|
|
writeln!(out, " write!(__f,\"{literal}\")?;").unwrap();
|
|
}
|
|
}
|
|
"<shift>" => {
|
|
if let Some(_f) = fields.get("stype") {
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{}}\", a32_shift(*stype))?;"
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
"<registers>" | "<registers_with_pc>" | "<registers_without_pc>" => {
|
|
if let Some(fld) = fields.get("regs").or_else(|| fields.get("register_list")) {
|
|
let fname = &fld.name;
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{}}\", a32_reglist(*{fname} as u32))?;"
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
"<label>" => {
|
|
// raw offset; Display can't compute absolute address
|
|
let encodedin = sym_map.get(sym).map(String::as_str).unwrap_or("imm24");
|
|
let fname = resolve_encodedin_field(encodedin, fields);
|
|
if let Some(fname) = fname {
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{:#x}}\", a32_branch_offset(*{fname} as u32))?;"
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
_ => {
|
|
// Register or immediate symbol - look up encodedin
|
|
let encodedin = sym_map.get(sym).map(String::as_str).unwrap_or("");
|
|
if let Some(fname) = resolve_encodedin_field(encodedin, fields) {
|
|
if is_reg_sym(sym) {
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{}}\", a32_reg(*{fname} as u8))?;"
|
|
)
|
|
.unwrap();
|
|
} else {
|
|
// Immediate
|
|
if sym == "<const>" {
|
|
// Modified immediate: expand
|
|
writeln!(
|
|
out,
|
|
" write!(__f,\"{{}}\", a32_expand_imm12(*{fname} as u32))?;"
|
|
)
|
|
.unwrap();
|
|
} else {
|
|
writeln!(out, " write!(__f,\"{{}}\", *{fname})?;").unwrap();
|
|
}
|
|
}
|
|
} else {
|
|
// Unknown / no encodedin - skip
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn is_reg_sym(sym: &str) -> bool {
|
|
matches!(
|
|
sym,
|
|
"<Rd>"
|
|
| "<Rn>"
|
|
| "<Rm>"
|
|
| "<Rs>"
|
|
| "<Rt>"
|
|
| "<Ra>"
|
|
| "<Rt2>"
|
|
| "<RdHi>"
|
|
| "<RdLo>"
|
|
| "<Rdm>"
|
|
| "<Rdn>"
|
|
)
|
|
}
|
|
|
|
/// Given an `encodedin` string (possibly composite like "imm4H:imm4L"),
|
|
/// return the Rust field name in the variant that holds the value.
|
|
fn resolve_encodedin_field(encodedin: &str, fields: &HashMap<String, &FieldDef>) -> Option<String> {
|
|
if encodedin.is_empty() {
|
|
return None;
|
|
}
|
|
// Simple case - direct field name match
|
|
let lower = rust_field_name(encodedin);
|
|
if fields.contains_key(lower.as_str()) {
|
|
return Some(lower);
|
|
}
|
|
// Composite "X:Y" - use first part (MSB chunk) and note we don't reassemble
|
|
// This is imprecise but acceptable for display purposes
|
|
let first = encodedin.split(':').next().unwrap_or("");
|
|
let lower_first = rust_field_name(first);
|
|
if fields.contains_key(lower_first.as_str()) {
|
|
return Some(lower_first);
|
|
}
|
|
None
|
|
}
|
|
|
|
const HELPERS: &str = r#"
|
|
pub const fn a32_cond(cond: u8) -> &'static str {
|
|
match cond & 0xF {
|
|
0 => "eq", 1 => "ne", 2 => "hs", 3 => "lo",
|
|
4 => "mi", 5 => "pl", 6 => "vs", 7 => "vc",
|
|
8 => "hi", 9 => "ls", 10 => "ge", 11 => "lt",
|
|
12 => "gt", 13 => "le", _ => "",
|
|
}
|
|
}
|
|
|
|
pub const fn a32_reg(r: u8) -> &'static str {
|
|
match r & 0xF {
|
|
0 => "r0", 1 => "r1", 2 => "r2", 3 => "r3",
|
|
4 => "r4", 5 => "r5", 6 => "r6", 7 => "r7",
|
|
8 => "r8", 9 => "r9", 10 => "r10", 11 => "r11",
|
|
12 => "r12", 13 => "sp", 14 => "lr", 15 => "pc",
|
|
_ => "??",
|
|
}
|
|
}
|
|
|
|
pub const fn a32_shift(stype: u8) -> &'static str {
|
|
match stype & 3 {
|
|
0 => "lsl", 1 => "lsr", 2 => "asr", 3 => "ror", _ => "??",
|
|
}
|
|
}
|
|
|
|
pub const fn a32_expand_imm12(imm12: u32) -> u32 {
|
|
let rot = (imm12 >> 8) & 0xF;
|
|
let imm8 = imm12 & 0xFF;
|
|
imm8.rotate_right(rot * 2)
|
|
}
|
|
|
|
pub const fn a32_branch_offset(imm24: u32) -> i32 {
|
|
// Sign-extend the 24-bit field and convert to bytes (<<2)
|
|
(imm24 << 8) as i32 >> 6
|
|
}
|
|
|
|
// a32_reglist cannot be const fn because it builds a String at runtime.
|
|
pub fn a32_reglist(regs: u32) -> String {
|
|
let mut s = String::from("{");
|
|
let mut first = true;
|
|
for i in 0..16u32 {
|
|
if regs & (1 << i) != 0 {
|
|
if !first { s.push_str(", "); }
|
|
s.push_str(a32_reg(i as u8));
|
|
first = false;
|
|
}
|
|
}
|
|
s.push('}');
|
|
s
|
|
}
|
|
"#;
|