From 2047ef8e9a4f2e4a6c47931e3cdf11fb549d3a3e Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 20 Jan 2026 14:31:58 -0800 Subject: [PATCH 1/4] [Rust] Take TypeBuilder::named_type param by ref So we can pass Ref counted NTR easier --- rust/src/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/src/types.rs b/rust/src/types.rs index 4f6f82d2e..4a9c85b5d 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -391,7 +391,7 @@ impl TypeBuilder { } /// Create a named type reference [`TypeBuilder`]. Analogous to [`Type::named_type`]. - pub fn named_type(type_reference: NamedTypeReference) -> Self { + pub fn named_type(type_reference: &NamedTypeReference) -> Self { let mut is_const = Conf::new(false, MIN_CONFIDENCE).into(); let mut is_volatile = Conf::new(false, MIN_CONFIDENCE).into(); unsafe { From 511828c69aa2f08da30bd78bbfc45a92494a2f37 Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 20 Jan 2026 14:32:14 -0800 Subject: [PATCH 2/4] [Rust] Add `Platform::address_size` --- rust/src/platform.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/src/platform.rs b/rust/src/platform.rs index 435480323..46129b81a 100644 --- a/rust/src/platform.rs +++ b/rust/src/platform.rs @@ -165,6 +165,12 @@ impl Platform { unsafe { CoreArchitecture::from_raw(BNGetPlatformArchitecture(self.handle)) } } + /// Get the address size of the platform, this is typically the same as the architecture's address size, + /// but some platforms like Linux x86_64 x32 ABI have differing address sizes from architecture. + pub fn address_size(&self) -> usize { + unsafe { BNGetPlatformAddressSize(self.handle) } + } + pub fn type_container(&self) -> TypeContainer { let type_container_ptr = NonNull::new(unsafe { BNGetPlatformTypeContainer(self.handle) }); // NOTE: I have no idea how this isn't a UAF, see the note in `TypeContainer::from_raw` From d765013349c6c368f5affb20dee6927fddedf7ea Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 20 Jan 2026 14:33:01 -0800 Subject: [PATCH 3/4] [Rust] Add `TypeBuilder::function` and `TypeBuilder::function_with_opts` Temporary, we will likely deprecate in favor of a separate function builder later --- rust/src/types.rs | 122 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/rust/src/types.rs b/rust/src/types.rs index 4a9c85b5d..2eae99876 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -421,7 +421,127 @@ impl TypeBuilder { result } - // TODO : BNCreateFunctionTypeBuilder + // TODO: Deprecate this for a FunctionBuilder (along with the Type variant?) + /// NOTE: This is likely to be deprecated and removed in favor of a function type builder, please + /// use [`Type::function`] where possible. + pub fn function<'a, T: Into>>( + return_type: T, + parameters: Vec, + variable_arguments: bool, + ) -> Self { + let mut owned_raw_return_type = Conf::<&Type>::into_raw(return_type.into()); + let mut variable_arguments = Conf::new(variable_arguments, MAX_CONFIDENCE).into(); + let mut can_return = Conf::new(true, MIN_CONFIDENCE).into(); + let mut pure = Conf::new(false, MIN_CONFIDENCE).into(); + + let mut raw_calling_convention: BNCallingConventionWithConfidence = + BNCallingConventionWithConfidence { + convention: std::ptr::null_mut(), + confidence: MIN_CONFIDENCE, + }; + + let mut stack_adjust = Conf::new(0, MIN_CONFIDENCE).into(); + let mut raw_parameters = parameters + .into_iter() + .map(FunctionParameter::into_raw) + .collect::>(); + let reg_stack_adjust_regs = std::ptr::null_mut(); + let reg_stack_adjust_values = std::ptr::null_mut(); + + let mut return_regs: BNRegisterSetWithConfidence = BNRegisterSetWithConfidence { + regs: std::ptr::null_mut(), + count: 0, + confidence: 0, + }; + + let result = unsafe { + Self::from_raw(BNCreateFunctionTypeBuilder( + &mut owned_raw_return_type, + &mut raw_calling_convention, + raw_parameters.as_mut_ptr(), + raw_parameters.len(), + &mut variable_arguments, + &mut can_return, + &mut stack_adjust, + reg_stack_adjust_regs, + reg_stack_adjust_values, + 0, + &mut return_regs, + BNNameType::NoNameType, + &mut pure, + )) + }; + + for raw_param in raw_parameters { + FunctionParameter::free_raw(raw_param); + } + + result + } + + // TODO: Deprecate this for a FunctionBuilder (along with the Type variant?) + /// NOTE: This is likely to be deprecated and removed in favor of a function type builder, please + /// use [`Type::function_with_opts`] where possible. + pub fn function_with_opts< + 'a, + T: Into>, + C: Into>>, + >( + return_type: T, + parameters: &[FunctionParameter], + variable_arguments: bool, + calling_convention: C, + stack_adjust: Conf, + ) -> Self { + let mut owned_raw_return_type = Conf::<&Type>::into_raw(return_type.into()); + let mut variable_arguments = Conf::new(variable_arguments, MAX_CONFIDENCE).into(); + let mut can_return = Conf::new(true, MIN_CONFIDENCE).into(); + let mut pure = Conf::new(false, MIN_CONFIDENCE).into(); + + let mut owned_raw_calling_convention = + Conf::>::into_owned_raw(&calling_convention.into()); + + let mut stack_adjust = stack_adjust.into(); + let mut raw_parameters = parameters + .iter() + .cloned() + .map(FunctionParameter::into_raw) + .collect::>(); + + // TODO: Update type signature and include these (will be a breaking change) + let reg_stack_adjust_regs = std::ptr::null_mut(); + let reg_stack_adjust_values = std::ptr::null_mut(); + + let mut return_regs: BNRegisterSetWithConfidence = BNRegisterSetWithConfidence { + regs: std::ptr::null_mut(), + count: 0, + confidence: 0, + }; + + let result = unsafe { + Self::from_raw(BNCreateFunctionTypeBuilder( + &mut owned_raw_return_type, + &mut owned_raw_calling_convention, + raw_parameters.as_mut_ptr(), + raw_parameters.len(), + &mut variable_arguments, + &mut can_return, + &mut stack_adjust, + reg_stack_adjust_regs, + reg_stack_adjust_values, + 0, + &mut return_regs, + BNNameType::NoNameType, + &mut pure, + )) + }; + + for raw_param in raw_parameters { + FunctionParameter::free_raw(raw_param); + } + + result + } /// Create a pointer [`TypeBuilder`] with the given target type. Analogous to [`Type::pointer`]. pub fn pointer<'a, A: Architecture, T: Into>>(arch: &A, ty: T) -> Self { From 4636ce97c8575a41b9ccca87f3afa3a54c6ad93d Mon Sep 17 00:00:00 2001 From: Mason Reed Date: Tue, 20 Jan 2026 14:30:34 -0800 Subject: [PATCH 4/4] WIP: IDB Import refactor --- Cargo.lock | 90 ++- plugins/idb_import/Cargo.toml | 6 +- plugins/idb_import/src/addr_info.rs | 53 -- plugins/idb_import/src/commands.rs | 43 ++ plugins/idb_import/src/commands/create_til.rs | 1 + plugins/idb_import/src/commands/load_file.rs | 50 ++ plugins/idb_import/src/lib.rs | 410 +----------- plugins/idb_import/src/mapper.rs | 354 +++++++++++ plugins/idb_import/src/parse.rs | 441 +++++++++++++ plugins/idb_import/src/translate.rs | 583 ++++++++++++++++++ rust/src/types/structure.rs | 10 + 11 files changed, 1576 insertions(+), 465 deletions(-) delete mode 100644 plugins/idb_import/src/addr_info.rs create mode 100644 plugins/idb_import/src/commands.rs create mode 100644 plugins/idb_import/src/commands/create_til.rs create mode 100644 plugins/idb_import/src/commands/load_file.rs create mode 100644 plugins/idb_import/src/mapper.rs create mode 100644 plugins/idb_import/src/parse.rs create mode 100644 plugins/idb_import/src/translate.rs diff --git a/Cargo.lock b/Cargo.lock index f9401a8a9..0c5d06420 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -189,7 +189,7 @@ dependencies = [ "binaryninjacore-sys", "insta", "rayon", - "rstest", + "rstest 0.24.0", "serde", "serde_derive", "serde_json", @@ -250,6 +250,9 @@ name = "bitflags" version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +dependencies = [ + "serde", +] [[package]] name = "block2" @@ -1075,8 +1078,8 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "idb-rs" -version = "0.1.12" -source = "git+https://github.com/Vector35/idb-rs?tag=0.1.12#54c50ebb35724e1470ac28d259df8c98ad5fa3b0" +version = "0.1.13" +source = "git+https://github.com/Vector35/idb-rs?tag=0.1.13#9cec8cc88d35b23635c3c883f4bdbaa26c4a2f39" dependencies = [ "anyhow", "bincode", @@ -1084,6 +1087,9 @@ dependencies = [ "flate2", "num-traits", "num_enum", + "ron", + "rstest 0.25.0", + "rustc-hash", "serde", "zstd", ] @@ -1096,6 +1102,8 @@ dependencies = [ "binaryninja", "binaryninjacore-sys", "idb-rs", + "serde", + "serde_json", "tracing", ] @@ -1909,6 +1917,19 @@ dependencies = [ "byteorder", ] +[[package]] +name = "ron" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "beceb6f7bf81c73e73aeef6dd1356d9a1b2b4909e1f0fc3e59b034f9572d7b7f" +dependencies = [ + "base64", + "bitflags 2.9.1", + "serde", + "serde_derive", + "unicode-ident", +] + [[package]] name = "roxmltree" version = "0.20.0" @@ -1923,7 +1944,19 @@ checksum = "03e905296805ab93e13c1ec3a03f4b6c4f35e9498a3d5fa96dc626d22c03cd89" dependencies = [ "futures-timer", "futures-util", - "rstest_macros", + "rstest_macros 0.24.0", + "rustc_version", +] + +[[package]] +name = "rstest" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fc39292f8613e913f7df8fa892b8944ceb47c247b78e1b1ae2f09e019be789d" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros 0.25.0", "rustc_version", ] @@ -1945,6 +1978,24 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "rstest_macros" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn", + "unicode-ident", +] + [[package]] name = "rustc-demangle" version = "0.1.25" @@ -2088,18 +2139,28 @@ checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -2108,14 +2169,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", + "serde_core", + "zmij", ] [[package]] @@ -3115,6 +3177,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zmij" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f63c051f4fe3c1509da62131a678643c5b6fbdc9273b2b79d4378ebda003d2" + [[package]] name = "zstd" version = "0.13.3" diff --git a/plugins/idb_import/Cargo.toml b/plugins/idb_import/Cargo.toml index 9ba241d26..f90044f6a 100644 --- a/plugins/idb_import/Cargo.toml +++ b/plugins/idb_import/Cargo.toml @@ -12,5 +12,7 @@ crate-type = ["cdylib"] anyhow = { version = "1.0.86", features = ["backtrace"] } binaryninja.workspace = true binaryninjacore-sys.workspace = true -idb-rs = { git = "https://github.com/Vector35/idb-rs", tag = "0.1.12" } -tracing = "0.1" \ No newline at end of file +idb-rs = { git = "https://github.com/Vector35/idb-rs", tag = "0.1.13" } +tracing = "0.1" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } \ No newline at end of file diff --git a/plugins/idb_import/src/addr_info.rs b/plugins/idb_import/src/addr_info.rs deleted file mode 100644 index 9a05e6d38..000000000 --- a/plugins/idb_import/src/addr_info.rs +++ /dev/null @@ -1,53 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; - -use anyhow::Result; - -use idb_rs::addr_info::all_address_info; -use idb_rs::id0::{ID0Section, Netdelta}; -use idb_rs::id1::ID1Section; -use idb_rs::id2::ID2Section; -use idb_rs::{til, Address, IDAKind}; - -#[derive(Default)] -pub struct AddrInfo<'a> { - // TODO does binja differentiate comments types on the API? - pub comments: Vec>, - pub label: Option>, - // TODO make this a ref - pub ty: Option, -} - -pub fn get_info<'a, K: IDAKind>( - id0: &'a ID0Section, - id1: &ID1Section, - id2: Option<&ID2Section>, - netdelta: Netdelta, -) -> Result, AddrInfo<'a>>> { - let mut addr_info: HashMap, AddrInfo> = HashMap::new(); - - // comments defined on the address information - for (info, _info_size) in all_address_info(id0, id1, id2, netdelta) { - let entry = addr_info.entry(info.address()).or_default(); - if let Some(comment) = info.comment() { - entry.comments.push(comment.to_vec()); - } - if let Some(comment) = info.comment_repeatable() { - entry.comments.push(comment.to_vec()); - } - if let Some(comment) = info.comment_pre() { - entry.comments.extend(comment.map(|line| line.to_vec())); - } - if let Some(comment) = info.comment_post() { - entry.comments.extend(comment.map(|line| line.to_vec())); - } - if let Some(label) = info.label()? { - entry.label = Some(label); - } - if let Some(ty) = info.tinfo()? { - entry.ty = Some(ty); - } - } - - Ok(addr_info) -} diff --git a/plugins/idb_import/src/commands.rs b/plugins/idb_import/src/commands.rs new file mode 100644 index 000000000..a08ee5947 --- /dev/null +++ b/plugins/idb_import/src/commands.rs @@ -0,0 +1,43 @@ +use binaryninja::interaction::{Form, FormInputField}; +use std::path::PathBuf; + +pub mod create_til; +pub mod load_file; + +pub struct LoadFileField { + filter: String, + default: Option, +} + +impl LoadFileField { + #[allow(unused)] + pub fn new(filter: &str) -> Self { + Self { + filter: filter.to_string(), + default: None, + } + } + + pub fn with_default(filter: &str, default: &str) -> Self { + Self { + filter: filter.to_string(), + default: Some(default.to_string()), + } + } + + pub fn field(&self) -> FormInputField { + FormInputField::OpenFileName { + prompt: "File Path".to_string(), + // TODO: This is called extension but is really a filter. + extension: Some(self.filter.clone()), + default: self.default.clone(), + value: None, + } + } + + pub fn from_form(form: &Form) -> Option { + let field = form.get_field_with_name("File Path")?; + let field_value = field.try_value_string()?; + Some(PathBuf::from(field_value)) + } +} diff --git a/plugins/idb_import/src/commands/create_til.rs b/plugins/idb_import/src/commands/create_til.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/plugins/idb_import/src/commands/create_til.rs @@ -0,0 +1 @@ + diff --git a/plugins/idb_import/src/commands/load_file.rs b/plugins/idb_import/src/commands/load_file.rs new file mode 100644 index 000000000..c1a9849d8 --- /dev/null +++ b/plugins/idb_import/src/commands/load_file.rs @@ -0,0 +1,50 @@ +use crate::commands::LoadFileField; +use crate::mapper::IDBMapper; +use crate::parse::IDBFileParser; +use binaryninja::binary_view::{BinaryView, BinaryViewExt}; +use binaryninja::command::Command; +use binaryninja::interaction::Form; +use std::fs::File; +use std::io::BufReader; +use std::path::PathBuf; + +pub struct LoadIDBFile; + +impl Command for LoadIDBFile { + fn action(&self, view: &BinaryView) { + let mut form = Form::new("Load SVD File"); + // let mut load_settings = LoadSettings::from_view_settings(view); + let mut default_path = PathBuf::from(&view.file().filename()); + default_path.set_extension("idb"); + let file_field = + LoadFileField::with_default("*.idb;;*.i64;;*.til", &default_path.to_string_lossy()); + form.add_field(file_field.field()); + if !form.prompt() { + return; + } + let Some(file_path) = LoadFileField::from_form(&form) else { + return; + }; + let Ok(file) = File::open(&file_path) else { + tracing::error!("Failed to open file: {}", file_path.display()); + return; + }; + let mut file_reader = BufReader::new(file); + let file_parser = IDBFileParser::new(); + match file_parser.parse(&mut file_reader) { + Ok(idb_info) => { + let idb_str = serde_json::to_string_pretty(&idb_info).unwrap(); + std::fs::write(&file_path.with_extension("json"), idb_str) + .expect("Failed to write IDB info to JSON file"); + IDBMapper::new(idb_info).map_to_view(view); + } + Err(e) => { + tracing::error!("Failed to parse IDB file: {}", e); + } + } + } + + fn valid(&self, _view: &BinaryView) -> bool { + true + } +} diff --git a/plugins/idb_import/src/lib.rs b/plugins/idb_import/src/lib.rs index f285f554d..40f71004f 100644 --- a/plugins/idb_import/src/lib.rs +++ b/plugins/idb_import/src/lib.rs @@ -1,406 +1,18 @@ -mod types; -use std::borrow::Cow; -use std::io::{BufRead, Cursor, Seek}; - -use idb_rs::id1::ID1Section; -use idb_rs::id2::{ID2Section, ID2SectionVariants}; -use idb_rs::{IDAKind, IDAUsize, IDBFormat}; -use types::*; -mod addr_info; -use addr_info::*; - -use binaryninja::binary_view::{BinaryView, BinaryViewBase, BinaryViewExt}; -use binaryninja::debuginfo::{ - CustomDebugInfoParser, DebugFunctionInfo, DebugInfo, DebugInfoParser, -}; - -use idb_rs::id0::{ID0Section, ID0SectionVariants}; -use idb_rs::til::section::TILSection; -use idb_rs::til::TypeVariant as TILTypeVariant; - -use anyhow::{anyhow, Result}; - -struct IDBDebugInfoParser; -impl CustomDebugInfoParser for IDBDebugInfoParser { - fn is_valid(&self, view: &BinaryView) -> bool { - if let Some(project_file) = view.file().project_file() { - project_file.name().as_str().ends_with(".i64") - || project_file.name().as_str().ends_with(".idb") - } else { - view.file().filename().as_str().ends_with(".i64") - || view.file().filename().as_str().ends_with(".idb") - } - } - - fn parse_info( - &self, - debug_info: &mut DebugInfo, - bv: &BinaryView, - debug_file: &BinaryView, - progress: Box Result<(), ()>>, - ) -> bool { - match parse_idb_info(debug_info, bv, debug_file, progress) { - Ok(()) => true, - Err(error) => { - tracing::error!("Unable to parse IDB file: {error}"); - false - } - } - } -} - -struct TILDebugInfoParser; -impl CustomDebugInfoParser for TILDebugInfoParser { - fn is_valid(&self, view: &BinaryView) -> bool { - if let Some(project_file) = view.file().project_file() { - project_file.name().as_str().ends_with(".til") - } else { - view.file().filename().as_str().ends_with(".til") - } - } - - fn parse_info( - &self, - debug_info: &mut DebugInfo, - _bv: &BinaryView, - debug_file: &BinaryView, - progress: Box Result<(), ()>>, - ) -> bool { - match parse_til_info(debug_info, debug_file, progress) { - Ok(()) => true, - Err(error) => { - tracing::error!("Unable to parse TIL file: {error}"); - false - } - } - } -} - -struct BinaryViewReader<'a> { - bv: &'a BinaryView, - offset: u64, -} -impl std::io::Read for BinaryViewReader<'_> { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - if !self.bv.offset_valid(self.offset) { - // TODO check if this is truly a EoF hit, `self.bv.len()` is not - // reliable, it's returning a size bigger then the original file. - return Ok(0); - } - let len = BinaryView::read(self.bv, buf, self.offset); - self.offset += u64::try_from(len).unwrap(); - Ok(len) - } -} - -impl std::io::Seek for BinaryViewReader<'_> { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { - let new_offset = match pos { - std::io::SeekFrom::Start(offset) => Some(offset), - std::io::SeekFrom::End(end) => self.bv.len().checked_add_signed(end), - std::io::SeekFrom::Current(next) => self.offset.checked_add_signed(next), - }; - let new_offset = new_offset.ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unable to calculate new offset in BinaryViewReader", - ) - })?; - if !self.bv.offset_valid(new_offset) { - return Err(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Try to set invalid offset in BinaryViewReader", - )); - } - self.offset = new_offset; - Ok(new_offset) - } -} - -fn parse_idb_info( - debug_info: &mut DebugInfo, - bv: &BinaryView, - debug_file: &BinaryView, - progress: Box Result<(), ()>>, -) -> Result<()> { - tracing::trace!("Opening a IDB file"); - let file = BinaryViewReader { - bv: debug_file, - offset: 0, - }; - tracing::trace!("Parsing a IDB file"); - let mut file = std::io::BufReader::new(file); - let idb_kind = idb_rs::identify_idb_file(&mut file)?; - match idb_kind { - idb_rs::IDBFormats::Separated(sep) => { - parse_idb_info_format(debug_info, bv, debug_file, sep, file, progress) - } - idb_rs::IDBFormats::InlineUncompressed(inline) => { - parse_idb_info_format(debug_info, bv, debug_file, inline, file, progress) - } - idb_rs::IDBFormats::InlineCompressed(compressed) => { - let mut buf = vec![]; - let inline = compressed.decompress_into_memory(&mut file, &mut buf)?; - parse_idb_info_format( - debug_info, - bv, - debug_file, - inline, - Cursor::new(&buf[..]), - progress, - ) - } - } -} - -fn parse_idb_info_format( - debug_info: &mut DebugInfo, - bv: &BinaryView, - debug_file: &BinaryView, - format: impl IDBFormat, - mut idb_data: impl BufRead + Seek, - progress: Box Result<(), ()>>, -) -> Result<()> { - let Some(id0_idx) = format.id0_location() else { - return Err(anyhow!("Unable to find the ID0 section")); - }; - let Some(id1_idx) = format.id1_location() else { - return Err(anyhow!("Unable to find the ID1 section")); - }; - let id2_idx = format.id2_location(); - - if let Some(til_idx) = format.til_location() { - tracing::trace!("Parsing the TIL section"); - let til = format.read_til(&mut idb_data, til_idx)?; - // progress 0%-50% - import_til_section(debug_info, debug_file, &til, progress)?; - }; - - let id0 = format.read_id0(&mut idb_data, id0_idx)?; - let id1 = format.read_id1(&mut idb_data, id1_idx)?; - let id2 = id2_idx - .map(|id2_idx| format.read_id2(&mut idb_data, id2_idx)) - .transpose()?; - - match (id0, id2) { - (ID0SectionVariants::IDA32(id0), Some(ID2SectionVariants::IDA32(id2))) => { - parse_id0_section_info(debug_info, bv, debug_file, &id0, &id1, Some(&id2))? - } - (ID0SectionVariants::IDA32(id0), None) => { - parse_id0_section_info(debug_info, bv, debug_file, &id0, &id1, None)? - } - (ID0SectionVariants::IDA64(id0), Some(ID2SectionVariants::IDA64(id2))) => { - parse_id0_section_info(debug_info, bv, debug_file, &id0, &id1, Some(&id2))? - } - (ID0SectionVariants::IDA64(id0), None) => { - parse_id0_section_info(debug_info, bv, debug_file, &id0, &id1, None)? - } - _ => unreachable!(), - } - - Ok(()) -} - -fn parse_til_info( - debug_info: &mut DebugInfo, - debug_file: &BinaryView, - progress: Box Result<(), ()>>, -) -> Result<()> { - tracing::trace!("Opening a TIL file"); - let file = BinaryViewReader { - bv: debug_file, - offset: 0, - }; - let mut file = std::io::BufReader::new(file); - tracing::trace!("Parsing the TIL section"); - let til = TILSection::read(&mut file)?; - import_til_section(debug_info, debug_file, &til, progress) -} - -pub fn import_til_section( - debug_info: &mut DebugInfo, - debug_file: &BinaryView, - til: &TILSection, - progress: impl Fn(usize, usize) -> Result<(), ()>, -) -> Result<()> { - let types = types::translate_til_types(debug_file.default_arch().unwrap(), til, progress)?; - - // print any errors - for ty in &types { - match &ty.ty { - TranslateTypeResult::NotYet => { - panic!( - "type could not be processed `{}`: {:#?}", - ty.name.as_utf8_lossy(), - &ty.og_ty - ); - } - TranslateTypeResult::Error(error) => { - tracing::error!( - "Unable to parse type `{}`: {error}", - ty.name.as_utf8_lossy(), - ); - } - TranslateTypeResult::PartiallyTranslated(_, error) => { - if let Some(error) = error { - tracing::error!( - "Unable to parse type `{}` correctly: {error}", - ty.name.as_utf8_lossy(), - ); - } else { - tracing::warn!( - "Type `{}` maybe not be fully translated", - ty.name.as_utf8_lossy(), - ); - } - } - TranslateTypeResult::Translated(_) => {} - }; - } - - // add all type to binary ninja - for ty in &types { - if let TranslateTypeResult::Translated(bn_ty) - | TranslateTypeResult::PartiallyTranslated(bn_ty, _) = &ty.ty - { - if !debug_info.add_type(&ty.name.as_utf8_lossy(), bn_ty, &[/* TODO */]) { - tracing::error!("Unable to add type `{}`", ty.name.as_utf8_lossy()) - } - } - } - - // add a second time to fix the references LOL - for ty in &types { - if let TranslateTypeResult::Translated(bn_ty) - | TranslateTypeResult::PartiallyTranslated(bn_ty, _) = &ty.ty - { - if !debug_info.add_type(&ty.name.as_utf8_lossy(), bn_ty, &[/* TODO */]) { - tracing::error!("Unable to fix type `{}`", ty.name.as_utf8_lossy()) - } - } - } - - Ok(()) -} - -fn parse_id0_section_info( - debug_info: &mut DebugInfo, - bv: &BinaryView, - debug_file: &BinaryView, - id0: &ID0Section, - id1: &ID1Section, - id2: Option<&ID2Section>, -) -> Result<()> { - let ida_info_idx = id0.root_node()?; - let ida_info = id0.ida_info(ida_info_idx)?; - let idb_baseaddr = ida_info.addresses.loading_base.into_u64(); - let bv_baseaddr = bv.start(); - let netdelta = ida_info.netdelta(); - // just addr this value to the address to translate from ida to bn - // NOTE this delta could wrap here and while using translating - let addr_delta = bv_baseaddr.wrapping_sub(idb_baseaddr); - - for (idb_addr, info) in get_info(id0, id1, id2, netdelta)? { - let addr = addr_delta.wrapping_add(idb_addr.into_raw().into_u64()); - // just in case we change this struct in the future, this line will for us to review this code - // TODO merge this data with folder locations - let AddrInfo { - comments, - label, - ty, - } = info; - // TODO set comments to address here - for function in &bv.functions_containing(addr) { - function.set_comment_at(addr, &String::from_utf8_lossy(&comments.join(&b"\n"[..]))); - } - - let bnty = ty - .as_ref() - .and_then(|ty| match translate_ephemeral_type(debug_file, ty) { - TranslateTypeResult::Translated(result) => Some(result), - TranslateTypeResult::PartiallyTranslated(result, None) => { - tracing::warn!("Unable to fully translate the type at {addr:#x}"); - Some(result) - } - TranslateTypeResult::NotYet => { - tracing::error!("Unable to translate the type at {addr:#x}"); - None - } - TranslateTypeResult::PartiallyTranslated(_, Some(bn_type_error)) - | TranslateTypeResult::Error(bn_type_error) => { - tracing::error!("Unable to translate the type at {addr:#x}: {bn_type_error}",); - None - } - }); - - let label: Option> = - label.as_ref().map(Cow::as_ref).map(String::from_utf8_lossy); - match (label, &ty, bnty) { - (label, Some(ty), bnty) if matches!(&ty.type_variant, TILTypeVariant::Function(_)) => { - if bnty.is_none() { - tracing::error!("Unable to convert the function type at {addr:#x}",) - } - if !debug_info.add_function(&DebugFunctionInfo::new( - None, - None, - label.map(Cow::into_owned), - bnty, - Some(addr), - None, - vec![], - vec![], - )) { - tracing::error!("Unable to add the function at {addr:#x}") - } - } - (label, Some(_ty), Some(bnty)) => { - if !debug_info.add_data_variable(addr, &bnty, label.as_ref().map(Cow::as_ref), &[]) - { - tracing::error!("Unable to add the type at {addr:#x}") - } - } - (label, Some(_ty), None) => { - // TODO types come from the TIL sections, can we make all types be just NamedTypes? - tracing::error!("Unable to convert type {addr:#x}"); - // TODO how to add a label without a type associated with it? - if let Some(name) = label { - if !debug_info.add_data_variable( - addr, - &binaryninja::types::Type::void(), - Some(&name), - &[], - ) { - tracing::error!("Unable to add the label at {addr:#x}") - } - } - } - (Some(name), None, None) => { - // TODO how to add a label without a type associated with it? - if !debug_info.add_data_variable( - addr, - &binaryninja::types::Type::void(), - Some(&name), - &[], - ) { - tracing::error!("Unable to add the label at {addr:#x}") - } - } - - // just comments at this address - (None, None, None) => {} - - (_, None, Some(_)) => unreachable!(), - } - } - - Ok(()) -} +mod commands; +pub mod mapper; +pub mod parse; +pub mod translate; #[allow(non_snake_case)] #[no_mangle] pub extern "C" fn CorePluginInit() -> bool { binaryninja::tracing_init!("IDB Import"); - DebugInfoParser::register("IDB Parser", IDBDebugInfoParser); - DebugInfoParser::register("TIL Parser", TILDebugInfoParser); + + binaryninja::command::register_command( + "Load IDB File", + "Loads an IDB file into the current view.", + commands::load_file::LoadIDBFile, + ); + true } diff --git a/plugins/idb_import/src/mapper.rs b/plugins/idb_import/src/mapper.rs new file mode 100644 index 000000000..6774660b8 --- /dev/null +++ b/plugins/idb_import/src/mapper.rs @@ -0,0 +1,354 @@ +//! Map the IDB data we parsed into the [`BinaryView`]. + +use crate::parse::{CommentInfo, FunctionInfo, IDBInfo, LabelInfo, NameInfo, SegmentInfo}; +use crate::translate::TILTranslator; +use binaryninja::binary_view::{BinaryView, BinaryViewBase, BinaryViewExt}; +use binaryninja::qualified_name::QualifiedName; +use binaryninja::rc::Ref; +use binaryninja::section::{SectionBuilder, Semantics}; +use binaryninja::symbol::{Symbol, SymbolType}; +use idb_rs::id0::SegmentType; +use std::collections::HashSet; + +/// Maps IDB data into a [`BinaryView`]. +/// +/// The mapper can be re-used if mapping into multiple views. +pub struct IDBMapper { + info: IDBInfo, +} + +impl IDBMapper { + pub fn new(info: IDBInfo) -> Self { + Self { info } + } + + pub fn map_to_view(&self, view: &BinaryView) { + let Some(id0) = &self.info.id0 else { + tracing::warn!("No ID0 data found, skipping mapping."); + return; + }; + + // TODO: Actually the below comment belongs in an IDBVerifier that tries to determine if the idb + // TODO: Will process correctly for the given view. + // TODO: Have a shasum check of the file to make sure we are not mapping to bad data? + + // Rebase the address from ida -> binja without this rebased views will fail to map. + let bn_base_address = view.start(); + let ida_base_address = id0.base_address.unwrap_or(bn_base_address); + let base_address_delta = bn_base_address.wrapping_sub(ida_base_address); + let rebase = |addr: u64| -> u64 { addr.wrapping_add(base_address_delta) }; + + for segment in &id0.segments { + let mut rebased_segment = segment.clone(); + rebased_segment.region.start = rebase(segment.region.start); + rebased_segment.region.end = rebase(segment.region.end); + self.map_segment_to_view(view, &rebased_segment); + } + + let platform = view.default_platform().unwrap(); + let til_translator = match &self.info.til { + Some(til) => TILTranslator::new_from_platform(&platform).with_til_info(&til), + None => TILTranslator::new_from_platform(&platform), + }; + + if let Some(til) = &self.info.til { + self.map_types_to_view(view, &til_translator, &til.types); + } + if let Some(dir_tree) = &self.info.dir_tree { + self.map_types_to_view(view, &til_translator, &dir_tree.types); + } + + for func in &self.info.merged_functions() { + let mut rebased_func = func.clone(); + rebased_func.address = rebase(func.address); + self.map_func_to_view(view, &til_translator, &rebased_func); + } + + // TODO: The below undo and ignore is not thread safe, this means that the mapper itself + // TODO: should be the only thing running at the time of the mapping process. + let undo = view.file().begin_undo_actions(true); + for comment in &id0.comments { + let mut rebased_comment = comment.clone(); + rebased_comment.address = rebase(comment.address); + self.map_comment_to_view(view, &rebased_comment); + } + view.file().forget_undo_actions(&undo); + + for label in &id0.labels { + let mut rebased_label = label.clone(); + rebased_label.address = rebase(label.address); + self.map_label_to_view(view, &rebased_label); + } + + if let Some(dir_tree) = &self.info.dir_tree { + // NOTE: We do not map dir tree functions and types, they are already mapped + // in the merged_functions() function. + for name in &dir_tree.names { + let mut rebased_name = name.clone(); + rebased_name.address = rebase(name.address); + self.map_name_to_view(view, &til_translator, &rebased_name); + } + } + + // self.map_used_types_to_view(view, &til_translator); + } + + pub fn map_types_to_view( + &self, + view: &BinaryView, + til_translator: &TILTranslator, + types: &[idb_rs::til::TILTypeInfo], + ) { + for ty in types { + let ty_name = ty.name.to_string(); + if view.type_by_name(&ty_name).is_some() { + tracing::info!("Type already exists in view: {}", ty_name); + continue; + } + match til_translator.translate_type_info(&ty.tinfo) { + Ok(bn_ty) => { + tracing::info!("Mapping type: {:?}", ty); + view.define_auto_type(&ty_name, "IDA", &bn_ty); + } + Err(err) => { + tracing::warn!("Failed to map type {:?}: {}", ty, err) + } + } + } + } + + pub fn map_used_types_to_view(&self, view: &BinaryView, til_translator: &TILTranslator) { + let type_archives: Vec<_> = view + .attached_type_archives() + .iter() + .filter_map(|id| view.type_archive_by_id(&id)) + .collect(); + + let mut til_type_map = std::collections::HashMap::new(); + if let Some(til) = &self.info.til { + til_type_map = til + .types + .iter() + .map(|ty| (ty.name.to_string(), ty)) + .collect(); + } + if let Some(dir_tree) = &self.info.dir_tree { + til_type_map = dir_tree + .types + .iter() + .map(|ty| (ty.name.to_string(), ty)) + .collect(); + } + + let mut used_types = HashSet::new(); + if let Ok(_used_types) = til_translator.used_types.lock() { + used_types = _used_types.clone(); + } + // TODO: Adding types to view after the types have been applied to the functions is not a + // TODO: great idea, I imagine the NTR's will have stale references until the analysis runs again. + 'found: for used_ty in &used_types { + // 0. Make sure the type doesn't already exist in the view + if view.type_by_name(&used_ty.name).is_some() { + tracing::info!("Type already exists in view: {:?}", used_ty.name); + continue 'found; + } + + // 1. Check in BN type libraries. + if let Some(found_ty) = view.import_type_library(&used_ty.name, None) { + tracing::info!("Found type in type library: {:?}", found_ty); + continue 'found; + } + + // 2. Check in type archives + for type_archive in &type_archives { + if let Some(found_ty) = + type_archive.get_type_by_name(QualifiedName::from(&used_ty.name)) + { + tracing::info!("Found type in type archive: {:?}", found_ty); + view.define_auto_type(&used_ty.name, "IDA", &found_ty); + continue 'found; + } + } + + // // 3. Check in the TIL of the IDB info. + if let Some(ty) = til_type_map.get(&used_ty.name) { + if let Ok(bn_ty) = til_translator.translate_type_info(&ty.tinfo) { + tracing::info!("Found type in TIL: {:?}", ty); + view.define_auto_type(&used_ty.name, "IDA", &bn_ty); + continue 'found; + } + } + + tracing::warn!("Failed to find type: {:?}", used_ty); + // 4. TODO: Look through the idb attached tils? + } + } + + pub fn map_segment_to_view(&self, view: &BinaryView, segment: &SegmentInfo) { + let semantics = match segment.ty { + SegmentType::Norm => Semantics::DefaultSection, + SegmentType::Xtrn => { + // IDA definition of extern is an actual section like '.idata' whereas extern in BN + // is a synthetic section, do NOT use [`Semantics::External`]. + Semantics::ReadWriteData + } + SegmentType::Code => Semantics::ReadOnlyCode, + SegmentType::Data => Semantics::ReadWriteData, + SegmentType::Imp => Semantics::DefaultSection, + SegmentType::Grp => Semantics::DefaultSection, + SegmentType::Null => Semantics::DefaultSection, + SegmentType::Undf => { + // Don't map undefined segment i guess? + return; + } + SegmentType::Bss => Semantics::ReadWriteData, + SegmentType::Abssym => Semantics::DefaultSection, + SegmentType::Comm => Semantics::DefaultSection, + SegmentType::Imem => Semantics::DefaultSection, + }; + + // TODO: Is this section already mapped using address range not name. + if view.section_by_name(&segment.name).is_some() { + tracing::info!( + "Section with name '{}' already exists, skipping...", + segment.name + ); + return; + } + + tracing::info!( + "Mapping segment '{}': {:0x} - {:0x} ({:?})", + segment.name, + segment.region.start, + segment.region.end, + segment.ty + ); + + let section = SectionBuilder::new(segment.name.clone(), segment.region.clone()) + .semantics(semantics) + .is_auto(true); + view.add_section(section); + } + + pub fn map_func_to_view( + &self, + view: &BinaryView, + til_translator: &TILTranslator, + func: &FunctionInfo, + ) { + let Some(bn_func) = view.add_auto_function(func.address) else { + tracing::warn!("Failed to add function for {:0x}", func.address); + return; + }; + + if let Some(func_ty) = &func.ty { + match til_translator.translate_type_info(&func_ty) { + Ok(bn_func_ty) => { + tracing::info!("Mapping function with type: {:0x}", func.address); + bn_func.apply_auto_discovered_type(&bn_func_ty); + } + Err(err) => { + tracing::warn!( + "Failed to translate type {:?} for function {:0x}: {}", + func_ty, + func.address, + err + ); + } + } + } + + // TODO: Attach a platform tuple to the FunctionInfo? + if let Some(func_sym) = symbol_from_func(func) { + tracing::info!( + "Mapping function symbol: {:0x} => {}", + func.address, + func_sym + ); + view.define_auto_symbol(&func_sym); + } + } + + pub fn map_label_to_view(&self, view: &BinaryView, label: &LabelInfo) { + let symbol = Symbol::builder(SymbolType::LocalLabel, &label.label, label.address).create(); + tracing::info!("Mapping label: {:0x} => {}", label.address, symbol); + view.define_auto_symbol(&symbol); + } + + pub fn map_name_to_view( + &self, + view: &BinaryView, + til_translator: &TILTranslator, + name: &NameInfo, + ) { + // Currently, we only want to use name info to map data variables, so skip anything in code. + let within_code_section = view + .sections_at(name.address) + .iter() + .find(|s| s.semantics() == Semantics::ReadOnlyCode) + .is_some(); + if within_code_section || !view.functions_containing(name.address).is_empty() { + tracing::debug!("Skipping name contained in code: {:0x}", name.address); + return; + } + + if let Some(label) = &name.label { + let symbol = Symbol::builder(SymbolType::Data, &label, name.address).create(); + tracing::info!("Mapping name label: {:0x} => {}", name.address, symbol); + view.define_auto_symbol(&symbol); + } + + if let Some(data_ty) = &name.ty { + match til_translator.translate_type_info(&data_ty) { + Ok(data_ty) => { + tracing::info!("Mapping name with type: {:0x}", name.address); + view.define_auto_data_var(name.address, &data_ty); + } + Err(err) => { + tracing::warn!( + "Failed to translate type {:?} for name {:0x}: {}", + data_ty, + name.address, + err + ); + } + } + } + } + + pub fn map_comment_to_view(&self, view: &BinaryView, comment: &CommentInfo) { + // NOTE: This (`set_comment`) will generate an undo action. + // First try and attach the comment to the containing functions, if that fails, then + // attach the comment to the view. Attaching to the containing function can help with + // the comments' placement. + let functions = view.functions_containing(comment.address); + for func in &functions { + if func.start() == comment.address { + func.set_comment(&comment.comment); + } else { + func.set_comment_at(comment.address, &comment.comment); + } + } + + // We did not find any functions containing the comment, so attach it to the view. + if functions.is_empty() { + view.set_comment_at(comment.address, &comment.comment); + } + } +} + +fn symbol_from_func(func: &FunctionInfo) -> Option> { + let Some(func_name) = &func.name else { + return None; + }; + let short_func_name = binaryninja::demangle::demangle_llvm(&func_name, true) + .map(|qn| qn.to_string()) + .unwrap_or(func_name.clone()); + let sym_type = match func.is_library { + true => SymbolType::LibraryFunction, + false => SymbolType::Function, + }; + let symbol_builder = + Symbol::builder(sym_type, func_name, func.address).short_name(short_func_name); + Some(symbol_builder.create()) +} diff --git a/plugins/idb_import/src/parse.rs b/plugins/idb_import/src/parse.rs new file mode 100644 index 000000000..e679224fd --- /dev/null +++ b/plugins/idb_import/src/parse.rs @@ -0,0 +1,441 @@ +//! Parse the provided IDB / TIL file and extract information into a struct for further processing. + +use idb_rs::addr_info::AddressInfo; +use idb_rs::id0::function::{FuncIdx, FuncordsIdx, IDBFunctionType}; +use idb_rs::id0::{ID0Section, Netdelta, SegmentType}; +use idb_rs::id1::ID1Section; +use idb_rs::id2::ID2Section; +use idb_rs::til::section::TILSection; +use idb_rs::til::TILTypeInfo; +use idb_rs::{identify_idb_file, Address, IDAKind, IDAUsize, IDAVariants, IDBFormat, IDBFormats}; +use serde::Serialize; +use std::ffi::CString; +use std::io::{BufRead, Seek}; +use std::ops::Range; + +#[derive(Debug, Clone, Serialize)] +pub struct SegmentInfo { + pub name: String, + pub region: Range, + pub ty: SegmentType, +} + +#[derive(Debug, Clone, Serialize)] +pub struct FunctionInfo { + pub name: Option, + pub ty: Option, + pub address: u64, + pub is_library: bool, + pub is_no_return: bool, + pub is_entry: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct NameInfo { + pub address: u64, + pub ty: Option, + pub label: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct CommentInfo { + pub address: u64, + pub comment: String, + pub is_repeatable: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct LabelInfo { + pub address: u64, + pub label: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct FunctionCordInfo { + comments: Vec, + labels: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ID0Info { + pub base_address: Option, + pub segments: Vec, + pub functions: Vec, + pub comments: Vec, + pub labels: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct DirTreeInfo { + pub functions: Vec, + pub types: Vec, + /// Contains both function and data names (along with their types). + pub names: Vec, +} + +#[derive(Debug, Clone, Serialize, Default)] +pub struct IDBInfo { + pub sha256: Option, + pub id0: Option, + // NOTE: TILSection is self-contained, so we do no pre-processing. + pub til: Option, + pub dir_tree: Option, +} + +impl IDBInfo { + /// Retrieve the functions from both the `id0` and `dir_tree` sections, with function information + /// merged; this is the "sanitized" version of the functions contained in an IDB. + pub fn merged_functions(&self) -> Vec { + let mut id0_functions = self + .id0 + .as_ref() + .map(|id0| id0.functions.clone()) + .unwrap_or_default(); + let dir_tree_functions = self + .dir_tree + .as_ref() + .map(|dir_tree| dir_tree.functions.clone()) + .unwrap_or_default(); + id0_functions.extend(dir_tree_functions); + id0_functions.sort_by_key(|f| f.address); + id0_functions.dedup_by(|a, b| { + if a.address != b.address { + return false; + } + // We need to move data from one function to another, if a name is missing + // in one of the functions, add it to the function we are keeping, if we are missing + // a type, do the same. + if a.name.is_some() { + b.name = a.name.clone(); + } + if a.ty.is_some() { + b.ty = a.ty.clone(); + } + true + }); + id0_functions + } +} + +/// Parsed the IDB data into [`IDBInfo`]. +pub struct IDBFileParser; + +impl IDBFileParser { + pub fn new() -> Self { + Self {} + } + + pub fn parse(&self, data: &mut I) -> anyhow::Result { + match identify_idb_file(data)? { + IDBFormats::Separated(f) => match f { + IDAVariants::IDA32(f_32) => self.parse_format(&mut *data, f_32), + IDAVariants::IDA64(f_64) => self.parse_format(&mut *data, f_64), + }, + IDBFormats::InlineUncompressed(f) => self.parse_format(&mut *data, f), + IDBFormats::InlineCompressed(f) => { + let mut decompressed = Vec::new(); + let uncompressed_format = f.decompress_into_memory(data, &mut decompressed)?; + let mut decompressed_data = std::io::Cursor::new(decompressed); + self.parse_format(&mut decompressed_data, uncompressed_format) + } + } + } + + pub fn parse_format( + &self, + data: &mut I, + format: impl IDBFormat, + ) -> anyhow::Result { + let mut id0 = None; + if let Some(id0_loc) = format.id0_location() { + id0 = Some(format.read_id0(&mut *data, id0_loc)?); + } + + let mut id1 = None; + if let Some(id1_loc) = format.id1_location() { + id1 = Some(format.read_id1(&mut *data, id1_loc)?); + } + + let mut id2 = None; + if let Some(id2_loc) = format.id2_location() { + id2 = Some(format.read_id2(&mut *data, id2_loc)?); + } + + // TODO: Decompress til + let mut til = None; + if let Some(til_loc) = format.til_location() { + til = Some(format.read_til(&mut *data, til_loc)?); + }; + + let dir_tree_info = match (id0.as_ref(), id1.as_ref(), id2.as_ref(), til.as_ref()) { + (Some(id0), Some(id1), id2, til) => Some(self.parse_dir_tree(id0, id1, id2, til)?), + _ => None, + }; + + for entry in id0.as_ref().unwrap().all_entries() { + if let Ok(name_cstr) = CString::new(entry.key.clone()) { + let name = name_cstr.to_str(); + println!("ID0 entry: {:?}", name); + } else { + if let Ok(val_cstr) = CString::new(entry.value.clone()) { + println!("ID0 raw entry: {:?} => {:?}", entry.key, val_cstr.to_str()); + } + } + } + + for entry in id2.as_ref().unwrap().all_bytes() { + println!("ID2 entry: {:?}", entry); + } + + if let Some(nam_idx) = format.nam_location() { + let nam = format.read_nam(&mut *data, nam_idx)?; + } + + let id0_info = id0.as_ref().map(|id0| self.parse_id0(id0)).transpose()?; + + Ok(IDBInfo { + sha256: None, + id0: id0_info, + til, + dir_tree: dir_tree_info, + }) + } + + pub fn parse_id0(&self, id0: &ID0Section) -> anyhow::Result { + let root_info_idx = id0.root_node()?; + let root_info = id0.ida_info(root_info_idx)?; + let netdelta = root_info.netdelta(); + + let mut segments = Vec::new(); + if let Some(seg_idx) = id0.segments_idx()? { + for entry in id0.segments(seg_idx) { + let Ok(segment) = entry else { + tracing::warn!("Failed to read segment entry"); + continue; + }; + let name = id0.segment_name(segment.name)?.map(|s| s.to_string()); + let seg_start = segment.address.start.into_raw().into_u64(); + let seg_end = segment.address.end.into_raw().into_u64(); + segments.push(SegmentInfo { + name: name.unwrap_or_else(|| format!("seg_{:0x}", seg_start)), + region: seg_start..seg_end, + ty: segment.seg_type, + }); + } + } + + let mut functions = Vec::new(); + let mut comments = Vec::new(); + let mut labels = Vec::new(); + if let Some(funcs_idx) = id0.funcs_idx()? { + if let Some(funcords_idx) = id0.funcords_idx()? { + let info = self.parse_func_cord(&id0, netdelta, funcords_idx, funcs_idx)?; + comments.extend(info.comments); + labels.extend(info.labels); + } + + for entry in id0.fchunks(funcs_idx) { + let Ok(func) = entry else { + tracing::warn!("Failed to read function entry"); + continue; + }; + + let func_start = func.address.start.into_raw().into_u64(); + match &func.extra { + IDBFunctionType::Tail(_) => { + tracing::debug!("Skipping tail function... {:0x}", func_start); + } + IDBFunctionType::NonTail(func_ext) => { + if func.flags.is_outline() { + tracing::debug!("Skipping outlined function... {:0x}", func_start); + continue; + } + + // TODO: Parse function registers and params + for def_reg in id0.function_defined_registers(netdelta, &func, &func_ext) { + tracing::info!("{:0x} : Function register: {:?}", func_start, def_reg); + let Ok(_def_reg) = def_reg else { + tracing::warn!("Failed to read function register entry"); + continue; + }; + } + + if let Ok(stack_names) = + id0.function_defined_variables(&root_info, &func, &func_ext) + { + tracing::info!( + "{:0x} : Function stack variables: {:#?}", + func_start, + stack_names + ); + } + + functions.push(FunctionInfo { + name: None, + ty: None, + address: func_start, + is_library: func.flags.is_lib(), + is_no_return: func.flags.is_no_return(), + is_entry: false, + }); + } + } + } + } + + if let Ok(entry_points) = id0.entry_points(&root_info) { + for entry in entry_points { + // TODO: What to do with entry.forwarded? + functions.push(FunctionInfo { + name: Some(entry.name), + ty: entry.entry_type, + address: entry.address.into_u64(), + is_library: false, + is_no_return: false, + is_entry: true, + }); + } + } + + tracing::info!("{:#?}", root_info); + + let base_address = match root_info.addresses.loading_base.into_u64() { + // An IDB with zero loading base is possibly not loaded there. + // For example, see the FlawedGrace.idb in the idb-rs resources directory. + 0 => None, + loading_base => Some(loading_base.into_u64()), + }; + + Ok(ID0Info { + base_address, + segments, + functions, + comments, + labels, + }) + } + + pub fn parse_func_cord( + &self, + id0: &ID0Section, + netdelta: Netdelta, + funcords_idx: FuncordsIdx, + funcs_idx: FuncIdx, + ) -> anyhow::Result { + let mut comments = Vec::new(); + let mut labels = Vec::new(); + + for entry in id0.funcords(funcords_idx)? { + let Ok(address) = entry else { + tracing::warn!("Failed to read function address entry"); + continue; + }; + + for (label_addr, label_data) in id0.local_labels(netdelta, address)? { + if let Ok(label_data_cstr) = CString::new(label_data) { + let label_data_str = label_data_cstr.to_string_lossy(); + labels.push(LabelInfo { + address: label_addr.into_raw().into_u64(), + label: label_data_str.to_string(), + }); + } + } + + if let Some(comment) = id0.func_cmt(funcs_idx, netdelta, address)? { + comments.push(CommentInfo { + address: address.into_raw().into_u64(), + comment: comment.to_string(), + is_repeatable: false, + }); + } + + if let Some(comment) = id0.func_repeatable_cmt(funcs_idx, netdelta, address)? { + comments.push(CommentInfo { + address: address.into_raw().into_u64(), + comment: comment.to_string(), + is_repeatable: true, + }); + } + } + + Ok(FunctionCordInfo { comments, labels }) + } + + pub fn parse_dir_tree( + &self, + id0: &ID0Section, + id1: &ID1Section, + id2: Option<&ID2Section>, + til: Option<&TILSection>, + ) -> anyhow::Result { + let root_info_idx = id0.root_node()?; + let root_info = id0.ida_info(root_info_idx)?; + let netdelta = root_info.netdelta(); + + // sha256 + + let func_info_from_addr = + |addr_info: &AddressInfo| -> anyhow::Result> { + let func_name = addr_info.label()?.map(|s| s.to_string()); + let func_ty = addr_info.tinfo(&root_info)?; + let func_addr = addr_info.address().into_raw().into_u64(); + Ok(Some(FunctionInfo { + name: func_name, + ty: func_ty, + address: func_addr, + is_library: false, + is_no_return: false, + is_entry: false, + })) + }; + + let mut functions = Vec::new(); + if let Some(func_dir_tree) = id0.dirtree_function_address()? { + func_dir_tree.visit_leafs(|addr_raw| { + let addr = Address::from_raw(*addr_raw); + if let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, addr) { + // id0.reference_info() + if let Ok(Some(func_info)) = func_info_from_addr(&info) { + functions.push(func_info); + } + } + }); + } + + let mut names = Vec::new(); + if let Some(names_dir_tree) = id0.dirtree_names()? { + names_dir_tree.visit_leafs(|name_raw| { + let addr = Address::from_raw(*name_raw); + if let Some(info) = AddressInfo::new(id0, id1, id2, netdelta, addr) { + names.push(NameInfo { + address: info.address().into_raw().into_u64(), + ty: info.tinfo(&root_info).ok().flatten().map(|t| t.clone()), + label: info.label().ok().flatten().map(|s| s.to_string()), + }); + } + }); + } + + let mut types = Vec::new(); + if let Some(til) = til { + if let Some(type_dir_tree) = id0.dirtree_tinfos()? { + type_dir_tree.visit_leafs(|type_ord_raw| { + if let Some(type_info) = til.get_ord(type_ord_raw.into_u64()) { + tracing::info!( + "Found type: {} {}", + type_ord_raw.into_u64(), + type_info.name.to_string() + ); + types.push(type_info.clone()); + } + }) + } + } + + Ok(DirTreeInfo { + functions, + types, + names, + }) + } +} diff --git a/plugins/idb_import/src/translate.rs b/plugins/idb_import/src/translate.rs new file mode 100644 index 000000000..a99bc394a --- /dev/null +++ b/plugins/idb_import/src/translate.rs @@ -0,0 +1,583 @@ +//! Translate type information from IDB to Binary Ninja, this will not discover type information +//! by which we mean pull type information from outside sources, the mapper does that. + +use binaryninja::architecture::{Architecture, ArchitectureExt, CoreArchitecture}; +use binaryninja::calling_convention::CoreCallingConvention; +use binaryninja::confidence::Conf; +use binaryninja::platform::Platform; +use binaryninja::rc::Ref; +use binaryninja::types::{ + EnumerationBuilder, FunctionParameter, MemberAccess, MemberScope, NamedTypeReference, + NamedTypeReferenceClass, StructureBuilder, StructureMember, StructureType, TypeBuilder, +}; +use idb_rs::til::function::CallingConvention; +use idb_rs::til::r#enum::EnumMembers; +use idb_rs::til::{Basic, TILTypeInfo, TypeVariant, TyperefType, TyperefValue}; +use std::collections::{HashMap, HashSet}; +use std::rc::Rc; +use std::sync::Mutex; + +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub struct ReferencedType { + pub name: String, + pub ordinal: Option, + /// The width of the type in bytes, if known. + /// + /// This is required to be able to place NTR's in structures and unions. + pub width: Option, +} + +impl ReferencedType { + pub fn new(name: String) -> Self { + Self { + ordinal: None, + name, + width: None, + } + } + + pub fn new_with_ordinal(name: String, ordinal: u64) -> Self { + Self { + ordinal: Some(ordinal), + name, + width: None, + } + } +} + +impl From<&TILTypeInfo> for ReferencedType { + fn from(value: &TILTypeInfo) -> Self { + Self { + ordinal: match value.ordinal { + 0 => None, + ord => Some(ord), + }, + name: value.name.to_string(), + width: None, + } + } +} + +pub struct TILTranslator { + /// Default size of addresses. + pub address_size: usize, + /// Default size of enumerations. + pub enum_size: usize, + /// Reference types, for use with typedefs. + /// + /// This is necessary because ordinals do not have names and can't be made into a [`NamedTypeReference`]. + pub reference_types_by_ord: HashMap, + pub reference_types_by_name: HashMap, + /// The types that have been used in the translation process. + /// + /// For a complete analysis, we will take these used types, attempt to find them in type libraries, + /// then add them to the binary view. + /// + /// NOTE: Not to be confused with `reference_types_by_ord`, which is a map of ordinal to reference types. + pub used_types: Rc>>, + // pub referenced_types: Rc>>, + pub default_calling_convention: Option>, + pub cdecl_calling_convention: Option>, + pub stdcall_calling_convention: Option>, + pub fastcall_calling_convention: Option>, +} + +impl TILTranslator { + pub fn new(address_size: usize) -> Self { + Self { + address_size, + enum_size: address_size / 2, + reference_types_by_ord: HashMap::new(), + reference_types_by_name: HashMap::new(), + used_types: Rc::new(Mutex::new(HashSet::new())), + default_calling_convention: None, + cdecl_calling_convention: None, + stdcall_calling_convention: None, + fastcall_calling_convention: None, + } + } + + pub fn new_from_platform(platform: &Platform) -> Self { + Self { + address_size: platform.address_size(), + enum_size: platform.arch().default_integer_size(), + reference_types_by_ord: HashMap::new(), + reference_types_by_name: HashMap::new(), + used_types: Rc::new(Mutex::new(HashSet::new())), + default_calling_convention: platform.get_default_calling_convention(), + cdecl_calling_convention: platform.get_cdecl_calling_convention(), + stdcall_calling_convention: platform.get_stdcall_calling_convention(), + fastcall_calling_convention: platform.get_fastcall_calling_convention(), + } + } + + pub fn new_from_arch(arch: &CoreArchitecture) -> Self { + Self { + address_size: arch.address_size(), + enum_size: arch.default_integer_size(), + reference_types_by_ord: HashMap::new(), + reference_types_by_name: HashMap::new(), + used_types: Rc::new(Mutex::new(HashSet::new())), + default_calling_convention: arch.get_default_calling_convention(), + cdecl_calling_convention: arch.get_cdecl_calling_convention(), + stdcall_calling_convention: arch.get_stdcall_calling_convention(), + fastcall_calling_convention: arch.get_fastcall_calling_convention(), + } + } + + pub fn with_til_info(mut self, til: &idb_rs::til::section::TILSection) -> Self { + if let Some(size_enum) = til.header.size_enum { + self.enum_size = size_enum.get() as usize; + } + + // Add referencable types so that type def lookups can occur. + self.reference_types_by_ord.reserve(til.types.len()); + for (_idx, ty) in til.types.iter().enumerate() { + self.add_referenced_type_info(ty); + } + + // TODO: Handle address (pointer) size information? + self + } + + /// Add a type that can be referenced by ordinal or name. + pub fn add_referenced_type_info(&mut self, ty: &TILTypeInfo) { + let mut referenced_type = ReferencedType::from(ty); + referenced_type.width = self.width_of_type(&ty.tinfo).ok(); + self.reference_types_by_ord + .insert(ty.ordinal, referenced_type.clone()); + self.reference_types_by_name + .insert(referenced_type.name.clone(), referenced_type); + } + + pub fn translate_type_info( + &self, + til_ty: &idb_rs::til::Type, + ) -> anyhow::Result> { + let builder = match &til_ty.type_variant { + TypeVariant::Basic(v) => self.build_basic_ty(&v)?, + TypeVariant::Pointer(v) => self.build_pointer_ty(&v)?, + TypeVariant::Function(v) => self.build_function_ty(&v)?, + TypeVariant::Array(v) => self.build_array_ty(&v)?, + TypeVariant::Typeref(v) => self.build_type_ref_ty(&v)?, + TypeVariant::Struct(v) => self.build_udt_ty(&v, false)?, + TypeVariant::Union(v) => self.build_udt_ty(&v, true)?, + TypeVariant::Enum(v) => self.build_enum_ty(&v)?, + TypeVariant::Bitfield(v) => self.build_bitfield_ty(&v)?, + }; + + builder.set_const(til_ty.is_const); + builder.set_volatile(til_ty.is_volatile); + Ok(builder.finalize()) + } + + pub fn build_basic_ty(&self, basic_ty: &idb_rs::til::Basic) -> anyhow::Result { + use idb_rs::til::Basic; + // TODO: Grab the sizing information of these types from the TIL instead of hardcoding. + match basic_ty { + Basic::Void => Ok(TypeBuilder::void()), + Basic::Unknown { bytes } => { + // In the samples provided it appears that unknown can be used to represent a byte, + // so we are going to be liberal and allow unknown basic types to be treated as a sized int. + Ok(TypeBuilder::int(*bytes as usize, false)) + } + Basic::Bool => Ok(TypeBuilder::bool()), + Basic::BoolSized { .. } => { + // TODO: This needs to be resized, if that cannot be done, make a NTR to an int named BOOL? + Ok(TypeBuilder::bool()) + } + Basic::Char => Ok(TypeBuilder::char()), + Basic::SegReg => Err(anyhow::anyhow!("SegReg is not supported")), + Basic::Short { is_signed } => Ok(TypeBuilder::int(2, is_signed.unwrap_or(true))), + Basic::Long { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), + Basic::LongLong { is_signed } => Ok(TypeBuilder::int(8, is_signed.unwrap_or(true))), + Basic::Int { is_signed } => Ok(TypeBuilder::int(4, is_signed.unwrap_or(true))), + Basic::IntSized { bytes, is_signed } => { + let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); + Ok(TypeBuilder::int(bytes as usize, is_signed.unwrap_or(true))) + } + Basic::Float { bytes } => { + let bytes: u8 = u8::try_from(*bytes).unwrap_or(4); + Ok(TypeBuilder::float(bytes as usize)) + } + Basic::LongDouble => Ok(TypeBuilder::float(8)), + } + } + + pub fn build_pointer_ty( + &self, + pointer_ty: &idb_rs::til::pointer::Pointer, + ) -> anyhow::Result { + // TODO: Consult pointer_ty.closure (is this how we can get based pointers?) + let inner_ty = self.translate_type_info(&pointer_ty.typ)?; + Ok(TypeBuilder::pointer_of_width( + &inner_ty, + self.address_size, + // NOTE: Set later in `translate_type_info`. + false, + // NOTE: Set later in `translate_type_info`. + false, + None, + )) + } + + pub fn build_function_ty( + &self, + function_ty: &idb_rs::til::function::Function, + ) -> anyhow::Result { + // TODO: Once branch `test_call_layout` lands use function_ty.retloc to recover return location. + let return_ty = self.translate_type_info(&function_ty.ret)?; + let params: Vec = self.build_function_params(&function_ty.args)?; + let has_variable_args = false; + let stack_adjust = Conf::new(0, 0); + + let builder = match function_ty.calling_convention { + Some(CallingConvention::Cdecl) | Some(CallingConvention::Thiscall) + if self.cdecl_calling_convention.is_some() => + { + let cc = self.cdecl_calling_convention.clone().unwrap(); + TypeBuilder::function_with_opts( + &return_ty, + ¶ms, + has_variable_args, + cc, + stack_adjust, + ) + } + Some(CallingConvention::Stdcall) if self.stdcall_calling_convention.is_some() => { + let cc = self.stdcall_calling_convention.clone().unwrap(); + TypeBuilder::function_with_opts( + &return_ty, + ¶ms, + has_variable_args, + cc, + stack_adjust, + ) + } + Some(CallingConvention::Fastcall) if self.fastcall_calling_convention.is_some() => { + let cc = self.fastcall_calling_convention.clone().unwrap(); + TypeBuilder::function_with_opts( + &return_ty, + ¶ms, + has_variable_args, + cc, + stack_adjust, + ) + } + _ => TypeBuilder::function(&return_ty, params, has_variable_args), + }; + + Ok(builder) + } + + pub fn build_function_params( + &self, + args: &[idb_rs::til::function::FunctionArg], + ) -> anyhow::Result> { + args.iter() + .enumerate() + .map(|(idx, arg)| { + let arg_name = arg + .name + .clone() + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("arg{}", idx)); + self.translate_type_info(&arg.ty) + .map(|ty| FunctionParameter::new(ty, arg_name, None)) + }) + .collect() + } + + pub fn build_array_ty( + &self, + _array_ty: &idb_rs::til::array::Array, + ) -> anyhow::Result { + let elem_ty = self.translate_type_info(&_array_ty.elem_type)?; + // NOTE: IDA seems to allow DST array (optional nelem) we are just going to default zero count + // for those and assume that to be fine, this obviously is a little bit tricky to assume but + // I imagine IDA only allows these at the end of a struct, and makes the structure unsized, + // not exactly sure how to handle this yet. + let count = _array_ty.nelem.map(|n| n.get()).unwrap_or(0); + Ok(TypeBuilder::array(&elem_ty, count as u64)) + } + + pub fn build_type_ref_ty( + &self, + typ_ref_ty: &idb_rs::til::Typeref, + ) -> anyhow::Result { + let type_class = match typ_ref_ty.ref_type { + Some(TyperefType::Struct) => NamedTypeReferenceClass::StructNamedTypeClass, + Some(TyperefType::Union) => NamedTypeReferenceClass::UnionNamedTypeClass, + Some(TyperefType::Enum) => NamedTypeReferenceClass::EnumNamedTypeClass, + None => NamedTypeReferenceClass::UnknownNamedTypeClass, + }; + + // Named type references can be placed directly, otherwise we have to resolve the ordinal + // to get a name for the type reference. Once we get that, we make a NamedTypeReference + // and then place the types ordinal in the list of referenced types, so that we can pull + // them into the binary view later. + match &typ_ref_ty.typeref_value { + TyperefValue::Name(Some(ref_name)) => { + if let Ok(mut used_types) = self.used_types.lock() { + let ty_ref = ReferencedType::new(ref_name.to_string()); + used_types.insert(ty_ref.clone()); + } + let ntr = NamedTypeReference::new(type_class, ref_name.to_string()); + Ok(TypeBuilder::named_type(&ntr)) + } + TyperefValue::Name(None) => { + // IDA will use an unnamed type reference for a struct, union or enum with no definition. + match typ_ref_ty.ref_type { + Some(TyperefType::Struct) => { + let empty_struct = StructureBuilder::new().finalize(); + Ok(TypeBuilder::structure(&empty_struct)) + } + Some(TyperefType::Union) => { + let empty_union = StructureBuilder::new() + .structure_type(StructureType::UnionStructureType) + .finalize(); + Ok(TypeBuilder::structure(&empty_union)) + } + None | Some(TyperefType::Enum) => { + Err(anyhow::anyhow!("Unnamed type references are not supported")) + } + } + } + TyperefValue::Ordinal(ref_ord) => { + if let Some(ty_ref) = self.reference_types_by_ord.get(&(*ref_ord as u64)) { + // The ordinal has an associated reference type, use the name and insert this into + // the list of used types. + if let Ok(mut used_types) = self.used_types.lock() { + used_types.insert(ty_ref.clone()); + } + let ntr = NamedTypeReference::new(type_class, &ty_ref.name); + Ok(TypeBuilder::named_type(&ntr)) + } else { + Err(anyhow::anyhow!( + "Type reference ordinal not found: {}", + ref_ord + )) + } + } + } + } + + pub fn build_udt_ty( + &self, + udt_ty: &idb_rs::til::udt::UDT, + is_union: bool, + ) -> anyhow::Result { + let mut builder = StructureBuilder::new(); + if let Some(align) = udt_ty.alignment { + builder.alignment(align.get().into()); + } + builder.packed(udt_ty.is_unaligned && udt_ty.is_unknown_8); + if is_union { + builder.structure_type(StructureType::UnionStructureType); + } + + for mut member in self.build_udt_members(&udt_ty.members)? { + if is_union { + member.offset = 0; + } + builder.insert_member(member, false); + } + + // TODO: Handle udt_ty.extra_padding (is that tail padding?) + Ok(TypeBuilder::structure(&builder.finalize())) + } + + pub fn build_udt_members( + &self, + udt_members: &[idb_rs::til::udt::UDTMember], + ) -> anyhow::Result> { + let mut current_offset = 0; + let mut member_iter = udt_members.iter().peekable(); + let mut structure_members = Vec::new(); + while let Some(member) = member_iter.next() { + let current_byte_offset = current_offset / 8; + let member_name = member + .name + .clone() + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("field_{}", current_byte_offset)); + let member_ty = Conf::new(self.translate_type_info(&member.member_type)?, 255); + let bn_member = match member.member_type.type_variant { + TypeVariant::Bitfield(bf) => StructureMember::new_bitfield( + member_ty, + member_name, + current_offset, + bf.width as u8, + MemberAccess::PublicAccess, + MemberScope::NoScope, + ), + _ => { + let member_align = member_ty.contents.alignment().max(1) as u64; + let member_offset = if current_byte_offset % member_align == 0 { + current_byte_offset + } else { + current_byte_offset + (member_align - (current_byte_offset % member_align)) + }; + // NTR will be zero-sized, we need to handle this by computing the width ourselves. + let referenced_width = self.width_of_type(&member.member_type)?; + current_offset += referenced_width as u64 * 8; + StructureMember::new( + member_ty, + member_name, + member_offset, + MemberAccess::PublicAccess, + MemberScope::NoScope, + ) + } + }; + + structure_members.push(bn_member); + } + + Ok(structure_members) + } + + pub fn build_enum_ty( + &self, + enum_ty: &idb_rs::til::r#enum::Enum, + ) -> anyhow::Result { + let mut enumeration_builder = EnumerationBuilder::new(); + match &enum_ty.members { + EnumMembers::Regular(members) => { + for (idx, member) in members.iter().enumerate() { + let member_name = member + .name + .as_ref() + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("member_{}", idx)); + enumeration_builder.insert(&member_name, member.value); + } + } + EnumMembers::Groups(groups) => { + for (idx, group) in groups.iter().enumerate() { + // TODO: How does this grouping actually impact the enum besides the name? + let group_name = group + .field + .name + .as_ref() + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("group_{}", idx)); + for (idx, member) in group.sub_fields.iter().enumerate() { + let member_name = member + .name + .as_ref() + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("member_{}", idx)); + let grouped_member_name = format!("{}_{}", group_name, member_name); + enumeration_builder.insert(&grouped_member_name, member.value); + } + } + } + } + + let width = enum_ty + .storage_size + .map(|s| s.get() as usize) + .unwrap_or(self.enum_size); + Ok(TypeBuilder::enumeration( + &enumeration_builder.finalize(), + width.try_into()?, + enum_ty.is_signed, + )) + } + + /// A bitfield is a single member in an udt that plays the role of a bit-aligned integer. + /// + /// NOTE: This does not return the bit-aligned integer, this returns the **byte-aligned** integer, + /// you must constrain the integer yourself when constructing a Binary Ninja structure. + pub fn build_bitfield_ty( + &self, + bitfield_ty: &idb_rs::til::bitfield::Bitfield, + ) -> anyhow::Result { + self.build_basic_ty(&idb_rs::til::Basic::IntSized { + bytes: bitfield_ty.nbytes, + is_signed: Some(!bitfield_ty.unsigned), + }) + } + + /// Computes the width of a type, in bytes. + pub fn width_of_type(&self, ty: &idb_rs::til::Type) -> anyhow::Result { + match &ty.type_variant { + TypeVariant::Basic(basic) => match basic { + Basic::Void => Ok(0), + Basic::Unknown { bytes } => Ok(*bytes as usize), + Basic::Bool => Ok(1), + Basic::BoolSized { bytes } => Ok(bytes.get() as usize), + Basic::Char => Ok(1), + Basic::SegReg => Ok(8), + Basic::Short { .. } => Ok(2), + Basic::Long { .. } => Ok(4), + Basic::LongLong { .. } => Ok(8), + Basic::Int { .. } => Ok(4), + Basic::IntSized { bytes, .. } => Ok(bytes.get() as usize), + Basic::Float { bytes } => Ok(bytes.get() as usize), + Basic::LongDouble => Ok(8), + }, + TypeVariant::Pointer(_) => Ok(self.address_size), + TypeVariant::Function(_) => Err(anyhow::anyhow!("Function types do not have a width")), + TypeVariant::Array(arr) => { + let elem_width = self.width_of_type(&arr.elem_type)?; + // TODO: A DST array is unsized or what? I think we should error IMO. + let count = arr.nelem.map(|n| n.get()).unwrap_or(0); + Ok(elem_width * count as usize) + } + TypeVariant::Typeref(r) => { + let resolved_ty = self.resolve_type_ref(r).ok_or_else(|| { + anyhow::anyhow!( + "Type reference {:?} could not be resolved to a type", + r.typeref_value + ) + })?; + resolved_ty.width.ok_or_else(|| { + anyhow::anyhow!("Type reference has no width: {:?}", resolved_ty) + }) + } + TypeVariant::Struct(s) => { + let mut total_width = 0; + for member in &s.members { + total_width += self.width_of_type(&member.member_type)?; + } + // TODO: Handle alignment and bitfields. + Ok(total_width) + } + TypeVariant::Union(u) => { + // Size of the largest member + alignment + let mut max_width = 0; + for member in &u.members { + let member_width = self.width_of_type(&member.member_type)?; + max_width = max_width.max(member_width); + } + // TODO: Handle alignment + Ok(max_width) + } + TypeVariant::Enum(e) => Ok(e + .storage_size + .map(|s| s.get() as usize) + .unwrap_or(self.enum_size)), + TypeVariant::Bitfield(b) => { + // NOTE: We return the byte aligned width here if inside a structure you need to + // constrain the width to the storage yourself. + Ok(b.nbytes.get() as usize) + } + } + } + + /// Try and find the [`ReferencedType`] for a given type reference. + pub fn resolve_type_ref(&self, type_ref: &idb_rs::til::Typeref) -> Option { + match &type_ref.typeref_value { + TyperefValue::Name(Some(ref_name)) => self + .reference_types_by_name + .get(&ref_name.to_string()) + .cloned(), + TyperefValue::Ordinal(ref_ord) => { + self.reference_types_by_ord.get(&(*ref_ord as u64)).cloned() + } + _ => None, + } + } +} diff --git a/rust/src/types/structure.rs b/rust/src/types/structure.rs index 7de466a4a..3b609287c 100644 --- a/rust/src/types/structure.rs +++ b/rust/src/types/structure.rs @@ -571,6 +571,16 @@ impl StructureMember { pub fn bit_offset(&self) -> u64 { (self.offset * 8) + self.bit_position.unwrap_or(0) as u64 } + + /// Member width in bits. + /// + /// NOTE: This is a helper to calculate the bit width of the member, even for non-bitfield members. + /// This is not to be confused with the field `bit_width`, which is set for only bitfield members. + pub fn width_in_bits(&self) -> u64 { + self.bit_width + .map(|w| w as u64) + .unwrap_or(self.ty.contents.width() * 8) + } } impl CoreArrayProvider for StructureMember {