From f64b5d71209f5b7b1913d7ed96a5f2d62349e4de Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Wed, 1 Apr 2026 21:28:11 -0700 Subject: [PATCH] start running global constructors --- .gitignore | 3 +- Cargo.toml | 6 + compile-qemu-for-decoder.sh | 21 +- src/lib.rs | 1258 +++++++++++++++++++++++++++++------ src/main.rs | 4 +- 5 files changed, 1096 insertions(+), 196 deletions(-) diff --git a/.gitignore b/.gitignore index 6623e2b..ae32c89 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,6 @@ # See Notices.txt for copyright information /downloads /qemu-10.2.2* -/libqemu-ppc64-softmmu.* +*.ll +*.bc /target diff --git a/Cargo.toml b/Cargo.toml index 9e06087..039cb81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,12 @@ name = "compile-qemu-for-decoder" version = "0.1.0" edition = "2024" +[features] +verbose-trace = [] + [dependencies] eyre = "0.6.12" inkwell = { version = "0.8.0", features = ["no-libffi-linking", "llvm20-1", "target-powerpc", "llvm20-1-prefer-dynamic"], default-features = false } + +[profile.dev] +opt-level = 1 diff --git a/compile-qemu-for-decoder.sh b/compile-qemu-for-decoder.sh index a3e56ba..2b0c7d9 100755 --- a/compile-qemu-for-decoder.sh +++ b/compile-qemu-for-decoder.sh @@ -28,9 +28,23 @@ function build-qemu() ( --enable-tcg-interpreter \ --without-default-features \ --cross-prefix=powerpc64le-linux-gnu- - QEMU_LIBRARIES_WE_USE=(libqemu-ppc64-softmmu.a libqom.a) - make -j"$(nproc)" "${QEMU_LIBRARIES_WE_USE[@]}" - llvm-link-20 -o ../libqemu-ppc64-softmmu.bc "${QEMU_LIBRARIES_WE_USE[@]}" + target="$(ninja -t query qemu-system-ppc64)" + mapfile -t target_lines <<<"$target" + # add missed libs + link_inputs=(libpage-vary-common.a libqemuutil.a) + # add the rest of the libs and object files + for l in "${target_lines[@]}"; do + if [[ "$l" =~ ^' '([a-z].*)$ ]]; then + link_inputs+=("${BASH_REMATCH[1]}") + elif [[ "$l" == " outputs:" ]]; then + break + fi + done + make -j"$(nproc)" qemu-system-ppc64 + echo "linking bitcode" + llvm-link-20 --only-needed --ignore-non-bitcode -o ../qemu-system-ppc64.bc "${link_inputs[@]}" + echo "disassembling bitcode" + llvm-dis-20 -o ../qemu-system-ppc64.ll ../qemu-system-ppc64.bc ) mkdir -p downloads @@ -43,4 +57,3 @@ download_if_needed "downloads/$QEMU_SOURCE.tar.xz" "$QEMU_SOURCE_HASH" \ "https://download.qemu.org/$QEMU_SOURCE.tar.xz" build-qemu -llvm-dis-20 -o libqemu-ppc64-softmmu.ll libqemu-ppc64-softmmu.bc diff --git a/src/lib.rs b/src/lib.rs index c65b7cf..72b0398 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,13 +3,16 @@ use eyre::{OptionExt, Result, bail, ensure, eyre}; use inkwell::{ - AddressSpace, + AddressSpace, IntPredicate, + basic_block::BasicBlock, context::Context, llvm_sys::{ - LLVMValueKind, + LLVMTypeKind, LLVMValueKind, core::{ - LLVMGetAggregateElement, LLVMGetCalledValue, LLVMGetNumArgOperands, LLVMGetValueKind, - LLVMLookupIntrinsicID, + LLVMGetAggregateElement, LLVMGetCalledValue, LLVMGetConstOpcode, + LLVMGetGEPSourceElementType, LLVMGetNumArgOperands, LLVMGetNumOperands, LLVMGetOperand, + LLVMGetTypeKind, LLVMGetValueKind, LLVMIsAUser, LLVMLookupIntrinsicID, LLVMTypeOf, + LLVMValueAsBasicBlock, }, }, module::Module, @@ -18,7 +21,7 @@ use inkwell::{ types::{AnyType, AnyTypeEnum, BasicType, BasicTypeEnum, FunctionType}, values::{ AnyValue, AnyValueEnum, ArrayValue, AsValueRef, BasicValueEnum, FunctionValue, GlobalValue, - InstructionOpcode, InstructionValue, Operand, PointerValue, + InstructionOpcode, InstructionValue, Operand, PhiValue, PointerValue, }, }; use std::{ @@ -102,6 +105,8 @@ macro_rules! declare_intrinsics { declare_intrinsics! { #[name = "llvm.lifetime.start"] LifetimeStart, + #[name = "llvm.lifetime.end"] + LifetimeEnd, #[name = "llvm.memset"] Memset, } @@ -134,8 +139,14 @@ macro_rules! declare_known_functions { } declare_known_functions! { - #[name = c"type_register_static"] - TypeRegisterStatic, + #[name = c"g_malloc0"] + GMalloc0, + #[name = c"clock_gettime"] + ClockGetTime, + #[name = c"sysconf"] + SysConf, + #[name = c"getauxval"] + GetAuxVal, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] @@ -204,7 +215,12 @@ trait TargetType { fn llvm_basic_type<'ctx>(parser: &Parser<'ctx>) -> Option> { BasicTypeEnum::try_from(Self::llvm_type(parser)).ok() } - fn load<'ctx>(state: &State<'ctx>, ptr: Pointer) -> Result; + fn load<'ctx>(state: &impl StateTrait<'ctx>, ptr: Pointer) -> Result; + fn store<'ctx>( + state: &mut impl StateTrait<'ctx>, + ptr: Pointer, + value: Self::Value, + ) -> Result<()>; } trait TargetIntType: TargetType + Copy + Ord + fmt::Debug + std::hash::Hash { @@ -231,9 +247,16 @@ macro_rules! impl_target_type_for_int { assert_eq!(TargetLayout::abi_layout(parser, retval), Self::LAYOUT); retval.into() } - fn load<'ctx>(state: &State<'ctx>, ptr: Pointer) -> Result { + fn load<'ctx>(state: &impl StateTrait<'ctx>, ptr: Pointer) -> Result { state.load_int(ptr) } + fn store<'ctx>( + state: &mut impl StateTrait<'ctx>, + ptr: Pointer, + value: Self::Value, + ) -> Result<()> { + state.store_int(ptr, value) + } } impl TargetIntType for $ty { type Bytes = [u8; $size]; @@ -335,6 +358,18 @@ impl_target_type_for_int!(TargetIsize, 8, 8); trait TargetPointerType: TargetType { type Pointee; + fn pointee(&self) -> Self::Pointee + where + Self::Pointee: Default, + { + Self::Pointee::default() + } + fn pointee_llvm_type<'ctx>(&self, parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> + where + Self::Pointee: TargetType, + { + Self::Pointee::llvm_type(parser) + } } impl TargetType for *const T { @@ -345,9 +380,25 @@ impl TargetType for *const T { assert_eq!(TargetLayout::abi_layout(parser, retval), Self::LAYOUT); retval.into() } - fn load<'ctx>(state: &State<'ctx>, ptr: Pointer) -> Result { + fn load<'ctx>(state: &impl StateTrait<'ctx>, ptr: Pointer) -> Result { state.load_ptr(ptr) } + fn store<'ctx>( + state: &mut impl StateTrait<'ctx>, + ptr: Pointer, + value: Self::Value, + ) -> Result<()> { + state.store( + ptr, + state + .global() + .parser + .context + .ptr_type(AddressSpace::from(0u16)) + .into(), + Value::Pointer(value), + ) + } } impl TargetPointerType for *const T { @@ -360,9 +411,16 @@ impl TargetType for *mut T { fn llvm_type<'ctx>(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { <*const T as TargetType>::llvm_type(parser) } - fn load<'ctx>(state: &State<'ctx>, ptr: Pointer) -> Result { + fn load<'ctx>(state: &impl StateTrait<'ctx>, ptr: Pointer) -> Result { state.load_ptr(ptr) } + fn store<'ctx>( + state: &mut impl StateTrait<'ctx>, + ptr: Pointer, + value: Self::Value, + ) -> Result<()> { + <*const () as TargetType>::store(state, ptr, value) + } } impl TargetPointerType for *mut T { @@ -382,9 +440,16 @@ macro_rules! impl_target_type_for_fn_ptr { fn llvm_type<'ctx>(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { R::llvm_basic_type(parser).expect("invalid return type").fn_type(&[$($T::llvm_basic_type(parser).expect("invalid argument type").into()),*], false).into() } - fn load<'ctx>(_state: &State<'ctx>, _ptr: Pointer) -> Result { + fn load<'ctx>(_state: &impl StateTrait<'ctx>, _ptr: Pointer) -> Result { panic!("can't load a value of function type. try using function pointers?") } + fn store<'ctx>( + _state: &mut impl StateTrait<'ctx>, + _ptr: Pointer, + _value: Self::Value, + ) -> Result<()> { + panic!("can't store a value of function type. try using function pointers?") + } } impl TargetType for Option R> { const LAYOUT: TargetLayout = <*const () as TargetType>::LAYOUT; @@ -392,9 +457,16 @@ macro_rules! impl_target_type_for_fn_ptr { fn llvm_type<'ctx>(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { <*const () as TargetType>::llvm_type(parser) } - fn load<'ctx>(state: &State<'ctx>, ptr: Pointer) -> Result { + fn load<'ctx>(state: &impl StateTrait<'ctx>, ptr: Pointer) -> Result { state.load_ptr(ptr) } + fn store<'ctx>( + state: &mut impl StateTrait<'ctx>, + ptr: Pointer, + value: Self::Value, + ) -> Result<()> { + <*const () as TargetType>::store(state, ptr, value) + } } impl TargetPointerType for Option R> { type Pointee = TargetFunctionType<($($T,)*), R>; @@ -405,9 +477,16 @@ macro_rules! impl_target_type_for_fn_ptr { fn llvm_type<'ctx>(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { parser.context.void_type().fn_type(&[$($T::llvm_basic_type(parser).expect("invalid argument type").into()),*], false).into() } - fn load<'ctx>(_state: &State<'ctx>, _ptr: Pointer) -> Result { + fn load<'ctx>(_state: &impl StateTrait<'ctx>, _ptr: Pointer) -> Result { panic!("can't load a value of function type. try using function pointers?") } + fn store<'ctx>( + _state: &mut impl StateTrait<'ctx>, + _ptr: Pointer, + _value: Self::Value, + ) -> Result<()> { + panic!("can't store a value of function type. try using function pointers?") + } } impl<$($T: TargetType),*> TargetType for Option { const LAYOUT: TargetLayout = <*const () as TargetType>::LAYOUT; @@ -415,9 +494,16 @@ macro_rules! impl_target_type_for_fn_ptr { fn llvm_type<'ctx>(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { <*const () as TargetType>::llvm_type(parser) } - fn load<'ctx>(state: &State<'ctx>, ptr: Pointer) -> Result { + fn load<'ctx>(state: &impl StateTrait<'ctx>, ptr: Pointer) -> Result { state.load_ptr(ptr) } + fn store<'ctx>( + state: &mut impl StateTrait<'ctx>, + ptr: Pointer, + value: Self::Value, + ) -> Result<()> { + <*const () as TargetType>::store(state, ptr, value) + } } impl<$($T: TargetType),*> TargetPointerType for Option { type Pointee = TargetFunctionType<($($T,)*), ()>; @@ -439,9 +525,16 @@ impl_target_type_for_fn_ptr!(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9); impl_target_type_for_fn_ptr!(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10); impl_target_type_for_fn_ptr!(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11); -struct TargetField { - field: Field, - _struct: PhantomData, +struct TargetField( + PhantomData<(Struct, Field)>, +); + +impl Default + for TargetField +{ + fn default() -> Self { + Self(PhantomData) + } } impl @@ -450,9 +543,15 @@ impl(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { + fn llvm_type<'ctx>(&self, parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { Field::llvm_type(parser) } + fn pointee_llvm_type<'ctx>(&self, parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> + where + Field: TargetPointerType, + { + Field::Pointee::llvm_type(parser) + } } trait TargetStruct: TargetType { @@ -471,6 +570,7 @@ macro_rules! declare_target_struct { struct $FieldInts { $($field: u64,)* } + #[derive(Default)] struct $Struct { $(#[allow(dead_code)] $field: TargetField,)* @@ -511,11 +611,19 @@ macro_rules! declare_target_struct { fn llvm_type<'ctx>(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { parser.context.struct_type(&[$(<$field_ty as TargetType>::llvm_basic_type(parser).expect("invalid field type")),*], false).into() } - fn load<'ctx>(state: &State<'ctx>, ptr: Pointer) -> Result { + fn load<'ctx>(state: &impl StateTrait<'ctx>, ptr: Pointer) -> Result { Ok($Value { $($field: <$field_ty as TargetType>::load(state, ptr.add($Struct::OFFSETS.$field))?,)* }) } + fn store<'ctx>( + state: &mut impl StateTrait<'ctx>, + ptr: Pointer, + value: Self::Value, + ) -> Result<()> { + $(<$field_ty as TargetType>::store(state, ptr.add($Struct::OFFSETS.$field), value.$field)?;)* + Ok(()) + } } }; } @@ -529,9 +637,16 @@ macro_rules! declare_opaque_target_struct { fn llvm_type<'ctx>(parser: &Parser<'ctx>) -> AnyTypeEnum<'ctx> { parser.context.void_type().into() } - fn load<'ctx>(_state: &State<'ctx>, _ptr: Pointer) -> Result { + fn load<'ctx>(_state: &impl StateTrait<'ctx>, _ptr: Pointer) -> Result { panic!("can't load a value of opaque struct type") } + fn store<'ctx>( + _state: &mut impl StateTrait<'ctx>, + _ptr: Pointer, + _value: Self::Value, + ) -> Result<()> { + panic!("can't store a value of opaque struct type") + } } }; } @@ -569,6 +684,25 @@ declare_target_struct! { } } +declare_target_struct! { + #[field_ints = TargetTimeSpecFieldInts, value = TargetTimeSpecValue] + #[repr(C)] + struct TargetTimeSpec { + tv_sec: i64, + tv_nsec: i64, + } +} + +declare_target_struct! { + #[field_ints = TargetGlobalCtorFieldInts, value = TargetGlobalCtorValue] + #[repr(C)] + struct TargetGlobalCtor { + priority: u32, + constructor: Option, + data: *const (), + } +} + fn get_called_value<'ctx>(instruction: InstructionValue<'ctx>) -> AnyValueEnum<'ctx> { match instruction.get_opcode() { InstructionOpcode::Call | InstructionOpcode::Invoke => {} @@ -599,6 +733,87 @@ fn get_array_element<'ctx>(v: ArrayValue<'ctx>, index: u32) -> BasicValueEnum<'c unsafe { BasicValueEnum::new(LLVMGetAggregateElement(v.as_value_ref(), index)) } } +fn get_constant_opcode<'ctx>(v: AnyValueEnum<'ctx>) -> Option { + unsafe { + if LLVMGetValueKind(v.as_value_ref()) == LLVMValueKind::LLVMConstantExprValueKind { + Some(LLVMGetConstOpcode(v.as_value_ref()).into()) + } else { + None + } + } +} + +fn get_instruction_or_constant_opcode<'ctx>(v: AnyValueEnum<'ctx>) -> Option { + get_constant_opcode(v).or_else(|| { + match v { + AnyValueEnum::ArrayValue(v) => v.as_instruction(), + AnyValueEnum::IntValue(v) => v.as_instruction(), + AnyValueEnum::FloatValue(v) => v.as_instruction(), + AnyValueEnum::PhiValue(v) => Some(v.as_instruction()), + AnyValueEnum::FunctionValue(_) => None, + AnyValueEnum::PointerValue(v) => v.as_instruction(), + AnyValueEnum::StructValue(v) => v.as_instruction(), + AnyValueEnum::VectorValue(v) => v.as_instruction(), + AnyValueEnum::ScalableVectorValue(v) => v.as_instruction(), + AnyValueEnum::InstructionValue(v) => Some(v), + AnyValueEnum::MetadataValue(_) => None, + } + .map(|v| v.get_opcode()) + }) +} + +fn get_gep_source_element_type<'ctx>(v: PointerValue<'ctx>) -> BasicTypeEnum<'ctx> { + assert_eq!( + get_instruction_or_constant_opcode(v.as_any_value_enum()), + Some(InstructionOpcode::GetElementPtr) + ); + unsafe { BasicTypeEnum::new(LLVMGetGEPSourceElementType(v.as_value_ref())) } +} + +fn get_num_operands(v: AnyValueEnum<'_>) -> u32 { + unsafe { + assert!(!LLVMIsAUser(v.as_value_ref()).is_null()); + LLVMGetNumOperands(v.as_value_ref()) as u32 + } +} + +fn get_operand<'ctx>(v: AnyValueEnum<'ctx>, index: u32) -> Option> { + unsafe { + assert!(!LLVMIsAUser(v.as_value_ref()).is_null()); + assert!(index < LLVMGetNumOperands(v.as_value_ref()) as u32); + let operand = LLVMGetOperand(v.as_value_ref(), index); + if operand.is_null() { + return None; + } + match LLVMGetTypeKind(LLVMTypeOf(operand)) { + LLVMTypeKind::LLVMVoidTypeKind => unreachable!(), + LLVMTypeKind::LLVMLabelTypeKind => { + BasicBlock::new(LLVMValueAsBasicBlock(operand)).map(Operand::Block) + } + LLVMTypeKind::LLVMHalfTypeKind + | LLVMTypeKind::LLVMFloatTypeKind + | LLVMTypeKind::LLVMDoubleTypeKind + | LLVMTypeKind::LLVMX86_FP80TypeKind + | LLVMTypeKind::LLVMFP128TypeKind + | LLVMTypeKind::LLVMPPC_FP128TypeKind + | LLVMTypeKind::LLVMIntegerTypeKind + | LLVMTypeKind::LLVMStructTypeKind + | LLVMTypeKind::LLVMArrayTypeKind + | LLVMTypeKind::LLVMPointerTypeKind + | LLVMTypeKind::LLVMVectorTypeKind + | LLVMTypeKind::LLVMTokenTypeKind + | LLVMTypeKind::LLVMScalableVectorTypeKind + | LLVMTypeKind::LLVMBFloatTypeKind + | LLVMTypeKind::LLVMX86_AMXTypeKind + | LLVMTypeKind::LLVMTargetExtTypeKind => { + Some(Operand::Value(BasicValueEnum::new(operand))) + } + LLVMTypeKind::LLVMFunctionTypeKind => unreachable!(), + LLVMTypeKind::LLVMMetadataTypeKind => panic!("Operand::Metadata doesn't exist"), + } + } +} + fn array_try_from_fn Result>( mut f: F, ) -> Result<[T; N], E> { @@ -627,6 +842,7 @@ enum PointerBase { TranslationBlock, MaxInstructions, HostPC, + Heap { id: u64 }, } impl PointerBase { @@ -641,6 +857,7 @@ impl PointerBase { PointerBase::TranslationBlock => None, PointerBase::MaxInstructions => None, PointerBase::HostPC => todo!(), + PointerBase::Heap { .. } => None, } } } @@ -801,6 +1018,11 @@ impl<'ctx> GlobalState<'ctx> { ); retval } + fn gen_id(&mut self) -> u64 { + let id = self.next_id; + self.next_id += 1; + id + } fn take(&mut self) -> Self { let Self { memory, @@ -826,80 +1048,124 @@ impl<'ctx> GlobalState<'ctx> { offset: 0, } } -} - -#[derive(Clone, Debug)] -struct State<'ctx> { - global: GlobalState<'ctx>, - return_target: Option>>, - stack_variables: Vec>, - local_values: HashMap, Value>, - next_instruction: InstructionValue<'ctx>, -} - -impl<'ctx> State<'ctx> { - #[track_caller] - fn new( - global: GlobalState<'ctx>, - start_fn_name: impl AsRef, - expected_function_type: FunctionType<'ctx>, - arguments: impl FnOnce(&mut State<'ctx>) -> Result>, - ) -> Result { - let start_fn_name = start_fn_name.as_ref(); - let function = global - .parser - .module - .get_function(start_fn_name) - .ok_or_else(|| eyre!("function not found: {start_fn_name}"))?; - let function_type = function.get_type(); - if function_type != expected_function_type { - bail!( - "entry function {start_fn_name} has wrong type: {function_type}\nexpected: {expected_function_type}" - ); - } - let basic_block = function - .get_first_basic_block() - .ok_or_else(|| eyre!("function missing start block: {start_fn_name}"))?; - let next_instruction = basic_block.get_first_instruction().ok_or_else(|| { - eyre!("function start block missing first instruction: {start_fn_name}") - })?; - let mut retval = Self { - global, - return_target: None, - stack_variables: vec![], - local_values: HashMap::new(), - next_instruction, + fn run_global_ctors(mut self, max_step_count: u64) -> Result { + let Some(global_ctors) = self.parser.module.get_global("llvm.global_ctors") else { + return Ok(self); }; - let arguments = arguments(&mut retval)?; - assert_eq!(function.count_params() as usize, arguments.len()); - for (param, value) in function.get_param_iter().zip(arguments) { - retval.local_values.insert(param.as_any_value_enum(), value); + let ty = global_ctors.get_value_type().into_array_type(); + assert_eq!( + ty.get_element_type().as_any_type_enum(), + TargetGlobalCtor::llvm_type(&self.parser), + ); + let Value::Pointer(ptr) = self.get_value(global_ctors)? else { + unreachable!(); + }; + let constructor_ty = TargetGlobalCtor::default() + .constructor + .pointee_llvm_type(&self.parser) + .into_function_type(); + for i in 0..ty.len() { + let global_ctor = TargetGlobalCtor::load( + &mut self, + ptr.add(i as u64 * TargetGlobalCtor::LAYOUT.size), + )?; + let state = Rc::new(State::new_call_ptr( + self, + &global_ctor.constructor, + constructor_ty, + |_| Ok(vec![]), + )?); + self = Rc::unwrap_or_clone(state.run_single(max_step_count)?).global; } - Ok(retval) + Ok(self) } - fn gen_id(&mut self) -> u64 { - let id = self.global.next_id; - self.global.next_id += 1; - id +} + +trait StateTrait<'ctx>: 'ctx { + fn global_mut(&mut self) -> &mut GlobalState<'ctx>; + fn global(&self) -> &GlobalState<'ctx>; + fn get_local_value(&self, key: &AnyValueEnum<'ctx>) -> Option<&Value>; + fn insert_local_value(&mut self, key: AnyValueEnum<'ctx>, value: Value) -> Result<()>; + fn get_gep_value(&mut self, gep: PointerValue<'ctx>) -> Result { + let source_element_ty = get_gep_source_element_type(gep); + let ptr = get_operand(gep.as_any_value_enum(), 0) + .expect("known to have ptr operand") + .unwrap_value(); + let Value::Pointer(mut ptr) = self.get_value(ptr)? else { + bail!("can't getelementptr on non-pointer"); + }; + if let Some(ptr_index) = get_operand(gep.as_any_value_enum(), 1) { + let Value::ConstantInt(ptr_index) = self.get_value(ptr_index.unwrap_value())? else { + bail!("can't getelementptr with non-integer index"); + }; + let ptr_index = ptr_index as u64; + let source_element_size = self + .global() + .parser + .target_data + .get_abi_size(&source_element_ty); + ptr.offset = ptr + .offset + .wrapping_add(source_element_size.wrapping_mul(ptr_index)); + let mut container_ty = source_element_ty; + for operand_index in 2..get_num_operands(gep.as_any_value_enum()) { + let index = get_operand(gep.as_any_value_enum(), operand_index) + .expect("known to have operand") + .unwrap_value(); + let Value::ConstantInt(index) = self.get_value(index)? else { + bail!("can't getelementptr with non-integer index"); + }; + let index = index as u64; + match container_ty { + BasicTypeEnum::ArrayType(ty) => { + let element_ty = ty.get_element_type(); + let element_size = + self.global().parser.target_data.get_abi_size(&element_ty); + ptr.offset = ptr.offset.wrapping_add(element_size.wrapping_mul(index)); + container_ty = element_ty; + } + BasicTypeEnum::FloatType(_) + | BasicTypeEnum::IntType(_) + | BasicTypeEnum::PointerType(_) => unreachable!(), + BasicTypeEnum::StructType(ty) => { + let field_index = index as u32; + let field_offset = self + .global() + .parser + .target_data + .offset_of_element(&ty, field_index) + .expect("field index is in range"); + ptr.offset = ptr.offset.wrapping_add(field_offset); + container_ty = ty + .get_field_type_at_index(field_index) + .expect("field index is in range"); + } + BasicTypeEnum::VectorType(ty) => todo!("{ty}"), + BasicTypeEnum::ScalableVectorType(ty) => todo!("{ty}"), + } + } + } + Ok(Value::Pointer(ptr)) } fn get_value(&mut self, value: impl AnyValue<'ctx>) -> Result { let value = value.as_any_value_enum(); - if let Some(retval) = self.local_values.get(&value) { - Ok(retval.clone()) - } else if let Some(retval) = self.global.global_values.get(&value) { + if let Some(retval) = self.get_local_value(&value) { + return Ok(retval.clone()); + } + let global = self.global_mut(); + if let Some(retval) = global.global_values.get(&value) { Ok(retval.clone()) } else { match value { AnyValueEnum::ArrayValue(value) => { let ty = value.get_type(); - let size = self.global.parser.target_data.get_abi_size(&ty); - let element_size = - self.global - .parser - .target_data - .get_abi_size(&ty.get_element_type()) as usize; - let element_store_size = self - .global + let size = global.parser.target_data.get_abi_size(&ty); + let element_size = global + .parser + .target_data + .get_abi_size(&ty.get_element_type()) + as usize; + let element_store_size = global .parser .target_data .get_store_size(&ty.get_element_type()) @@ -914,7 +1180,7 @@ impl<'ctx> State<'ctx> { } } let retval = Value::Aggregate(retval); - self.global + self.global_mut() .global_values .insert(value.as_any_value_enum(), retval.clone()); Ok(retval) @@ -932,12 +1198,12 @@ impl<'ctx> State<'ctx> { let ptr_base = Rc::new(PointerBase::Function { name: value.get_name().into(), }); - self.global.memory.insert(ptr_base.clone(), Rc::new([])); + global.memory.insert(ptr_base.clone(), Rc::new([])); let retval = Value::Pointer(Pointer { base: ptr_base, offset: 0, }); - self.global + global .global_values .insert(value.as_any_value_enum(), retval.clone()); Ok(retval) @@ -946,20 +1212,31 @@ impl<'ctx> State<'ctx> { if let Some(value) = global_variable_from_pointer(value) { if let Some(initializer) = value.get_initializer() { let initializer = self.get_value(initializer)?; + let global = self.global_mut(); let ty = value.get_value_type(); - let size = self.global.parser.target_data.get_abi_size(&ty); + let size = global.parser.target_data.get_abi_size(&ty); let ptr_base = Rc::new(PointerBase::Global { name: value.get_name().into(), is_constant: value.is_constant(), }); - self.global - .memory - .insert(ptr_base.clone(), initializer.get_bytes(0..size as usize)); + let mut initializer = initializer.get_bytes(0..size as usize); + if let Some(start) = initializer.iter().position(|v| match v { + Byte::Constant(_) | Byte::PointerBits { .. } => false, + Byte::Undefined => true, + }) { + for b in Rc::make_mut(&mut initializer)[start..].iter_mut() { + match b { + Byte::Constant(_) | Byte::PointerBits { .. } => {} + Byte::Undefined => *b = Byte::Constant(0), + } + } + } + global.memory.insert(ptr_base.clone(), initializer); let retval = Value::Pointer(Pointer { base: ptr_base, offset: 0, }); - self.global + global .global_values .insert(value.as_any_value_enum(), retval.clone()); Ok(retval) @@ -968,21 +1245,93 @@ impl<'ctx> State<'ctx> { } } else if value.is_null() { Ok(Value::Pointer(Pointer { - base: self.global.null_ptr_base.clone(), + base: global.null_ptr_base.clone(), offset: 0, })) + } else if let Some(constant_expr) = + get_constant_opcode(value.as_any_value_enum()) + { + match constant_expr { + InstructionOpcode::Add => todo!(), + InstructionOpcode::AddrSpaceCast => todo!(), + InstructionOpcode::Alloca => todo!(), + InstructionOpcode::And => todo!(), + InstructionOpcode::AShr => todo!(), + InstructionOpcode::AtomicCmpXchg => todo!(), + InstructionOpcode::AtomicRMW => todo!(), + InstructionOpcode::BitCast => todo!(), + InstructionOpcode::Br => todo!(), + InstructionOpcode::Call => todo!(), + InstructionOpcode::CallBr => todo!(), + InstructionOpcode::CatchPad => todo!(), + InstructionOpcode::CatchRet => todo!(), + InstructionOpcode::CatchSwitch => todo!(), + InstructionOpcode::CleanupPad => todo!(), + InstructionOpcode::CleanupRet => todo!(), + InstructionOpcode::ExtractElement => todo!(), + InstructionOpcode::ExtractValue => todo!(), + InstructionOpcode::FNeg => todo!(), + InstructionOpcode::FAdd => todo!(), + InstructionOpcode::FCmp => todo!(), + InstructionOpcode::FDiv => todo!(), + InstructionOpcode::Fence => todo!(), + InstructionOpcode::FMul => todo!(), + InstructionOpcode::FPExt => todo!(), + InstructionOpcode::FPToSI => todo!(), + InstructionOpcode::FPToUI => todo!(), + InstructionOpcode::FPTrunc => todo!(), + InstructionOpcode::Freeze => todo!(), + InstructionOpcode::FRem => todo!(), + InstructionOpcode::FSub => todo!(), + InstructionOpcode::GetElementPtr => self.get_gep_value(value), + InstructionOpcode::ICmp => todo!(), + InstructionOpcode::IndirectBr => todo!(), + InstructionOpcode::InsertElement => todo!(), + InstructionOpcode::InsertValue => todo!(), + InstructionOpcode::IntToPtr => todo!(), + InstructionOpcode::Invoke => todo!(), + InstructionOpcode::LandingPad => todo!(), + InstructionOpcode::Load => todo!(), + InstructionOpcode::LShr => todo!(), + InstructionOpcode::Mul => todo!(), + InstructionOpcode::Or => todo!(), + InstructionOpcode::Phi => todo!(), + InstructionOpcode::PtrToInt => todo!(), + InstructionOpcode::Resume => todo!(), + InstructionOpcode::Return => todo!(), + InstructionOpcode::SDiv => todo!(), + InstructionOpcode::Select => todo!(), + InstructionOpcode::SExt => todo!(), + InstructionOpcode::Shl => todo!(), + InstructionOpcode::ShuffleVector => todo!(), + InstructionOpcode::SIToFP => todo!(), + InstructionOpcode::SRem => todo!(), + InstructionOpcode::Store => todo!(), + InstructionOpcode::Sub => todo!(), + InstructionOpcode::Switch => todo!(), + InstructionOpcode::Trunc => todo!(), + InstructionOpcode::UDiv => todo!(), + InstructionOpcode::UIToFP => todo!(), + InstructionOpcode::Unreachable => todo!(), + InstructionOpcode::URem => todo!(), + InstructionOpcode::UserOp1 => todo!(), + InstructionOpcode::UserOp2 => todo!(), + InstructionOpcode::VAArg => todo!(), + InstructionOpcode::Xor => todo!(), + InstructionOpcode::ZExt => todo!(), + } } else { todo!("{value}"); } } AnyValueEnum::StructValue(value) => { let ty = value.get_type(); - let size = self.global.parser.target_data.get_abi_size(&ty); + let size = global.parser.target_data.get_abi_size(&ty); let mut retval = Rc::from_iter((0..size).map(|_| Byte::Undefined)); let bytes = Rc::make_mut(&mut retval); for (i, field) in value.get_fields().enumerate() { let start = self - .global + .global() .parser .target_data .offset_of_element(&ty, i as u32) @@ -990,7 +1339,7 @@ impl<'ctx> State<'ctx> { as usize; let field_value = self.get_value(field)?; let field_store_size = self - .global + .global() .parser .target_data .get_store_size(&field.get_type()) @@ -1000,7 +1349,7 @@ impl<'ctx> State<'ctx> { } } let retval = Value::Aggregate(retval); - self.global + self.global_mut() .global_values .insert(value.as_any_value_enum(), retval.clone()); Ok(retval) @@ -1012,36 +1361,16 @@ impl<'ctx> State<'ctx> { } } } - fn get_call_arg_operands(&self) -> Result<[Operand<'ctx>; N]> { - ensure!( - get_num_arg_operands(self.next_instruction) == N as u32, - "unexpected argument operand count: {}", - self.next_instruction, - ); - Ok(std::array::from_fn(|i| { - self.next_instruction - .get_operand(i as u32) - .expect("just checked number of argument operands") - })) - } - fn get_call_arg_values(&mut self) -> Result<[Value; N]> { - let operands = self.get_call_arg_operands::()?; - array_try_from_fn(|i| { - self.get_value( - operands[i] - .value() - .ok_or_eyre("call argument operand should be a value")?, - ) - }) - } fn do_store(&mut self, ptr: &Pointer, value: Byte) -> Result<()> { - let Some(memory) = self.global.memory.get_mut(&ptr.base) else { + let Some(memory) = self.global_mut().memory.get_mut(&ptr.base) else { bail!("target memory not found: {ptr:?}"); }; let memory = Rc::make_mut(memory); if let Ok(offset) = usize::try_from(ptr.offset) && let Some(dest) = memory.get_mut(offset) { + #[cfg(feature = "verbose-trace")] + println!("stored {value:?} to {ptr:?}"); *dest = value; Ok(()) } else { @@ -1052,12 +1381,14 @@ impl<'ctx> State<'ctx> { } } fn do_load(&self, ptr: &Pointer) -> Result { - let Some(memory) = self.global.memory.get(&ptr.base) else { + let Some(memory) = self.global().memory.get(&ptr.base) else { bail!("target memory not found: {ptr:?}"); }; if let Ok(offset) = usize::try_from(ptr.offset) && let Some(dest) = memory.get(offset) { + #[cfg(feature = "verbose-trace")] + println!("loaded {dest:?} from {ptr:?}"); Ok(dest.clone()) } else { bail!( @@ -1083,7 +1414,20 @@ impl<'ctx> State<'ctx> { } fn load_ptr(&self, mut ptr: Pointer) -> Result { match self.do_load(&ptr)? { - Byte::Constant(v) => todo!(), + Byte::Constant(v) => { + let mut bytes: ::Bytes = [v; _]; + for b in bytes[1..].iter_mut() { + ptr.offset += 1; + *b = match self.do_load(&ptr)?.as_u8() { + Some(v) => v, + None => todo!(), + }; + } + Ok(Pointer { + base: self.global().null_ptr_base.clone(), + offset: TargetUsize::from_le_bytes(bytes).0, + }) + } Byte::PointerBits { ptr: loaded_ptr, offset, @@ -1122,12 +1466,20 @@ impl<'ctx> State<'ctx> { } Ok(T::from_bytes(bytes)) } + fn store_int(&mut self, mut ptr: Pointer, value: T) -> Result<()> { + let bytes = value.to_bytes(); + for b in &bytes[..] { + self.do_store(&ptr, Byte::Constant(*b))?; + ptr.offset += 1; + } + Ok(()) + } fn load(&self, ptr: &Pointer, ty: BasicTypeEnum<'ctx>) -> Result { match ty { BasicTypeEnum::ArrayType(_) | BasicTypeEnum::StructType(_) => todo!("{ty}"), BasicTypeEnum::FloatType(ty) => todo!("{ty}"), BasicTypeEnum::IntType(ty) => { - let size = self.global.parser.target_data.get_store_size(&ty); + let size = self.global().parser.target_data.get_store_size(&ty); let mut target_bytes = 0u128.to_le_bytes(); assert!(size <= target_bytes.len() as u64); let mut ptr = ptr.clone(); @@ -1145,13 +1497,187 @@ impl<'ctx> State<'ctx> { BasicTypeEnum::ScalableVectorType(ty) => todo!("{ty}"), } } + fn store(&mut self, mut ptr: Pointer, ty: BasicTypeEnum<'ctx>, value: Value) -> Result<()> { + let size = self.global().parser.target_data.get_store_size(&ty); + for i in 0..size as usize { + self.do_store(&ptr, value.get_byte(i))?; + ptr.offset += 1; + } + Ok(()) + } +} + +impl<'ctx> StateTrait<'ctx> for GlobalState<'ctx> { + fn global_mut(&mut self) -> &mut GlobalState<'ctx> { + self + } + + fn global(&self) -> &GlobalState<'ctx> { + self + } + + fn get_local_value(&self, _key: &AnyValueEnum<'ctx>) -> Option<&Value> { + None + } + + fn insert_local_value(&mut self, _key: AnyValueEnum<'ctx>, _value: Value) -> Result<()> { + bail!("can't insert local value in GlobalState") + } +} + +#[derive(Clone, Debug)] +struct State<'ctx> { + global: GlobalState<'ctx>, + return_target: Option>>, + stack_variables: Vec>, + local_values: HashMap, Value>, + next_instruction: InstructionValue<'ctx>, +} + +impl<'ctx> StateTrait<'ctx> for State<'ctx> { + fn global_mut(&mut self) -> &mut GlobalState<'ctx> { + &mut self.global + } + + fn global(&self) -> &GlobalState<'ctx> { + &self.global + } + + fn get_local_value(&self, key: &AnyValueEnum<'ctx>) -> Option<&Value> { + self.local_values.get(key) + } + + fn insert_local_value(&mut self, key: AnyValueEnum<'ctx>, value: Value) -> Result<()> { + #[cfg(feature = "verbose-trace")] + println!("writing {value:?} to {key}"); + self.local_values.insert(key, value); + Ok(()) + } +} + +impl<'ctx> State<'ctx> { + fn get_basic_block(&self) -> BasicBlock<'ctx> { + self.next_instruction + .get_parent() + .expect("known to be in a basic block") + } + fn get_function(&self) -> FunctionValue<'ctx> { + self.get_basic_block() + .get_parent() + .expect("known to be in a function") + } + fn print_backtrace(&self) { + println!("print_backtrace:"); + for (index, state) in + std::iter::successors(Some(self), |state| state.return_target.as_deref()).enumerate() + { + println!( + "{index}: {}", + state.get_function().get_name().to_string_lossy() + ); + } + } + #[track_caller] + fn new_call_ptr( + global: GlobalState<'ctx>, + function: &Pointer, + expected_function_type: FunctionType<'ctx>, + arguments: impl FnOnce(&mut State<'ctx>) -> Result>, + ) -> Result { + if let Pointer { base, offset: 0 } = function + && let PointerBase::Function { name } = &**base + { + let Ok(name) = name.to_str() else { + bail!("function name is not valid UTF-8: {function:?}"); + }; + Self::new(global, name, expected_function_type, arguments) + } else { + bail!("can't call pointer that doesn't point to a function: {function:?}"); + } + } + #[track_caller] + fn new_call( + global: GlobalState<'ctx>, + function: FunctionValue<'ctx>, + arguments: impl FnOnce(&mut State<'ctx>) -> Result>, + ) -> Result { + println!("calling: {}", function.get_name().to_string_lossy()); + let basic_block = function.get_first_basic_block().ok_or_else(|| { + eyre!( + "function missing start block: {}", + function.get_name().to_string_lossy() + ) + })?; + let next_instruction = basic_block.get_first_instruction().ok_or_else(|| { + eyre!( + "function start block missing first instruction: {}", + function.get_name().to_string_lossy() + ) + })?; + let mut retval = Self { + global, + return_target: None, + stack_variables: vec![], + local_values: HashMap::new(), + next_instruction, + }; + let arguments = arguments(&mut retval)?; + assert_eq!(function.count_params() as usize, arguments.len()); + for (param, value) in function.get_param_iter().zip(arguments) { + retval.insert_local_value(param.as_any_value_enum(), value)?; + } + Ok(retval) + } + #[track_caller] + fn new( + global: GlobalState<'ctx>, + start_fn_name: impl AsRef, + expected_function_type: FunctionType<'ctx>, + arguments: impl FnOnce(&mut State<'ctx>) -> Result>, + ) -> Result { + let start_fn_name = start_fn_name.as_ref(); + let function = global + .parser + .module + .get_function(start_fn_name) + .ok_or_else(|| eyre!("function not found: {start_fn_name}"))?; + let function_type = function.get_type(); + if function_type != expected_function_type { + bail!( + "entry function {start_fn_name} has wrong type: {function_type}\nexpected: {expected_function_type}" + ); + } + Self::new_call(global, function, arguments) + } + fn get_call_arg_operands(&self) -> Result<[Operand<'ctx>; N]> { + ensure!( + get_num_arg_operands(self.next_instruction) == N as u32, + "unexpected argument operand count: {}", + self.next_instruction, + ); + Ok(std::array::from_fn(|i| { + self.next_instruction + .get_operand(i as u32) + .expect("just checked number of argument operands") + })) + } + fn get_call_arg_values(&mut self) -> Result<[Value; N]> { + let operands = self.get_call_arg_operands::()?; + array_try_from_fn(|i| { + self.get_value( + operands[i] + .value() + .ok_or_eyre("call argument operand should be a value")?, + ) + }) + } fn run_call_intrinsic( &mut self, function_value: FunctionValue<'ctx>, intrinsic: Intrinsic, ) -> Result<()> { match intrinsic { - Intrinsic::LifetimeStart => Ok(()), + Intrinsic::LifetimeStart | Intrinsic::LifetimeEnd => Ok(()), Intrinsic::Memset => { let [dest, val, len, _is_volatile] = self.get_call_arg_values()?; let Value::Pointer(mut dest) = dest else { @@ -1187,38 +1713,111 @@ impl<'ctx> State<'ctx> { self.global.registered_type_infos.push(parsed_type_info); Ok(()) } + fn run_call_g_malloc0(&mut self) -> Result<()> { + let [size] = self.get_call_arg_values()?; + let Value::ConstantInt(size) = size else { + bail!("g_malloc0 argument must be an integer"); + }; + let size = size as u64; + let pointer_base = if size == 0 { + self.global.null_ptr_base.clone() + } else { + Rc::new(PointerBase::Heap { + id: self.global.gen_id(), + }) + }; + self.global.memory.insert( + pointer_base.clone(), + Rc::from_iter((0..size).map(|_| Byte::Constant(0))), + ); + self.insert_local_value( + self.next_instruction.as_any_value_enum(), + Value::Pointer(Pointer { + base: pointer_base, + offset: 0, + }), + )?; + Ok(()) + } + fn run_call_clock_gettime(&mut self) -> Result<()> { + let [_clockid, tp] = self.get_call_arg_values()?; + let Value::Pointer(tp) = tp else { + bail!("clock_gettime tp argument must be a pointer"); + }; + TargetTimeSpec::store( + self, + tp, + TargetTimeSpecValue { + tv_sec: 0, + tv_nsec: 0, + }, + )?; + self.insert_local_value( + self.next_instruction.as_any_value_enum(), + Value::ConstantInt(0), + )?; + Ok(()) + } + fn run_call_sysconf(&mut self) -> Result<()> { + let [name] = self.get_call_arg_values()?; + let Value::ConstantInt(name) = name else { + bail!("sysconf `name` argument must be an integer"); + }; + let retval: i64 = match name as i32 { + 187 => 64, // _SC_LEVEL1_ICACHE_LINESIZE -- 64 since that seems reasonable + 190 => 64, // _SC_LEVEL1_DCACHE_LINESIZE -- 64 since that seems reasonable + _ => todo!("sysconf({name})"), + }; + self.insert_local_value( + self.next_instruction.as_any_value_enum(), + Value::ConstantInt(retval as u64 as u128), + )?; + Ok(()) + } + fn run_call_getauxval(&mut self) -> Result<()> { + let [type_] = self.get_call_arg_values()?; + let Value::ConstantInt(type_) = type_ else { + bail!("getauxval `type` argument must be an integer"); + }; + let retval: u64 = match type_ as u64 { + 16 => { + // AT_HWCAP + if self.get_function().get_name() == c"qemu_getauxval" + && let Some(caller) = &self.return_target + && caller.get_function().get_name() == c"init_cache_info" + { + // it only cares about PPC_FEATURE_ICACHE_SNOOP, so just set that + 0x00002000 + } else { + todo!("getauxval(AT_HWCAP)") + } + } + _ => todo!("getauxval({type_})"), + }; + self.insert_local_value( + self.next_instruction.as_any_value_enum(), + Value::ConstantInt(retval as u128), + )?; + Ok(()) + } fn run_call_known_function( &mut self, function_value: FunctionValue<'ctx>, known_function: KnownFunction, ) -> Result<()> { match known_function { - KnownFunction::TypeRegisterStatic => self.run_call_type_register_static(), + KnownFunction::GMalloc0 => self.run_call_g_malloc0(), + KnownFunction::ClockGetTime => self.run_call_clock_gettime(), + KnownFunction::SysConf => self.run_call_sysconf(), + KnownFunction::GetAuxVal => self.run_call_getauxval(), } } fn run_normal_call(mut self: Rc, function: FunctionValue<'ctx>) -> Result> { let this = Rc::make_mut(&mut self); - let global = this.global.take(); let call_instruction = this.next_instruction; - let basic_block = function.get_first_basic_block().ok_or_else(|| { - eyre!( - "function missing start block: {}", - function.get_name().to_string_lossy(), - ) - })?; - let next_instruction = basic_block.get_first_instruction().ok_or_else(|| { - eyre!( - "function start block missing first instruction: {}", - function.get_name().to_string_lossy(), - ) - })?; - let mut local_values = HashMap::new(); + let mut args = Vec::with_capacity(get_num_arg_operands(this.next_instruction) as usize); for i in 0..get_num_arg_operands(call_instruction) { - local_values.insert( - function - .get_nth_param(i) - .expect("argument count doesn't match function parameter count") - .as_any_value_enum(), + args.push( this.get_value( call_instruction .get_operand(i) @@ -1228,13 +1827,98 @@ impl<'ctx> State<'ctx> { )?, ); } - Ok(Rc::new(Self { - global, - return_target: Some(self), - stack_variables: vec![], - local_values, - next_instruction, - })) + let global = this.global.take(); + let mut retval = Self::new_call(global, function, |_| Ok(args))?; + retval.return_target = Some(self); + Ok(Rc::new(retval)) + } + fn branch_to(&mut self, target: BasicBlock<'ctx>) -> Result<()> { + #[cfg(feature = "verbose-trace")] + println!("branch_to: {target:?}"); + let source_block = self.get_basic_block(); + let mut next_instruction = target + .get_first_instruction() + .expect("known to have instructions"); + // we have to read all phi inputs before writing any outputs + let mut phi_assignments = Vec::new(); + while let Ok(phi) = PhiValue::try_from(next_instruction) { + next_instruction = next_instruction + .get_next_instruction() + .expect("phi is not a block terminator"); + let value = phi + .get_incomings() + .find_map(|(value, block)| (block == source_block).then_some(value)) + .expect("phi instruction's source blocks are known to be correct"); + phi_assignments.push((phi, self.get_value(value)?)); + } + // now that we've read all phi inputs, we can write all outputs + for (phi, value) in phi_assignments { + self.insert_local_value(phi.as_any_value_enum(), value)?; + } + self.next_instruction = next_instruction; + Ok(()) + } + fn int_bin_op_unmasked_inputs( + &mut self, + op: impl FnOnce(u32, u128, u128) -> Result, + ) -> Result<()> { + let lhs = self + .next_instruction + .get_operand(0) + .expect("known to have lhs operand") + .unwrap_value(); + let bit_width = lhs.get_type().into_int_type().get_bit_width(); + let lhs = self.get_value(lhs)?; + let Value::ConstantInt(lhs) = lhs else { + todo!("{lhs:?}"); + }; + let rhs = self + .next_instruction + .get_operand(1) + .expect("known to have rhs operand") + .unwrap_value(); + let rhs = self.get_value(rhs)?; + let Value::ConstantInt(rhs) = rhs else { + todo!("{rhs:?}"); + }; + let mut retval = op(bit_width, lhs, rhs)?; + let dest_bit_width = self + .next_instruction + .as_any_value_enum() + .into_int_value() + .get_type() + .get_bit_width(); + retval &= 1u128.unbounded_shl(dest_bit_width).wrapping_sub(1); + self.insert_local_value( + self.next_instruction.as_any_value_enum(), + Value::ConstantInt(retval), + ) + } + fn int_bin_op_unsigned( + &mut self, + op: impl FnOnce(u32, u128, u128) -> Result, + ) -> Result<()> { + self.int_bin_op_unmasked_inputs(|bit_width, lhs, rhs| { + let shift = u128::BITS + .checked_sub(bit_width) + .expect("bit width too big"); + let lhs = (lhs << shift) >> shift; + let rhs = (rhs << shift) >> shift; + op(bit_width, lhs, rhs) + }) + } + fn int_bin_op_signed( + &mut self, + op: impl FnOnce(u32, i128, i128) -> Result, + ) -> Result<()> { + self.int_bin_op_unmasked_inputs(|bit_width, lhs, rhs| { + let shift = i128::BITS + .checked_sub(bit_width) + .expect("bit width too big"); + let lhs = ((lhs as i128) << shift) >> shift; + let rhs = ((rhs as i128) << shift) >> shift; + Ok(op(bit_width, lhs, rhs)? as u128) + }) } fn get_next_states( mut self: Rc, @@ -1251,12 +1935,17 @@ impl<'ctx> State<'ctx> { return Ok(ControlFlow::Continue(())); }}; } + #[cfg(feature = "verbose-trace")] println!( "get_next_states: {}", this.next_instruction.print_to_string().to_string_lossy(), ); - match this.next_instruction.get_opcode() { - InstructionOpcode::Add => todo!("{}", this.next_instruction), + let opcode = this.next_instruction.get_opcode(); + match opcode { + InstructionOpcode::Add => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok(lhs.wrapping_add(rhs)))?; + return_after_non_term!(); + } InstructionOpcode::AddrSpaceCast => todo!("{}", this.next_instruction), InstructionOpcode::Alloca => { let allocated_type = this @@ -1276,7 +1965,7 @@ impl<'ctx> State<'ctx> { .get_abi_alignment(&allocated_type); } let pointer_base = Rc::new(PointerBase::Local { - id: this.gen_id(), + id: this.global.gen_id(), align, }); this.stack_variables.push(pointer_base.clone()); @@ -1284,22 +1973,73 @@ impl<'ctx> State<'ctx> { pointer_base.clone(), Rc::from_iter((0..size).map(|_| Byte::Undefined)), ); - let value = this.next_instruction.as_any_value_enum(); - this.local_values.insert( - value, + this.insert_local_value( + this.next_instruction.as_any_value_enum(), Value::Pointer(Pointer { base: pointer_base, offset: 0, }), - ); + )?; + return_after_non_term!(); + } + InstructionOpcode::And => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok(lhs & rhs))?; + return_after_non_term!(); + } + InstructionOpcode::AShr => { + this.int_bin_op_signed(|_, lhs, rhs| Ok(lhs.wrapping_shr(rhs as u32)))?; return_after_non_term!(); } - InstructionOpcode::And => todo!("{}", this.next_instruction), - InstructionOpcode::AShr => todo!("{}", this.next_instruction), InstructionOpcode::AtomicCmpXchg => todo!("{}", this.next_instruction), InstructionOpcode::AtomicRMW => todo!("{}", this.next_instruction), InstructionOpcode::BitCast => todo!("{}", this.next_instruction), - InstructionOpcode::Br => todo!("{}", this.next_instruction), + InstructionOpcode::Br => match this.next_instruction.get_num_operands() { + 1 => { + let target = this + .next_instruction + .get_operand(0) + .expect("known to have target label") + .unwrap_block(); + this.branch_to(target)?; + new_states.push(self); + return Ok(ControlFlow::Continue(())); + } + 3 => { + let cond = this + .next_instruction + .get_operand(0) + .expect("known to have condition operand") + .unwrap_value(); + let cond = this.get_value(cond)?; + // the branch targets are stored in reverse order (false, true) of the printed order (true, false) + let true_target = this + .next_instruction + .get_operand(2) + .expect("known to have target label") + .unwrap_block(); + let false_target = this + .next_instruction + .get_operand(1) + .expect("known to have target label") + .unwrap_block(); + if let Value::ConstantInt(cond) = cond { + let target = if cond & 1 != 0 { + true_target + } else { + false_target + }; + this.branch_to(target)?; + new_states.push(self); + return Ok(ControlFlow::Continue(())); + } else { + todo!( + "unimplemented branch condition: {cond:?}\n{}", + this.next_instruction + ); + } + } + _ => bail!("invalid branch operand count: {}", this.next_instruction), + }, InstructionOpcode::Call => { let called_value = get_called_value(this.next_instruction); match called_value { @@ -1327,6 +2067,7 @@ impl<'ctx> State<'ctx> { new_states.push(self.run_normal_call(function_value)?); return Ok(ControlFlow::Continue(())); } else { + this.print_backtrace(); todo!("{}", function_value.get_name().to_string_lossy()); } } @@ -1354,32 +2095,119 @@ impl<'ctx> State<'ctx> { InstructionOpcode::Freeze => todo!("{}", this.next_instruction), InstructionOpcode::FRem => todo!("{}", this.next_instruction), InstructionOpcode::FSub => todo!("{}", this.next_instruction), - InstructionOpcode::GetElementPtr => todo!("{}", this.next_instruction), - InstructionOpcode::ICmp => todo!("{}", this.next_instruction), + InstructionOpcode::GetElementPtr => { + let value = this.get_gep_value( + this.next_instruction + .as_any_value_enum() + .into_pointer_value(), + )?; + this.insert_local_value(this.next_instruction.as_any_value_enum(), value)?; + return_after_non_term!(); + } + InstructionOpcode::ICmp => { + match this + .next_instruction + .get_icmp_predicate() + .expect("known to be icmp") + { + IntPredicate::EQ => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok((lhs == rhs).into()))?; + } + IntPredicate::NE => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok((lhs != rhs).into()))?; + } + IntPredicate::UGT => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok((lhs > rhs).into()))?; + } + IntPredicate::UGE => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok((lhs >= rhs).into()))?; + } + IntPredicate::ULT => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok((lhs < rhs).into()))?; + } + IntPredicate::ULE => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok((lhs <= rhs).into()))?; + } + IntPredicate::SGT => { + this.int_bin_op_signed(|_, lhs, rhs| Ok((lhs > rhs).into()))?; + } + IntPredicate::SGE => { + this.int_bin_op_signed(|_, lhs, rhs| Ok((lhs >= rhs).into()))?; + } + IntPredicate::SLT => { + this.int_bin_op_signed(|_, lhs, rhs| Ok((lhs < rhs).into()))?; + } + IntPredicate::SLE => { + this.int_bin_op_signed(|_, lhs, rhs| Ok((lhs <= rhs).into()))?; + } + } + return_after_non_term!(); + } InstructionOpcode::IndirectBr => todo!("{}", this.next_instruction), InstructionOpcode::InsertElement => todo!("{}", this.next_instruction), InstructionOpcode::InsertValue => todo!("{}", this.next_instruction), InstructionOpcode::IntToPtr => todo!("{}", this.next_instruction), InstructionOpcode::Invoke => todo!("{}", this.next_instruction), InstructionOpcode::LandingPad => todo!("{}", this.next_instruction), - InstructionOpcode::Load => todo!("{}", this.next_instruction), - InstructionOpcode::LShr => todo!("{}", this.next_instruction), - InstructionOpcode::Mul => todo!("{}", this.next_instruction), - InstructionOpcode::Or => todo!("{}", this.next_instruction), + InstructionOpcode::Load => { + let ptr = this + .next_instruction + .get_operand(0) + .expect("known to have ptr operand") + .unwrap_value(); + let Value::Pointer(ptr) = this.get_value(ptr)? else { + bail!("can't load from non-pointer"); + }; + let ty = BasicTypeEnum::try_from(this.next_instruction.get_type()) + .expect("known to have valid load type"); + let value = this.load(&ptr, ty)?; + this.insert_local_value(this.next_instruction.as_any_value_enum(), value)?; + return_after_non_term!(); + } + InstructionOpcode::LShr => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok(lhs.wrapping_shr(rhs as u32)))?; + return_after_non_term!(); + } + InstructionOpcode::Mul => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok(lhs.wrapping_mul(rhs)))?; + return_after_non_term!(); + } + InstructionOpcode::Or => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok(lhs | rhs))?; + return_after_non_term!(); + } InstructionOpcode::Phi => todo!("{}", this.next_instruction), InstructionOpcode::PtrToInt => todo!("{}", this.next_instruction), InstructionOpcode::Resume => todo!("{}", this.next_instruction), InstructionOpcode::Return => { if let Some(mut return_target) = this.return_target.take() { - let this = Rc::unwrap_or_clone(self); + println!( + "returning from {} to {}", + this.get_function().get_name().to_string_lossy(), + return_target.get_function().get_name().to_string_lossy(), + ); let return_target_mut = Rc::make_mut(&mut return_target); + let return_value = if this.next_instruction.get_num_operands() > 0 { + let return_value = this + .next_instruction + .get_operand(0) + .expect("known to have operand") + .unwrap_value(); + Some(this.get_value(return_value)?) + } else { + None + }; + let this = Rc::unwrap_or_clone(self); return_target_mut.global = this.global; - if this.next_instruction.get_num_operands() > 0 { - todo!("{}", this.next_instruction); - } for stack_variable in this.stack_variables { return_target_mut.global.memory.remove(&stack_variable); } + if let Some(return_value) = return_value { + return_target_mut.insert_local_value( + return_target_mut.next_instruction.as_any_value_enum(), + return_value, + )?; + } match return_target_mut.next_instruction.get_opcode() { InstructionOpcode::Call => { return_target_mut.next_instruction = return_target_mut @@ -1395,20 +2223,79 @@ impl<'ctx> State<'ctx> { new_states.push(return_target); return Ok(ControlFlow::Continue(())); } else { + println!( + "finished running {}", + this.get_function().get_name().to_string_lossy(), + ); return Ok(ControlFlow::Break(self)); } } InstructionOpcode::SDiv => todo!("{}", this.next_instruction), InstructionOpcode::Select => todo!("{}", this.next_instruction), - InstructionOpcode::SExt => todo!("{}", this.next_instruction), - InstructionOpcode::Shl => todo!("{}", this.next_instruction), + InstructionOpcode::SExt | InstructionOpcode::Trunc | InstructionOpcode::ZExt => { + let value = this + .next_instruction + .get_operand(0) + .expect("known to have value operand") + .unwrap_value(); + let src_bit_width = value.get_type().into_int_type().get_bit_width(); + let dest_bit_width = this + .next_instruction + .as_any_value_enum() + .into_int_value() + .get_type() + .get_bit_width(); + let Some(shift) = u128::BITS.checked_sub(src_bit_width) else { + panic!("int too big"); + }; + let value = this.get_value(value)?; + let value = match value { + Value::Pointer(value) => todo!("{value:?}"), + Value::ConstantInt(value) => { + let mut retval = if let InstructionOpcode::SExt = opcode { + (((value as i128) << shift) >> shift) as u128 + } else { + (value << shift) >> shift + }; + retval &= 1u128.unbounded_shl(dest_bit_width).wrapping_sub(1); + Value::ConstantInt(retval) + } + Value::Aggregate(_) => unreachable!(), + }; + this.insert_local_value(this.next_instruction.as_any_value_enum(), value)?; + return_after_non_term!(); + } + InstructionOpcode::Shl => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok(lhs.wrapping_shl(rhs as u32)))?; + return_after_non_term!(); + } InstructionOpcode::ShuffleVector => todo!("{}", this.next_instruction), InstructionOpcode::SIToFP => todo!("{}", this.next_instruction), InstructionOpcode::SRem => todo!("{}", this.next_instruction), - InstructionOpcode::Store => todo!("{}", this.next_instruction), - InstructionOpcode::Sub => todo!("{}", this.next_instruction), + InstructionOpcode::Store => { + let value = this + .next_instruction + .get_operand(0) + .expect("known to have value operand") + .unwrap_value(); + let ty = value.get_type(); + let value = this.get_value(value)?; + let ptr = this + .next_instruction + .get_operand(1) + .expect("known to have ptr operand") + .unwrap_value(); + let Value::Pointer(ptr) = this.get_value(ptr)? else { + bail!("can't store to non-pointer"); + }; + this.store(ptr, ty, value)?; + return_after_non_term!(); + } + InstructionOpcode::Sub => { + this.int_bin_op_unsigned(|_, lhs, rhs| Ok(lhs.wrapping_sub(rhs)))?; + return_after_non_term!(); + } InstructionOpcode::Switch => todo!("{}", this.next_instruction), - InstructionOpcode::Trunc => todo!("{}", this.next_instruction), InstructionOpcode::UDiv => todo!("{}", this.next_instruction), InstructionOpcode::UIToFP => todo!("{}", this.next_instruction), InstructionOpcode::Unreachable => todo!("{}", this.next_instruction), @@ -1417,7 +2304,6 @@ impl<'ctx> State<'ctx> { InstructionOpcode::UserOp2 => todo!("{}", this.next_instruction), InstructionOpcode::VAArg => todo!("{}", this.next_instruction), InstructionOpcode::Xor => todo!("{}", this.next_instruction), - InstructionOpcode::ZExt => todo!("{}", this.next_instruction), } } fn run_states( @@ -1428,6 +2314,7 @@ impl<'ctx> State<'ctx> { let mut next_states = Vec::new(); let mut finished_states = Vec::new(); for step in 0..max_step_count { + println!("step: {step}"); next_states.clear(); let mut last_states = mem::replace(&mut states, next_states); for last_state in last_states.drain(..) { @@ -1462,7 +2349,7 @@ impl<'ctx> State<'ctx> { } } -pub fn parse_qemu_ppc64_softmmu( +pub fn parse_qemu_system_ppc64( path: impl AsRef, max_step_count: u64, max_state_count: usize, @@ -1471,16 +2358,8 @@ pub fn parse_qemu_ppc64_softmmu( let context = &context; let module = Rc::new(Module::parse_bitcode_from_path(path, context).map_err(map_llvm_err)?); let parser = Rc::new(Parser::new(context, module)?); - let ptr_ty = context.ptr_type(AddressSpace::from(0u16)); - let state = Rc::new(State::new( - GlobalState::new(parser), - "ppc_cpu_register_types", - context.void_type().fn_type(&[], false), - |_| Ok(vec![]), - )?) - .run_single(max_step_count)?; - let powerpc64_cpu_type_info = state - .global + let global = GlobalState::new(parser).run_global_ctors(200)?; + let powerpc64_cpu_type_info = global .registered_type_infos .iter() .find(|v| &*v.name == c"powerpc64-cpu") @@ -1488,8 +2367,9 @@ pub fn parse_qemu_ppc64_softmmu( .clone(); dbg!(&powerpc64_cpu_type_info); // should be: void ppc_translate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc) + let ptr_ty = context.ptr_type(AddressSpace::from(0u16)); let states = Rc::new(State::new( - Rc::unwrap_or_clone(state).global, + global, "ppc_translate_code", context.void_type().fn_type( &[ diff --git a/src/main.rs b/src/main.rs index 48f1f30..af4a77c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,10 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use compile_qemu_for_decoder::parse_qemu_ppc64_softmmu; +use compile_qemu_for_decoder::parse_qemu_system_ppc64; use inkwell::targets::{InitializationConfig, Target}; fn main() -> eyre::Result<()> { Target::initialize_power_pc(&InitializationConfig::default()); - parse_qemu_ppc64_softmmu("libqemu-ppc64-softmmu.bc", 10, 10) + parse_qemu_system_ppc64("qemu-system-ppc64.bc", 10, 10) }