WIP adding rendering

This commit is contained in:
Jacob Lifshay 2025-12-29 03:18:28 -08:00
parent aba6368948
commit 9445599850
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
10 changed files with 2271 additions and 147 deletions

View file

@ -5,22 +5,23 @@ use crate::{
object::{
PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject,
PdfObjectIdentifier, PdfObjectIndirect, PdfObjectStreamDictionary, PdfReal, PdfStream,
PdfStreamDictionary, PdfString, UnparsedPdfStreamDictionary,
PdfStreamDictionary, PdfString,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown, PdfParse, PdfParseError,
},
},
pdf_parse,
util::ArcOrRef,
util::{ArcOrRef, DagDebugState},
};
use std::{
collections::BTreeMap,
any::{Any, TypeId},
collections::{BTreeMap, HashMap},
convert::Infallible,
fmt,
num::NonZero,
str::FromStr,
sync::{Arc, OnceLock},
sync::{Arc, Mutex, OnceLock},
};
pub mod content_stream;
@ -28,10 +29,100 @@ pub mod document_structure;
pub mod font;
pub mod object;
pub mod parse;
pub mod render;
pub mod stream_filters;
struct ParseCache {
parse_results: HashMap<TypeId, std::sync::Weak<dyn Any + Send + Sync>>,
steps_till_next_gc: usize,
}
impl Default for ParseCache {
fn default() -> Self {
Self {
parse_results: HashMap::new(),
steps_till_next_gc: 1,
}
}
}
impl ParseCache {
fn gc(&mut self) {
if self.steps_till_next_gc == 0 {
self.parse_results.retain(|_k, v| v.strong_count() > 0);
let mut adjusted_len = self.parse_results.len();
if adjusted_len < 10 {
adjusted_len = 10;
}
self.steps_till_next_gc = adjusted_len.saturating_mul(20);
} else {
self.steps_till_next_gc -= 1;
}
}
fn get<T: 'static + Send + Sync>(&mut self) -> Option<Arc<T>> {
self.gc();
let Ok(retval) = self
.parse_results
.get(&TypeId::of::<T>())?
.upgrade()?
.downcast()
else {
unreachable!();
};
Some(retval)
}
fn get_or_insert<T: 'static + Send + Sync>(
&mut self,
value: Arc<T>,
) -> (Arc<T>, impl Sized + use<T>) {
use std::collections::hash_map::Entry;
self.gc();
match self.parse_results.entry(TypeId::of::<T>()) {
Entry::Occupied(mut entry) => {
if let Some(retval) = entry.get().upgrade() {
let Ok(retval) = retval.downcast::<T>() else {
unreachable!();
};
(retval, Some(value))
} else {
entry.insert(Arc::<T>::downgrade(&value));
(value, None)
}
}
Entry::Vacant(entry) => {
entry.insert(Arc::<T>::downgrade(&value));
(value, None)
}
}
}
}
struct PdfObjectAndParseCache {
object: PdfObject,
parse_cache: Mutex<ParseCache>,
}
impl PdfObjectAndParseCache {
fn new(object: PdfObject) -> Self {
Self {
object,
parse_cache: Mutex::default(),
}
}
fn parse_cache_get<T: 'static + Send + Sync>(&self) -> Option<Arc<T>> {
self.parse_cache.lock().expect("not poisoned").get()
}
fn parse_cache_get_or_insert<T: 'static + Send + Sync>(&self, value: Arc<T>) -> Arc<T> {
let mut parse_cache = self.parse_cache.lock().expect("not poisoned");
let (retval, to_drop_after_unlock) = parse_cache.get_or_insert(value);
drop(parse_cache);
drop(to_drop_after_unlock);
retval
}
}
struct PdfObjectsInner {
objects: BTreeMap<PdfObjectIdentifier, PdfObject>,
objects: BTreeMap<PdfObjectIdentifier, PdfObjectAndParseCache>,
#[allow(dead_code)]
object_streams: Vec<PdfStream<PdfObjectStreamDictionary>>,
}
@ -52,7 +143,7 @@ impl PdfHeader {
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
#[derive(Clone)]
pub struct PdfTrailerDictionary {
#[pdf(name = "Size")]
pub size: usize,
@ -71,6 +162,31 @@ pdf_parse! {
}
}
impl fmt::Debug for PdfTrailerDictionary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
size,
prev,
root,
encrypt,
info,
id,
rest,
} = self;
f.debug_struct("PdfTrailerDictionary")
.field("size", size)
.field("prev", prev)
.field("root", root)
.field("encrypt", encrypt)
.field("info", info)
.field("id", id)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
@ -83,7 +199,7 @@ pdf_parse! {
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
#[derive(Clone)]
pub struct PdfXRefStreamDictionaryRest {
#[pdf(name = "Type")]
pub ty: PdfXRefName,
@ -96,7 +212,21 @@ pdf_parse! {
}
}
#[derive(Clone, Debug)]
impl fmt::Debug for PdfXRefStreamDictionaryRest {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self { ty, index, w, rest } = self;
f.debug_struct("PdfXRefStreamDictionaryRest")
.field("ty", ty)
.field("index", index)
.field("w", w)
.field("rest", rest)
.finish()
})
}
}
#[derive(Clone)]
pub enum PdfTrailer {
Trailer {
trailer_dictionary: PdfTrailerDictionary,
@ -108,6 +238,29 @@ pub enum PdfTrailer {
},
}
impl fmt::Debug for PdfTrailer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| match self {
Self::Trailer {
trailer_dictionary,
start_xref,
} => f
.debug_struct("Trailer")
.field("trailer_dictionary", trailer_dictionary)
.field("start_xref", start_xref)
.finish(),
Self::Stream {
xref_stream,
start_xref,
} => f
.debug_struct("Stream")
.field("xref_stream", xref_stream)
.field("start_xref", start_xref)
.finish(),
})
}
}
impl PdfTrailer {
pub fn trailer_dictionary(&self) -> &PdfTrailerDictionary {
match self {
@ -722,7 +875,7 @@ impl<'a> PdfParser<'a> {
struct PdfFileParser<'a> {
parser: PdfParser<'a>,
objects_map: BTreeMap<PdfObjectIdentifier, PdfObject>,
objects_map: BTreeMap<PdfObjectIdentifier, PdfObjectAndParseCache>,
}
impl<'a> PdfFileParser<'a> {
@ -823,7 +976,11 @@ impl<'a> PdfFileParser<'a> {
let Some(PdfToken::Regular(b"endobj")) = self.parser.tokenizer.next() else {
return Err(PdfParseError::MissingEndObj { pos: end_obj_pos });
};
if self.objects_map.insert(id, object).is_some() {
if self
.objects_map
.insert(id, PdfObjectAndParseCache::new(object))
.is_some()
{
Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id })
} else {
Ok(Some(()))
@ -882,7 +1039,11 @@ impl<'a> PdfFileParser<'a> {
let object = parser
.parse_object_or_operator()?
.error_on_stream_or_operator()?;
if self.objects_map.insert(id, object).is_some() {
if self
.objects_map
.insert(id, PdfObjectAndParseCache::new(object))
.is_some()
{
return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id });
}
}
@ -892,7 +1053,7 @@ impl<'a> PdfFileParser<'a> {
while let Some(()) = self.parse_indirect_object_definition()? {}
let mut object_streams: Vec<PdfStream<PdfObjectStreamDictionary>> = Vec::new();
for object in self.objects_map.values_mut() {
let stream = match object {
let stream = match &object.object {
PdfObject::Stream(stream) => stream,
PdfObject::Boolean(_)
| PdfObject::Integer(_)
@ -907,7 +1068,7 @@ impl<'a> PdfFileParser<'a> {
if PdfObjectStreamDictionary::parse_type_from_dictionary(&stream.dictionary().rest)
.is_ok()
{
object_streams.push(PdfStream::parse(object.clone())?);
object_streams.push(PdfStream::parse(object.object.clone())?);
}
}
for object_stream in &object_streams {
@ -1012,11 +1173,6 @@ impl<'a> PdfFileParser<'a> {
self.parse_body()?;
self.parse_xref_table()?;
let trailer = self.parse_trailer()?;
for page in trailer.trailer_dictionary().root.pages.pages().iter() {
for content in page.contents.iter() {
content.decoded_data().as_ref()?;
}
}
Ok(Pdf {
header,
objects: self.parser.objects,