WIP adding rendering
This commit is contained in:
parent
aba6368948
commit
9445599850
10 changed files with 2271 additions and 147 deletions
192
src/pdf.rs
192
src/pdf.rs
|
|
@ -5,22 +5,23 @@ use crate::{
|
|||
object::{
|
||||
PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject,
|
||||
PdfObjectIdentifier, PdfObjectIndirect, PdfObjectStreamDictionary, PdfReal, PdfStream,
|
||||
PdfStreamDictionary, PdfString, UnparsedPdfStreamDictionary,
|
||||
PdfStreamDictionary, PdfString,
|
||||
},
|
||||
parse::{
|
||||
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown, PdfParse, PdfParseError,
|
||||
},
|
||||
},
|
||||
pdf_parse,
|
||||
util::ArcOrRef,
|
||||
util::{ArcOrRef, DagDebugState},
|
||||
};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
any::{Any, TypeId},
|
||||
collections::{BTreeMap, HashMap},
|
||||
convert::Infallible,
|
||||
fmt,
|
||||
num::NonZero,
|
||||
str::FromStr,
|
||||
sync::{Arc, OnceLock},
|
||||
sync::{Arc, Mutex, OnceLock},
|
||||
};
|
||||
|
||||
pub mod content_stream;
|
||||
|
|
@ -28,10 +29,100 @@ pub mod document_structure;
|
|||
pub mod font;
|
||||
pub mod object;
|
||||
pub mod parse;
|
||||
pub mod render;
|
||||
pub mod stream_filters;
|
||||
|
||||
struct ParseCache {
|
||||
parse_results: HashMap<TypeId, std::sync::Weak<dyn Any + Send + Sync>>,
|
||||
steps_till_next_gc: usize,
|
||||
}
|
||||
|
||||
impl Default for ParseCache {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
parse_results: HashMap::new(),
|
||||
steps_till_next_gc: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ParseCache {
|
||||
fn gc(&mut self) {
|
||||
if self.steps_till_next_gc == 0 {
|
||||
self.parse_results.retain(|_k, v| v.strong_count() > 0);
|
||||
let mut adjusted_len = self.parse_results.len();
|
||||
if adjusted_len < 10 {
|
||||
adjusted_len = 10;
|
||||
}
|
||||
self.steps_till_next_gc = adjusted_len.saturating_mul(20);
|
||||
} else {
|
||||
self.steps_till_next_gc -= 1;
|
||||
}
|
||||
}
|
||||
fn get<T: 'static + Send + Sync>(&mut self) -> Option<Arc<T>> {
|
||||
self.gc();
|
||||
let Ok(retval) = self
|
||||
.parse_results
|
||||
.get(&TypeId::of::<T>())?
|
||||
.upgrade()?
|
||||
.downcast()
|
||||
else {
|
||||
unreachable!();
|
||||
};
|
||||
Some(retval)
|
||||
}
|
||||
fn get_or_insert<T: 'static + Send + Sync>(
|
||||
&mut self,
|
||||
value: Arc<T>,
|
||||
) -> (Arc<T>, impl Sized + use<T>) {
|
||||
use std::collections::hash_map::Entry;
|
||||
self.gc();
|
||||
match self.parse_results.entry(TypeId::of::<T>()) {
|
||||
Entry::Occupied(mut entry) => {
|
||||
if let Some(retval) = entry.get().upgrade() {
|
||||
let Ok(retval) = retval.downcast::<T>() else {
|
||||
unreachable!();
|
||||
};
|
||||
(retval, Some(value))
|
||||
} else {
|
||||
entry.insert(Arc::<T>::downgrade(&value));
|
||||
(value, None)
|
||||
}
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(Arc::<T>::downgrade(&value));
|
||||
(value, None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct PdfObjectAndParseCache {
|
||||
object: PdfObject,
|
||||
parse_cache: Mutex<ParseCache>,
|
||||
}
|
||||
|
||||
impl PdfObjectAndParseCache {
|
||||
fn new(object: PdfObject) -> Self {
|
||||
Self {
|
||||
object,
|
||||
parse_cache: Mutex::default(),
|
||||
}
|
||||
}
|
||||
fn parse_cache_get<T: 'static + Send + Sync>(&self) -> Option<Arc<T>> {
|
||||
self.parse_cache.lock().expect("not poisoned").get()
|
||||
}
|
||||
fn parse_cache_get_or_insert<T: 'static + Send + Sync>(&self, value: Arc<T>) -> Arc<T> {
|
||||
let mut parse_cache = self.parse_cache.lock().expect("not poisoned");
|
||||
let (retval, to_drop_after_unlock) = parse_cache.get_or_insert(value);
|
||||
drop(parse_cache);
|
||||
drop(to_drop_after_unlock);
|
||||
retval
|
||||
}
|
||||
}
|
||||
|
||||
struct PdfObjectsInner {
|
||||
objects: BTreeMap<PdfObjectIdentifier, PdfObject>,
|
||||
objects: BTreeMap<PdfObjectIdentifier, PdfObjectAndParseCache>,
|
||||
#[allow(dead_code)]
|
||||
object_streams: Vec<PdfStream<PdfObjectStreamDictionary>>,
|
||||
}
|
||||
|
|
@ -52,7 +143,7 @@ impl PdfHeader {
|
|||
|
||||
pdf_parse! {
|
||||
#[pdf]
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone)]
|
||||
pub struct PdfTrailerDictionary {
|
||||
#[pdf(name = "Size")]
|
||||
pub size: usize,
|
||||
|
|
@ -71,6 +162,31 @@ pdf_parse! {
|
|||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for PdfTrailerDictionary {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
DagDebugState::scope(|_state| {
|
||||
let Self {
|
||||
size,
|
||||
prev,
|
||||
root,
|
||||
encrypt,
|
||||
info,
|
||||
id,
|
||||
rest,
|
||||
} = self;
|
||||
f.debug_struct("PdfTrailerDictionary")
|
||||
.field("size", size)
|
||||
.field("prev", prev)
|
||||
.field("root", root)
|
||||
.field("encrypt", encrypt)
|
||||
.field("info", info)
|
||||
.field("id", id)
|
||||
.field("rest", rest)
|
||||
.finish()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pdf_parse! {
|
||||
#[pdf(name)]
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
|
||||
|
|
@ -83,7 +199,7 @@ pdf_parse! {
|
|||
|
||||
pdf_parse! {
|
||||
#[pdf]
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone)]
|
||||
pub struct PdfXRefStreamDictionaryRest {
|
||||
#[pdf(name = "Type")]
|
||||
pub ty: PdfXRefName,
|
||||
|
|
@ -96,7 +212,21 @@ pdf_parse! {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
impl fmt::Debug for PdfXRefStreamDictionaryRest {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
DagDebugState::scope(|_state| {
|
||||
let Self { ty, index, w, rest } = self;
|
||||
f.debug_struct("PdfXRefStreamDictionaryRest")
|
||||
.field("ty", ty)
|
||||
.field("index", index)
|
||||
.field("w", w)
|
||||
.field("rest", rest)
|
||||
.finish()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum PdfTrailer {
|
||||
Trailer {
|
||||
trailer_dictionary: PdfTrailerDictionary,
|
||||
|
|
@ -108,6 +238,29 @@ pub enum PdfTrailer {
|
|||
},
|
||||
}
|
||||
|
||||
impl fmt::Debug for PdfTrailer {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
DagDebugState::scope(|_state| match self {
|
||||
Self::Trailer {
|
||||
trailer_dictionary,
|
||||
start_xref,
|
||||
} => f
|
||||
.debug_struct("Trailer")
|
||||
.field("trailer_dictionary", trailer_dictionary)
|
||||
.field("start_xref", start_xref)
|
||||
.finish(),
|
||||
Self::Stream {
|
||||
xref_stream,
|
||||
start_xref,
|
||||
} => f
|
||||
.debug_struct("Stream")
|
||||
.field("xref_stream", xref_stream)
|
||||
.field("start_xref", start_xref)
|
||||
.finish(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PdfTrailer {
|
||||
pub fn trailer_dictionary(&self) -> &PdfTrailerDictionary {
|
||||
match self {
|
||||
|
|
@ -722,7 +875,7 @@ impl<'a> PdfParser<'a> {
|
|||
|
||||
struct PdfFileParser<'a> {
|
||||
parser: PdfParser<'a>,
|
||||
objects_map: BTreeMap<PdfObjectIdentifier, PdfObject>,
|
||||
objects_map: BTreeMap<PdfObjectIdentifier, PdfObjectAndParseCache>,
|
||||
}
|
||||
|
||||
impl<'a> PdfFileParser<'a> {
|
||||
|
|
@ -823,7 +976,11 @@ impl<'a> PdfFileParser<'a> {
|
|||
let Some(PdfToken::Regular(b"endobj")) = self.parser.tokenizer.next() else {
|
||||
return Err(PdfParseError::MissingEndObj { pos: end_obj_pos });
|
||||
};
|
||||
if self.objects_map.insert(id, object).is_some() {
|
||||
if self
|
||||
.objects_map
|
||||
.insert(id, PdfObjectAndParseCache::new(object))
|
||||
.is_some()
|
||||
{
|
||||
Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id })
|
||||
} else {
|
||||
Ok(Some(()))
|
||||
|
|
@ -882,7 +1039,11 @@ impl<'a> PdfFileParser<'a> {
|
|||
let object = parser
|
||||
.parse_object_or_operator()?
|
||||
.error_on_stream_or_operator()?;
|
||||
if self.objects_map.insert(id, object).is_some() {
|
||||
if self
|
||||
.objects_map
|
||||
.insert(id, PdfObjectAndParseCache::new(object))
|
||||
.is_some()
|
||||
{
|
||||
return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id });
|
||||
}
|
||||
}
|
||||
|
|
@ -892,7 +1053,7 @@ impl<'a> PdfFileParser<'a> {
|
|||
while let Some(()) = self.parse_indirect_object_definition()? {}
|
||||
let mut object_streams: Vec<PdfStream<PdfObjectStreamDictionary>> = Vec::new();
|
||||
for object in self.objects_map.values_mut() {
|
||||
let stream = match object {
|
||||
let stream = match &object.object {
|
||||
PdfObject::Stream(stream) => stream,
|
||||
PdfObject::Boolean(_)
|
||||
| PdfObject::Integer(_)
|
||||
|
|
@ -907,7 +1068,7 @@ impl<'a> PdfFileParser<'a> {
|
|||
if PdfObjectStreamDictionary::parse_type_from_dictionary(&stream.dictionary().rest)
|
||||
.is_ok()
|
||||
{
|
||||
object_streams.push(PdfStream::parse(object.clone())?);
|
||||
object_streams.push(PdfStream::parse(object.object.clone())?);
|
||||
}
|
||||
}
|
||||
for object_stream in &object_streams {
|
||||
|
|
@ -1012,11 +1173,6 @@ impl<'a> PdfFileParser<'a> {
|
|||
self.parse_body()?;
|
||||
self.parse_xref_table()?;
|
||||
let trailer = self.parse_trailer()?;
|
||||
for page in trailer.trailer_dictionary().root.pages.pages().iter() {
|
||||
for content in page.contents.iter() {
|
||||
content.decoded_data().as_ref()?;
|
||||
}
|
||||
}
|
||||
Ok(Pdf {
|
||||
header,
|
||||
objects: self.parser.objects,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue