parse info from type 1 fonts

This commit is contained in:
Jacob Lifshay 2025-12-30 07:01:16 -08:00
parent 9445599850
commit d7727289eb
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
7 changed files with 2900 additions and 25 deletions

View file

@ -1,15 +1,21 @@
use crate::{
pdf::{
object::{
IsPdfNull, PdfDictionary, PdfName, PdfObject, PdfObjectDirect, PdfRectangle, PdfStream,
PdfString,
IsPdfNull, PdfArray, PdfDictionary, PdfMatrix, PdfName, PdfNameOrInteger, PdfObject,
PdfObjectDirect, PdfRectangle, PdfStream, PdfString,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse,
PdfParseError,
},
parse::{PdfParse, PdfParseError},
pdf_parse,
},
util::DagDebugState,
util::{ArcOrRef, DagDebugState},
};
use std::{borrow::Cow, fmt, sync::Arc};
use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc};
mod tables;
mod type_1_parse;
pdf_parse! {
#[pdf(transparent)]
@ -108,7 +114,7 @@ pdf_parse! {
#[pdf(name = "MissingWidth")]
pub missing_width: Option<f32>,
#[pdf(name = "FontFile")]
pub font_file: Option<PdfStream>,
pub font_file: Option<PdfStream<PdfDictionary, PdfFontType1Program>>,
#[pdf(name = "FontFile2")]
pub font_file2: Option<PdfStream>,
#[pdf(name = "FontFile3")]
@ -403,7 +409,7 @@ impl PdfFontType1 {
Self::Other(v) => Some(&v.font_descriptor),
}
}
pub fn encoding(&self) -> &PdfObjectDirect {
pub fn encoding(&self) -> &Option<PdfSimpleFontEncoding> {
match self {
Self::Standard(v) => &v.encoding,
Self::Other(v) => &v.encoding,
@ -435,14 +441,19 @@ impl PdfParse for PdfFontType1 {
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = object.into();
let PdfObjectDirect::Dictionary(object) = object else {
return Arc::<PdfFontType1Other>::parse(object.into()).map(Self::Other);
};
if let Ok(_) = PdfStandardFontName::parse(object.get_or_null(b"BaseFont".as_slice())) {
Arc::<PdfFontType1Standard>::parse(object.into()).map(Self::Standard)
let font = if let PdfObjectDirect::Dictionary(object) = object {
if let Ok(_) = PdfStandardFontName::parse(object.get_or_null(b"BaseFont".as_slice())) {
Self::Standard(PdfParse::parse(object.into())?)
} else {
Self::Other(PdfParse::parse(object.into())?)
}
} else {
Arc::<PdfFontType1Other>::parse(object.into()).map(Self::Other)
Self::Other(PdfParse::parse(object.into())?)
};
if let Some(font_file) = font.font_descriptor().and_then(|v| v.font_file.as_ref()) {
font_file.decoded_data().as_ref()?;
}
Ok(font)
}
}
@ -456,7 +467,7 @@ pub struct PdfFontType1Common {
pub last_char: Option<u32>,
pub widths: Option<Arc<[f32]>>,
pub font_descriptor: Option<PdfFontDescriptor>,
pub encoding: PdfObjectDirect,
pub encoding: Option<PdfSimpleFontEncoding>,
pub to_unicode: Option<PdfFontToUnicode>,
pub rest: PdfDictionary,
}
@ -515,8 +526,7 @@ pdf_parse! {
#[pdf(name = "FontDescriptor")]
pub font_descriptor: Option<PdfFontDescriptor>,
#[pdf(name = "Encoding")]
// TODO
pub encoding: PdfObjectDirect,
pub encoding: Option<PdfSimpleFontEncoding>,
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
@ -609,8 +619,7 @@ pdf_parse! {
#[pdf(name = "FontDescriptor")]
pub font_descriptor: PdfFontDescriptor,
#[pdf(name = "Encoding")]
// TODO
pub encoding: PdfObjectDirect,
pub encoding: Option<PdfSimpleFontEncoding>,
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
@ -681,3 +690,235 @@ impl PdfFontType1Other {
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum PdfSimpleFontEncodingPredefined {
#[pdf(name = "MacRomanEncoding")]
MacRomanEncoding,
#[pdf(name = "MacExpertEncoding")]
MacExpertEncoding,
#[pdf(name = "WinAnsiEncoding")]
WinAnsiEncoding,
}
}
impl PdfSimpleFontEncodingPredefined {
pub const fn table(self) -> PdfSimpleFontEncodingTable {
match self {
Self::MacRomanEncoding => PdfSimpleFontEncodingTable::MAC_ROMAN,
Self::MacExpertEncoding => PdfSimpleFontEncodingTable::MAC_EXPERT,
Self::WinAnsiEncoding => PdfSimpleFontEncodingTable::WIN_ANSI,
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
pub enum PdfSimpleFontEncodingDictionaryType {
#[pdf(name = "Encoding")]
#[default]
Encoding,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingDictionary {
#[pdf(name = "Type")]
pub ty: Option<PdfSimpleFontEncodingDictionaryType>,
#[pdf(name = "BaseEncoding")]
pub base_encoding: Option<PdfSimpleFontEncodingPredefined>,
#[pdf(name = "Differences")]
pub differences: Option<PdfSimpleFontEncodingDifferences>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfSimpleFontEncodingDictionary {
pub fn table(
&self,
default_table: impl FnOnce() -> PdfSimpleFontEncodingTable,
) -> PdfSimpleFontEncodingTable {
let Self {
ty: _,
base_encoding,
differences,
rest: _,
} = self;
let mut retval = base_encoding
.map(|v| v.table())
.unwrap_or_else(default_table);
if let Some(differences) = differences {
retval = differences.table(retval);
}
retval
}
}
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingDifferences {
pos: PdfInputPositionNoCompare,
map: Arc<BTreeMap<u8, PdfName>>,
}
impl PdfSimpleFontEncodingDifferences {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, map: Arc<BTreeMap<u8, PdfName>>) -> Self {
Self {
pos: pos.into(),
map,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn map(&self) -> &Arc<BTreeMap<u8, PdfName>> {
&self.map
}
pub fn table(&self, base_table: PdfSimpleFontEncodingTable) -> PdfSimpleFontEncodingTable {
let mut retval = base_table;
let table: &mut [_; 0x100] = ArcOrRef::make_mut(&mut retval.table);
for (&byte, name) in self.map.iter() {
table[usize::from(byte)] = PdfSimpleFontEncodingTableEntry {
name: Some(name.clone()),
presumed_unicode: None,
};
}
retval
}
}
impl GetPdfInputPosition for PdfSimpleFontEncodingDifferences {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
impl IsPdfNull for PdfSimpleFontEncodingDifferences {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfSimpleFontEncodingDifferences {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfSimpleFontEncodingDifferences")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let array = PdfArray::parse(object)?;
let pos = array.pos();
let mut map = BTreeMap::new();
let mut next_byte = None::<u8>;
for i in array.iter() {
let i = PdfNameOrInteger::parse(i.clone())?;
match i {
PdfNameOrInteger::Name(name) => {
let pos = name.pos();
let byte = next_byte.ok_or(PdfParseError::IntegerOutOfRange { pos })?;
next_byte = byte.checked_add(1);
map.insert(byte, name);
}
PdfNameOrInteger::Integer(v) => next_byte = Some(u8::parse(v.into())?),
}
}
Ok(Self {
pos: pos.into(),
map: Arc::new(map),
})
}
}
#[derive(Clone, Default, Debug)]
pub struct PdfSimpleFontEncodingTableEntry {
pub name: Option<PdfName>,
pub presumed_unicode: Option<&'static str>,
}
impl PdfSimpleFontEncodingTableEntry {
pub const fn new_static(
name: Option<&'static [u8]>,
presumed_unicode: Option<&'static str>,
) -> Self {
Self {
name: match name {
Some(name) => Some(PdfName::new_static(name)),
None => None,
},
presumed_unicode,
}
}
}
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingTable {
pub table: ArcOrRef<'static, [PdfSimpleFontEncodingTableEntry; 0x100]>,
}
#[derive(Clone, Debug)]
pub enum PdfSimpleFontEncoding {
Predefined(PdfSimpleFontEncodingPredefined),
Dictionary(PdfSimpleFontEncodingDictionary),
}
impl PdfSimpleFontEncoding {
pub fn table(
&self,
default_table: impl FnOnce() -> PdfSimpleFontEncodingTable,
) -> PdfSimpleFontEncodingTable {
match self {
PdfSimpleFontEncoding::Predefined(v) => v.table(),
PdfSimpleFontEncoding::Dictionary(v) => v.table(default_table),
}
}
}
impl IsPdfNull for PdfSimpleFontEncoding {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfSimpleFontEncoding {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfSimpleFontEncoding")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
match object {
PdfObjectDirect::Name(v) => Ok(Self::Predefined(PdfParse::parse(v.into())?)),
PdfObjectDirect::Dictionary(v) => Ok(Self::Dictionary(PdfParse::parse(v.into())?)),
_ => Err(PdfParseError::InvalidType {
pos: object.pos(),
ty: object.type_name(),
expected_ty: "PdfSimpleFontEncoding",
}),
}
}
}
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct PdfFontType1Program {
pub encoding: Option<Arc<[Option<PdfName>]>>,
pub font_bbox: Option<PdfRectangle>,
pub font_info: Option<PdfFontType1FontInfo>,
pub font_matrix: Option<PdfMatrix>,
pub font_name: Option<PdfName>,
}
#[derive(Clone, Debug)]
pub struct PdfFontType1FontInfo {
pub family_name: Option<PdfString>,
pub full_name: Option<PdfString>,
pub notice: Option<PdfString>,
pub weight: Option<PdfString>,
pub version: Option<PdfString>,
pub italic_angle: Option<f32>,
pub is_fixed_pitch: Option<bool>,
pub underline_position: Option<f32>,
pub underline_thickness: Option<f32>,
}

1067
src/pdf/font/tables.rs Normal file

File diff suppressed because it is too large Load diff

1423
src/pdf/font/type_1_parse.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -482,6 +482,81 @@ impl PdfParse for PdfStringOrNumber {
}
}
#[derive(Clone)]
pub enum PdfNameOrInteger {
Name(PdfName),
Integer(PdfInteger),
}
impl fmt::Debug for PdfNameOrInteger {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Name(v) => v.fmt(f),
Self::Integer(v) => v.fmt(f),
}
}
}
impl PdfNameOrInteger {
pub fn pos(self) -> PdfInputPosition {
match self {
Self::Name(v) => v.pos(),
Self::Integer(v) => v.pos(),
}
}
}
impl PdfObjectDirect {
pub fn name_or_integer(&self) -> Option<PdfNameOrInteger> {
match *self {
PdfObjectDirect::Name(ref v) => Some(PdfNameOrInteger::Name(v.clone())),
PdfObjectDirect::Integer(v) => Some(PdfNameOrInteger::Integer(v)),
PdfObjectDirect::Boolean(_)
| PdfObjectDirect::Real(_)
| PdfObjectDirect::String(_)
| PdfObjectDirect::Array(_)
| PdfObjectDirect::Dictionary(_)
| PdfObjectDirect::Stream(_)
| PdfObjectDirect::Null(_) => None,
}
}
}
impl PdfObjectNonNull {
pub fn name_or_integer(&self) -> Option<PdfNameOrInteger> {
match *self {
PdfObjectNonNull::Name(ref v) => Some(PdfNameOrInteger::Name(v.clone())),
PdfObjectNonNull::Integer(v) => Some(PdfNameOrInteger::Integer(v)),
PdfObjectNonNull::Boolean(_)
| PdfObjectNonNull::Real(_)
| PdfObjectNonNull::String(_)
| PdfObjectNonNull::Array(_)
| PdfObjectNonNull::Dictionary(_)
| PdfObjectNonNull::Stream(_) => None,
}
}
}
impl IsPdfNull for PdfNameOrInteger {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfNameOrInteger {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("name or integer")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
object.name_or_integer().ok_or(PdfParseError::InvalidType {
pos: object.pos(),
ty: object.type_name(),
expected_ty: "name or integer",
})
}
}
macro_rules! make_pdf_object {
(
$(

View file

@ -1302,7 +1302,7 @@ macro_rules! pdf_parse {
$crate::__std::result::Result::Err($crate::pdf::parse::PdfParseError::InvalidName {
pos: name.pos(),
name,
expected_ty: $crate::__std::stringify!($Struct),
expected_ty: $crate::__std::stringify!($Enum),
})
}
}

View file

@ -40,10 +40,7 @@ use crate::{
IsPdfNull, PdfMatrix, PdfName, PdfNumber, PdfObject, PdfObjectDirect,
PdfStringOrNumber, PdfVec2D,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse,
PdfParseError,
},
parse::{PdfInputPosition, PdfInputPositionNoCompare, PdfParse, PdfParseError},
},
pdf_parse,
};
@ -929,8 +926,21 @@ impl PdfRenderOperator for PdfOperatorShowTextWithGlyphPositioning {
PdfStringOrNumber::String(s) => {
for glyph in s.bytes().iter() {
let positioning = std::mem::replace(&mut positioning, 0.0);
let encoding = font.encoding();
todo!("{encoding:?}");
let Some(encoding) = font.encoding() else {
todo!();
};
let table = encoding.table(|| {
let Some(font_encoding) = font
.font_descriptor()
.and_then(|v| v.font_file.as_ref())
.and_then(|v| v.decoded_data().as_ref().ok())
.and_then(|v| v.encoding.as_ref())
else {
todo!()
};
todo!("{font_encoding:?}");
});
todo!("{table:?}");
}
}
PdfStringOrNumber::Number(number) => positioning = number.as_f32(),

View file

@ -104,6 +104,65 @@ impl<T: ?Sized + fmt::Display> fmt::Display for ArcOrRef<'_, T> {
}
}
/// a stable alternative to `CloneToUninit` for `Arc`
pub trait ArcFromRef {
/// like `Arc::new(Self::clone(self))` but works for unsized types too
fn arc_from_ref(&self) -> Arc<Self>;
/// generic version of `Arc::make_mut`
fn make_mut(this: &mut Arc<Self>) -> &mut Self;
}
impl<T: Clone> ArcFromRef for T {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::new(Self::clone(self))
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<T: Clone> ArcFromRef for [T] {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl ArcFromRef for str {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<'a, T: ?Sized + ArcFromRef> ArcOrRef<'a, T> {
pub fn into_arc(this: Self) -> Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => T::arc_from_ref(v),
}
}
pub fn make_arc(this: &mut Self) -> &mut Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => {
*this = ArcOrRef::Arc(T::arc_from_ref(v));
let ArcOrRef::Arc(v) = this else {
unreachable!();
};
v
}
}
}
pub fn make_mut(this: &mut Self) -> &mut T {
T::make_mut(Self::make_arc(this))
}
}
trait DagDebugStateSealed {}
#[expect(private_bounds)]