parse ToUnicode stream

This commit is contained in:
Jacob Lifshay 2025-12-30 22:48:56 -08:00
parent d7727289eb
commit b8a97a2326
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
5 changed files with 545 additions and 42 deletions

View file

@ -1,5 +1,6 @@
use crate::{
pdf::{
font::type_1_parse::PsFile,
object::{
IsPdfNull, PdfArray, PdfDictionary, PdfMatrix, PdfName, PdfNameOrInteger, PdfObject,
PdfObjectDirect, PdfRectangle, PdfStream, PdfString,
@ -15,26 +16,88 @@ use crate::{
use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc};
mod tables;
mod to_unicode_parse;
mod type_1_parse;
pdf_parse! {
#[pdf(transparent)]
#[derive(Clone)]
// TODO: actually parse the stream
pub struct PdfFontToUnicode {
#[pdf]
stream: PdfStream,
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfFontToUnicodeDictionary {
#[pdf(name = "UseCMap")]
pub base_map: Option<PdfObjectDirect>, // TODO: parse
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
#[derive(Clone)]
pub struct PdfFontToUnicode {
pub base_map: Option<PdfObjectDirect>, // TODO: parse
pub char_map_name: PdfName,
pub src_ranges: Arc<[std::ops::RangeInclusive<PdfString>]>,
pub to_unicode_map: Arc<BTreeMap<PdfString, Arc<str>>>,
}
impl fmt::Debug for PdfFontToUnicode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self { stream } = self;
f.debug_struct("PdfFontToUnicode")
.field("stream", stream)
.finish()
})
struct DebugFn<F: Fn(&mut fmt::Formatter<'_>) -> fmt::Result>(F);
impl<F: Fn(&mut fmt::Formatter<'_>) -> fmt::Result> fmt::Debug for DebugFn<F> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
(self.0)(f)
}
}
let Self {
base_map,
char_map_name,
src_ranges,
to_unicode_map,
} = self;
f.debug_struct("PdfFontToUnicode")
.field("base_map", base_map)
.field("char_map_name", char_map_name)
.field(
"src_ranges",
&DebugFn(|f| {
f.debug_set()
.entries(
src_ranges
.iter()
.map(|v| v.start().bytes_debug()..=v.end().bytes_debug()),
)
.finish()
}),
)
.field(
"to_unicode_map",
&DebugFn(|f| {
f.debug_map()
.entries(to_unicode_map.iter().map(|(k, v)| (k.bytes_debug(), v)))
.finish()
}),
)
.finish()
}
}
impl IsPdfNull for PdfFontToUnicode {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfFontToUnicode {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfFontToUnicode")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let stream = PdfStream::<PdfFontToUnicodeDictionary>::parse(object)?;
let base_map = stream.dictionary().rest.base_map.clone();
let decoded_data = stream.decoded_data().clone()?;
to_unicode_parse::ToUnicodeParser::new(PsFile::from_arc_bytes(
decoded_data,
stream.get_pdf_input_position(),
))
.parse(base_map)
}
}
@ -858,6 +921,17 @@ pub struct PdfSimpleFontEncodingTable {
pub table: ArcOrRef<'static, [PdfSimpleFontEncodingTableEntry; 0x100]>,
}
impl PdfSimpleFontEncodingTable {
pub const fn empty() -> Self {
const EMPTY_ENTRY: PdfSimpleFontEncodingTableEntry =
PdfSimpleFontEncodingTableEntry::new_static(None, None);
const EMPTY_TABLE: &[PdfSimpleFontEncodingTableEntry; 0x100] = &[EMPTY_ENTRY; 0x100];
Self {
table: ArcOrRef::Ref(EMPTY_TABLE),
}
}
}
#[derive(Clone, Debug)]
pub enum PdfSimpleFontEncoding {
Predefined(PdfSimpleFontEncodingPredefined),
@ -903,11 +977,12 @@ impl PdfParse for PdfSimpleFontEncoding {
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct PdfFontType1Program {
pub encoding: Option<Arc<[Option<PdfName>]>>,
pub font_bbox: Option<PdfRectangle>,
pub encoding: PdfSimpleFontEncodingTable,
pub font_bbox: PdfRectangle,
pub font_info: Option<PdfFontType1FontInfo>,
pub font_matrix: Option<PdfMatrix>,
pub font_matrix: PdfMatrix,
pub font_name: Option<PdfName>,
pub vertical_writing_mode: bool,
}
#[derive(Clone, Debug)]

View file

@ -0,0 +1,325 @@
use std::{collections::BTreeMap, sync::Arc};
use crate::{
pdf::{
font::{
PdfFontToUnicode,
type_1_parse::{PsFile, Token},
},
object::{PdfName, PdfObjectDirect, PdfString},
parse::{PdfInputPosition, PdfParseError},
},
util::ArcOrRef,
};
pub(crate) struct ToUnicodeParser {
tokenizer: PsFile,
}
#[track_caller]
fn invalid_token_err<T>(pos: PdfInputPosition, token: Option<Token>) -> Result<T, PdfParseError> {
Err(PdfParseError::InvalidTokenInToUnicodeStream {
pos,
token: format!("{token:?}"),
})
}
impl ToUnicodeParser {
pub(crate) fn new(tokenizer: PsFile) -> Self {
Self { tokenizer }
}
fn expect_any_string(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_string_with_len(
&mut self,
expected_len: usize,
) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) if string.len() == expected_len => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_literal_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_any_literal_name(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) => Ok(name),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_executable_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect(&mut self, expected_token: Token) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(token) if token == expected_token => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_integer(&mut self) -> Result<i128, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::Integer(value)) => Ok(value),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn parse_dict(
&mut self,
mut entry_callback: impl FnMut(Vec<u8>, PdfInputPosition, Token) -> Result<(), PdfParseError>,
) -> Result<(), PdfParseError> {
self.expect(Token::DictStart)?;
loop {
self.tokenizer.skip_comments_and_whitespace();
let name_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::DictEnd) => return Ok(()),
Some(Token::LiteralName(name)) => {
self.tokenizer.skip_comments_and_whitespace();
let value_pos = self.tokenizer.pos();
let Some(value) = self.tokenizer.next_token()? else {
return invalid_token_err(value_pos, None);
};
entry_callback(name, value_pos, value)?;
}
token => {
return invalid_token_err(name_pos, token);
}
}
}
}
pub(crate) fn parse(
mut self,
base_map: Option<PdfObjectDirect>,
) -> Result<PdfFontToUnicode, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
self.expect_literal_name(b"CIDInit")?;
self.expect_literal_name(b"ProcSet")?;
self.expect_executable_name(b"findresource")?;
self.expect_executable_name(b"begin")?;
self.expect_integer()?;
self.expect_executable_name(b"dict")?;
self.expect_executable_name(b"begin")?;
self.expect_executable_name(b"begincmap")?;
self.expect_literal_name(b"CIDSystemInfo")?;
let mut registry = None;
let mut ordering = None;
let mut supplement = None;
self.parse_dict(|name, value_pos, value| match &*name {
b"Registry" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
registry = Some(v);
Ok(())
}
b"Ordering" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
ordering = Some(v);
Ok(())
}
b"Supplement" => {
let Token::Integer(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
supplement = Some(v);
Ok(())
}
_ => todo!("{}: {value:?}", name.escape_ascii()),
})?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapName")?;
self.tokenizer.skip_comments_and_whitespace();
let char_map_name_pos = self.tokenizer.pos();
let char_map_name = self.expect_any_literal_name()?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapType")?;
self.expect(Token::Integer(2))?;
self.expect_executable_name(b"def")?;
self.expect(Token::Integer(1))?;
self.expect_executable_name(b"begincodespacerange")?;
self.tokenizer.skip_comments_and_whitespace();
let range_start_pos = self.tokenizer.pos();
let range_start = self.expect_any_string()?;
if range_start.is_empty() {
return invalid_token_err(range_start_pos, Some(Token::String(range_start)));
}
self.tokenizer.skip_comments_and_whitespace();
let range_end_pos = self.tokenizer.pos();
let range_end = self.expect_string_with_len(range_start.len())?;
self.expect_executable_name(b"endcodespacerange")?;
let mut to_unicode_map: BTreeMap<PdfString, Arc<str>> = BTreeMap::new();
let mut dest_str = String::new();
let mut insert_mapping = |src_pos: PdfInputPosition,
src: &[u8],
dest_pos: PdfInputPosition,
dest_utf16_be: &[u8]|
-> Result<(), PdfParseError> {
dest_str.clear();
for ch in char::decode_utf16(
dest_utf16_be
.chunks(2)
.map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]])),
) {
match ch {
Ok(ch) => dest_str.push(ch),
Err(_) => {
return Err(PdfParseError::InvalidUtf16 { pos: dest_pos });
}
}
}
to_unicode_map.insert(
PdfString::new(src_pos, ArcOrRef::Arc(src.into())),
dest_str.as_str().into(),
);
Ok(())
};
loop {
match self.tokenizer.next_token()? {
Some(Token::Integer(size)) => match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == b"beginbfrange" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src_low = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let src_high_pos = self.tokenizer.pos();
let src_high = self.expect_string_with_len(range_start.len())?;
if src_low.split_last().map(|(_, prefix)| prefix)
!= src_high.split_last().map(|(_, prefix)| prefix)
{
return invalid_token_err(
src_high_pos,
Some(Token::String(src_high)),
);
}
let src_last_range = *src_low.last().expect("known to be non-empty")
..=*src_high.last().expect("known to be non-empty");
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
let mut src = src_low;
for (index, src_last_byte) in src_last_range.enumerate() {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
let mut dest = dest.clone();
let [.., last] = &mut *dest else {
unreachable!();
};
*last += index as u8;
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
Some(Token::ArrayStart) => {
let mut src = src_low;
for src_last_byte in src_last_range {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect(Token::ArrayEnd)?;
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfrange")?;
}
Some(Token::ExecutableName(name)) if name == b"beginbfchar" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest)) if dest.len() % 2 == 0 => {
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfchar")?;
}
token => todo!("{token:?}"),
},
Some(Token::ExecutableName(name)) if name == b"endcmap" => {
break;
}
token => todo!("{token:?}"),
}
}
self.expect_executable_name(b"CMapName")?;
self.expect_executable_name(b"currentdict")?;
self.expect_literal_name(b"CMap")?;
self.expect_executable_name(b"defineresource")?;
self.expect_executable_name(b"pop")?;
self.expect_executable_name(b"end")?;
self.expect_executable_name(b"end")?;
self.tokenizer.skip_comments_and_whitespace();
let eof_pos = self.tokenizer.pos();
if let token @ Some(_) = self.tokenizer.next_token()? {
return invalid_token_err(eof_pos, token);
}
Ok(PdfFontToUnicode {
base_map,
char_map_name: PdfName::new(char_map_name_pos, Arc::<[u8]>::from(char_map_name)),
src_ranges: Arc::new([
PdfString::new(range_start_pos, ArcOrRef::Arc(range_start.into()))
..=PdfString::new(range_end_pos, ArcOrRef::Arc(range_end.into())),
]),
to_unicode_map: Arc::new(to_unicode_map),
})
}
}

View file

@ -1,7 +1,10 @@
use crate::{
pdf::{
PdfObjects,
font::{PdfFontType1FontInfo, PdfFontType1Program},
font::{
PdfFontType1FontInfo, PdfFontType1Program, PdfSimpleFontEncodingTable,
PdfSimpleFontEncodingTableEntry,
},
object::{PdfMatrix, PdfName, PdfRectangle, PdfStreamContents, PdfString, PdfVec2D},
parse::{
PdfInputPosition, PdfInputPositionKnown, PdfInputPositionNoCompare, PdfParseError,
@ -52,7 +55,7 @@ impl PsFileDecryptedSource {
#[derive(Clone)]
enum PsFileSource {
Bytes(Rc<[u8]>),
Bytes(Arc<[u8]>),
Decrypted(Rc<RefCell<PsFileDecryptedSource>>),
}
@ -66,7 +69,7 @@ impl PsFileSource {
}
#[derive(Clone)]
struct PsFile {
pub(crate) struct PsFile {
id: u64,
source: PsFileSource,
pos: Rc<Cell<PdfInputPositionKnown>>,
@ -119,8 +122,8 @@ fn is_regular_char(v: u8) -> bool {
struct NotALineEnd;
#[derive(Clone)]
enum Token {
#[derive(Clone, PartialEq)]
pub(crate) enum Token {
Integer(i128),
Real(f64),
ArrayStart,
@ -131,6 +134,8 @@ enum Token {
LiteralName(Vec<u8>),
ImmediatelyEvaluatedName(Vec<u8>),
String(Vec<u8>),
DictStart,
DictEnd,
}
impl fmt::Debug for Token {
@ -150,6 +155,8 @@ impl fmt::Debug for Token {
Self::String(contents) => {
write!(f, "String({})", contents.escape_ascii())
}
Self::DictStart => write!(f, "DictStart"),
Self::DictEnd => write!(f, "DictEnd"),
}
}
}
@ -165,7 +172,10 @@ impl PsFile {
})),
}
}
fn pos(&self) -> PdfInputPosition {
pub(crate) fn from_arc_bytes(bytes: Arc<[u8]>, stream_pos: PdfInputPosition) -> Self {
Self::new(0, PsFileSource::Bytes(bytes), 0, stream_pos)
}
pub(crate) fn pos(&self) -> PdfInputPosition {
PdfInputPosition::new(Some(self.pos.get()))
}
fn peek_byte(&self) -> Option<u8> {
@ -200,7 +210,7 @@ impl PsFile {
self.next_byte();
}
}
fn skip_comments_and_whitespace(&mut self) {
pub(crate) fn skip_comments_and_whitespace(&mut self) {
loop {
self.skip_whitespace();
let Some(b'%') = self.peek_byte() else {
@ -340,7 +350,41 @@ impl PsFile {
}
Err(PdfParseError::TruncatedFile { pos: self.pos() })
}
fn next_token(&mut self) -> Result<Option<Token>, PdfParseError> {
fn parse_string_after_l_angle(&mut self) -> Result<Vec<u8>, PdfParseError> {
let mut contents = Vec::new();
let mut high_digit_value = None;
let mut push_digit_value = |value: u8| {
high_digit_value = match high_digit_value {
Some(high_digit_value) => {
contents.push((high_digit_value << 4) | value);
None
}
None => Some(value),
};
};
let string_pos = self.pos();
loop {
let pos = self.pos();
match self.next_byte() {
None => {
return Err(PdfParseError::TruncatedFile { pos });
}
Some(b'\0' | b'\t' | b'\n' | b'\x0C' | b'\r' | b' ') => {}
Some(b'>') => {
// if we have an odd trailing digit, add the final digit, otherwise doesn't modify contents
push_digit_value(0);
return Ok(contents);
}
Some(b) => {
let Some(value) = (b as char).to_digit(0x10) else {
return Err(PdfParseError::InvalidHexStringDigit { pos });
};
push_digit_value(value as u8);
}
}
}
}
pub(crate) fn next_token(&mut self) -> Result<Option<Token>, PdfParseError> {
self.skip_comments_and_whitespace();
let Some(first_byte) = self.peek_byte() else {
return Ok(None);
@ -352,9 +396,26 @@ impl PsFile {
}
b')' => todo!(),
b'<' => {
todo!("encoded string");
self.next_byte();
match self.peek_byte() {
Some(b'<') => {
self.next_byte();
Ok(Some(Token::DictStart))
}
Some(b'~') => todo!("base 85 encoded string"),
_ => Ok(Some(Token::String(self.parse_string_after_l_angle()?))),
}
}
b'>' => {
self.next_byte();
match self.peek_byte() {
Some(b'>') => {
self.next_byte();
Ok(Some(Token::DictEnd))
}
_ => todo!("stray >"),
}
}
b'>' => todo!(),
b'[' => {
self.next_byte();
Ok(Some(Token::ArrayStart))
@ -724,12 +785,15 @@ impl PsOperator {
let PsObject::Integer(initial) = initial else {
todo!("{initial:?}");
};
let PsObject::Integer(increment @ (..=-1 | 1..)) = increment else {
let PsObject::Integer(increment) = increment else {
todo!("{increment:?}");
};
let PsObject::Integer(limit) = limit else {
todo!("{limit:?} {:?}", parser.operand_stack);
};
if increment == 0 {
return custom_err("postscript for operator: increment can't be zero");
};
let mut counter = initial;
let proc = proc.into_vec();
loop {
@ -1158,6 +1222,8 @@ impl PsParser {
Token::Real(v) => PsObject::Real(PsReal(v)),
Token::ArrayStart => PsObject::ExecutableName(PsName(b"[".into())),
Token::ArrayEnd => PsObject::ExecutableName(PsName(b"]".into())),
Token::DictStart => PsObject::ExecutableName(PsName(b"<<".into())),
Token::DictEnd => PsObject::ExecutableName(PsName(b">>".into())),
Token::ProcedureStart => PsObject::Procedure(self.parse_procedure()?),
Token::ProcedureEnd => return Ok(PsArray::from_elements(self, objects)),
Token::ExecutableName(name) => PsObject::ExecutableName(PsName(name.into())),
@ -1176,6 +1242,8 @@ impl PsParser {
Token::Real(v) => self.operand_stack.push(PsObject::Real(PsReal(v))),
Token::ArrayStart => self.run_name(&PsName(b"[".into()))?,
Token::ArrayEnd => self.run_name(&PsName(b"]".into()))?,
Token::DictStart => self.run_name(&PsName(b"<<".into()))?,
Token::DictEnd => self.run_name(&PsName(b">>".into()))?,
Token::ProcedureStart => {
let procedure = self.parse_procedure()?;
self.operand_stack.push(PsObject::Procedure(procedure))
@ -1199,26 +1267,30 @@ impl PsParser {
fn parse_font_encoding(
&mut self,
value: PsArray,
) -> Result<Arc<[Option<PdfName>]>, PdfParseError> {
) -> Result<PdfSimpleFontEncodingTable, PdfParseError> {
let value = value.rc();
let value = value.borrow();
let mut vec = Vec::with_capacity(value.len());
let mut retval = PdfSimpleFontEncodingTable::empty();
let mut table_iter = ArcOrRef::make_mut(&mut retval.table).iter_mut();
for entry in value.iter() {
match entry {
PsObject::Name(name) => {
if name.0 == b".notdef" {
vec.push(None);
let name = if name.0 == b".notdef" {
None
} else {
vec.push(Some(PdfName::new(
self.tokenizer.pos(),
Arc::from(&*name.0),
)));
Some(PdfName::new(self.tokenizer.pos(), Arc::from(&*name.0)))
};
if let Some(entry) = table_iter.next() {
*entry = PdfSimpleFontEncodingTableEntry {
name,
presumed_unicode: None,
};
}
}
_ => todo!("{entry:?}"),
}
}
Ok(Arc::from(vec))
Ok(retval)
}
fn parse_font_bbox(&mut self, value: PsArray) -> Result<PdfRectangle, PdfParseError> {
let value = value.rc();
@ -1332,6 +1404,7 @@ impl PsParser {
let mut font_info = None;
let mut font_matrix = None;
let mut font_name = None;
let mut vertical_writing_mode = false;
for (key, value) in named {
match (&*key.0, value) {
(b"Encoding", PsObject::Array(value)) => {
@ -1349,6 +1422,7 @@ impl PsParser {
(b"FontName", PsObject::Name(value)) => {
font_name = Some(value.into());
}
(b"WMode", PsObject::Boolean(v)) => vertical_writing_mode = v,
(b"FontType", _) => {
// TODO
}
@ -1361,12 +1435,22 @@ impl PsParser {
for (key, value) in other {
todo!("{key:?}: {value:?}");
}
let Some(encoding) = encoding else {
return custom_err("postscript type 1 font must have Encoding");
};
let Some(font_bbox) = font_bbox else {
return custom_err("postscript type 1 font must have FontBBox");
};
let Some(font_matrix) = font_matrix else {
return custom_err("postscript type 1 font must have FontMatrix");
};
Ok(PdfFontType1Program {
encoding,
font_bbox,
font_info,
font_matrix,
font_name,
vertical_writing_mode,
})
}
fn parse(mut self) -> Result<PdfFontType1Program, PdfParseError> {
@ -1414,7 +1498,7 @@ impl PdfStreamContents for PdfFontType1Program {
) -> Result<Self, PdfParseError> {
PsParser::new(PsFile::new(
0,
PsFileSource::Bytes(Rc::from(data)),
PsFileSource::Bytes(Arc::from(data)),
0,
stream_pos,
))

View file

@ -296,6 +296,13 @@ pub enum PdfParseError {
MissingSetFontOperator {
pos: PdfInputPosition,
},
InvalidTokenInToUnicodeStream {
pos: PdfInputPosition,
token: String,
},
InvalidUtf16 {
pos: PdfInputPosition,
},
}
impl From<std::convert::Infallible> for PdfParseError {
@ -345,7 +352,9 @@ impl GetPdfInputPosition for PdfParseError {
| PdfParseError::CantRestoreGraphicsStateWithEmptyStack { pos }
| PdfParseError::FontResourceNotFound { pos, .. }
| PdfParseError::MissingBeginTextOperator { pos }
| PdfParseError::MissingSetFontOperator { pos } => pos,
| PdfParseError::MissingSetFontOperator { pos }
| PdfParseError::InvalidTokenInToUnicodeStream { pos, .. }
| PdfParseError::InvalidUtf16 { pos } => pos,
PdfParseError::OperatorNotAllowedHere { ref operator } => operator.pos(),
PdfParseError::OperatorHasTooFewOperands { ref operator }
| PdfParseError::OperatorHasTooManyOperands { ref operator } => operator.pos(),
@ -488,7 +497,7 @@ impl fmt::Display for PdfParseError {
)
}
PdfParseError::MissingOperator { pos } => {
write!(f, "at {pos}: stream not allowed here")
write!(f, "at {pos}: missing operator")
}
PdfParseError::OperatorHasTooFewOperands { ref operator } => {
write!(
@ -525,6 +534,12 @@ impl fmt::Display for PdfParseError {
"at {pos}: missing set font `Tf` operator before this text showing operator"
)
}
PdfParseError::InvalidTokenInToUnicodeStream { pos, ref token } => {
write!(f, "at {pos}: invalid token in ToUnicode stream: {token}")
}
PdfParseError::InvalidUtf16 { pos } => {
write!(f, "at {pos}: invalid UTF-16")
}
}
}
}

View file

@ -35,7 +35,7 @@ use crate::{
PdfOperatorUnparsed,
},
document_structure::{PdfPage, PdfResourcesDictionary},
font::{PdfFont, PdfTodo},
font::{PdfFont, PdfSimpleFontEncodingTableEntry, PdfTodo},
object::{
IsPdfNull, PdfMatrix, PdfName, PdfNumber, PdfObject, PdfObjectDirect,
PdfStringOrNumber, PdfVec2D,
@ -934,13 +934,17 @@ impl PdfRenderOperator for PdfOperatorShowTextWithGlyphPositioning {
.font_descriptor()
.and_then(|v| v.font_file.as_ref())
.and_then(|v| v.decoded_data().as_ref().ok())
.and_then(|v| v.encoding.as_ref())
.map(|v| v.encoding.clone())
else {
todo!()
};
todo!("{font_encoding:?}");
font_encoding
});
todo!("{table:?}");
let PdfSimpleFontEncodingTableEntry {
name,
presumed_unicode,
} = table.table[usize::from(*glyph)].clone();
todo!("{name:?} {presumed_unicode:?} {:#?}", font.to_unicode());
}
}
PdfStringOrNumber::Number(number) => positioning = number.as_f32(),