From 9bf15dc9d0ab91ce11a1113f9fc3043b265902f7 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Tue, 6 Jan 2026 13:54:53 -0800 Subject: [PATCH] it works! produces the exact same xml as the python version --- src/main.rs | 9 ++++++++- src/xml_tree.rs | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5b0feca..67d9dd9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3838,7 +3838,14 @@ fn main_inner() -> Result<(), Error> { parser.parse_pdf(ctx, file_name, page_numbers, dump_mupdf_page_xml)?; let mut insns = xml_tree::Element::new( "instructions".into(), - [("is-subset".into(), is_subset.to_string())], + [( + "is-subset".into(), + if is_subset { + "True".into() + } else { + "False".into() + }, + )], ); insns.text = "\n".into(); insns.tail = "\n".into(); diff --git a/src/xml_tree.rs b/src/xml_tree.rs index 6139ca6..4831a38 100644 --- a/src/xml_tree.rs +++ b/src/xml_tree.rs @@ -3,7 +3,7 @@ use quick_xml::{ Writer, - events::{BytesDecl, BytesText, Event}, + events::{BytesDecl, BytesStart, BytesText, Event}, }; use std::fmt; @@ -183,6 +183,15 @@ impl Element { ElementTag::Normal(tag) if tag.is_empty() => { writer.write_event(Event::Text(BytesText::new(text)))?; } + ElementTag::Normal(tag) + if attrib.is_empty() && text.is_empty() && children.is_empty() => + { + // write element like `
` to match python instead of like `
` + writer.write_event(Event::Empty(BytesStart::from_content( + tag.clone() + " ", + tag.len(), + )))?; + } ElementTag::Normal(tag) => { let mut element_writer = writer.create_element(tag); for (name, value) in attrib { @@ -212,7 +221,10 @@ impl Element { ) -> std::io::Result<()> { let mut writer = Writer::new(writer); if xml_declaration { - writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("utf-8"), None)))?; + // use specific string to match python + writer.write_event(Event::Decl(BytesDecl::from_start( + BytesStart::from_content("xml version='1.0' encoding='utf-8'", 3), + )))?; writer.write_event(Event::Text(BytesText::new("\n")))?; } self.write_to(&mut writer)