generate xml output
This commit is contained in:
parent
21b97c0064
commit
25f47227d8
3 changed files with 209 additions and 20 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -3,3 +3,4 @@
|
||||||
*.egg-info
|
*.egg-info
|
||||||
__pycache__
|
__pycache__
|
||||||
*.log
|
*.log
|
||||||
|
/powerisa-instructions.xml
|
|
@ -10,6 +10,8 @@ from typing import ClassVar, TypeVar, assert_never
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
import enum
|
import enum
|
||||||
import traceback
|
import traceback
|
||||||
|
from copy import deepcopy
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from pdfminer.high_level import extract_pages
|
from pdfminer.high_level import extract_pages
|
||||||
from pdfminer.layout import LTChar, LTLine, LTPage, LTRect, LTTextBox
|
from pdfminer.layout import LTChar, LTLine, LTPage, LTRect, LTTextBox
|
||||||
|
@ -256,6 +258,39 @@ class ParsedTextLine:
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return "\n" * self.preceding_blank_lines + ElementTree.tostring(self.element, encoding="unicode")
|
return "\n" * self.preceding_blank_lines + ElementTree.tostring(self.element, encoding="unicode")
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element, trailing_nl: bool):
|
||||||
|
for _ in range(self.preceding_blank_lines):
|
||||||
|
ElementTree.SubElement(parent, "br").tail = "\n"
|
||||||
|
if self.element.text is not None:
|
||||||
|
if len(parent) == 0:
|
||||||
|
parent.text = (parent.text or "") + self.element.text
|
||||||
|
else:
|
||||||
|
parent[-1].tail = (parent[-1].tail or "") + self.element.text
|
||||||
|
for element in self.element:
|
||||||
|
parent.append(deepcopy(element))
|
||||||
|
if trailing_nl:
|
||||||
|
ElementTree.SubElement(parent, "br").tail = "\n"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def write_xml_lines(
|
||||||
|
lines: Iterable[ParsedTextLine],
|
||||||
|
parent: ElementTree.Element,
|
||||||
|
trailing_nl: bool,
|
||||||
|
preceding_nl: bool=False,
|
||||||
|
):
|
||||||
|
if preceding_nl:
|
||||||
|
ElementTree.SubElement(parent, "br").tail = "\n"
|
||||||
|
first = True
|
||||||
|
for line in lines:
|
||||||
|
if first:
|
||||||
|
first = False
|
||||||
|
else:
|
||||||
|
ElementTree.SubElement(parent, "br").tail = "\n"
|
||||||
|
line.write_xml(parent, trailing_nl=False)
|
||||||
|
if trailing_nl:
|
||||||
|
ElementTree.SubElement(parent, "br").tail = "\n"
|
||||||
|
|
||||||
|
|
||||||
_T = TypeVar("_T")
|
_T = TypeVar("_T")
|
||||||
|
|
||||||
class BaselinePos(enum.Enum):
|
class BaselinePos(enum.Enum):
|
||||||
|
@ -535,6 +570,17 @@ class InsnBitField:
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return f"<InsnBitField: x={self.box_min_x}..{self.box_max_x} name={self.name} bit_number={self.bit_number}>"
|
return f"<InsnBitField: x={self.box_min_x}..{self.box_max_x} name={self.name} bit_number={self.bit_number}>"
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element):
|
||||||
|
field = ElementTree.SubElement(parent, "field")
|
||||||
|
field.text = "\n"
|
||||||
|
field.tail = "\n"
|
||||||
|
name = ElementTree.SubElement(field, "name")
|
||||||
|
name.tail = "\n"
|
||||||
|
self.name.write_xml(name, trailing_nl=False)
|
||||||
|
bit_number = ElementTree.SubElement(field, "bit-number")
|
||||||
|
bit_number.tail = "\n"
|
||||||
|
self.bit_number.write_xml(bit_number, trailing_nl=False)
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True, frozen=True)
|
@dataclass(unsafe_hash=True, frozen=True)
|
||||||
class InsnBitFieldsPrefix:
|
class InsnBitFieldsPrefix:
|
||||||
box_min_x: float
|
box_min_x: float
|
||||||
|
@ -555,6 +601,18 @@ class InsnBitFieldsPrefix:
|
||||||
f" ]\n"
|
f" ]\n"
|
||||||
f" suffix_text={self.suffix_text}>")
|
f" suffix_text={self.suffix_text}>")
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element):
|
||||||
|
prefix_elm = ElementTree.SubElement(parent, "prefix")
|
||||||
|
prefix_elm.text = "\n"
|
||||||
|
prefix_elm.tail = "\n"
|
||||||
|
prefix_text = ElementTree.SubElement(prefix_elm, "prefix-text")
|
||||||
|
prefix_text.tail = "\n"
|
||||||
|
self.prefix_text.write_xml(prefix_text, trailing_nl=False)
|
||||||
|
InsnBitFields.write_xml_fields(self.fields, prefix_elm)
|
||||||
|
suffix_text = ElementTree.SubElement(prefix_elm, "suffix-text")
|
||||||
|
suffix_text.tail = "\n"
|
||||||
|
self.suffix_text.write_xml(suffix_text, trailing_nl=False)
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True, frozen=True)
|
@dataclass(unsafe_hash=True, frozen=True)
|
||||||
class InsnBitFields:
|
class InsnBitFields:
|
||||||
prefix: None | InsnBitFieldsPrefix
|
prefix: None | InsnBitFieldsPrefix
|
||||||
|
@ -573,6 +631,23 @@ class InsnBitFields:
|
||||||
f"({self.box_max_x},{self.box_max_y}) [\n"
|
f"({self.box_max_x},{self.box_max_y}) [\n"
|
||||||
f" {sep.join(map(str, self.fields))},\n]>")
|
f" {sep.join(map(str, self.fields))},\n]>")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def write_xml_fields(fields: Iterable[InsnBitField], parent: ElementTree.Element):
|
||||||
|
fields_elm = ElementTree.SubElement(parent, "fields")
|
||||||
|
fields_elm.text = "\n"
|
||||||
|
fields_elm.tail = "\n"
|
||||||
|
for field in fields:
|
||||||
|
field.write_xml(fields_elm)
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element):
|
||||||
|
bit_fields = ElementTree.SubElement(parent, "bit-fields")
|
||||||
|
bit_fields.text = "\n"
|
||||||
|
bit_fields.tail = "\n"
|
||||||
|
if self.prefix is not None:
|
||||||
|
self.prefix.write_xml(bit_fields)
|
||||||
|
InsnBitFields.write_xml_fields(self.fields, bit_fields)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True, frozen=True)
|
@dataclass(unsafe_hash=True, frozen=True)
|
||||||
class InsnSpRegsAlteredEntry:
|
class InsnSpRegsAlteredEntry:
|
||||||
reg: ParsedTextLine
|
reg: ParsedTextLine
|
||||||
|
@ -600,6 +675,20 @@ class InsnSpRegsAlteredEntry:
|
||||||
f"{indent} conds={conds},\n"
|
f"{indent} conds={conds},\n"
|
||||||
f"{indent})")
|
f"{indent})")
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element):
|
||||||
|
entry = ElementTree.SubElement(parent, "entry")
|
||||||
|
entry.text = "\n"
|
||||||
|
entry.tail = "\n"
|
||||||
|
reg = ElementTree.SubElement(entry, "register")
|
||||||
|
reg.tail = "\n"
|
||||||
|
self.reg.write_xml(reg, trailing_nl=False)
|
||||||
|
fields = ElementTree.SubElement(entry, "fields")
|
||||||
|
fields.tail = "\n"
|
||||||
|
ParsedTextLine.write_xml_lines(self.fields, fields, trailing_nl=False)
|
||||||
|
conds = ElementTree.SubElement(entry, "conditions")
|
||||||
|
conds.tail = "\n"
|
||||||
|
ParsedTextLine.write_xml_lines(self.conds, conds, trailing_nl=False)
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True, frozen=True)
|
@dataclass(unsafe_hash=True, frozen=True)
|
||||||
class InsnSpRegsAltered:
|
class InsnSpRegsAltered:
|
||||||
sp_regs_altered_text: ParsedTextLine
|
sp_regs_altered_text: ParsedTextLine
|
||||||
|
@ -631,6 +720,28 @@ class InsnSpRegsAltered:
|
||||||
lines.append(f")")
|
lines.append(f")")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element):
|
||||||
|
sp_regs_altered = ElementTree.SubElement(parent, "special-registers-altered")
|
||||||
|
sp_regs_altered.text = "\n"
|
||||||
|
sp_regs_altered.tail = "\n"
|
||||||
|
title = ElementTree.SubElement(sp_regs_altered, "title")
|
||||||
|
title.tail = "\n"
|
||||||
|
self.sp_regs_altered_text.write_xml(title, trailing_nl=False)
|
||||||
|
if self.special_text is not None:
|
||||||
|
special_text = ElementTree.SubElement(sp_regs_altered, "special-text")
|
||||||
|
special_text.tail = "\n"
|
||||||
|
self.special_text.write_xml(special_text, trailing_nl=False)
|
||||||
|
if self.table_header_reg is not None:
|
||||||
|
table_header_reg = ElementTree.SubElement(sp_regs_altered, "table-header-register")
|
||||||
|
table_header_reg.tail = "\n"
|
||||||
|
self.table_header_reg.write_xml(table_header_reg, trailing_nl=False)
|
||||||
|
if self.table_header_fields is not None:
|
||||||
|
table_header_fields = ElementTree.SubElement(sp_regs_altered, "table-header-fields")
|
||||||
|
table_header_fields.tail = "\n"
|
||||||
|
self.table_header_fields.write_xml(table_header_fields, trailing_nl=False)
|
||||||
|
for entry in self.entries:
|
||||||
|
entry.write_xml(sp_regs_altered)
|
||||||
|
|
||||||
class _InsnParseSection(enum.Enum):
|
class _InsnParseSection(enum.Enum):
|
||||||
CODE = "code"
|
CODE = "code"
|
||||||
HEADER = "header"
|
HEADER = "header"
|
||||||
|
@ -687,8 +798,9 @@ class Page:
|
||||||
)
|
)
|
||||||
if text_section is None:
|
if text_section is None:
|
||||||
if PAGE_BODY_MIN_Y <= element.y0 <= PAGE_BODY_MAX_Y:
|
if PAGE_BODY_MIN_Y <= element.y0 <= PAGE_BODY_MAX_Y:
|
||||||
raise AssertionError(
|
if page_num != 1072: # page 1072 has characters in the margins
|
||||||
f"char not in text section: {element}\npage_num={page_num}")
|
raise AssertionError(
|
||||||
|
f"char not in text section: {element}\npage_num={page_num}")
|
||||||
continue
|
continue
|
||||||
char = Char(
|
char = Char(
|
||||||
text=element.get_text(),
|
text=element.get_text(),
|
||||||
|
@ -1003,10 +1115,57 @@ class TextSection:
|
||||||
return i
|
return i
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class InsnHeader:
|
||||||
|
header_lines: tuple[ParsedTextLine, ...]
|
||||||
|
mnemonic_lines: tuple[ParsedTextLine, ...]
|
||||||
|
bit_fields: InsnBitFields
|
||||||
|
|
||||||
|
@property
|
||||||
|
def min_y(self) -> float:
|
||||||
|
return self.bit_fields.box_min_y
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element):
|
||||||
|
header = ElementTree.SubElement(parent, "header")
|
||||||
|
header.text = "\n"
|
||||||
|
header.tail = "\n"
|
||||||
|
title = ElementTree.SubElement(header, "title")
|
||||||
|
title.tail = "\n"
|
||||||
|
ParsedTextLine.write_xml_lines(self.header_lines, title, trailing_nl=False)
|
||||||
|
mnemonics = ElementTree.SubElement(header, "mnemonics")
|
||||||
|
mnemonics.tail = "\n"
|
||||||
|
ParsedTextLine.write_xml_lines(self.mnemonic_lines, mnemonics, trailing_nl=False)
|
||||||
|
self.bit_fields.write_xml(header)
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Insn:
|
||||||
|
headers: tuple[InsnHeader, ...]
|
||||||
|
code_lines: tuple[ParsedTextLine, ...]
|
||||||
|
desc_lines: tuple[ParsedTextLine, ...]
|
||||||
|
sp_regs_altered: None | InsnSpRegsAltered
|
||||||
|
|
||||||
|
def write_xml(self, parent: ElementTree.Element):
|
||||||
|
insn = ElementTree.SubElement(parent, "instruction")
|
||||||
|
insn.text = "\n"
|
||||||
|
insn.tail = "\n"
|
||||||
|
for header in self.headers:
|
||||||
|
header.write_xml(insn)
|
||||||
|
if len(self.code_lines) != 0:
|
||||||
|
code = ElementTree.SubElement(insn, "code")
|
||||||
|
code.tail = "\n"
|
||||||
|
ParsedTextLine.write_xml_lines(self.code_lines, code, trailing_nl=False)
|
||||||
|
if len(self.desc_lines) != 0:
|
||||||
|
desc = ElementTree.SubElement(insn, "description")
|
||||||
|
desc.tail = "\n"
|
||||||
|
ParsedTextLine.write_xml_lines(self.desc_lines, desc, trailing_nl=False)
|
||||||
|
if self.sp_regs_altered is not None:
|
||||||
|
self.sp_regs_altered.write_xml(insn)
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
class Parser:
|
class Parser:
|
||||||
pages: Pages = field(default_factory=Pages)
|
pages: Pages = field(default_factory=Pages)
|
||||||
text_section: TextSection = TextSection.first()
|
text_section: TextSection = TextSection.first()
|
||||||
|
insns: list[Insn] = field(default_factory=list)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def page(self) -> Page:
|
def page(self) -> Page:
|
||||||
|
@ -1017,7 +1176,7 @@ class Parser:
|
||||||
return self.pages[self.text_section.page_num].unprocessed_chars[self.text_section]
|
return self.pages[self.text_section.page_num].unprocessed_chars[self.text_section]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __pages_gen(file: str, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
|
def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
|
||||||
if page_numbers is not None:
|
if page_numbers is not None:
|
||||||
page_numbers = sorted(i - 1 for i in page_numbers)
|
page_numbers = sorted(i - 1 for i in page_numbers)
|
||||||
for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)):
|
for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)):
|
||||||
|
@ -1028,7 +1187,7 @@ class Parser:
|
||||||
print(f"page {page_num}")
|
print(f"page {page_num}")
|
||||||
yield Page.from_lt_page(page_num=page_num, page=page)
|
yield Page.from_lt_page(page_num=page_num, page=page)
|
||||||
|
|
||||||
def parse_pdf(self, file: str, page_numbers: Iterable[int] | None = None):
|
def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None):
|
||||||
self.pages = Pages(pages_gen=Parser.__pages_gen(
|
self.pages = Pages(pages_gen=Parser.__pages_gen(
|
||||||
file=file, page_numbers=page_numbers))
|
file=file, page_numbers=page_numbers))
|
||||||
self.text_section = TextSection.first()
|
self.text_section = TextSection.first()
|
||||||
|
@ -1059,7 +1218,7 @@ class Parser:
|
||||||
try:
|
try:
|
||||||
with self.note_text_section():
|
with self.note_text_section():
|
||||||
self.extract_insns()
|
self.extract_insns()
|
||||||
except InsnParseError as e:
|
except (InsnParseError, PageParseError) as e:
|
||||||
print("".join(traceback.format_exception_only(e)), flush=True)
|
print("".join(traceback.format_exception_only(e)), flush=True)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
@ -1200,10 +1359,11 @@ class Parser:
|
||||||
self.unprocessed_chars[char.font].remove(char)
|
self.unprocessed_chars[char.font].remove(char)
|
||||||
if allowed_start_min_y_error is None:
|
if allowed_start_min_y_error is None:
|
||||||
allowed_start_min_y_error = 0.01
|
allowed_start_min_y_error = 0.01
|
||||||
assert abs(start_min_y - retval.regular_min_y) < allowed_start_min_y_error, (
|
if abs(start_min_y - retval.regular_min_y) > allowed_start_min_y_error:
|
||||||
f"start_min_y={start_min_y} regular_min_y={retval.regular_min_y}\n"
|
raise PageParseError(
|
||||||
f"start_min_y error: {start_min_y - retval.regular_min_y}\n"
|
f"start_min_y={start_min_y} regular_min_y={retval.regular_min_y}\n"
|
||||||
f"allowed_start_min_y_error={allowed_start_min_y_error}")
|
f"start_min_y error: {start_min_y - retval.regular_min_y}\n"
|
||||||
|
f"allowed_start_min_y_error={allowed_start_min_y_error}")
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def extract_following_text_lines(
|
def extract_following_text_lines(
|
||||||
|
@ -1403,7 +1563,7 @@ class Parser:
|
||||||
self,
|
self,
|
||||||
start_min_y: float,
|
start_min_y: float,
|
||||||
header_start_char: None | Char = None,
|
header_start_char: None | Char = None,
|
||||||
) -> None | tuple[list[ParsedTextLine], list[ParsedTextLine], InsnBitFields]:
|
) -> None | InsnHeader:
|
||||||
assert header_start_char is None or \
|
assert header_start_char is None or \
|
||||||
header_start_char.font == Font.INSN_HEADER
|
header_start_char.font == Font.INSN_HEADER
|
||||||
header_line = self.extract_text_line(
|
header_line = self.extract_text_line(
|
||||||
|
@ -1458,7 +1618,11 @@ class Parser:
|
||||||
print(insn_bit_fields)
|
print(insn_bit_fields)
|
||||||
if insn_bit_fields is None:
|
if insn_bit_fields is None:
|
||||||
raise InsnParseError("can't find insn bit fields")
|
raise InsnParseError("can't find insn bit fields")
|
||||||
return header_lines, mnemonic_lines, insn_bit_fields
|
return InsnHeader(
|
||||||
|
header_lines=tuple(header_lines),
|
||||||
|
mnemonic_lines=tuple(mnemonic_lines),
|
||||||
|
bit_fields=insn_bit_fields,
|
||||||
|
)
|
||||||
|
|
||||||
def extract_insn_sp_regs_altered(
|
def extract_insn_sp_regs_altered(
|
||||||
self,
|
self,
|
||||||
|
@ -1474,12 +1638,14 @@ class Parser:
|
||||||
max_y=sp_regs_altered_text.regular_min_y - 5,
|
max_y=sp_regs_altered_text.regular_min_y - 5,
|
||||||
allow_processed=False,
|
allow_processed=False,
|
||||||
)
|
)
|
||||||
assert table_header_reg_char is not None, \
|
if table_header_reg_char is None:
|
||||||
"can't find special registers altered table's register-column's header"
|
raise InsnParseError(
|
||||||
|
"can't find special registers altered table's register-column's header")
|
||||||
KNOWN_SPECIAL_TEXTS = (
|
KNOWN_SPECIAL_TEXTS = (
|
||||||
"None",
|
"None",
|
||||||
"Dependent on the system service",
|
"Dependent on the system service",
|
||||||
"See above.",
|
"See above.",
|
||||||
|
"See Table 5.1",
|
||||||
)
|
)
|
||||||
match table_header_reg_char.text:
|
match table_header_reg_char.text:
|
||||||
case "R":
|
case "R":
|
||||||
|
@ -1611,7 +1777,7 @@ class Parser:
|
||||||
final_regular_min_y=regular_min_y,
|
final_regular_min_y=regular_min_y,
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_insn(self, header_start_char: Char):
|
def extract_insn(self, header_start_char: Char) -> Insn:
|
||||||
assert header_start_char.font == Font.INSN_HEADER
|
assert header_start_char.font == Font.INSN_HEADER
|
||||||
print(header_start_char)
|
print(header_start_char)
|
||||||
header = self.extract_insn_header_mnemonics_and_bit_fields(
|
header = self.extract_insn_header_mnemonics_and_bit_fields(
|
||||||
|
@ -1620,7 +1786,7 @@ class Parser:
|
||||||
)
|
)
|
||||||
if header is None:
|
if header is None:
|
||||||
raise PageParseError("can't find header text line")
|
raise PageParseError("can't find header text line")
|
||||||
next_start_min_y = header[2].box_min_y - 5
|
next_start_min_y = header.min_y - 5
|
||||||
headers = [header]
|
headers = [header]
|
||||||
code_lines: list[ParsedTextLine] = []
|
code_lines: list[ParsedTextLine] = []
|
||||||
desc_lines: list[ParsedTextLine] = []
|
desc_lines: list[ParsedTextLine] = []
|
||||||
|
@ -1687,7 +1853,7 @@ class Parser:
|
||||||
if header is None:
|
if header is None:
|
||||||
raise InsnParseError("can't find header text line")
|
raise InsnParseError("can't find header text line")
|
||||||
headers.append(header)
|
headers.append(header)
|
||||||
next_start_min_y = header[2].box_min_y - 5
|
next_start_min_y = header.min_y - 5
|
||||||
case _InsnParseSection.DESC:
|
case _InsnParseSection.DESC:
|
||||||
desc_line = self.extract_text_line(
|
desc_line = self.extract_text_line(
|
||||||
start_char=next_char,
|
start_char=next_char,
|
||||||
|
@ -1706,7 +1872,7 @@ class Parser:
|
||||||
first_text_line=desc_line,
|
first_text_line=desc_line,
|
||||||
min_x=desc_line.chars[0].min_x,
|
min_x=desc_line.chars[0].min_x,
|
||||||
max_x=self.text_section.max_x,
|
max_x=self.text_section.max_x,
|
||||||
allowed_start_min_y_error=3,
|
allowed_start_min_y_error=3.5,
|
||||||
)
|
)
|
||||||
print("more insn desc lines:")
|
print("more insn desc lines:")
|
||||||
print("\n".join(map(str, more_desc_lines)))
|
print("\n".join(map(str, more_desc_lines)))
|
||||||
|
@ -1729,6 +1895,12 @@ class Parser:
|
||||||
print("sp_regs_altered:")
|
print("sp_regs_altered:")
|
||||||
print(sp_regs_altered)
|
print(sp_regs_altered)
|
||||||
# TODO: finish
|
# TODO: finish
|
||||||
|
return Insn(
|
||||||
|
headers=tuple(headers),
|
||||||
|
code_lines=tuple(code_lines),
|
||||||
|
desc_lines=tuple(desc_lines),
|
||||||
|
sp_regs_altered=sp_regs_altered,
|
||||||
|
)
|
||||||
|
|
||||||
def extract_insns(self):
|
def extract_insns(self):
|
||||||
while True:
|
while True:
|
||||||
|
@ -1737,7 +1909,7 @@ class Parser:
|
||||||
self.unprocessed_chars[Font.INSN_HEADER]))
|
self.unprocessed_chars[Font.INSN_HEADER]))
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
break
|
break
|
||||||
self.extract_insn(header_start_char=header_start_char)
|
self.insns.append(self.extract_insn(header_start_char=header_start_char))
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if 2 < len(sys.argv):
|
if 2 < len(sys.argv):
|
||||||
|
@ -1747,4 +1919,19 @@ def main():
|
||||||
page_numbers = tuple(int(i) for i in sys.argv[2].split(","))
|
page_numbers = tuple(int(i) for i in sys.argv[2].split(","))
|
||||||
else:
|
else:
|
||||||
page_numbers = None
|
page_numbers = None
|
||||||
Parser().parse_pdf(sys.argv[1], page_numbers=page_numbers)
|
parser = Parser()
|
||||||
|
file_name = Path(sys.argv[1])
|
||||||
|
parser.parse_pdf(file_name, page_numbers=page_numbers)
|
||||||
|
insns = ElementTree.Element("instructions", attrib={"is-subset": str(page_numbers is not None)})
|
||||||
|
insns.text = "\n"
|
||||||
|
insns.tail = "\n"
|
||||||
|
comment = ElementTree.Comment(f" Automatically generated from {file_name.name} ")
|
||||||
|
comment.tail = "\n"
|
||||||
|
insns.append(comment)
|
||||||
|
for insn in parser.insns:
|
||||||
|
insn.write_xml(insns)
|
||||||
|
ElementTree.ElementTree(insns).write(
|
||||||
|
"powerisa-instructions.xml",
|
||||||
|
encoding="utf-8",
|
||||||
|
xml_declaration=True,
|
||||||
|
)
|
|
@ -8,6 +8,7 @@ version = "0.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"pdfminer.six == 20240706"
|
"pdfminer.six == 20240706"
|
||||||
]
|
]
|
||||||
|
requires-python = ">= 3.11"
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
|
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
|
Loading…
Add table
Add a link
Reference in a new issue