Compare commits
	
		
			2 commits
		
	
	
		
			21b97c0064
			...
			87352a4fd7
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 87352a4fd7 | |||
| 25f47227d8 | 
					 4 changed files with 247 additions and 20 deletions
				
			
		
							
								
								
									
										1
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -3,3 +3,4 @@
 | 
			
		|||
*.egg-info
 | 
			
		||||
__pycache__
 | 
			
		||||
*.log
 | 
			
		||||
/powerisa-instructions.xml
 | 
			
		||||
							
								
								
									
										38
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										38
									
								
								README.md
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1 +1,39 @@
 | 
			
		|||
parser for the OPF PowerISA 3.1C pdf to attempt to extract all instructions' pseudo-code including subscripts/superscripts and other formatting
 | 
			
		||||
 | 
			
		||||
Usage:
 | 
			
		||||
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
 | 
			
		||||
* Obtain CPython 3.11 (the default `python3` in [Debian Bookworm](https://www.debian.org/releases/bookworm/))
 | 
			
		||||
 | 
			
		||||
  On Debian Bookworm you can do:
 | 
			
		||||
 | 
			
		||||
  ```bash
 | 
			
		||||
  sudo apt update
 | 
			
		||||
  sudo apt install python3-venv
 | 
			
		||||
  ```
 | 
			
		||||
 | 
			
		||||
* Create a venv:
 | 
			
		||||
 | 
			
		||||
  ```bash
 | 
			
		||||
  python3.11 -m venv --upgrade-deps path/to/your/new/.venv
 | 
			
		||||
  ```
 | 
			
		||||
 | 
			
		||||
* Activate the venv:
 | 
			
		||||
 | 
			
		||||
  ```bash
 | 
			
		||||
  . path/to/your/new/.venv/bin/activate
 | 
			
		||||
  ```
 | 
			
		||||
 | 
			
		||||
* Install this project in the venv:
 | 
			
		||||
 | 
			
		||||
  ```bash
 | 
			
		||||
  pip install -e .
 | 
			
		||||
  ```
 | 
			
		||||
 | 
			
		||||
* Run it:
 | 
			
		||||
 | 
			
		||||
  ```bash
 | 
			
		||||
  parse_powerisa_pdf path/to/downloaded/OPF_PowerISA_v3.1C.pdf > out.log
 | 
			
		||||
  ```
 | 
			
		||||
 | 
			
		||||
* This will spit out lots of errors and then successfully create
 | 
			
		||||
  the output file -- `powerisa-instructions.xml` in the current directory.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,6 +10,8 @@ from typing import ClassVar, TypeVar, assert_never
 | 
			
		|||
from xml.etree import ElementTree
 | 
			
		||||
import enum
 | 
			
		||||
import traceback
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
from pdfminer.high_level import extract_pages
 | 
			
		||||
from pdfminer.layout import LTChar, LTLine, LTPage, LTRect, LTTextBox
 | 
			
		||||
| 
						 | 
				
			
			@ -256,6 +258,39 @@ class ParsedTextLine:
 | 
			
		|||
    def __str__(self) -> str:
 | 
			
		||||
        return "\n" * self.preceding_blank_lines + ElementTree.tostring(self.element, encoding="unicode")
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element, trailing_nl: bool):
 | 
			
		||||
        for _ in range(self.preceding_blank_lines):
 | 
			
		||||
            ElementTree.SubElement(parent, "br").tail = "\n"
 | 
			
		||||
        if self.element.text is not None:
 | 
			
		||||
            if len(parent) == 0:
 | 
			
		||||
                parent.text = (parent.text or "") + self.element.text
 | 
			
		||||
            else:
 | 
			
		||||
                parent[-1].tail = (parent[-1].tail or "") + self.element.text
 | 
			
		||||
        for element in self.element:
 | 
			
		||||
            parent.append(deepcopy(element))
 | 
			
		||||
        if trailing_nl:
 | 
			
		||||
            ElementTree.SubElement(parent, "br").tail = "\n"
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def write_xml_lines(
 | 
			
		||||
        lines: Iterable[ParsedTextLine],
 | 
			
		||||
        parent: ElementTree.Element,
 | 
			
		||||
        trailing_nl: bool,
 | 
			
		||||
        preceding_nl: bool=False,
 | 
			
		||||
    ):
 | 
			
		||||
        if preceding_nl:
 | 
			
		||||
            ElementTree.SubElement(parent, "br").tail = "\n"
 | 
			
		||||
        first = True
 | 
			
		||||
        for line in lines:
 | 
			
		||||
            if first:
 | 
			
		||||
                first = False
 | 
			
		||||
            else:
 | 
			
		||||
                ElementTree.SubElement(parent, "br").tail = "\n"
 | 
			
		||||
            line.write_xml(parent, trailing_nl=False)
 | 
			
		||||
        if trailing_nl:
 | 
			
		||||
            ElementTree.SubElement(parent, "br").tail = "\n"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
_T = TypeVar("_T")
 | 
			
		||||
 | 
			
		||||
class BaselinePos(enum.Enum):
 | 
			
		||||
| 
						 | 
				
			
			@ -535,6 +570,17 @@ class InsnBitField:
 | 
			
		|||
    def __str__(self) -> str:
 | 
			
		||||
        return f"<InsnBitField: x={self.box_min_x}..{self.box_max_x} name={self.name} bit_number={self.bit_number}>"
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element):
 | 
			
		||||
        field = ElementTree.SubElement(parent, "field")
 | 
			
		||||
        field.text = "\n"
 | 
			
		||||
        field.tail = "\n"
 | 
			
		||||
        name = ElementTree.SubElement(field, "name")
 | 
			
		||||
        name.tail = "\n"
 | 
			
		||||
        self.name.write_xml(name, trailing_nl=False)
 | 
			
		||||
        bit_number = ElementTree.SubElement(field, "bit-number")
 | 
			
		||||
        bit_number.tail = "\n"
 | 
			
		||||
        self.bit_number.write_xml(bit_number, trailing_nl=False)
 | 
			
		||||
 | 
			
		||||
@dataclass(unsafe_hash=True, frozen=True)
 | 
			
		||||
class InsnBitFieldsPrefix:
 | 
			
		||||
    box_min_x: float
 | 
			
		||||
| 
						 | 
				
			
			@ -555,6 +601,18 @@ class InsnBitFieldsPrefix:
 | 
			
		|||
            f"    ]\n"
 | 
			
		||||
            f"    suffix_text={self.suffix_text}>")
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element):
 | 
			
		||||
        prefix_elm = ElementTree.SubElement(parent, "prefix")
 | 
			
		||||
        prefix_elm.text = "\n"
 | 
			
		||||
        prefix_elm.tail = "\n"
 | 
			
		||||
        prefix_text = ElementTree.SubElement(prefix_elm, "prefix-text")
 | 
			
		||||
        prefix_text.tail = "\n"
 | 
			
		||||
        self.prefix_text.write_xml(prefix_text, trailing_nl=False)
 | 
			
		||||
        InsnBitFields.write_xml_fields(self.fields, prefix_elm)
 | 
			
		||||
        suffix_text = ElementTree.SubElement(prefix_elm, "suffix-text")
 | 
			
		||||
        suffix_text.tail = "\n"
 | 
			
		||||
        self.suffix_text.write_xml(suffix_text, trailing_nl=False)
 | 
			
		||||
 | 
			
		||||
@dataclass(unsafe_hash=True, frozen=True)
 | 
			
		||||
class InsnBitFields:
 | 
			
		||||
    prefix: None | InsnBitFieldsPrefix
 | 
			
		||||
| 
						 | 
				
			
			@ -573,6 +631,23 @@ class InsnBitFields:
 | 
			
		|||
            f"({self.box_max_x},{self.box_max_y}) [\n"
 | 
			
		||||
            f"    {sep.join(map(str, self.fields))},\n]>")
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def write_xml_fields(fields: Iterable[InsnBitField], parent: ElementTree.Element):
 | 
			
		||||
        fields_elm = ElementTree.SubElement(parent, "fields")
 | 
			
		||||
        fields_elm.text = "\n"
 | 
			
		||||
        fields_elm.tail = "\n"
 | 
			
		||||
        for field in fields:
 | 
			
		||||
            field.write_xml(fields_elm)
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element):
 | 
			
		||||
        bit_fields = ElementTree.SubElement(parent, "bit-fields")
 | 
			
		||||
        bit_fields.text = "\n"
 | 
			
		||||
        bit_fields.tail = "\n"
 | 
			
		||||
        if self.prefix is not None:
 | 
			
		||||
            self.prefix.write_xml(bit_fields)
 | 
			
		||||
        InsnBitFields.write_xml_fields(self.fields, bit_fields)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass(unsafe_hash=True, frozen=True)
 | 
			
		||||
class InsnSpRegsAlteredEntry:
 | 
			
		||||
    reg: ParsedTextLine
 | 
			
		||||
| 
						 | 
				
			
			@ -600,6 +675,20 @@ class InsnSpRegsAlteredEntry:
 | 
			
		|||
            f"{indent}    conds={conds},\n"
 | 
			
		||||
            f"{indent})")
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element):
 | 
			
		||||
        entry = ElementTree.SubElement(parent, "entry")
 | 
			
		||||
        entry.text = "\n"
 | 
			
		||||
        entry.tail = "\n"
 | 
			
		||||
        reg = ElementTree.SubElement(entry, "register")
 | 
			
		||||
        reg.tail = "\n"
 | 
			
		||||
        self.reg.write_xml(reg, trailing_nl=False)
 | 
			
		||||
        fields = ElementTree.SubElement(entry, "fields")
 | 
			
		||||
        fields.tail = "\n"
 | 
			
		||||
        ParsedTextLine.write_xml_lines(self.fields, fields, trailing_nl=False)
 | 
			
		||||
        conds = ElementTree.SubElement(entry, "conditions")
 | 
			
		||||
        conds.tail = "\n"
 | 
			
		||||
        ParsedTextLine.write_xml_lines(self.conds, conds, trailing_nl=False)
 | 
			
		||||
 | 
			
		||||
@dataclass(unsafe_hash=True, frozen=True)
 | 
			
		||||
class InsnSpRegsAltered:
 | 
			
		||||
    sp_regs_altered_text: ParsedTextLine
 | 
			
		||||
| 
						 | 
				
			
			@ -631,6 +720,28 @@ class InsnSpRegsAltered:
 | 
			
		|||
        lines.append(f")")
 | 
			
		||||
        return "\n".join(lines)
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element):
 | 
			
		||||
        sp_regs_altered = ElementTree.SubElement(parent, "special-registers-altered")
 | 
			
		||||
        sp_regs_altered.text = "\n"
 | 
			
		||||
        sp_regs_altered.tail = "\n"
 | 
			
		||||
        title = ElementTree.SubElement(sp_regs_altered, "title")
 | 
			
		||||
        title.tail = "\n"
 | 
			
		||||
        self.sp_regs_altered_text.write_xml(title, trailing_nl=False)
 | 
			
		||||
        if self.special_text is not None:
 | 
			
		||||
            special_text = ElementTree.SubElement(sp_regs_altered, "special-text")
 | 
			
		||||
            special_text.tail = "\n"
 | 
			
		||||
            self.special_text.write_xml(special_text, trailing_nl=False)
 | 
			
		||||
        if self.table_header_reg is not None:
 | 
			
		||||
            table_header_reg = ElementTree.SubElement(sp_regs_altered, "table-header-register")
 | 
			
		||||
            table_header_reg.tail = "\n"
 | 
			
		||||
            self.table_header_reg.write_xml(table_header_reg, trailing_nl=False)
 | 
			
		||||
        if self.table_header_fields is not None:
 | 
			
		||||
            table_header_fields = ElementTree.SubElement(sp_regs_altered, "table-header-fields")
 | 
			
		||||
            table_header_fields.tail = "\n"
 | 
			
		||||
            self.table_header_fields.write_xml(table_header_fields, trailing_nl=False)
 | 
			
		||||
        for entry in self.entries:
 | 
			
		||||
            entry.write_xml(sp_regs_altered)
 | 
			
		||||
 | 
			
		||||
class _InsnParseSection(enum.Enum):
 | 
			
		||||
    CODE = "code"
 | 
			
		||||
    HEADER = "header"
 | 
			
		||||
| 
						 | 
				
			
			@ -687,6 +798,7 @@ class Page:
 | 
			
		|||
                    )
 | 
			
		||||
                    if text_section is None:
 | 
			
		||||
                        if PAGE_BODY_MIN_Y <= element.y0 <= PAGE_BODY_MAX_Y:
 | 
			
		||||
                            if page_num != 1072:  # page 1072 has characters in the margins
 | 
			
		||||
                                raise AssertionError(
 | 
			
		||||
                                    f"char not in text section: {element}\npage_num={page_num}")
 | 
			
		||||
                        continue
 | 
			
		||||
| 
						 | 
				
			
			@ -1003,10 +1115,57 @@ class TextSection:
 | 
			
		|||
                return i
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
@dataclass(frozen=True)
 | 
			
		||||
class InsnHeader:
 | 
			
		||||
    header_lines: tuple[ParsedTextLine, ...]
 | 
			
		||||
    mnemonic_lines: tuple[ParsedTextLine, ...]
 | 
			
		||||
    bit_fields: InsnBitFields
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def min_y(self) -> float:
 | 
			
		||||
        return self.bit_fields.box_min_y
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element):
 | 
			
		||||
        header = ElementTree.SubElement(parent, "header")
 | 
			
		||||
        header.text = "\n"
 | 
			
		||||
        header.tail = "\n"
 | 
			
		||||
        title = ElementTree.SubElement(header, "title")
 | 
			
		||||
        title.tail = "\n"
 | 
			
		||||
        ParsedTextLine.write_xml_lines(self.header_lines, title, trailing_nl=False)
 | 
			
		||||
        mnemonics = ElementTree.SubElement(header, "mnemonics")
 | 
			
		||||
        mnemonics.tail = "\n"
 | 
			
		||||
        ParsedTextLine.write_xml_lines(self.mnemonic_lines, mnemonics, trailing_nl=False)
 | 
			
		||||
        self.bit_fields.write_xml(header)
 | 
			
		||||
 | 
			
		||||
@dataclass(frozen=True)
 | 
			
		||||
class Insn:
 | 
			
		||||
    headers: tuple[InsnHeader, ...]
 | 
			
		||||
    code_lines: tuple[ParsedTextLine, ...]
 | 
			
		||||
    desc_lines: tuple[ParsedTextLine, ...]
 | 
			
		||||
    sp_regs_altered: None | InsnSpRegsAltered
 | 
			
		||||
 | 
			
		||||
    def write_xml(self, parent: ElementTree.Element):
 | 
			
		||||
        insn = ElementTree.SubElement(parent, "instruction")
 | 
			
		||||
        insn.text = "\n"
 | 
			
		||||
        insn.tail = "\n"
 | 
			
		||||
        for header in self.headers:
 | 
			
		||||
            header.write_xml(insn)
 | 
			
		||||
        if len(self.code_lines) != 0:
 | 
			
		||||
            code = ElementTree.SubElement(insn, "code")
 | 
			
		||||
            code.tail = "\n"
 | 
			
		||||
            ParsedTextLine.write_xml_lines(self.code_lines, code, trailing_nl=False)
 | 
			
		||||
        if len(self.desc_lines) != 0:
 | 
			
		||||
            desc = ElementTree.SubElement(insn, "description")
 | 
			
		||||
            desc.tail = "\n"
 | 
			
		||||
            ParsedTextLine.write_xml_lines(self.desc_lines, desc, trailing_nl=False)
 | 
			
		||||
        if self.sp_regs_altered is not None:
 | 
			
		||||
            self.sp_regs_altered.write_xml(insn)
 | 
			
		||||
 | 
			
		||||
@dataclass()
 | 
			
		||||
class Parser:
 | 
			
		||||
    pages: Pages = field(default_factory=Pages)
 | 
			
		||||
    text_section: TextSection = TextSection.first()
 | 
			
		||||
    insns: list[Insn] = field(default_factory=list)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def page(self) -> Page:
 | 
			
		||||
| 
						 | 
				
			
			@ -1017,7 +1176,7 @@ class Parser:
 | 
			
		|||
        return self.pages[self.text_section.page_num].unprocessed_chars[self.text_section]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def __pages_gen(file: str, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
 | 
			
		||||
    def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
 | 
			
		||||
        if page_numbers is not None:
 | 
			
		||||
            page_numbers = sorted(i - 1 for i in page_numbers)
 | 
			
		||||
        for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)):
 | 
			
		||||
| 
						 | 
				
			
			@ -1028,7 +1187,7 @@ class Parser:
 | 
			
		|||
            print(f"page {page_num}")
 | 
			
		||||
            yield Page.from_lt_page(page_num=page_num, page=page)
 | 
			
		||||
 | 
			
		||||
    def parse_pdf(self, file: str, page_numbers: Iterable[int] | None = None):
 | 
			
		||||
    def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None):
 | 
			
		||||
        self.pages = Pages(pages_gen=Parser.__pages_gen(
 | 
			
		||||
            file=file, page_numbers=page_numbers))
 | 
			
		||||
        self.text_section = TextSection.first()
 | 
			
		||||
| 
						 | 
				
			
			@ -1059,7 +1218,7 @@ class Parser:
 | 
			
		|||
        try:
 | 
			
		||||
            with self.note_text_section():
 | 
			
		||||
                self.extract_insns()
 | 
			
		||||
        except InsnParseError as e:
 | 
			
		||||
        except (InsnParseError, PageParseError) as e:
 | 
			
		||||
            print("".join(traceback.format_exception_only(e)), flush=True)
 | 
			
		||||
            traceback.print_exc()
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1200,7 +1359,8 @@ class Parser:
 | 
			
		|||
            self.unprocessed_chars[char.font].remove(char)
 | 
			
		||||
        if allowed_start_min_y_error is None:
 | 
			
		||||
            allowed_start_min_y_error = 0.01
 | 
			
		||||
        assert abs(start_min_y - retval.regular_min_y) < allowed_start_min_y_error, (
 | 
			
		||||
        if abs(start_min_y - retval.regular_min_y) > allowed_start_min_y_error:
 | 
			
		||||
            raise PageParseError(
 | 
			
		||||
                f"start_min_y={start_min_y} regular_min_y={retval.regular_min_y}\n"
 | 
			
		||||
                f"start_min_y error: {start_min_y - retval.regular_min_y}\n"
 | 
			
		||||
                f"allowed_start_min_y_error={allowed_start_min_y_error}")
 | 
			
		||||
| 
						 | 
				
			
			@ -1403,7 +1563,7 @@ class Parser:
 | 
			
		|||
        self,
 | 
			
		||||
        start_min_y: float,
 | 
			
		||||
        header_start_char: None | Char = None,
 | 
			
		||||
    ) -> None | tuple[list[ParsedTextLine], list[ParsedTextLine], InsnBitFields]:
 | 
			
		||||
    ) -> None | InsnHeader:
 | 
			
		||||
        assert header_start_char is None or \
 | 
			
		||||
            header_start_char.font == Font.INSN_HEADER
 | 
			
		||||
        header_line = self.extract_text_line(
 | 
			
		||||
| 
						 | 
				
			
			@ -1458,7 +1618,11 @@ class Parser:
 | 
			
		|||
        print(insn_bit_fields)
 | 
			
		||||
        if insn_bit_fields is None:
 | 
			
		||||
            raise InsnParseError("can't find insn bit fields")
 | 
			
		||||
        return header_lines, mnemonic_lines, insn_bit_fields
 | 
			
		||||
        return InsnHeader(
 | 
			
		||||
            header_lines=tuple(header_lines),
 | 
			
		||||
            mnemonic_lines=tuple(mnemonic_lines),
 | 
			
		||||
            bit_fields=insn_bit_fields,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def extract_insn_sp_regs_altered(
 | 
			
		||||
        self,
 | 
			
		||||
| 
						 | 
				
			
			@ -1474,12 +1638,14 @@ class Parser:
 | 
			
		|||
            max_y=sp_regs_altered_text.regular_min_y - 5,
 | 
			
		||||
            allow_processed=False,
 | 
			
		||||
        )
 | 
			
		||||
        assert table_header_reg_char is not None, \
 | 
			
		||||
            "can't find special registers altered table's register-column's header"
 | 
			
		||||
        if table_header_reg_char is None:
 | 
			
		||||
            raise InsnParseError(
 | 
			
		||||
                "can't find special registers altered table's register-column's header")
 | 
			
		||||
        KNOWN_SPECIAL_TEXTS = (
 | 
			
		||||
            "None",
 | 
			
		||||
            "Dependent on the system service",
 | 
			
		||||
            "See above.",
 | 
			
		||||
            "See Table 5.1",
 | 
			
		||||
        )
 | 
			
		||||
        match table_header_reg_char.text:
 | 
			
		||||
            case "R":
 | 
			
		||||
| 
						 | 
				
			
			@ -1611,7 +1777,7 @@ class Parser:
 | 
			
		|||
            final_regular_min_y=regular_min_y,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def extract_insn(self, header_start_char: Char):
 | 
			
		||||
    def extract_insn(self, header_start_char: Char) -> Insn:
 | 
			
		||||
        assert header_start_char.font == Font.INSN_HEADER
 | 
			
		||||
        print(header_start_char)
 | 
			
		||||
        header = self.extract_insn_header_mnemonics_and_bit_fields(
 | 
			
		||||
| 
						 | 
				
			
			@ -1620,7 +1786,7 @@ class Parser:
 | 
			
		|||
        )
 | 
			
		||||
        if header is None:
 | 
			
		||||
            raise PageParseError("can't find header text line")
 | 
			
		||||
        next_start_min_y = header[2].box_min_y - 5
 | 
			
		||||
        next_start_min_y = header.min_y - 5
 | 
			
		||||
        headers = [header]
 | 
			
		||||
        code_lines: list[ParsedTextLine] = []
 | 
			
		||||
        desc_lines: list[ParsedTextLine] = []
 | 
			
		||||
| 
						 | 
				
			
			@ -1687,7 +1853,7 @@ class Parser:
 | 
			
		|||
                    if header is None:
 | 
			
		||||
                        raise InsnParseError("can't find header text line")
 | 
			
		||||
                    headers.append(header)
 | 
			
		||||
                    next_start_min_y = header[2].box_min_y - 5
 | 
			
		||||
                    next_start_min_y = header.min_y - 5
 | 
			
		||||
                case _InsnParseSection.DESC:
 | 
			
		||||
                    desc_line = self.extract_text_line(
 | 
			
		||||
                        start_char=next_char,
 | 
			
		||||
| 
						 | 
				
			
			@ -1706,7 +1872,7 @@ class Parser:
 | 
			
		|||
                                first_text_line=desc_line,
 | 
			
		||||
                                min_x=desc_line.chars[0].min_x,
 | 
			
		||||
                                max_x=self.text_section.max_x,
 | 
			
		||||
                                allowed_start_min_y_error=3,
 | 
			
		||||
                                allowed_start_min_y_error=3.5,
 | 
			
		||||
                            )
 | 
			
		||||
                            print("more insn desc lines:")
 | 
			
		||||
                            print("\n".join(map(str, more_desc_lines)))
 | 
			
		||||
| 
						 | 
				
			
			@ -1729,6 +1895,12 @@ class Parser:
 | 
			
		|||
        print("sp_regs_altered:")
 | 
			
		||||
        print(sp_regs_altered)
 | 
			
		||||
        # TODO: finish
 | 
			
		||||
        return Insn(
 | 
			
		||||
            headers=tuple(headers),
 | 
			
		||||
            code_lines=tuple(code_lines),
 | 
			
		||||
            desc_lines=tuple(desc_lines),
 | 
			
		||||
            sp_regs_altered=sp_regs_altered,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def extract_insns(self):
 | 
			
		||||
        while True:
 | 
			
		||||
| 
						 | 
				
			
			@ -1737,7 +1909,7 @@ class Parser:
 | 
			
		|||
                    self.unprocessed_chars[Font.INSN_HEADER]))
 | 
			
		||||
            except StopIteration:
 | 
			
		||||
                break
 | 
			
		||||
            self.extract_insn(header_start_char=header_start_char)
 | 
			
		||||
            self.insns.append(self.extract_insn(header_start_char=header_start_char))
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    if 2 < len(sys.argv):
 | 
			
		||||
| 
						 | 
				
			
			@ -1747,4 +1919,19 @@ def main():
 | 
			
		|||
            page_numbers = tuple(int(i) for i in sys.argv[2].split(","))
 | 
			
		||||
    else:
 | 
			
		||||
        page_numbers = None
 | 
			
		||||
    Parser().parse_pdf(sys.argv[1], page_numbers=page_numbers)
 | 
			
		||||
    parser = Parser()
 | 
			
		||||
    file_name = Path(sys.argv[1])
 | 
			
		||||
    parser.parse_pdf(file_name, page_numbers=page_numbers)
 | 
			
		||||
    insns = ElementTree.Element("instructions", attrib={"is-subset": str(page_numbers is not None)})
 | 
			
		||||
    insns.text = "\n"
 | 
			
		||||
    insns.tail = "\n"
 | 
			
		||||
    comment = ElementTree.Comment(f" Automatically generated from {file_name.name} ")
 | 
			
		||||
    comment.tail = "\n"
 | 
			
		||||
    insns.append(comment)
 | 
			
		||||
    for insn in parser.insns:
 | 
			
		||||
        insn.write_xml(insns)
 | 
			
		||||
    ElementTree.ElementTree(insns).write(
 | 
			
		||||
        "powerisa-instructions.xml",
 | 
			
		||||
        encoding="utf-8",
 | 
			
		||||
        xml_declaration=True,
 | 
			
		||||
    )
 | 
			
		||||
| 
						 | 
				
			
			@ -8,6 +8,7 @@ version = "0.0.0"
 | 
			
		|||
dependencies = [
 | 
			
		||||
    "pdfminer.six == 20240706"
 | 
			
		||||
]
 | 
			
		||||
requires-python = ">= 3.11"
 | 
			
		||||
 | 
			
		||||
[project.scripts]
 | 
			
		||||
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue