init

2025-03-07 16:42:22 +08:00
parent 3404b6b7e0
commit 807fcb2849
11 changed files with 587 additions and 88 deletions
--- a/zig_fetch_py/init.py
+++ b/zig_fetch_py/init.py
@ -0,0 +1,5 @@
+"""
+zig-fetch-py - A tool to parse Zig Object Notation (ZON) files and convert them to JSON.
+"""
+
+__version__ = "0.1.0"
--- a/zig_fetch_py/main.py
+++ b/zig_fetch_py/main.py
@ -0,0 +1,60 @@
+"""
+Command-line interface for the ZON parser.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+import click
+from loguru import logger
+
+from zig_fetch_py.parser import parse_zon_file
+
+
+@click.command()
+@click.argument("file", type=click.Path(exists=True, readable=True))
+@click.option(
+    "-o",
+    "--output",
+    type=click.Path(writable=True),
+    help="Output JSON file path (default: stdout)",
+)
+@click.option("-p", "--pretty", is_flag=True, help="Pretty print JSON output")
+@click.option("-v", "--verbose", is_flag=True, help="Enable verbose logging")
+def main(file, output, pretty, verbose):
+    """Parse ZON files and convert to JSON.
+
+    This tool parses Zig Object Notation (ZON) files and converts them to JSON format.
+    """
+    # Configure logging
+    log_level = "DEBUG" if verbose else "INFO"
+    logger.remove()  # Remove default handler
+    logger.add(sys.stderr, level=log_level)
+
+    logger.info(f"Processing file: {file}")
+
+    try:
+        result = parse_zon_file(file)
+
+        indent = 4 if pretty else None
+        json_str = json.dumps(result, indent=indent)
+
+        if output:
+            logger.info(f"Writing output to: {output}")
+            with open(output, "w") as f:
+                f.write(json_str)
+        else:
+            logger.debug("Writing output to stdout")
+            click.echo(json_str)
+
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        sys.exit(1)
+
+
+# This is only executed when the module is run directly
+if __name__ == "__main__":
+    # When imported as a module, click will handle the function call
+    # When run directly, we need to call it explicitly
+    main()
--- a/zig_fetch_py/parser.py
+++ b/zig_fetch_py/parser.py
@ -0,0 +1,350 @@
+"""
+ZON parser module - Parses Zig Object Notation (ZON) files.
+"""
+
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Union, Tuple, Optional
+
+from loguru import logger
+
+
+class ZonParser:
+    """
+    A parser for Zig Object Notation (ZON) files.
+    """
+
+    def __init__(self, content: str):
+        """
+        Initialize the parser with ZON content.
+
+        Args:
+            content: The ZON content to parse
+        """
+        self.content = content
+        self.pos = 0
+        self.line = 1
+        self.col = 1
+
+    def parse(self) -> Dict[str, Any]:
+        """Parse ZON content and return a Python dictionary."""
+        result = self._parse_value()
+        return result
+
+    def _current_char(self) -> str:
+        if self.pos >= len(self.content):
+            return ""
+        return self.content[self.pos]
+
+    def _next_char(self) -> str:
+        self.pos += 1
+        if self.pos - 1 < len(self.content):
+            char = self.content[self.pos - 1]
+            if char == "\n":
+                self.line += 1
+                self.col = 1
+            else:
+                self.col += 1
+            return char
+        return ""
+
+    def _skip_whitespace_and_comments(self):
+        while self.pos < len(self.content):
+            char = self._current_char()
+
+            # Skip whitespace
+            if char.isspace():
+                self._next_char()
+                continue
+
+            # Skip comments
+            if (
+                char == "/"
+                and self.pos + 1 < len(self.content)
+                and self.content[self.pos + 1] == "/"
+            ):
+                # Skip to end of line
+                while self.pos < len(self.content) and self._current_char() != "\n":
+                    self._next_char()
+                continue
+
+            break
+
+    def _parse_value(self) -> Any:
+        self._skip_whitespace_and_comments()
+
+        char = self._current_char()
+
+        if char == ".":
+            self._next_char()  # Skip the dot
+
+            # Check if it's an object
+            if self._current_char() == "{":
+                return self._parse_object()
+
+            # Check if it's an array
+            if self._current_char() == "[":
+                return self._parse_array()
+
+            # It's a field name or a special value
+            return self._parse_identifier()
+
+        elif char == '"':
+            return self._parse_string()
+        elif char.isdigit() or char == "-":
+            return self._parse_number()
+        elif char == "t" or char == "f":
+            return self._parse_boolean()
+        elif char == "n" and self.content[self.pos : self.pos + 4] == "null":
+            self.pos += 4
+            return None
+        else:
+            raise ValueError(
+                f"Unexpected character '{char}' at line {self.line}, column {self.col}"
+            )
+
+    def _parse_object(self) -> Dict[str, Any]:
+        result = {}
+
+        # Skip the opening brace
+        self._next_char()
+
+        while True:
+            self._skip_whitespace_and_comments()
+
+            # Check for closing brace
+            if self._current_char() == "}":
+                self._next_char()
+                break
+
+            # Parse key
+            if self._current_char() == ".":
+                self._next_char()  # Skip the dot
+                key = self._parse_identifier()
+            else:
+                raise ValueError(
+                    f"Expected '.' before key at line {self.line}, column {self.col}"
+                )
+
+            self._skip_whitespace_and_comments()
+
+            # Parse equals sign or check if it's a shorthand notation
+            if self._current_char() == "=":
+                self._next_char()
+                self._skip_whitespace_and_comments()
+                value = self._parse_value()
+            else:
+                # Shorthand notation where key is the same as value
+                value = key
+
+            result[key] = value
+
+            self._skip_whitespace_and_comments()
+
+            # Check for comma
+            if self._current_char() == ",":
+                self._next_char()
+            elif self._current_char() != "}":
+                raise ValueError(
+                    f"Expected ',' or '}}' at line {self.line}, column {self.col}"
+                )
+
+        return result
+
+    def _parse_array(self) -> List[Any]:
+        result = []
+
+        # Skip the opening bracket
+        self._next_char()
+
+        while True:
+            self._skip_whitespace_and_comments()
+
+            # Check for closing bracket
+            if self._current_char() == "]":
+                self._next_char()
+                break
+
+            # Parse value
+            value = self._parse_value()
+            result.append(value)
+
+            self._skip_whitespace_and_comments()
+
+            # Check for comma
+            if self._current_char() == ",":
+                self._next_char()
+            elif self._current_char() != "]":
+                raise ValueError(
+                    f"Expected ',' or ']' at line {self.line}, column {self.col}"
+                )
+
+        return result
+
+    def _parse_identifier(self) -> str:
+        start = self.pos
+
+        # Handle quoted identifiers (like .@"lsp-codegen")
+        if (
+            self._current_char() == "@"
+            and self.pos + 1 < len(self.content)
+            and self.content[self.pos + 1] == '"'
+        ):
+            self._next_char()  # Skip @
+            return self._parse_string()
+
+        # Regular identifier
+        while self.pos < len(self.content):
+            char = self._current_char()
+            if char.isalnum() or char == "_" or char == "-":
+                self._next_char()
+            else:
+                break
+
+        if start == self.pos:
+            raise ValueError(f"Empty identifier at line {self.line}, column {self.col}")
+
+        return self.content[start : self.pos]
+
+    def _parse_string(self) -> str:
+        result = ""
+
+        # Skip the opening quote
+        self._next_char()
+
+        while self.pos < len(self.content) and self._current_char() != '"':
+            if self._current_char() == "\\":
+                self._next_char()
+                if self._current_char() == "n":
+                    result += "\n"
+                elif self._current_char() == "t":
+                    result += "\t"
+                elif self._current_char() == "r":
+                    result += "\r"
+                elif self._current_char() == '"':
+                    result += '"'
+                elif self._current_char() == "\\":
+                    result += "\\"
+                else:
+                    result += "\\" + self._current_char()
+            else:
+                result += self._current_char()
+            self._next_char()
+
+        if self._current_char() != '"':
+            raise ValueError(
+                f"Unterminated string at line {self.line}, column {self.col}"
+            )
+
+        self._next_char()  # Skip the closing quote
+        return result
+
+    def _parse_number(self) -> Union[int, float]:
+        start = self.pos
+
+        # Handle hex numbers
+        if (
+            self._current_char() == "0"
+            and self.pos + 1 < len(self.content)
+            and self.content[self.pos + 1].lower() == "x"
+        ):
+            self._next_char()  # Skip 0
+            self._next_char()  # Skip x
+
+            hex_start = self.pos
+            while self.pos < len(self.content) and (
+                self._current_char().isdigit()
+                or self._current_char().lower() in "abcdef"
+            ):
+                self._next_char()
+
+            hex_str = self.content[hex_start : self.pos]
+            return int(hex_str, 16)
+
+        # Regular number
+        is_float = False
+
+        # Handle sign
+        if self._current_char() == "-":
+            self._next_char()
+
+        # Handle digits before decimal point
+        while self.pos < len(self.content) and self._current_char().isdigit():
+            self._next_char()
+
+        # Handle decimal point
+        if self._current_char() == ".":
+            is_float = True
+            self._next_char()
+
+            # Handle digits after decimal point
+            while self.pos < len(self.content) and self._current_char().isdigit():
+                self._next_char()
+
+        # Handle exponent
+        if self._current_char().lower() == "e":
+            is_float = True
+            self._next_char()
+
+            # Handle exponent sign
+            if self._current_char() in "+-":
+                self._next_char()
+
+            # Handle exponent digits
+            while self.pos < len(self.content) and self._current_char().isdigit():
+                self._next_char()
+
+        num_str = self.content[start : self.pos]
+
+        if is_float:
+            return float(num_str)
+        else:
+            return int(num_str)
+
+    def _parse_boolean(self) -> bool:
+        if self.content[self.pos : self.pos + 4] == "true":
+            self.pos += 4
+            return True
+        elif self.content[self.pos : self.pos + 5] == "false":
+            self.pos += 5
+            return False
+        else:
+            raise ValueError(
+                f"Expected 'true' or 'false' at line {self.line}, column {self.col}"
+            )
+
+
+def parse_zon_file(file_path: str) -> Dict[str, Any]:
+    """
+    Parse a ZON file and return a Python dictionary.
+
+    Args:
+        file_path: Path to the ZON file
+
+    Returns:
+        Dictionary representation of the ZON file
+    """
+    logger.debug(f"Parsing ZON file: {file_path}")
+    with open(file_path, "r") as f:
+        content = f.read()
+
+    parser = ZonParser(content)
+    result = parser.parse()
+    logger.debug(f"Successfully parsed ZON file")
+    return result
+
+
+def zon_to_json(zon_content: str, indent: Optional[int] = None) -> str:
+    """
+    Convert ZON content to JSON string.
+
+    Args:
+        zon_content: ZON content as string
+        indent: Number of spaces for indentation (None for compact JSON)
+
+    Returns:
+        JSON string
+    """
+    parser = ZonParser(zon_content)
+    result = parser.parse()
+    return json.dumps(result, indent=indent)