Files
zig-fetch-py/zig_fetch_py/parser.py
crosstyan 765c98502c Add dependency download and CLI functionality
- Implement dependency download and extraction for Zig packages
- Create new CLI commands for downloading and converting ZON files
- Add support for downloading dependencies from ZON files
- Update project dependencies to include httpx and tqdm
- Add WTFPL license file
- Enhance README with more detailed usage instructions and project motivation
2025-03-07 17:59:32 +08:00

451 lines
14 KiB
Python

"""
ZON parser module - Parses Zig Object Notation (ZON) files.
"""
import json
from typing import Any, Dict, List, Union, Optional
from loguru import logger
class ZonParser:
"""
A parser for Zig Object Notation (ZON) files.
"""
_content: str
_pos: int
_line: int
_col: int
empty_tuple_as_dict: bool = False
def __init__(self, content: str, empty_tuple_as_dict: bool = False):
"""
Initialize the parser with ZON content.
Args:
content: The ZON content to parse
empty_tuple_as_dict: If True, empty tuples (.{}) will be parsed as empty dictionaries ({})
If False, empty tuples will be parsed as empty lists ([])
"""
self._content = content
self._pos = 0
self._line = 1
self._col = 1
self.empty_tuple_as_dict = empty_tuple_as_dict
def parse(self) -> Dict[str, Any]:
"""Parse ZON content and return a Python dictionary."""
result = self._parse_value()
return result
def _current_char(self) -> str:
if self._pos >= len(self._content):
return ""
return self._content[self._pos]
def _next_char(self) -> str:
self._pos += 1
if self._pos - 1 < len(self._content):
char = self._content[self._pos - 1]
if char == "\n":
self._line += 1
self._col = 1
else:
self._col += 1
return char
return ""
def _skip_whitespace_and_comments(self):
while self._pos < len(self._content):
char = self._current_char()
# Skip whitespace
if char.isspace():
self._next_char()
continue
# Skip comments
if (
char == "/"
and self._pos + 1 < len(self._content)
and self._content[self._pos + 1] == "/"
):
# Skip to end of line
while self._pos < len(self._content) and self._current_char() != "\n":
self._next_char()
continue
break
def _parse_value(self) -> Any:
"""Parse a ZON value."""
self._skip_whitespace_and_comments()
char = self._current_char()
if char == ".":
self._next_char() # Skip the dot
# Check if it's an object or tuple
if self._current_char() == "{":
return self._parse_object()
# It's a field name or a special value
return self._parse_identifier()
elif char == '"':
return self._parse_string()
elif char.isdigit() or char == "-":
return self._parse_number()
elif char == "t" or char == "f":
return self._parse_boolean()
elif char == "n" and self._content[self._pos : self._pos + 4] == "null":
self._pos += 4
return None
else:
raise ValueError(
f"Unexpected character '{char}' at line {self._line}, column {self._col}"
)
def _parse_object(self) -> Union[Dict[str, Any], List[Any]]:
"""Parse a ZON object or tuple."""
# Skip the opening brace
self._next_char()
# Look ahead to see if this is a tuple or an object
pos_before = self._pos
line_before = self._line
col_before = self._col
self._skip_whitespace_and_comments()
# Check if it's empty
if self._current_char() == "}":
# Need to determine if it should be an empty object or empty tuple
# Use the configuration option to decide
self._next_char() # Skip the closing brace
return (
{} if self.empty_tuple_as_dict else []
) # Empty dict or list based on config
# Look at the first character to determine if it's a tuple or object
is_tuple = True
if self._current_char() == ".":
# Look ahead one more character
self._next_char()
# If the next character is an object, it could be a nested tuple
if self._current_char() == "{":
# This is potentially a nested tuple starting with .{
# Go back to the dot and let the normal parsing decide
self._pos -= 1
elif (
self._current_char() == "@"
or self._current_char().isalnum()
or self._current_char() == "_"
):
# This looks like a field name, so it's probably an object
is_tuple = False
else:
# Unexpected character after dot, could be a syntax error
is_tuple = False
# Reset position
self._pos = pos_before
self._line = line_before
self._col = col_before
if is_tuple:
return self._parse_tuple()
else:
return self._parse_struct()
def _parse_struct(self) -> Dict[str, Any]:
"""Parse a ZON struct/object with key-value pairs."""
result = {}
while True:
self._skip_whitespace_and_comments()
# Check for closing brace
if self._current_char() == "}":
self._next_char()
break
# Parse key
if self._current_char() == ".":
self._next_char() # Skip the dot
key = self._parse_identifier()
else:
raise ValueError(
f"Expected '.' before key at line {self._line}, column {self._col}"
)
self._skip_whitespace_and_comments()
# Parse equals sign or check if it's a shorthand notation
if self._current_char() == "=":
self._next_char()
self._skip_whitespace_and_comments()
value = self._parse_value()
else:
# Shorthand notation where key is the same as value
value = key
result[key] = value
self._skip_whitespace_and_comments()
# Check for comma
if self._current_char() == ",":
self._next_char()
elif self._current_char() != "}":
raise ValueError(
f"Expected ',' or '}}' at line {self._line}, column {self._col}"
)
return result
def _parse_tuple(self) -> Union[Dict[str, Any], List[Any]]:
"""
Parse a ZON tuple as a list of values or empty dict based on configuration.
Returns:
List[Any] for non-empty tuples, or Dict[str, Any] if empty and empty_tuple_as_dict=True
"""
result = []
# Skip the opening brace (already done in _parse_object)
self._skip_whitespace_and_comments()
# Check for empty tuple
if self._current_char() == "}":
self._next_char()
return (
{} if self.empty_tuple_as_dict else []
) # Empty dict or list based on config
while True:
self._skip_whitespace_and_comments()
# Check for closing brace
if self._current_char() == "}":
self._next_char()
break
# Handle the special case of nested tuple/object with dot prefix
if self._current_char() == ".":
# Save position before the dot
pos_before = self._pos
line_before = self._line
col_before = self._col
self._next_char() # Skip the dot
# If we have a nested object/tuple
if self._current_char() == "{":
# Parse the nested object/tuple
value = self._parse_object()
result.append(value)
else:
# Not a nested tuple/object, reset position and parse normally
self._pos = pos_before
self._line = line_before
self._col = col_before
# Parse as normal value
value = self._parse_value()
result.append(value)
else:
# Regular value
value = self._parse_value()
result.append(value)
self._skip_whitespace_and_comments()
# Check for comma
if self._current_char() == ",":
self._next_char()
elif self._current_char() != "}":
raise ValueError(
f"Expected ',' or '}}' at line {self._line}, column {self._col}"
)
return result
def _parse_identifier(self) -> str:
start = self._pos
# Handle quoted identifiers (like .@"lsp-codegen")
if (
self._current_char() == "@"
and self._pos + 1 < len(self._content)
and self._content[self._pos + 1] == '"'
):
self._next_char() # Skip @
return self._parse_string()
# Regular identifier
while self._pos < len(self._content):
char = self._current_char()
if char.isalnum() or char == "_" or char == "-":
self._next_char()
else:
break
if start == self._pos:
raise ValueError(
f"Empty identifier at line {self._line}, column {self._col}"
)
return self._content[start : self._pos]
def _parse_string(self) -> str:
result = ""
# Skip the opening quote
self._next_char()
while self._pos < len(self._content) and self._current_char() != '"':
if self._current_char() == "\\":
self._next_char()
if self._current_char() == "n":
result += "\n"
elif self._current_char() == "t":
result += "\t"
elif self._current_char() == "r":
result += "\r"
elif self._current_char() == '"':
result += '"'
elif self._current_char() == "\\":
result += "\\"
else:
result += "\\" + self._current_char()
else:
result += self._current_char()
self._next_char()
if self._current_char() != '"':
raise ValueError(
f"Unterminated string at line {self._line}, column {self._col}"
)
self._next_char() # Skip the closing quote
return result
def _parse_number(self) -> Union[int, float]:
start = self._pos
# Handle hex numbers
if (
self._current_char() == "0"
and self._pos + 1 < len(self._content)
and self._content[self._pos + 1].lower() == "x"
):
self._next_char() # Skip 0
self._next_char() # Skip x
hex_start = self._pos
while self._pos < len(self._content) and (
self._current_char().isdigit()
or self._current_char().lower() in "abcdef"
):
self._next_char()
hex_str = self._content[hex_start : self._pos]
return int(hex_str, 16)
# Regular number
is_float = False
# Handle sign
if self._current_char() == "-":
self._next_char()
# Handle digits before decimal point
while self._pos < len(self._content) and self._current_char().isdigit():
self._next_char()
# Handle decimal point
if self._current_char() == ".":
is_float = True
self._next_char()
# Handle digits after decimal point
while self._pos < len(self._content) and self._current_char().isdigit():
self._next_char()
# Handle exponent
if self._current_char().lower() == "e":
is_float = True
self._next_char()
# Handle exponent sign
if self._current_char() in "+-":
self._next_char()
# Handle exponent digits
while self._pos < len(self._content) and self._current_char().isdigit():
self._next_char()
num_str = self._content[start : self._pos]
if is_float:
return float(num_str)
else:
return int(num_str)
def _parse_boolean(self) -> bool:
if self._content[self._pos : self._pos + 4] == "true":
self._pos += 4
return True
elif self._content[self._pos : self._pos + 5] == "false":
self._pos += 5
return False
else:
raise ValueError(
f"Expected 'true' or 'false' at line {self._line}, column {self._col}"
)
def parse_zon_file(file_path: str, empty_tuple_as_dict: bool = False) -> Dict[str, Any]:
"""
Parse a ZON file and return a Python dictionary.
Args:
file_path: Path to the ZON file
empty_tuple_as_dict: If True, empty tuples (.{}) will be parsed as empty dictionaries ({})
If False, empty tuples will be parsed as empty lists ([])
Returns:
Dictionary representation of the ZON file
"""
logger.debug(f"Parsing ZON file: {file_path}")
with open(file_path, "r") as f:
content = f.read()
parser = ZonParser(content, empty_tuple_as_dict=empty_tuple_as_dict)
result = parser.parse()
logger.debug(f"Successfully parsed ZON file")
return result
def zon_to_json(
zon_content: str, indent: Optional[int] = None, empty_tuple_as_dict: bool = False
) -> str:
"""
Convert ZON content to JSON string.
Args:
zon_content: ZON content as string
indent: Number of spaces for indentation (None for compact JSON)
empty_tuple_as_dict: If True, empty tuples (.{}) will be parsed as empty dictionaries ({})
If False, empty tuples will be parsed as empty lists ([])
Returns:
JSON string
"""
parser = ZonParser(zon_content, empty_tuple_as_dict=empty_tuple_as_dict)
result = parser.parse()
return json.dumps(result, indent=indent)