- Implement dependency download and extraction for Zig packages - Create new CLI commands for downloading and converting ZON files - Add support for downloading dependencies from ZON files - Update project dependencies to include httpx and tqdm - Add WTFPL license file - Enhance README with more detailed usage instructions and project motivation
451 lines
14 KiB
Python
451 lines
14 KiB
Python
"""
|
|
ZON parser module - Parses Zig Object Notation (ZON) files.
|
|
"""
|
|
|
|
import json
|
|
from typing import Any, Dict, List, Union, Optional
|
|
|
|
from loguru import logger
|
|
|
|
|
|
class ZonParser:
|
|
"""
|
|
A parser for Zig Object Notation (ZON) files.
|
|
"""
|
|
|
|
_content: str
|
|
_pos: int
|
|
_line: int
|
|
_col: int
|
|
empty_tuple_as_dict: bool = False
|
|
|
|
def __init__(self, content: str, empty_tuple_as_dict: bool = False):
|
|
"""
|
|
Initialize the parser with ZON content.
|
|
|
|
Args:
|
|
content: The ZON content to parse
|
|
empty_tuple_as_dict: If True, empty tuples (.{}) will be parsed as empty dictionaries ({})
|
|
If False, empty tuples will be parsed as empty lists ([])
|
|
"""
|
|
self._content = content
|
|
self._pos = 0
|
|
self._line = 1
|
|
self._col = 1
|
|
self.empty_tuple_as_dict = empty_tuple_as_dict
|
|
|
|
def parse(self) -> Dict[str, Any]:
|
|
"""Parse ZON content and return a Python dictionary."""
|
|
result = self._parse_value()
|
|
return result
|
|
|
|
def _current_char(self) -> str:
|
|
if self._pos >= len(self._content):
|
|
return ""
|
|
return self._content[self._pos]
|
|
|
|
def _next_char(self) -> str:
|
|
self._pos += 1
|
|
if self._pos - 1 < len(self._content):
|
|
char = self._content[self._pos - 1]
|
|
if char == "\n":
|
|
self._line += 1
|
|
self._col = 1
|
|
else:
|
|
self._col += 1
|
|
return char
|
|
return ""
|
|
|
|
def _skip_whitespace_and_comments(self):
|
|
while self._pos < len(self._content):
|
|
char = self._current_char()
|
|
|
|
# Skip whitespace
|
|
if char.isspace():
|
|
self._next_char()
|
|
continue
|
|
|
|
# Skip comments
|
|
if (
|
|
char == "/"
|
|
and self._pos + 1 < len(self._content)
|
|
and self._content[self._pos + 1] == "/"
|
|
):
|
|
# Skip to end of line
|
|
while self._pos < len(self._content) and self._current_char() != "\n":
|
|
self._next_char()
|
|
continue
|
|
|
|
break
|
|
|
|
def _parse_value(self) -> Any:
|
|
"""Parse a ZON value."""
|
|
self._skip_whitespace_and_comments()
|
|
|
|
char = self._current_char()
|
|
|
|
if char == ".":
|
|
self._next_char() # Skip the dot
|
|
|
|
# Check if it's an object or tuple
|
|
if self._current_char() == "{":
|
|
return self._parse_object()
|
|
|
|
# It's a field name or a special value
|
|
return self._parse_identifier()
|
|
|
|
elif char == '"':
|
|
return self._parse_string()
|
|
elif char.isdigit() or char == "-":
|
|
return self._parse_number()
|
|
elif char == "t" or char == "f":
|
|
return self._parse_boolean()
|
|
elif char == "n" and self._content[self._pos : self._pos + 4] == "null":
|
|
self._pos += 4
|
|
return None
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected character '{char}' at line {self._line}, column {self._col}"
|
|
)
|
|
|
|
def _parse_object(self) -> Union[Dict[str, Any], List[Any]]:
|
|
"""Parse a ZON object or tuple."""
|
|
# Skip the opening brace
|
|
self._next_char()
|
|
|
|
# Look ahead to see if this is a tuple or an object
|
|
pos_before = self._pos
|
|
line_before = self._line
|
|
col_before = self._col
|
|
|
|
self._skip_whitespace_and_comments()
|
|
|
|
# Check if it's empty
|
|
if self._current_char() == "}":
|
|
# Need to determine if it should be an empty object or empty tuple
|
|
# Use the configuration option to decide
|
|
self._next_char() # Skip the closing brace
|
|
return (
|
|
{} if self.empty_tuple_as_dict else []
|
|
) # Empty dict or list based on config
|
|
|
|
# Look at the first character to determine if it's a tuple or object
|
|
is_tuple = True
|
|
if self._current_char() == ".":
|
|
# Look ahead one more character
|
|
self._next_char()
|
|
# If the next character is an object, it could be a nested tuple
|
|
if self._current_char() == "{":
|
|
# This is potentially a nested tuple starting with .{
|
|
# Go back to the dot and let the normal parsing decide
|
|
self._pos -= 1
|
|
elif (
|
|
self._current_char() == "@"
|
|
or self._current_char().isalnum()
|
|
or self._current_char() == "_"
|
|
):
|
|
# This looks like a field name, so it's probably an object
|
|
is_tuple = False
|
|
else:
|
|
# Unexpected character after dot, could be a syntax error
|
|
is_tuple = False
|
|
|
|
# Reset position
|
|
self._pos = pos_before
|
|
self._line = line_before
|
|
self._col = col_before
|
|
|
|
if is_tuple:
|
|
return self._parse_tuple()
|
|
else:
|
|
return self._parse_struct()
|
|
|
|
def _parse_struct(self) -> Dict[str, Any]:
|
|
"""Parse a ZON struct/object with key-value pairs."""
|
|
result = {}
|
|
|
|
while True:
|
|
self._skip_whitespace_and_comments()
|
|
|
|
# Check for closing brace
|
|
if self._current_char() == "}":
|
|
self._next_char()
|
|
break
|
|
|
|
# Parse key
|
|
if self._current_char() == ".":
|
|
self._next_char() # Skip the dot
|
|
key = self._parse_identifier()
|
|
else:
|
|
raise ValueError(
|
|
f"Expected '.' before key at line {self._line}, column {self._col}"
|
|
)
|
|
|
|
self._skip_whitespace_and_comments()
|
|
|
|
# Parse equals sign or check if it's a shorthand notation
|
|
if self._current_char() == "=":
|
|
self._next_char()
|
|
self._skip_whitespace_and_comments()
|
|
value = self._parse_value()
|
|
else:
|
|
# Shorthand notation where key is the same as value
|
|
value = key
|
|
|
|
result[key] = value
|
|
|
|
self._skip_whitespace_and_comments()
|
|
|
|
# Check for comma
|
|
if self._current_char() == ",":
|
|
self._next_char()
|
|
elif self._current_char() != "}":
|
|
raise ValueError(
|
|
f"Expected ',' or '}}' at line {self._line}, column {self._col}"
|
|
)
|
|
|
|
return result
|
|
|
|
def _parse_tuple(self) -> Union[Dict[str, Any], List[Any]]:
|
|
"""
|
|
Parse a ZON tuple as a list of values or empty dict based on configuration.
|
|
|
|
Returns:
|
|
List[Any] for non-empty tuples, or Dict[str, Any] if empty and empty_tuple_as_dict=True
|
|
"""
|
|
result = []
|
|
|
|
# Skip the opening brace (already done in _parse_object)
|
|
self._skip_whitespace_and_comments()
|
|
|
|
# Check for empty tuple
|
|
if self._current_char() == "}":
|
|
self._next_char()
|
|
return (
|
|
{} if self.empty_tuple_as_dict else []
|
|
) # Empty dict or list based on config
|
|
|
|
while True:
|
|
self._skip_whitespace_and_comments()
|
|
|
|
# Check for closing brace
|
|
if self._current_char() == "}":
|
|
self._next_char()
|
|
break
|
|
|
|
# Handle the special case of nested tuple/object with dot prefix
|
|
if self._current_char() == ".":
|
|
# Save position before the dot
|
|
pos_before = self._pos
|
|
line_before = self._line
|
|
col_before = self._col
|
|
|
|
self._next_char() # Skip the dot
|
|
|
|
# If we have a nested object/tuple
|
|
if self._current_char() == "{":
|
|
# Parse the nested object/tuple
|
|
value = self._parse_object()
|
|
result.append(value)
|
|
else:
|
|
# Not a nested tuple/object, reset position and parse normally
|
|
self._pos = pos_before
|
|
self._line = line_before
|
|
self._col = col_before
|
|
|
|
# Parse as normal value
|
|
value = self._parse_value()
|
|
result.append(value)
|
|
else:
|
|
# Regular value
|
|
value = self._parse_value()
|
|
result.append(value)
|
|
|
|
self._skip_whitespace_and_comments()
|
|
|
|
# Check for comma
|
|
if self._current_char() == ",":
|
|
self._next_char()
|
|
elif self._current_char() != "}":
|
|
raise ValueError(
|
|
f"Expected ',' or '}}' at line {self._line}, column {self._col}"
|
|
)
|
|
|
|
return result
|
|
|
|
def _parse_identifier(self) -> str:
|
|
start = self._pos
|
|
|
|
# Handle quoted identifiers (like .@"lsp-codegen")
|
|
if (
|
|
self._current_char() == "@"
|
|
and self._pos + 1 < len(self._content)
|
|
and self._content[self._pos + 1] == '"'
|
|
):
|
|
self._next_char() # Skip @
|
|
return self._parse_string()
|
|
|
|
# Regular identifier
|
|
while self._pos < len(self._content):
|
|
char = self._current_char()
|
|
if char.isalnum() or char == "_" or char == "-":
|
|
self._next_char()
|
|
else:
|
|
break
|
|
|
|
if start == self._pos:
|
|
raise ValueError(
|
|
f"Empty identifier at line {self._line}, column {self._col}"
|
|
)
|
|
|
|
return self._content[start : self._pos]
|
|
|
|
def _parse_string(self) -> str:
|
|
result = ""
|
|
|
|
# Skip the opening quote
|
|
self._next_char()
|
|
|
|
while self._pos < len(self._content) and self._current_char() != '"':
|
|
if self._current_char() == "\\":
|
|
self._next_char()
|
|
if self._current_char() == "n":
|
|
result += "\n"
|
|
elif self._current_char() == "t":
|
|
result += "\t"
|
|
elif self._current_char() == "r":
|
|
result += "\r"
|
|
elif self._current_char() == '"':
|
|
result += '"'
|
|
elif self._current_char() == "\\":
|
|
result += "\\"
|
|
else:
|
|
result += "\\" + self._current_char()
|
|
else:
|
|
result += self._current_char()
|
|
self._next_char()
|
|
|
|
if self._current_char() != '"':
|
|
raise ValueError(
|
|
f"Unterminated string at line {self._line}, column {self._col}"
|
|
)
|
|
|
|
self._next_char() # Skip the closing quote
|
|
return result
|
|
|
|
def _parse_number(self) -> Union[int, float]:
|
|
start = self._pos
|
|
|
|
# Handle hex numbers
|
|
if (
|
|
self._current_char() == "0"
|
|
and self._pos + 1 < len(self._content)
|
|
and self._content[self._pos + 1].lower() == "x"
|
|
):
|
|
self._next_char() # Skip 0
|
|
self._next_char() # Skip x
|
|
|
|
hex_start = self._pos
|
|
while self._pos < len(self._content) and (
|
|
self._current_char().isdigit()
|
|
or self._current_char().lower() in "abcdef"
|
|
):
|
|
self._next_char()
|
|
|
|
hex_str = self._content[hex_start : self._pos]
|
|
return int(hex_str, 16)
|
|
|
|
# Regular number
|
|
is_float = False
|
|
|
|
# Handle sign
|
|
if self._current_char() == "-":
|
|
self._next_char()
|
|
|
|
# Handle digits before decimal point
|
|
while self._pos < len(self._content) and self._current_char().isdigit():
|
|
self._next_char()
|
|
|
|
# Handle decimal point
|
|
if self._current_char() == ".":
|
|
is_float = True
|
|
self._next_char()
|
|
|
|
# Handle digits after decimal point
|
|
while self._pos < len(self._content) and self._current_char().isdigit():
|
|
self._next_char()
|
|
|
|
# Handle exponent
|
|
if self._current_char().lower() == "e":
|
|
is_float = True
|
|
self._next_char()
|
|
|
|
# Handle exponent sign
|
|
if self._current_char() in "+-":
|
|
self._next_char()
|
|
|
|
# Handle exponent digits
|
|
while self._pos < len(self._content) and self._current_char().isdigit():
|
|
self._next_char()
|
|
|
|
num_str = self._content[start : self._pos]
|
|
|
|
if is_float:
|
|
return float(num_str)
|
|
else:
|
|
return int(num_str)
|
|
|
|
def _parse_boolean(self) -> bool:
|
|
if self._content[self._pos : self._pos + 4] == "true":
|
|
self._pos += 4
|
|
return True
|
|
elif self._content[self._pos : self._pos + 5] == "false":
|
|
self._pos += 5
|
|
return False
|
|
else:
|
|
raise ValueError(
|
|
f"Expected 'true' or 'false' at line {self._line}, column {self._col}"
|
|
)
|
|
|
|
|
|
def parse_zon_file(file_path: str, empty_tuple_as_dict: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Parse a ZON file and return a Python dictionary.
|
|
|
|
Args:
|
|
file_path: Path to the ZON file
|
|
empty_tuple_as_dict: If True, empty tuples (.{}) will be parsed as empty dictionaries ({})
|
|
If False, empty tuples will be parsed as empty lists ([])
|
|
|
|
Returns:
|
|
Dictionary representation of the ZON file
|
|
"""
|
|
logger.debug(f"Parsing ZON file: {file_path}")
|
|
with open(file_path, "r") as f:
|
|
content = f.read()
|
|
|
|
parser = ZonParser(content, empty_tuple_as_dict=empty_tuple_as_dict)
|
|
result = parser.parse()
|
|
logger.debug(f"Successfully parsed ZON file")
|
|
return result
|
|
|
|
|
|
def zon_to_json(
|
|
zon_content: str, indent: Optional[int] = None, empty_tuple_as_dict: bool = False
|
|
) -> str:
|
|
"""
|
|
Convert ZON content to JSON string.
|
|
|
|
Args:
|
|
zon_content: ZON content as string
|
|
indent: Number of spaces for indentation (None for compact JSON)
|
|
empty_tuple_as_dict: If True, empty tuples (.{}) will be parsed as empty dictionaries ({})
|
|
If False, empty tuples will be parsed as empty lists ([])
|
|
|
|
Returns:
|
|
JSON string
|
|
"""
|
|
parser = ZonParser(zon_content, empty_tuple_as_dict=empty_tuple_as_dict)
|
|
result = parser.parse()
|
|
return json.dumps(result, indent=indent)
|