mirror of
https://github.com/rommapp/romm.git
synced 2026-02-18 00:27:41 +01:00
add xml diagnostics script
This commit is contained in:
84
backend/tools/xml_diagnostics.py
Normal file
84
backend/tools/xml_diagnostics.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import sys
|
||||
from xml.sax.handler import ContentHandler # nosec
|
||||
|
||||
from defusedxml import ElementTree as ET
|
||||
from defusedxml.sax import make_parser
|
||||
|
||||
|
||||
class DiagnosticHandler(ContentHandler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.line_number = 0
|
||||
self.column_number = 0
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
self.locator = locator
|
||||
|
||||
def characters(self, content):
|
||||
# Check for invalid XML characters
|
||||
for char in content:
|
||||
if ord(char) >= 0xFFFE or (ord(char) <= 0x1F and char not in "\n\r\t"):
|
||||
print(
|
||||
f"Found invalid character '0x{ord(char):04x}' at line {self.locator.getLineNumber()}, column {self.locator.getColumnNumber()}"
|
||||
)
|
||||
|
||||
|
||||
def diagnose_xml(filename):
|
||||
print(f"Analyzing {filename}...")
|
||||
|
||||
# First, try to read the file in chunks to find encoding issues
|
||||
try:
|
||||
with open(filename, "rb") as f:
|
||||
chunk_size = 8192
|
||||
chunk_number = 0
|
||||
while True:
|
||||
chunk = f.read(chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
try:
|
||||
chunk.decode("utf-8")
|
||||
except UnicodeDecodeError as e:
|
||||
byte_pos = chunk_number * chunk_size + e.start
|
||||
print(f"Found invalid UTF-8 sequence at byte position {byte_pos}")
|
||||
print(f"Problematic bytes: {chunk[e.start:e.end].decode('utf-8')}")
|
||||
chunk_number += 1
|
||||
except Exception as e:
|
||||
print(f"Error reading file: {e}")
|
||||
return
|
||||
|
||||
# Then try SAX parsing for detailed error reporting
|
||||
parser = make_parser()
|
||||
handler = DiagnosticHandler()
|
||||
parser.setContentHandler(handler)
|
||||
|
||||
try:
|
||||
parser.parse(filename)
|
||||
except Exception as e:
|
||||
print(f"SAX parsing error: {e}")
|
||||
|
||||
# Finally try ElementTree parsing
|
||||
try:
|
||||
ET.parse(filename)
|
||||
except ET.ParseError as e:
|
||||
print(f"ElementTree parsing error: {e}")
|
||||
|
||||
# Try to get context around the error
|
||||
try:
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
line_num = e.position[0]
|
||||
start = max(0, line_num - 2)
|
||||
end = min(len(lines), line_num + 3)
|
||||
print("\nContext around error:")
|
||||
for i in range(start, end):
|
||||
prefix = "-> " if i + 1 == line_num else " "
|
||||
print(f"{prefix}{i+1}: {lines[i].rstrip()}")
|
||||
except Exception as context_error:
|
||||
print(f"Could not get context: {context_error}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python script.py <xml_file>")
|
||||
sys.exit(1)
|
||||
diagnose_xml(sys.argv[1])
|
||||
Reference in New Issue
Block a user