This commit is contained in:
Iliyan Angelov
2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions

View File

@@ -0,0 +1,6 @@
"""Functions for parsing Links"""
__all__ = ("parseLinkDestination", "parseLinkLabel", "parseLinkTitle")
from .parse_link_destination import parseLinkDestination
from .parse_link_label import parseLinkLabel
from .parse_link_title import parseLinkTitle

View File

@@ -0,0 +1,83 @@
"""
Parse link destination
"""
from ..common.utils import charCodeAt, unescapeAll
class _Result:
__slots__ = ("ok", "pos", "str")
def __init__(self) -> None:
self.ok = False
self.pos = 0
self.str = ""
def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
start = pos
result = _Result()
if charCodeAt(string, pos) == 0x3C: # /* < */
pos += 1
while pos < maximum:
code = charCodeAt(string, pos)
if code == 0x0A: # /* \n */)
return result
if code == 0x3C: # / * < * /
return result
if code == 0x3E: # /* > */) {
result.pos = pos + 1
result.str = unescapeAll(string[start + 1 : pos])
result.ok = True
return result
if code == 0x5C and pos + 1 < maximum: # \
pos += 2
continue
pos += 1
# no closing '>'
return result
# this should be ... } else { ... branch
level = 0
while pos < maximum:
code = charCodeAt(string, pos)
if code is None or code == 0x20:
break
# ascii control characters
if code < 0x20 or code == 0x7F:
break
if code == 0x5C and pos + 1 < maximum:
if charCodeAt(string, pos + 1) == 0x20:
break
pos += 2
continue
if code == 0x28: # /* ( */)
level += 1
if level > 32:
return result
if code == 0x29: # /* ) */)
if level == 0:
break
level -= 1
pos += 1
if start == pos:
return result
if level != 0:
return result
result.str = unescapeAll(string[start:pos])
result.pos = pos
result.ok = True
return result

View File

@@ -0,0 +1,44 @@
"""
Parse link label
this function assumes that first character ("[") already matches
returns the end of the label
"""
from markdown_it.rules_inline import StateInline
def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) -> int:
labelEnd = -1
oldPos = state.pos
found = False
state.pos = start + 1
level = 1
while state.pos < state.posMax:
marker = state.src[state.pos]
if marker == "]":
level -= 1
if level == 0:
found = True
break
prevPos = state.pos
state.md.inline.skipToken(state)
if marker == "[":
if prevPos == state.pos - 1:
# increase level if we find text `[`,
# which is not a part of any token
level += 1
elif disableNested:
state.pos = oldPos
return -1
if found:
labelEnd = state.pos
# restore old state
state.pos = oldPos
return labelEnd

View File

@@ -0,0 +1,75 @@
"""Parse link title"""
from ..common.utils import charCodeAt, unescapeAll
class _State:
__slots__ = ("can_continue", "marker", "ok", "pos", "str")
def __init__(self) -> None:
self.ok = False
"""if `true`, this is a valid link title"""
self.can_continue = False
"""if `true`, this link can be continued on the next line"""
self.pos = 0
"""if `ok`, it's the position of the first character after the closing marker"""
self.str = ""
"""if `ok`, it's the unescaped title"""
self.marker = 0
"""expected closing marker character code"""
def __str__(self) -> str:
return self.str
def parseLinkTitle(
string: str, start: int, maximum: int, prev_state: _State | None = None
) -> _State:
"""Parse link title within `str` in [start, max] range,
or continue previous parsing if `prev_state` is defined (equal to result of last execution).
"""
pos = start
state = _State()
if prev_state is not None:
# this is a continuation of a previous parseLinkTitle call on the next line,
# used in reference links only
state.str = prev_state.str
state.marker = prev_state.marker
else:
if pos >= maximum:
return state
marker = charCodeAt(string, pos)
# /* " */ /* ' */ /* ( */
if marker != 0x22 and marker != 0x27 and marker != 0x28:
return state
start += 1
pos += 1
# if opening marker is "(", switch it to closing marker ")"
if marker == 0x28:
marker = 0x29
state.marker = marker
while pos < maximum:
code = charCodeAt(string, pos)
if code == state.marker:
state.pos = pos + 1
state.str += unescapeAll(string[start:pos])
state.ok = True
return state
elif code == 0x28 and state.marker == 0x29: # /* ( */ /* ) */
return state
elif code == 0x5C and pos + 1 < maximum: # /* \ */
pos += 1
pos += 1
# no closing marker found, but this link title may continue on the next line (for references)
state.can_continue = True
state.str += unescapeAll(string[start:pos])
return state