updates
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
# Natural Language Toolkit: Combinatory Categorial Grammar
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Graeme Gange <ggange@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Combinatory Categorial Grammar.
|
||||
|
||||
For more information see nltk/doc/contrib/ccg/ccg.pdf
|
||||
"""
|
||||
|
||||
from nltk.ccg.chart import CCGChart, CCGChartParser, CCGEdge, CCGLeafEdge
|
||||
from nltk.ccg.combinator import (
|
||||
BackwardApplication,
|
||||
BackwardBx,
|
||||
BackwardCombinator,
|
||||
BackwardComposition,
|
||||
BackwardSx,
|
||||
BackwardT,
|
||||
DirectedBinaryCombinator,
|
||||
ForwardApplication,
|
||||
ForwardCombinator,
|
||||
ForwardComposition,
|
||||
ForwardSubstitution,
|
||||
ForwardT,
|
||||
UndirectedBinaryCombinator,
|
||||
UndirectedComposition,
|
||||
UndirectedFunctionApplication,
|
||||
UndirectedSubstitution,
|
||||
UndirectedTypeRaise,
|
||||
)
|
||||
from nltk.ccg.lexicon import CCGLexicon
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
358
Backend/venv/lib/python3.12/site-packages/nltk/ccg/api.py
Normal file
358
Backend/venv/lib/python3.12/site-packages/nltk/ccg/api.py
Normal file
@@ -0,0 +1,358 @@
|
||||
# Natural Language Toolkit: CCG Categories
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Graeme Gange <ggange@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from functools import total_ordering
|
||||
|
||||
from nltk.internals import raise_unorderable_types
|
||||
|
||||
|
||||
@total_ordering
|
||||
class AbstractCCGCategory(metaclass=ABCMeta):
|
||||
"""
|
||||
Interface for categories in combinatory grammars.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def is_primitive(self):
|
||||
"""
|
||||
Returns true if the category is primitive.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def is_function(self):
|
||||
"""
|
||||
Returns true if the category is a function application.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def is_var(self):
|
||||
"""
|
||||
Returns true if the category is a variable.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def substitute(self, substitutions):
|
||||
"""
|
||||
Takes a set of (var, category) substitutions, and replaces every
|
||||
occurrence of the variable with the corresponding category.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def can_unify(self, other):
|
||||
"""
|
||||
Determines whether two categories can be unified.
|
||||
- Returns None if they cannot be unified
|
||||
- Returns a list of necessary substitutions if they can.
|
||||
"""
|
||||
|
||||
# Utility functions: comparison, strings and hashing.
|
||||
@abstractmethod
|
||||
def __str__(self):
|
||||
pass
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ is other.__class__
|
||||
and self._comparison_key == other._comparison_key
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __lt__(self, other):
|
||||
if not isinstance(other, AbstractCCGCategory):
|
||||
raise_unorderable_types("<", self, other)
|
||||
if self.__class__ is other.__class__:
|
||||
return self._comparison_key < other._comparison_key
|
||||
else:
|
||||
return self.__class__.__name__ < other.__class__.__name__
|
||||
|
||||
def __hash__(self):
|
||||
try:
|
||||
return self._hash
|
||||
except AttributeError:
|
||||
self._hash = hash(self._comparison_key)
|
||||
return self._hash
|
||||
|
||||
|
||||
class CCGVar(AbstractCCGCategory):
|
||||
"""
|
||||
Class representing a variable CCG category.
|
||||
Used for conjunctions (and possibly type-raising, if implemented as a
|
||||
unary rule).
|
||||
"""
|
||||
|
||||
_maxID = 0
|
||||
|
||||
def __init__(self, prim_only=False):
|
||||
"""Initialize a variable (selects a new identifier)
|
||||
|
||||
:param prim_only: a boolean that determines whether the variable is
|
||||
restricted to primitives
|
||||
:type prim_only: bool
|
||||
"""
|
||||
self._id = self.new_id()
|
||||
self._prim_only = prim_only
|
||||
self._comparison_key = self._id
|
||||
|
||||
@classmethod
|
||||
def new_id(cls):
|
||||
"""
|
||||
A class method allowing generation of unique variable identifiers.
|
||||
"""
|
||||
cls._maxID = cls._maxID + 1
|
||||
return cls._maxID - 1
|
||||
|
||||
@classmethod
|
||||
def reset_id(cls):
|
||||
cls._maxID = 0
|
||||
|
||||
def is_primitive(self):
|
||||
return False
|
||||
|
||||
def is_function(self):
|
||||
return False
|
||||
|
||||
def is_var(self):
|
||||
return True
|
||||
|
||||
def substitute(self, substitutions):
|
||||
"""If there is a substitution corresponding to this variable,
|
||||
return the substituted category.
|
||||
"""
|
||||
for var, cat in substitutions:
|
||||
if var == self:
|
||||
return cat
|
||||
return self
|
||||
|
||||
def can_unify(self, other):
|
||||
"""If the variable can be replaced with other
|
||||
a substitution is returned.
|
||||
"""
|
||||
if other.is_primitive() or not self._prim_only:
|
||||
return [(self, other)]
|
||||
return None
|
||||
|
||||
def id(self):
|
||||
return self._id
|
||||
|
||||
def __str__(self):
|
||||
return "_var" + str(self._id)
|
||||
|
||||
|
||||
@total_ordering
|
||||
class Direction:
|
||||
"""
|
||||
Class representing the direction of a function application.
|
||||
Also contains maintains information as to which combinators
|
||||
may be used with the category.
|
||||
"""
|
||||
|
||||
def __init__(self, dir, restrictions):
|
||||
self._dir = dir
|
||||
self._restrs = restrictions
|
||||
self._comparison_key = (dir, tuple(restrictions))
|
||||
|
||||
# Testing the application direction
|
||||
def is_forward(self):
|
||||
return self._dir == "/"
|
||||
|
||||
def is_backward(self):
|
||||
return self._dir == "\\"
|
||||
|
||||
def dir(self):
|
||||
return self._dir
|
||||
|
||||
def restrs(self):
|
||||
"""A list of restrictions on the combinators.
|
||||
'.' denotes that permuting operations are disallowed
|
||||
',' denotes that function composition is disallowed
|
||||
'_' denotes that the direction has variable restrictions.
|
||||
(This is redundant in the current implementation of type-raising)
|
||||
"""
|
||||
return self._restrs
|
||||
|
||||
def is_variable(self):
|
||||
return self._restrs == "_"
|
||||
|
||||
# Unification and substitution of variable directions.
|
||||
# Used only if type-raising is implemented as a unary rule, as it
|
||||
# must inherit restrictions from the argument category.
|
||||
def can_unify(self, other):
|
||||
if other.is_variable():
|
||||
return [("_", self.restrs())]
|
||||
elif self.is_variable():
|
||||
return [("_", other.restrs())]
|
||||
else:
|
||||
if self.restrs() == other.restrs():
|
||||
return []
|
||||
return None
|
||||
|
||||
def substitute(self, subs):
|
||||
if not self.is_variable():
|
||||
return self
|
||||
|
||||
for var, restrs in subs:
|
||||
if var == "_":
|
||||
return Direction(self._dir, restrs)
|
||||
return self
|
||||
|
||||
# Testing permitted combinators
|
||||
def can_compose(self):
|
||||
return "," not in self._restrs
|
||||
|
||||
def can_cross(self):
|
||||
return "." not in self._restrs
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ is other.__class__
|
||||
and self._comparison_key == other._comparison_key
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __lt__(self, other):
|
||||
if not isinstance(other, Direction):
|
||||
raise_unorderable_types("<", self, other)
|
||||
if self.__class__ is other.__class__:
|
||||
return self._comparison_key < other._comparison_key
|
||||
else:
|
||||
return self.__class__.__name__ < other.__class__.__name__
|
||||
|
||||
def __hash__(self):
|
||||
try:
|
||||
return self._hash
|
||||
except AttributeError:
|
||||
self._hash = hash(self._comparison_key)
|
||||
return self._hash
|
||||
|
||||
def __str__(self):
|
||||
r_str = ""
|
||||
for r in self._restrs:
|
||||
r_str = r_str + "%s" % r
|
||||
return f"{self._dir}{r_str}"
|
||||
|
||||
# The negation operator reverses the direction of the application
|
||||
def __neg__(self):
|
||||
if self._dir == "/":
|
||||
return Direction("\\", self._restrs)
|
||||
else:
|
||||
return Direction("/", self._restrs)
|
||||
|
||||
|
||||
class PrimitiveCategory(AbstractCCGCategory):
|
||||
"""
|
||||
Class representing primitive categories.
|
||||
Takes a string representation of the category, and a
|
||||
list of strings specifying the morphological subcategories.
|
||||
"""
|
||||
|
||||
def __init__(self, categ, restrictions=[]):
|
||||
self._categ = categ
|
||||
self._restrs = restrictions
|
||||
self._comparison_key = (categ, tuple(restrictions))
|
||||
|
||||
def is_primitive(self):
|
||||
return True
|
||||
|
||||
def is_function(self):
|
||||
return False
|
||||
|
||||
def is_var(self):
|
||||
return False
|
||||
|
||||
def restrs(self):
|
||||
return self._restrs
|
||||
|
||||
def categ(self):
|
||||
return self._categ
|
||||
|
||||
# Substitution does nothing to a primitive category
|
||||
def substitute(self, subs):
|
||||
return self
|
||||
|
||||
# A primitive can be unified with a class of the same
|
||||
# base category, given that the other category shares all
|
||||
# of its subclasses, or with a variable.
|
||||
def can_unify(self, other):
|
||||
if not other.is_primitive():
|
||||
return None
|
||||
if other.is_var():
|
||||
return [(other, self)]
|
||||
if other.categ() == self.categ():
|
||||
for restr in self._restrs:
|
||||
if restr not in other.restrs():
|
||||
return None
|
||||
return []
|
||||
return None
|
||||
|
||||
def __str__(self):
|
||||
if self._restrs == []:
|
||||
return "%s" % self._categ
|
||||
restrictions = "[%s]" % ",".join(repr(r) for r in self._restrs)
|
||||
return f"{self._categ}{restrictions}"
|
||||
|
||||
|
||||
class FunctionalCategory(AbstractCCGCategory):
|
||||
"""
|
||||
Class that represents a function application category.
|
||||
Consists of argument and result categories, together with
|
||||
an application direction.
|
||||
"""
|
||||
|
||||
def __init__(self, res, arg, dir):
|
||||
self._res = res
|
||||
self._arg = arg
|
||||
self._dir = dir
|
||||
self._comparison_key = (arg, dir, res)
|
||||
|
||||
def is_primitive(self):
|
||||
return False
|
||||
|
||||
def is_function(self):
|
||||
return True
|
||||
|
||||
def is_var(self):
|
||||
return False
|
||||
|
||||
# Substitution returns the category consisting of the
|
||||
# substitution applied to each of its constituents.
|
||||
def substitute(self, subs):
|
||||
sub_res = self._res.substitute(subs)
|
||||
sub_dir = self._dir.substitute(subs)
|
||||
sub_arg = self._arg.substitute(subs)
|
||||
return FunctionalCategory(sub_res, sub_arg, self._dir)
|
||||
|
||||
# A function can unify with another function, so long as its
|
||||
# constituents can unify, or with an unrestricted variable.
|
||||
def can_unify(self, other):
|
||||
if other.is_var():
|
||||
return [(other, self)]
|
||||
if other.is_function():
|
||||
sa = self._res.can_unify(other.res())
|
||||
sd = self._dir.can_unify(other.dir())
|
||||
if sa is not None and sd is not None:
|
||||
sb = self._arg.substitute(sa).can_unify(other.arg().substitute(sa))
|
||||
if sb is not None:
|
||||
return sa + sb
|
||||
return None
|
||||
|
||||
# Constituent accessors
|
||||
def arg(self):
|
||||
return self._arg
|
||||
|
||||
def res(self):
|
||||
return self._res
|
||||
|
||||
def dir(self):
|
||||
return self._dir
|
||||
|
||||
def __str__(self):
|
||||
return f"({self._res}{self._dir}{self._arg})"
|
||||
480
Backend/venv/lib/python3.12/site-packages/nltk/ccg/chart.py
Normal file
480
Backend/venv/lib/python3.12/site-packages/nltk/ccg/chart.py
Normal file
@@ -0,0 +1,480 @@
|
||||
# Natural Language Toolkit: Combinatory Categorial Grammar
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Graeme Gange <ggange@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
The lexicon is constructed by calling
|
||||
``lexicon.fromstring(<lexicon string>)``.
|
||||
|
||||
In order to construct a parser, you also need a rule set.
|
||||
The standard English rules are provided in chart as
|
||||
``chart.DefaultRuleSet``.
|
||||
|
||||
The parser can then be constructed by calling, for example:
|
||||
``parser = chart.CCGChartParser(<lexicon>, <ruleset>)``
|
||||
|
||||
Parsing is then performed by running
|
||||
``parser.parse(<sentence>.split())``.
|
||||
|
||||
While this returns a list of trees, the default representation
|
||||
of the produced trees is not very enlightening, particularly
|
||||
given that it uses the same tree class as the CFG parsers.
|
||||
It is probably better to call:
|
||||
``chart.printCCGDerivation(<parse tree extracted from list>)``
|
||||
which should print a nice representation of the derivation.
|
||||
|
||||
This entire process is shown far more clearly in the demonstration:
|
||||
python chart.py
|
||||
"""
|
||||
|
||||
import itertools
|
||||
|
||||
from nltk.ccg.combinator import *
|
||||
from nltk.ccg.combinator import (
|
||||
BackwardApplication,
|
||||
BackwardBx,
|
||||
BackwardComposition,
|
||||
BackwardSx,
|
||||
BackwardT,
|
||||
ForwardApplication,
|
||||
ForwardComposition,
|
||||
ForwardSubstitution,
|
||||
ForwardT,
|
||||
)
|
||||
from nltk.ccg.lexicon import Token, fromstring
|
||||
from nltk.ccg.logic import *
|
||||
from nltk.parse import ParserI
|
||||
from nltk.parse.chart import AbstractChartRule, Chart, EdgeI
|
||||
from nltk.sem.logic import *
|
||||
from nltk.tree import Tree
|
||||
|
||||
|
||||
# Based on the EdgeI class from NLTK.
|
||||
# A number of the properties of the EdgeI interface don't
|
||||
# transfer well to CCGs, however.
|
||||
class CCGEdge(EdgeI):
|
||||
def __init__(self, span, categ, rule):
|
||||
self._span = span
|
||||
self._categ = categ
|
||||
self._rule = rule
|
||||
self._comparison_key = (span, categ, rule)
|
||||
|
||||
# Accessors
|
||||
def lhs(self):
|
||||
return self._categ
|
||||
|
||||
def span(self):
|
||||
return self._span
|
||||
|
||||
def start(self):
|
||||
return self._span[0]
|
||||
|
||||
def end(self):
|
||||
return self._span[1]
|
||||
|
||||
def length(self):
|
||||
return self._span[1] - self.span[0]
|
||||
|
||||
def rhs(self):
|
||||
return ()
|
||||
|
||||
def dot(self):
|
||||
return 0
|
||||
|
||||
def is_complete(self):
|
||||
return True
|
||||
|
||||
def is_incomplete(self):
|
||||
return False
|
||||
|
||||
def nextsym(self):
|
||||
return None
|
||||
|
||||
def categ(self):
|
||||
return self._categ
|
||||
|
||||
def rule(self):
|
||||
return self._rule
|
||||
|
||||
|
||||
class CCGLeafEdge(EdgeI):
|
||||
"""
|
||||
Class representing leaf edges in a CCG derivation.
|
||||
"""
|
||||
|
||||
def __init__(self, pos, token, leaf):
|
||||
self._pos = pos
|
||||
self._token = token
|
||||
self._leaf = leaf
|
||||
self._comparison_key = (pos, token.categ(), leaf)
|
||||
|
||||
# Accessors
|
||||
def lhs(self):
|
||||
return self._token.categ()
|
||||
|
||||
def span(self):
|
||||
return (self._pos, self._pos + 1)
|
||||
|
||||
def start(self):
|
||||
return self._pos
|
||||
|
||||
def end(self):
|
||||
return self._pos + 1
|
||||
|
||||
def length(self):
|
||||
return 1
|
||||
|
||||
def rhs(self):
|
||||
return self._leaf
|
||||
|
||||
def dot(self):
|
||||
return 0
|
||||
|
||||
def is_complete(self):
|
||||
return True
|
||||
|
||||
def is_incomplete(self):
|
||||
return False
|
||||
|
||||
def nextsym(self):
|
||||
return None
|
||||
|
||||
def token(self):
|
||||
return self._token
|
||||
|
||||
def categ(self):
|
||||
return self._token.categ()
|
||||
|
||||
def leaf(self):
|
||||
return self._leaf
|
||||
|
||||
|
||||
class BinaryCombinatorRule(AbstractChartRule):
|
||||
"""
|
||||
Class implementing application of a binary combinator to a chart.
|
||||
Takes the directed combinator to apply.
|
||||
"""
|
||||
|
||||
NUMEDGES = 2
|
||||
|
||||
def __init__(self, combinator):
|
||||
self._combinator = combinator
|
||||
|
||||
# Apply a combinator
|
||||
def apply(self, chart, grammar, left_edge, right_edge):
|
||||
# The left & right edges must be touching.
|
||||
if not (left_edge.end() == right_edge.start()):
|
||||
return
|
||||
|
||||
# Check if the two edges are permitted to combine.
|
||||
# If so, generate the corresponding edge.
|
||||
if self._combinator.can_combine(left_edge.categ(), right_edge.categ()):
|
||||
for res in self._combinator.combine(left_edge.categ(), right_edge.categ()):
|
||||
new_edge = CCGEdge(
|
||||
span=(left_edge.start(), right_edge.end()),
|
||||
categ=res,
|
||||
rule=self._combinator,
|
||||
)
|
||||
if chart.insert(new_edge, (left_edge, right_edge)):
|
||||
yield new_edge
|
||||
|
||||
# The representation of the combinator (for printing derivations)
|
||||
def __str__(self):
|
||||
return "%s" % self._combinator
|
||||
|
||||
|
||||
# Type-raising must be handled slightly differently to the other rules, as the
|
||||
# resulting rules only span a single edge, rather than both edges.
|
||||
|
||||
|
||||
class ForwardTypeRaiseRule(AbstractChartRule):
|
||||
"""
|
||||
Class for applying forward type raising
|
||||
"""
|
||||
|
||||
NUMEDGES = 2
|
||||
|
||||
def __init__(self):
|
||||
self._combinator = ForwardT
|
||||
|
||||
def apply(self, chart, grammar, left_edge, right_edge):
|
||||
if not (left_edge.end() == right_edge.start()):
|
||||
return
|
||||
|
||||
for res in self._combinator.combine(left_edge.categ(), right_edge.categ()):
|
||||
new_edge = CCGEdge(span=left_edge.span(), categ=res, rule=self._combinator)
|
||||
if chart.insert(new_edge, (left_edge,)):
|
||||
yield new_edge
|
||||
|
||||
def __str__(self):
|
||||
return "%s" % self._combinator
|
||||
|
||||
|
||||
class BackwardTypeRaiseRule(AbstractChartRule):
|
||||
"""
|
||||
Class for applying backward type raising.
|
||||
"""
|
||||
|
||||
NUMEDGES = 2
|
||||
|
||||
def __init__(self):
|
||||
self._combinator = BackwardT
|
||||
|
||||
def apply(self, chart, grammar, left_edge, right_edge):
|
||||
if not (left_edge.end() == right_edge.start()):
|
||||
return
|
||||
|
||||
for res in self._combinator.combine(left_edge.categ(), right_edge.categ()):
|
||||
new_edge = CCGEdge(span=right_edge.span(), categ=res, rule=self._combinator)
|
||||
if chart.insert(new_edge, (right_edge,)):
|
||||
yield new_edge
|
||||
|
||||
def __str__(self):
|
||||
return "%s" % self._combinator
|
||||
|
||||
|
||||
# Common sets of combinators used for English derivations.
|
||||
ApplicationRuleSet = [
|
||||
BinaryCombinatorRule(ForwardApplication),
|
||||
BinaryCombinatorRule(BackwardApplication),
|
||||
]
|
||||
CompositionRuleSet = [
|
||||
BinaryCombinatorRule(ForwardComposition),
|
||||
BinaryCombinatorRule(BackwardComposition),
|
||||
BinaryCombinatorRule(BackwardBx),
|
||||
]
|
||||
SubstitutionRuleSet = [
|
||||
BinaryCombinatorRule(ForwardSubstitution),
|
||||
BinaryCombinatorRule(BackwardSx),
|
||||
]
|
||||
TypeRaiseRuleSet = [ForwardTypeRaiseRule(), BackwardTypeRaiseRule()]
|
||||
|
||||
# The standard English rule set.
|
||||
DefaultRuleSet = (
|
||||
ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet + TypeRaiseRuleSet
|
||||
)
|
||||
|
||||
|
||||
class CCGChartParser(ParserI):
|
||||
"""
|
||||
Chart parser for CCGs.
|
||||
Based largely on the ChartParser class from NLTK.
|
||||
"""
|
||||
|
||||
def __init__(self, lexicon, rules, trace=0):
|
||||
self._lexicon = lexicon
|
||||
self._rules = rules
|
||||
self._trace = trace
|
||||
|
||||
def lexicon(self):
|
||||
return self._lexicon
|
||||
|
||||
# Implements the CYK algorithm
|
||||
def parse(self, tokens):
|
||||
tokens = list(tokens)
|
||||
chart = CCGChart(list(tokens))
|
||||
lex = self._lexicon
|
||||
|
||||
# Initialize leaf edges.
|
||||
for index in range(chart.num_leaves()):
|
||||
for token in lex.categories(chart.leaf(index)):
|
||||
new_edge = CCGLeafEdge(index, token, chart.leaf(index))
|
||||
chart.insert(new_edge, ())
|
||||
|
||||
# Select a span for the new edges
|
||||
for span in range(2, chart.num_leaves() + 1):
|
||||
for start in range(0, chart.num_leaves() - span + 1):
|
||||
# Try all possible pairs of edges that could generate
|
||||
# an edge for that span
|
||||
for part in range(1, span):
|
||||
lstart = start
|
||||
mid = start + part
|
||||
rend = start + span
|
||||
|
||||
for left in chart.select(span=(lstart, mid)):
|
||||
for right in chart.select(span=(mid, rend)):
|
||||
# Generate all possible combinations of the two edges
|
||||
for rule in self._rules:
|
||||
edges_added_by_rule = 0
|
||||
for newedge in rule.apply(chart, lex, left, right):
|
||||
edges_added_by_rule += 1
|
||||
|
||||
# Output the resulting parses
|
||||
return chart.parses(lex.start())
|
||||
|
||||
|
||||
class CCGChart(Chart):
|
||||
def __init__(self, tokens):
|
||||
Chart.__init__(self, tokens)
|
||||
|
||||
# Constructs the trees for a given parse. Unfortnunately, the parse trees need to be
|
||||
# constructed slightly differently to those in the default Chart class, so it has to
|
||||
# be reimplemented
|
||||
def _trees(self, edge, complete, memo, tree_class):
|
||||
assert complete, "CCGChart cannot build incomplete trees"
|
||||
|
||||
if edge in memo:
|
||||
return memo[edge]
|
||||
|
||||
if isinstance(edge, CCGLeafEdge):
|
||||
word = tree_class(edge.token(), [self._tokens[edge.start()]])
|
||||
leaf = tree_class((edge.token(), "Leaf"), [word])
|
||||
memo[edge] = [leaf]
|
||||
return [leaf]
|
||||
|
||||
memo[edge] = []
|
||||
trees = []
|
||||
|
||||
for cpl in self.child_pointer_lists(edge):
|
||||
child_choices = [self._trees(cp, complete, memo, tree_class) for cp in cpl]
|
||||
for children in itertools.product(*child_choices):
|
||||
lhs = (
|
||||
Token(
|
||||
self._tokens[edge.start() : edge.end()],
|
||||
edge.lhs(),
|
||||
compute_semantics(children, edge),
|
||||
),
|
||||
str(edge.rule()),
|
||||
)
|
||||
trees.append(tree_class(lhs, children))
|
||||
|
||||
memo[edge] = trees
|
||||
return trees
|
||||
|
||||
|
||||
def compute_semantics(children, edge):
|
||||
if children[0].label()[0].semantics() is None:
|
||||
return None
|
||||
|
||||
if len(children) == 2:
|
||||
if isinstance(edge.rule(), BackwardCombinator):
|
||||
children = [children[1], children[0]]
|
||||
|
||||
combinator = edge.rule()._combinator
|
||||
function = children[0].label()[0].semantics()
|
||||
argument = children[1].label()[0].semantics()
|
||||
|
||||
if isinstance(combinator, UndirectedFunctionApplication):
|
||||
return compute_function_semantics(function, argument)
|
||||
elif isinstance(combinator, UndirectedComposition):
|
||||
return compute_composition_semantics(function, argument)
|
||||
elif isinstance(combinator, UndirectedSubstitution):
|
||||
return compute_substitution_semantics(function, argument)
|
||||
else:
|
||||
raise AssertionError("Unsupported combinator '" + combinator + "'")
|
||||
else:
|
||||
return compute_type_raised_semantics(children[0].label()[0].semantics())
|
||||
|
||||
|
||||
# --------
|
||||
# Displaying derivations
|
||||
# --------
|
||||
def printCCGDerivation(tree):
|
||||
# Get the leaves and initial categories
|
||||
leafcats = tree.pos()
|
||||
leafstr = ""
|
||||
catstr = ""
|
||||
|
||||
# Construct a string with both the leaf word and corresponding
|
||||
# category aligned.
|
||||
for leaf, cat in leafcats:
|
||||
str_cat = "%s" % cat
|
||||
nextlen = 2 + max(len(leaf), len(str_cat))
|
||||
lcatlen = (nextlen - len(str_cat)) // 2
|
||||
rcatlen = lcatlen + (nextlen - len(str_cat)) % 2
|
||||
catstr += " " * lcatlen + str_cat + " " * rcatlen
|
||||
lleaflen = (nextlen - len(leaf)) // 2
|
||||
rleaflen = lleaflen + (nextlen - len(leaf)) % 2
|
||||
leafstr += " " * lleaflen + leaf + " " * rleaflen
|
||||
print(leafstr.rstrip())
|
||||
print(catstr.rstrip())
|
||||
|
||||
# Display the derivation steps
|
||||
printCCGTree(0, tree)
|
||||
|
||||
|
||||
# Prints the sequence of derivation steps.
|
||||
def printCCGTree(lwidth, tree):
|
||||
rwidth = lwidth
|
||||
|
||||
# Is a leaf (word).
|
||||
# Increment the span by the space occupied by the leaf.
|
||||
if not isinstance(tree, Tree):
|
||||
return 2 + lwidth + len(tree)
|
||||
|
||||
# Find the width of the current derivation step
|
||||
for child in tree:
|
||||
rwidth = max(rwidth, printCCGTree(rwidth, child))
|
||||
|
||||
# Is a leaf node.
|
||||
# Don't print anything, but account for the space occupied.
|
||||
if not isinstance(tree.label(), tuple):
|
||||
return max(
|
||||
rwidth, 2 + lwidth + len("%s" % tree.label()), 2 + lwidth + len(tree[0])
|
||||
)
|
||||
|
||||
(token, op) = tree.label()
|
||||
|
||||
if op == "Leaf":
|
||||
return rwidth
|
||||
|
||||
# Pad to the left with spaces, followed by a sequence of '-'
|
||||
# and the derivation rule.
|
||||
print(lwidth * " " + (rwidth - lwidth) * "-" + "%s" % op)
|
||||
# Print the resulting category on a new line.
|
||||
str_res = "%s" % (token.categ())
|
||||
if token.semantics() is not None:
|
||||
str_res += " {" + str(token.semantics()) + "}"
|
||||
respadlen = (rwidth - lwidth - len(str_res)) // 2 + lwidth
|
||||
print(respadlen * " " + str_res)
|
||||
return rwidth
|
||||
|
||||
|
||||
### Demonstration code
|
||||
|
||||
# Construct the lexicon
|
||||
lex = fromstring(
|
||||
"""
|
||||
:- S, NP, N, VP # Primitive categories, S is the target primitive
|
||||
|
||||
Det :: NP/N # Family of words
|
||||
Pro :: NP
|
||||
TV :: VP/NP
|
||||
Modal :: (S\\NP)/VP # Backslashes need to be escaped
|
||||
|
||||
I => Pro # Word -> Category mapping
|
||||
you => Pro
|
||||
|
||||
the => Det
|
||||
|
||||
# Variables have the special keyword 'var'
|
||||
# '.' prevents permutation
|
||||
# ',' prevents composition
|
||||
and => var\\.,var/.,var
|
||||
|
||||
which => (N\\N)/(S/NP)
|
||||
|
||||
will => Modal # Categories can be either explicit, or families.
|
||||
might => Modal
|
||||
|
||||
cook => TV
|
||||
eat => TV
|
||||
|
||||
mushrooms => N
|
||||
parsnips => N
|
||||
bacon => N
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def demo():
|
||||
parser = CCGChartParser(lex, DefaultRuleSet)
|
||||
for parse in parser.parse("I might cook and eat the bacon".split()):
|
||||
printCCGDerivation(parse)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
340
Backend/venv/lib/python3.12/site-packages/nltk/ccg/combinator.py
Normal file
340
Backend/venv/lib/python3.12/site-packages/nltk/ccg/combinator.py
Normal file
@@ -0,0 +1,340 @@
|
||||
# Natural Language Toolkit: Combinatory Categorial Grammar
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Graeme Gange <ggange@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
"""
|
||||
CCG Combinators
|
||||
"""
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
from nltk.ccg.api import FunctionalCategory
|
||||
|
||||
|
||||
class UndirectedBinaryCombinator(metaclass=ABCMeta):
|
||||
"""
|
||||
Abstract class for representing a binary combinator.
|
||||
Merely defines functions for checking if the function and argument
|
||||
are able to be combined, and what the resulting category is.
|
||||
|
||||
Note that as no assumptions are made as to direction, the unrestricted
|
||||
combinators can perform all backward, forward and crossed variations
|
||||
of the combinators; these restrictions must be added in the rule
|
||||
class.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def can_combine(self, function, argument):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def combine(self, function, argument):
|
||||
pass
|
||||
|
||||
|
||||
class DirectedBinaryCombinator(metaclass=ABCMeta):
|
||||
"""
|
||||
Wrapper for the undirected binary combinator.
|
||||
It takes left and right categories, and decides which is to be
|
||||
the function, and which the argument.
|
||||
It then decides whether or not they can be combined.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def can_combine(self, left, right):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def combine(self, left, right):
|
||||
pass
|
||||
|
||||
|
||||
class ForwardCombinator(DirectedBinaryCombinator):
|
||||
"""
|
||||
Class representing combinators where the primary functor is on the left.
|
||||
|
||||
Takes an undirected combinator, and a predicate which adds constraints
|
||||
restricting the cases in which it may apply.
|
||||
"""
|
||||
|
||||
def __init__(self, combinator, predicate, suffix=""):
|
||||
self._combinator = combinator
|
||||
self._predicate = predicate
|
||||
self._suffix = suffix
|
||||
|
||||
def can_combine(self, left, right):
|
||||
return self._combinator.can_combine(left, right) and self._predicate(
|
||||
left, right
|
||||
)
|
||||
|
||||
def combine(self, left, right):
|
||||
yield from self._combinator.combine(left, right)
|
||||
|
||||
def __str__(self):
|
||||
return f">{self._combinator}{self._suffix}"
|
||||
|
||||
|
||||
class BackwardCombinator(DirectedBinaryCombinator):
|
||||
"""
|
||||
The backward equivalent of the ForwardCombinator class.
|
||||
"""
|
||||
|
||||
def __init__(self, combinator, predicate, suffix=""):
|
||||
self._combinator = combinator
|
||||
self._predicate = predicate
|
||||
self._suffix = suffix
|
||||
|
||||
def can_combine(self, left, right):
|
||||
return self._combinator.can_combine(right, left) and self._predicate(
|
||||
left, right
|
||||
)
|
||||
|
||||
def combine(self, left, right):
|
||||
yield from self._combinator.combine(right, left)
|
||||
|
||||
def __str__(self):
|
||||
return f"<{self._combinator}{self._suffix}"
|
||||
|
||||
|
||||
class UndirectedFunctionApplication(UndirectedBinaryCombinator):
|
||||
"""
|
||||
Class representing function application.
|
||||
Implements rules of the form:
|
||||
X/Y Y -> X (>)
|
||||
And the corresponding backwards application rule
|
||||
"""
|
||||
|
||||
def can_combine(self, function, argument):
|
||||
if not function.is_function():
|
||||
return False
|
||||
|
||||
return not function.arg().can_unify(argument) is None
|
||||
|
||||
def combine(self, function, argument):
|
||||
if not function.is_function():
|
||||
return
|
||||
|
||||
subs = function.arg().can_unify(argument)
|
||||
if subs is None:
|
||||
return
|
||||
|
||||
yield function.res().substitute(subs)
|
||||
|
||||
def __str__(self):
|
||||
return ""
|
||||
|
||||
|
||||
# Predicates for function application.
|
||||
|
||||
|
||||
# Ensures the left functor takes an argument on the right
|
||||
def forwardOnly(left, right):
|
||||
return left.dir().is_forward()
|
||||
|
||||
|
||||
# Ensures the right functor takes an argument on the left
|
||||
def backwardOnly(left, right):
|
||||
return right.dir().is_backward()
|
||||
|
||||
|
||||
# Application combinator instances
|
||||
ForwardApplication = ForwardCombinator(UndirectedFunctionApplication(), forwardOnly)
|
||||
BackwardApplication = BackwardCombinator(UndirectedFunctionApplication(), backwardOnly)
|
||||
|
||||
|
||||
class UndirectedComposition(UndirectedBinaryCombinator):
|
||||
"""
|
||||
Functional composition (harmonic) combinator.
|
||||
Implements rules of the form
|
||||
X/Y Y/Z -> X/Z (B>)
|
||||
And the corresponding backwards and crossed variations.
|
||||
"""
|
||||
|
||||
def can_combine(self, function, argument):
|
||||
# Can only combine two functions, and both functions must
|
||||
# allow composition.
|
||||
if not (function.is_function() and argument.is_function()):
|
||||
return False
|
||||
if function.dir().can_compose() and argument.dir().can_compose():
|
||||
return not function.arg().can_unify(argument.res()) is None
|
||||
return False
|
||||
|
||||
def combine(self, function, argument):
|
||||
if not (function.is_function() and argument.is_function()):
|
||||
return
|
||||
if function.dir().can_compose() and argument.dir().can_compose():
|
||||
subs = function.arg().can_unify(argument.res())
|
||||
if subs is not None:
|
||||
yield FunctionalCategory(
|
||||
function.res().substitute(subs),
|
||||
argument.arg().substitute(subs),
|
||||
argument.dir(),
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return "B"
|
||||
|
||||
|
||||
# Predicates for restricting application of straight composition.
|
||||
def bothForward(left, right):
|
||||
return left.dir().is_forward() and right.dir().is_forward()
|
||||
|
||||
|
||||
def bothBackward(left, right):
|
||||
return left.dir().is_backward() and right.dir().is_backward()
|
||||
|
||||
|
||||
# Predicates for crossed composition
|
||||
def crossedDirs(left, right):
|
||||
return left.dir().is_forward() and right.dir().is_backward()
|
||||
|
||||
|
||||
def backwardBxConstraint(left, right):
|
||||
# The functors must be crossed inwards
|
||||
if not crossedDirs(left, right):
|
||||
return False
|
||||
# Permuting combinators must be allowed
|
||||
if not left.dir().can_cross() and right.dir().can_cross():
|
||||
return False
|
||||
# The resulting argument category is restricted to be primitive
|
||||
return left.arg().is_primitive()
|
||||
|
||||
|
||||
# Straight composition combinators
|
||||
ForwardComposition = ForwardCombinator(UndirectedComposition(), forwardOnly)
|
||||
BackwardComposition = BackwardCombinator(UndirectedComposition(), backwardOnly)
|
||||
|
||||
# Backward crossed composition
|
||||
BackwardBx = BackwardCombinator(
|
||||
UndirectedComposition(), backwardBxConstraint, suffix="x"
|
||||
)
|
||||
|
||||
|
||||
class UndirectedSubstitution(UndirectedBinaryCombinator):
|
||||
r"""
|
||||
Substitution (permutation) combinator.
|
||||
Implements rules of the form
|
||||
Y/Z (X\Y)/Z -> X/Z (<Sx)
|
||||
And other variations.
|
||||
"""
|
||||
|
||||
def can_combine(self, function, argument):
|
||||
if function.is_primitive() or argument.is_primitive():
|
||||
return False
|
||||
|
||||
# These could potentially be moved to the predicates, as the
|
||||
# constraints may not be general to all languages.
|
||||
if function.res().is_primitive():
|
||||
return False
|
||||
if not function.arg().is_primitive():
|
||||
return False
|
||||
|
||||
if not (function.dir().can_compose() and argument.dir().can_compose()):
|
||||
return False
|
||||
return (function.res().arg() == argument.res()) and (
|
||||
function.arg() == argument.arg()
|
||||
)
|
||||
|
||||
def combine(self, function, argument):
|
||||
if self.can_combine(function, argument):
|
||||
yield FunctionalCategory(
|
||||
function.res().res(), argument.arg(), argument.dir()
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return "S"
|
||||
|
||||
|
||||
# Predicate for forward substitution
|
||||
def forwardSConstraint(left, right):
|
||||
if not bothForward(left, right):
|
||||
return False
|
||||
return left.res().dir().is_forward() and left.arg().is_primitive()
|
||||
|
||||
|
||||
# Predicate for backward crossed substitution
|
||||
def backwardSxConstraint(left, right):
|
||||
if not left.dir().can_cross() and right.dir().can_cross():
|
||||
return False
|
||||
if not bothForward(left, right):
|
||||
return False
|
||||
return right.res().dir().is_backward() and right.arg().is_primitive()
|
||||
|
||||
|
||||
# Instances of substitution combinators
|
||||
ForwardSubstitution = ForwardCombinator(UndirectedSubstitution(), forwardSConstraint)
|
||||
BackwardSx = BackwardCombinator(UndirectedSubstitution(), backwardSxConstraint, "x")
|
||||
|
||||
|
||||
# Retrieves the left-most functional category.
|
||||
# ie, (N\N)/(S/NP) => N\N
|
||||
def innermostFunction(categ):
|
||||
while categ.res().is_function():
|
||||
categ = categ.res()
|
||||
return categ
|
||||
|
||||
|
||||
class UndirectedTypeRaise(UndirectedBinaryCombinator):
|
||||
"""
|
||||
Undirected combinator for type raising.
|
||||
"""
|
||||
|
||||
def can_combine(self, function, arg):
|
||||
# The argument must be a function.
|
||||
# The restriction that arg.res() must be a function
|
||||
# merely reduces redundant type-raising; if arg.res() is
|
||||
# primitive, we have:
|
||||
# X Y\X =>(<T) Y/(Y\X) Y\X =>(>) Y
|
||||
# which is equivalent to
|
||||
# X Y\X =>(<) Y
|
||||
if not (arg.is_function() and arg.res().is_function()):
|
||||
return False
|
||||
|
||||
arg = innermostFunction(arg)
|
||||
|
||||
# left, arg_categ are undefined!
|
||||
subs = left.can_unify(arg_categ.arg())
|
||||
if subs is not None:
|
||||
return True
|
||||
return False
|
||||
|
||||
def combine(self, function, arg):
|
||||
if not (
|
||||
function.is_primitive() and arg.is_function() and arg.res().is_function()
|
||||
):
|
||||
return
|
||||
|
||||
# Type-raising matches only the innermost application.
|
||||
arg = innermostFunction(arg)
|
||||
|
||||
subs = function.can_unify(arg.arg())
|
||||
if subs is not None:
|
||||
xcat = arg.res().substitute(subs)
|
||||
yield FunctionalCategory(
|
||||
xcat, FunctionalCategory(xcat, function, arg.dir()), -(arg.dir())
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return "T"
|
||||
|
||||
|
||||
# Predicates for type-raising
|
||||
# The direction of the innermost category must be towards
|
||||
# the primary functor.
|
||||
# The restriction that the variable must be primitive is not
|
||||
# common to all versions of CCGs; some authors have other restrictions.
|
||||
def forwardTConstraint(left, right):
|
||||
arg = innermostFunction(right)
|
||||
return arg.dir().is_backward() and arg.res().is_primitive()
|
||||
|
||||
|
||||
def backwardTConstraint(left, right):
|
||||
arg = innermostFunction(left)
|
||||
return arg.dir().is_forward() and arg.res().is_primitive()
|
||||
|
||||
|
||||
# Instances of type-raising combinators
|
||||
ForwardT = ForwardCombinator(UndirectedTypeRaise(), forwardTConstraint)
|
||||
BackwardT = BackwardCombinator(UndirectedTypeRaise(), backwardTConstraint)
|
||||
338
Backend/venv/lib/python3.12/site-packages/nltk/ccg/lexicon.py
Normal file
338
Backend/venv/lib/python3.12/site-packages/nltk/ccg/lexicon.py
Normal file
@@ -0,0 +1,338 @@
|
||||
# Natural Language Toolkit: Combinatory Categorial Grammar
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Graeme Gange <ggange@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
"""
|
||||
CCG Lexicons
|
||||
"""
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
from nltk.ccg.api import CCGVar, Direction, FunctionalCategory, PrimitiveCategory
|
||||
from nltk.internals import deprecated
|
||||
from nltk.sem.logic import Expression
|
||||
|
||||
# ------------
|
||||
# Regular expressions used for parsing components of the lexicon
|
||||
# ------------
|
||||
|
||||
# Parses a primitive category and subscripts
|
||||
PRIM_RE = re.compile(r"""([A-Za-z]+)(\[[A-Za-z,]+\])?""")
|
||||
|
||||
# Separates the next primitive category from the remainder of the
|
||||
# string
|
||||
NEXTPRIM_RE = re.compile(r"""([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)""")
|
||||
|
||||
# Separates the next application operator from the remainder
|
||||
APP_RE = re.compile(r"""([\\/])([.,]?)([.,]?)(.*)""")
|
||||
|
||||
# Parses the definition of the right-hand side (rhs) of either a word or a family
|
||||
LEX_RE = re.compile(r"""([\S_]+)\s*(::|[-=]+>)\s*(.+)""", re.UNICODE)
|
||||
|
||||
# Parses the right hand side that contains category and maybe semantic predicate
|
||||
RHS_RE = re.compile(r"""([^{}]*[^ {}])\s*(\{[^}]+\})?""", re.UNICODE)
|
||||
|
||||
# Parses the semantic predicate
|
||||
SEMANTICS_RE = re.compile(r"""\{([^}]+)\}""", re.UNICODE)
|
||||
|
||||
# Strips comments from a line
|
||||
COMMENTS_RE = re.compile("""([^#]*)(?:#.*)?""")
|
||||
|
||||
|
||||
class Token:
|
||||
"""
|
||||
Class representing a token.
|
||||
|
||||
token => category {semantics}
|
||||
e.g. eat => S\\var[pl]/var {\\x y.eat(x,y)}
|
||||
|
||||
* `token` (string)
|
||||
* `categ` (string)
|
||||
* `semantics` (Expression)
|
||||
"""
|
||||
|
||||
def __init__(self, token, categ, semantics=None):
|
||||
self._token = token
|
||||
self._categ = categ
|
||||
self._semantics = semantics
|
||||
|
||||
def categ(self):
|
||||
return self._categ
|
||||
|
||||
def semantics(self):
|
||||
return self._semantics
|
||||
|
||||
def __str__(self):
|
||||
semantics_str = ""
|
||||
if self._semantics is not None:
|
||||
semantics_str = " {" + str(self._semantics) + "}"
|
||||
return "" + str(self._categ) + semantics_str
|
||||
|
||||
def __cmp__(self, other):
|
||||
if not isinstance(other, Token):
|
||||
return -1
|
||||
return cmp((self._categ, self._semantics), other.categ(), other.semantics())
|
||||
|
||||
|
||||
class CCGLexicon:
|
||||
"""
|
||||
Class representing a lexicon for CCG grammars.
|
||||
|
||||
* `primitives`: The list of primitive categories for the lexicon
|
||||
* `families`: Families of categories
|
||||
* `entries`: A mapping of words to possible categories
|
||||
"""
|
||||
|
||||
def __init__(self, start, primitives, families, entries):
|
||||
self._start = PrimitiveCategory(start)
|
||||
self._primitives = primitives
|
||||
self._families = families
|
||||
self._entries = entries
|
||||
|
||||
def categories(self, word):
|
||||
"""
|
||||
Returns all the possible categories for a word
|
||||
"""
|
||||
return self._entries[word]
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Return the target category for the parser
|
||||
"""
|
||||
return self._start
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
String representation of the lexicon. Used for debugging.
|
||||
"""
|
||||
string = ""
|
||||
first = True
|
||||
for ident in sorted(self._entries):
|
||||
if not first:
|
||||
string = string + "\n"
|
||||
string = string + ident + " => "
|
||||
|
||||
first = True
|
||||
for cat in self._entries[ident]:
|
||||
if not first:
|
||||
string = string + " | "
|
||||
else:
|
||||
first = False
|
||||
string = string + "%s" % cat
|
||||
return string
|
||||
|
||||
|
||||
# -----------
|
||||
# Parsing lexicons
|
||||
# -----------
|
||||
|
||||
|
||||
def matchBrackets(string):
|
||||
"""
|
||||
Separate the contents matching the first set of brackets from the rest of
|
||||
the input.
|
||||
"""
|
||||
rest = string[1:]
|
||||
inside = "("
|
||||
|
||||
while rest != "" and not rest.startswith(")"):
|
||||
if rest.startswith("("):
|
||||
(part, rest) = matchBrackets(rest)
|
||||
inside = inside + part
|
||||
else:
|
||||
inside = inside + rest[0]
|
||||
rest = rest[1:]
|
||||
if rest.startswith(")"):
|
||||
return (inside + ")", rest[1:])
|
||||
raise AssertionError("Unmatched bracket in string '" + string + "'")
|
||||
|
||||
|
||||
def nextCategory(string):
|
||||
"""
|
||||
Separate the string for the next portion of the category from the rest
|
||||
of the string
|
||||
"""
|
||||
if string.startswith("("):
|
||||
return matchBrackets(string)
|
||||
return NEXTPRIM_RE.match(string).groups()
|
||||
|
||||
|
||||
def parseApplication(app):
|
||||
"""
|
||||
Parse an application operator
|
||||
"""
|
||||
return Direction(app[0], app[1:])
|
||||
|
||||
|
||||
def parseSubscripts(subscr):
|
||||
"""
|
||||
Parse the subscripts for a primitive category
|
||||
"""
|
||||
if subscr:
|
||||
return subscr[1:-1].split(",")
|
||||
return []
|
||||
|
||||
|
||||
def parsePrimitiveCategory(chunks, primitives, families, var):
|
||||
"""
|
||||
Parse a primitive category
|
||||
|
||||
If the primitive is the special category 'var', replace it with the
|
||||
correct `CCGVar`.
|
||||
"""
|
||||
if chunks[0] == "var":
|
||||
if chunks[1] is None:
|
||||
if var is None:
|
||||
var = CCGVar()
|
||||
return (var, var)
|
||||
|
||||
catstr = chunks[0]
|
||||
if catstr in families:
|
||||
(cat, cvar) = families[catstr]
|
||||
if var is None:
|
||||
var = cvar
|
||||
else:
|
||||
cat = cat.substitute([(cvar, var)])
|
||||
return (cat, var)
|
||||
|
||||
if catstr in primitives:
|
||||
subscrs = parseSubscripts(chunks[1])
|
||||
return (PrimitiveCategory(catstr, subscrs), var)
|
||||
raise AssertionError(
|
||||
"String '" + catstr + "' is neither a family nor primitive category."
|
||||
)
|
||||
|
||||
|
||||
def augParseCategory(line, primitives, families, var=None):
|
||||
"""
|
||||
Parse a string representing a category, and returns a tuple with
|
||||
(possibly) the CCG variable for the category
|
||||
"""
|
||||
(cat_string, rest) = nextCategory(line)
|
||||
|
||||
if cat_string.startswith("("):
|
||||
(res, var) = augParseCategory(cat_string[1:-1], primitives, families, var)
|
||||
|
||||
else:
|
||||
(res, var) = parsePrimitiveCategory(
|
||||
PRIM_RE.match(cat_string).groups(), primitives, families, var
|
||||
)
|
||||
|
||||
while rest != "":
|
||||
app = APP_RE.match(rest).groups()
|
||||
direction = parseApplication(app[0:3])
|
||||
rest = app[3]
|
||||
|
||||
(cat_string, rest) = nextCategory(rest)
|
||||
if cat_string.startswith("("):
|
||||
(arg, var) = augParseCategory(cat_string[1:-1], primitives, families, var)
|
||||
else:
|
||||
(arg, var) = parsePrimitiveCategory(
|
||||
PRIM_RE.match(cat_string).groups(), primitives, families, var
|
||||
)
|
||||
res = FunctionalCategory(res, arg, direction)
|
||||
|
||||
return (res, var)
|
||||
|
||||
|
||||
def fromstring(lex_str, include_semantics=False):
|
||||
"""
|
||||
Convert string representation into a lexicon for CCGs.
|
||||
"""
|
||||
CCGVar.reset_id()
|
||||
primitives = []
|
||||
families = {}
|
||||
entries = defaultdict(list)
|
||||
for line in lex_str.splitlines():
|
||||
# Strip comments and leading/trailing whitespace.
|
||||
line = COMMENTS_RE.match(line).groups()[0].strip()
|
||||
if line == "":
|
||||
continue
|
||||
|
||||
if line.startswith(":-"):
|
||||
# A line of primitive categories.
|
||||
# The first one is the target category
|
||||
# ie, :- S, N, NP, VP
|
||||
primitives = primitives + [
|
||||
prim.strip() for prim in line[2:].strip().split(",")
|
||||
]
|
||||
else:
|
||||
# Either a family definition, or a word definition
|
||||
(ident, sep, rhs) = LEX_RE.match(line).groups()
|
||||
(catstr, semantics_str) = RHS_RE.match(rhs).groups()
|
||||
(cat, var) = augParseCategory(catstr, primitives, families)
|
||||
|
||||
if sep == "::":
|
||||
# Family definition
|
||||
# ie, Det :: NP/N
|
||||
families[ident] = (cat, var)
|
||||
else:
|
||||
semantics = None
|
||||
if include_semantics is True:
|
||||
if semantics_str is None:
|
||||
raise AssertionError(
|
||||
line
|
||||
+ " must contain semantics because include_semantics is set to True"
|
||||
)
|
||||
else:
|
||||
semantics = Expression.fromstring(
|
||||
SEMANTICS_RE.match(semantics_str).groups()[0]
|
||||
)
|
||||
# Word definition
|
||||
# ie, which => (N\N)/(S/NP)
|
||||
entries[ident].append(Token(ident, cat, semantics))
|
||||
return CCGLexicon(primitives[0], primitives, families, entries)
|
||||
|
||||
|
||||
@deprecated("Use fromstring() instead.")
|
||||
def parseLexicon(lex_str):
|
||||
return fromstring(lex_str)
|
||||
|
||||
|
||||
openccg_tinytiny = fromstring(
|
||||
"""
|
||||
# Rather minimal lexicon based on the openccg `tinytiny' grammar.
|
||||
# Only incorporates a subset of the morphological subcategories, however.
|
||||
:- S,NP,N # Primitive categories
|
||||
Det :: NP/N # Determiners
|
||||
Pro :: NP
|
||||
IntransVsg :: S\\NP[sg] # Tensed intransitive verbs (singular)
|
||||
IntransVpl :: S\\NP[pl] # Plural
|
||||
TransVsg :: S\\NP[sg]/NP # Tensed transitive verbs (singular)
|
||||
TransVpl :: S\\NP[pl]/NP # Plural
|
||||
|
||||
the => NP[sg]/N[sg]
|
||||
the => NP[pl]/N[pl]
|
||||
|
||||
I => Pro
|
||||
me => Pro
|
||||
we => Pro
|
||||
us => Pro
|
||||
|
||||
book => N[sg]
|
||||
books => N[pl]
|
||||
|
||||
peach => N[sg]
|
||||
peaches => N[pl]
|
||||
|
||||
policeman => N[sg]
|
||||
policemen => N[pl]
|
||||
|
||||
boy => N[sg]
|
||||
boys => N[pl]
|
||||
|
||||
sleep => IntransVsg
|
||||
sleep => IntransVpl
|
||||
|
||||
eat => IntransVpl
|
||||
eat => TransVpl
|
||||
eats => IntransVsg
|
||||
eats => TransVsg
|
||||
|
||||
see => TransVpl
|
||||
sees => TransVsg
|
||||
"""
|
||||
)
|
||||
63
Backend/venv/lib/python3.12/site-packages/nltk/ccg/logic.py
Normal file
63
Backend/venv/lib/python3.12/site-packages/nltk/ccg/logic.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# Natural Language Toolkit: Combinatory Categorial Grammar
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Tanin Na Nakorn (@tanin)
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
"""
|
||||
Helper functions for CCG semantics computation
|
||||
"""
|
||||
|
||||
import copy
|
||||
|
||||
from nltk.sem.logic import *
|
||||
|
||||
|
||||
def compute_type_raised_semantics(semantics):
|
||||
semantics_copy = copy.deepcopy(semantics)
|
||||
core = semantics_copy
|
||||
parent = None
|
||||
while isinstance(core, LambdaExpression):
|
||||
parent = core
|
||||
core = core.term
|
||||
|
||||
var = Variable("F")
|
||||
while var in core.free():
|
||||
var = unique_variable(pattern=var)
|
||||
core = ApplicationExpression(FunctionVariableExpression(var), core)
|
||||
|
||||
if parent is not None:
|
||||
parent.term = core
|
||||
else:
|
||||
semantics_copy = core
|
||||
|
||||
return LambdaExpression(var, semantics_copy)
|
||||
|
||||
|
||||
def compute_function_semantics(function, argument):
|
||||
return ApplicationExpression(function, argument).simplify()
|
||||
|
||||
|
||||
def compute_composition_semantics(function, argument):
|
||||
assert isinstance(argument, LambdaExpression), (
|
||||
"`" + str(argument) + "` must be a lambda expression"
|
||||
)
|
||||
return LambdaExpression(
|
||||
argument.variable, ApplicationExpression(function, argument.term).simplify()
|
||||
)
|
||||
|
||||
|
||||
def compute_substitution_semantics(function, argument):
|
||||
assert isinstance(function, LambdaExpression) and isinstance(
|
||||
function.term, LambdaExpression
|
||||
), ("`" + str(function) + "` must be a lambda expression with 2 arguments")
|
||||
assert isinstance(argument, LambdaExpression), (
|
||||
"`" + str(argument) + "` must be a lambda expression"
|
||||
)
|
||||
|
||||
new_argument = ApplicationExpression(
|
||||
argument, VariableExpression(function.variable)
|
||||
).simplify()
|
||||
new_term = ApplicationExpression(function.term, new_argument).simplify()
|
||||
|
||||
return LambdaExpression(function.variable, new_term)
|
||||
Reference in New Issue
Block a user