updates
This commit is contained in:
@@ -0,0 +1,75 @@
|
||||
# Natural Language Toolkit: Semantic Interpretation
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
NLTK Semantic Interpretation Package
|
||||
|
||||
This package contains classes for representing semantic structure in
|
||||
formulas of first-order logic and for evaluating such formulas in
|
||||
set-theoretic models.
|
||||
|
||||
>>> from nltk.sem import logic
|
||||
>>> logic._counter._value = 0
|
||||
|
||||
The package has two main components:
|
||||
|
||||
- ``logic`` provides support for analyzing expressions of First
|
||||
Order Logic (FOL).
|
||||
- ``evaluate`` allows users to recursively determine truth in a
|
||||
model for formulas of FOL.
|
||||
|
||||
A model consists of a domain of discourse and a valuation function,
|
||||
which assigns values to non-logical constants. We assume that entities
|
||||
in the domain are represented as strings such as ``'b1'``, ``'g1'``,
|
||||
etc. A ``Valuation`` is initialized with a list of (symbol, value)
|
||||
pairs, where values are entities, sets of entities or sets of tuples
|
||||
of entities.
|
||||
The domain of discourse can be inferred from the valuation, and model
|
||||
is then created with domain and valuation as parameters.
|
||||
|
||||
>>> from nltk.sem import Valuation, Model
|
||||
>>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
|
||||
... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
|
||||
... ('dog', set(['d1'])),
|
||||
... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
|
||||
>>> val = Valuation(v)
|
||||
>>> dom = val.domain
|
||||
>>> m = Model(dom, val)
|
||||
"""
|
||||
|
||||
from nltk.sem.boxer import Boxer
|
||||
from nltk.sem.drt import DRS, DrtExpression
|
||||
from nltk.sem.evaluate import (
|
||||
Assignment,
|
||||
Model,
|
||||
Undefined,
|
||||
Valuation,
|
||||
arity,
|
||||
is_rel,
|
||||
read_valuation,
|
||||
set2rel,
|
||||
)
|
||||
from nltk.sem.lfg import FStructure
|
||||
from nltk.sem.logic import (
|
||||
ApplicationExpression,
|
||||
Expression,
|
||||
LogicalExpressionException,
|
||||
Variable,
|
||||
binding_ops,
|
||||
boolean_ops,
|
||||
equality_preds,
|
||||
read_logic,
|
||||
)
|
||||
from nltk.sem.relextract import clause, extract_rels, rtuple
|
||||
from nltk.sem.skolemize import skolemize
|
||||
from nltk.sem.util import evaluate_sents, interpret_sents, parse_sents, root_semrep
|
||||
|
||||
# from nltk.sem.glue import Glue
|
||||
# from nltk.sem.hole import HoleSemantics
|
||||
# from nltk.sem.cooper_storage import CooperStore
|
||||
|
||||
# don't import chat80 as its names are too generic
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1609
Backend/venv/lib/python3.12/site-packages/nltk/sem/boxer.py
Normal file
1609
Backend/venv/lib/python3.12/site-packages/nltk/sem/boxer.py
Normal file
File diff suppressed because it is too large
Load Diff
857
Backend/venv/lib/python3.12/site-packages/nltk/sem/chat80.py
Normal file
857
Backend/venv/lib/python3.12/site-packages/nltk/sem/chat80.py
Normal file
@@ -0,0 +1,857 @@
|
||||
# Natural Language Toolkit: Chat-80 KB Reader
|
||||
# See https://www.w3.org/TR/swbp-skos-core-guide/
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>,
|
||||
# URL: <https://www.nltk.org>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
r"""
|
||||
Overview
|
||||
========
|
||||
|
||||
Chat-80 was a natural language system which allowed the user to
|
||||
interrogate a Prolog knowledge base in the domain of world
|
||||
geography. It was developed in the early '80s by Warren and Pereira; see
|
||||
``https://www.aclweb.org/anthology/J82-3002.pdf`` for a description and
|
||||
``http://www.cis.upenn.edu/~pereira/oldies.html`` for the source
|
||||
files.
|
||||
|
||||
This module contains functions to extract data from the Chat-80
|
||||
relation files ('the world database'), and convert then into a format
|
||||
that can be incorporated in the FOL models of
|
||||
``nltk.sem.evaluate``. The code assumes that the Prolog
|
||||
input files are available in the NLTK corpora directory.
|
||||
|
||||
The Chat-80 World Database consists of the following files::
|
||||
|
||||
world0.pl
|
||||
rivers.pl
|
||||
cities.pl
|
||||
countries.pl
|
||||
contain.pl
|
||||
borders.pl
|
||||
|
||||
This module uses a slightly modified version of ``world0.pl``, in which
|
||||
a set of Prolog rules have been omitted. The modified file is named
|
||||
``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since
|
||||
it uses a list rather than a string in the second field.
|
||||
|
||||
Reading Chat-80 Files
|
||||
=====================
|
||||
|
||||
Chat-80 relations are like tables in a relational database. The
|
||||
relation acts as the name of the table; the first argument acts as the
|
||||
'primary key'; and subsequent arguments are further fields in the
|
||||
table. In general, the name of the table provides a label for a unary
|
||||
predicate whose extension is all the primary keys. For example,
|
||||
relations in ``cities.pl`` are of the following form::
|
||||
|
||||
'city(athens,greece,1368).'
|
||||
|
||||
Here, ``'athens'`` is the key, and will be mapped to a member of the
|
||||
unary predicate *city*.
|
||||
|
||||
The fields in the table are mapped to binary predicates. The first
|
||||
argument of the predicate is the primary key, while the second
|
||||
argument is the data in the relevant field. Thus, in the above
|
||||
example, the third field is mapped to the binary predicate
|
||||
*population_of*, whose extension is a set of pairs such as
|
||||
``'(athens, 1368)'``.
|
||||
|
||||
An exception to this general framework is required by the relations in
|
||||
the files ``borders.pl`` and ``contains.pl``. These contain facts of the
|
||||
following form::
|
||||
|
||||
'borders(albania,greece).'
|
||||
|
||||
'contains0(africa,central_africa).'
|
||||
|
||||
We do not want to form a unary concept out the element in
|
||||
the first field of these records, and we want the label of the binary
|
||||
relation just to be ``'border'``/``'contain'`` respectively.
|
||||
|
||||
In order to drive the extraction process, we use 'relation metadata bundles'
|
||||
which are Python dictionaries such as the following::
|
||||
|
||||
city = {'label': 'city',
|
||||
'closures': [],
|
||||
'schema': ['city', 'country', 'population'],
|
||||
'filename': 'cities.pl'}
|
||||
|
||||
According to this, the file ``city['filename']`` contains a list of
|
||||
relational tuples (or more accurately, the corresponding strings in
|
||||
Prolog form) whose predicate symbol is ``city['label']`` and whose
|
||||
relational schema is ``city['schema']``. The notion of a ``closure`` is
|
||||
discussed in the next section.
|
||||
|
||||
Concepts
|
||||
========
|
||||
In order to encapsulate the results of the extraction, a class of
|
||||
``Concept`` objects is introduced. A ``Concept`` object has a number of
|
||||
attributes, in particular a ``prefLabel`` and ``extension``, which make
|
||||
it easier to inspect the output of the extraction. In addition, the
|
||||
``extension`` can be further processed: in the case of the ``'border'``
|
||||
relation, we check that the relation is symmetric, and in the case
|
||||
of the ``'contain'`` relation, we carry out the transitive
|
||||
closure. The closure properties associated with a concept is
|
||||
indicated in the relation metadata, as indicated earlier.
|
||||
|
||||
The ``extension`` of a ``Concept`` object is then incorporated into a
|
||||
``Valuation`` object.
|
||||
|
||||
Persistence
|
||||
===========
|
||||
The functions ``val_dump`` and ``val_load`` are provided to allow a
|
||||
valuation to be stored in a persistent database and re-loaded, rather
|
||||
than having to be re-computed each time.
|
||||
|
||||
Individuals and Lexical Items
|
||||
=============================
|
||||
As well as deriving relations from the Chat-80 data, we also create a
|
||||
set of individual constants, one for each entity in the domain. The
|
||||
individual constants are string-identical to the entities. For
|
||||
example, given a data item such as ``'zloty'``, we add to the valuation
|
||||
a pair ``('zloty', 'zloty')``. In order to parse English sentences that
|
||||
refer to these entities, we also create a lexical item such as the
|
||||
following for each individual constant::
|
||||
|
||||
PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
|
||||
|
||||
The set of rules is written to the file ``chat_pnames.cfg`` in the
|
||||
current directory.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shelve
|
||||
import sys
|
||||
|
||||
import nltk.data
|
||||
|
||||
###########################################################################
|
||||
# Chat-80 relation metadata bundles needed to build the valuation
|
||||
###########################################################################
|
||||
|
||||
borders = {
|
||||
"rel_name": "borders",
|
||||
"closures": ["symmetric"],
|
||||
"schema": ["region", "border"],
|
||||
"filename": "borders.pl",
|
||||
}
|
||||
|
||||
contains = {
|
||||
"rel_name": "contains0",
|
||||
"closures": ["transitive"],
|
||||
"schema": ["region", "contain"],
|
||||
"filename": "contain.pl",
|
||||
}
|
||||
|
||||
city = {
|
||||
"rel_name": "city",
|
||||
"closures": [],
|
||||
"schema": ["city", "country", "population"],
|
||||
"filename": "cities.pl",
|
||||
}
|
||||
|
||||
country = {
|
||||
"rel_name": "country",
|
||||
"closures": [],
|
||||
"schema": [
|
||||
"country",
|
||||
"region",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"area",
|
||||
"population",
|
||||
"capital",
|
||||
"currency",
|
||||
],
|
||||
"filename": "countries.pl",
|
||||
}
|
||||
|
||||
circle_of_lat = {
|
||||
"rel_name": "circle_of_latitude",
|
||||
"closures": [],
|
||||
"schema": ["circle_of_latitude", "degrees"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
circle_of_long = {
|
||||
"rel_name": "circle_of_longitude",
|
||||
"closures": [],
|
||||
"schema": ["circle_of_longitude", "degrees"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
continent = {
|
||||
"rel_name": "continent",
|
||||
"closures": [],
|
||||
"schema": ["continent"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
region = {
|
||||
"rel_name": "in_continent",
|
||||
"closures": [],
|
||||
"schema": ["region", "continent"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
ocean = {
|
||||
"rel_name": "ocean",
|
||||
"closures": [],
|
||||
"schema": ["ocean"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
sea = {"rel_name": "sea", "closures": [], "schema": ["sea"], "filename": "world1.pl"}
|
||||
|
||||
|
||||
items = [
|
||||
"borders",
|
||||
"contains",
|
||||
"city",
|
||||
"country",
|
||||
"circle_of_lat",
|
||||
"circle_of_long",
|
||||
"continent",
|
||||
"region",
|
||||
"ocean",
|
||||
"sea",
|
||||
]
|
||||
items = tuple(sorted(items))
|
||||
|
||||
item_metadata = {
|
||||
"borders": borders,
|
||||
"contains": contains,
|
||||
"city": city,
|
||||
"country": country,
|
||||
"circle_of_lat": circle_of_lat,
|
||||
"circle_of_long": circle_of_long,
|
||||
"continent": continent,
|
||||
"region": region,
|
||||
"ocean": ocean,
|
||||
"sea": sea,
|
||||
}
|
||||
|
||||
rels = item_metadata.values()
|
||||
|
||||
not_unary = ["borders.pl", "contain.pl"]
|
||||
|
||||
###########################################################################
|
||||
|
||||
|
||||
class Concept:
|
||||
"""
|
||||
A Concept class, loosely based on SKOS
|
||||
(https://www.w3.org/TR/swbp-skos-core-guide/).
|
||||
"""
|
||||
|
||||
def __init__(self, prefLabel, arity, altLabels=[], closures=[], extension=set()):
|
||||
"""
|
||||
:param prefLabel: the preferred label for the concept
|
||||
:type prefLabel: str
|
||||
:param arity: the arity of the concept
|
||||
:type arity: int
|
||||
:param altLabels: other (related) labels
|
||||
:type altLabels: list
|
||||
:param closures: closure properties of the extension
|
||||
(list items can be ``symmetric``, ``reflexive``, ``transitive``)
|
||||
:type closures: list
|
||||
:param extension: the extensional value of the concept
|
||||
:type extension: set
|
||||
"""
|
||||
self.prefLabel = prefLabel
|
||||
self.arity = arity
|
||||
self.altLabels = altLabels
|
||||
self.closures = closures
|
||||
# keep _extension internally as a set
|
||||
self._extension = extension
|
||||
# public access is via a list (for slicing)
|
||||
self.extension = sorted(list(extension))
|
||||
|
||||
def __str__(self):
|
||||
# _extension = ''
|
||||
# for element in sorted(self.extension):
|
||||
# if isinstance(element, tuple):
|
||||
# element = '(%s, %s)' % (element)
|
||||
# _extension += element + ', '
|
||||
# _extension = _extension[:-1]
|
||||
|
||||
return "Label = '{}'\nArity = {}\nExtension = {}".format(
|
||||
self.prefLabel,
|
||||
self.arity,
|
||||
self.extension,
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return "Concept('%s')" % self.prefLabel
|
||||
|
||||
def augment(self, data):
|
||||
"""
|
||||
Add more data to the ``Concept``'s extension set.
|
||||
|
||||
:param data: a new semantic value
|
||||
:type data: string or pair of strings
|
||||
:rtype: set
|
||||
|
||||
"""
|
||||
self._extension.add(data)
|
||||
self.extension = sorted(list(self._extension))
|
||||
return self._extension
|
||||
|
||||
def _make_graph(self, s):
|
||||
"""
|
||||
Convert a set of pairs into an adjacency linked list encoding of a graph.
|
||||
"""
|
||||
g = {}
|
||||
for x, y in s:
|
||||
if x in g:
|
||||
g[x].append(y)
|
||||
else:
|
||||
g[x] = [y]
|
||||
return g
|
||||
|
||||
def _transclose(self, g):
|
||||
"""
|
||||
Compute the transitive closure of a graph represented as a linked list.
|
||||
"""
|
||||
for x in g:
|
||||
for adjacent in g[x]:
|
||||
# check that adjacent is a key
|
||||
if adjacent in g:
|
||||
for y in g[adjacent]:
|
||||
if y not in g[x]:
|
||||
g[x].append(y)
|
||||
return g
|
||||
|
||||
def _make_pairs(self, g):
|
||||
"""
|
||||
Convert an adjacency linked list back into a set of pairs.
|
||||
"""
|
||||
pairs = []
|
||||
for node in g:
|
||||
for adjacent in g[node]:
|
||||
pairs.append((node, adjacent))
|
||||
return set(pairs)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close a binary relation in the ``Concept``'s extension set.
|
||||
|
||||
:return: a new extension for the ``Concept`` in which the
|
||||
relation is closed under a given property
|
||||
"""
|
||||
from nltk.sem import is_rel
|
||||
|
||||
assert is_rel(self._extension)
|
||||
if "symmetric" in self.closures:
|
||||
pairs = []
|
||||
for x, y in self._extension:
|
||||
pairs.append((y, x))
|
||||
sym = set(pairs)
|
||||
self._extension = self._extension.union(sym)
|
||||
if "transitive" in self.closures:
|
||||
all = self._make_graph(self._extension)
|
||||
closed = self._transclose(all)
|
||||
trans = self._make_pairs(closed)
|
||||
self._extension = self._extension.union(trans)
|
||||
self.extension = sorted(list(self._extension))
|
||||
|
||||
|
||||
def clause2concepts(filename, rel_name, schema, closures=[]):
|
||||
"""
|
||||
Convert a file of Prolog clauses into a list of ``Concept`` objects.
|
||||
|
||||
:param filename: filename containing the relations
|
||||
:type filename: str
|
||||
:param rel_name: name of the relation
|
||||
:type rel_name: str
|
||||
:param schema: the schema used in a set of relational tuples
|
||||
:type schema: list
|
||||
:param closures: closure properties for the extension of the concept
|
||||
:type closures: list
|
||||
:return: a list of ``Concept`` objects
|
||||
:rtype: list
|
||||
"""
|
||||
concepts = []
|
||||
# position of the subject of a binary relation
|
||||
subj = 0
|
||||
# label of the 'primary key'
|
||||
pkey = schema[0]
|
||||
# fields other than the primary key
|
||||
fields = schema[1:]
|
||||
|
||||
# convert a file into a list of lists
|
||||
records = _str2records(filename, rel_name)
|
||||
|
||||
# add a unary concept corresponding to the set of entities
|
||||
# in the primary key position
|
||||
# relations in 'not_unary' are more like ordinary binary relations
|
||||
if not filename in not_unary:
|
||||
concepts.append(unary_concept(pkey, subj, records))
|
||||
|
||||
# add a binary concept for each non-key field
|
||||
for field in fields:
|
||||
obj = schema.index(field)
|
||||
concepts.append(binary_concept(field, closures, subj, obj, records))
|
||||
|
||||
return concepts
|
||||
|
||||
|
||||
def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
|
||||
"""
|
||||
Convert a file of Prolog clauses into a database table.
|
||||
|
||||
This is not generic, since it doesn't allow arbitrary
|
||||
schemas to be set as a parameter.
|
||||
|
||||
Intended usage::
|
||||
|
||||
cities2table('cities.pl', 'city', 'city.db', verbose=True, setup=True)
|
||||
|
||||
:param filename: filename containing the relations
|
||||
:type filename: str
|
||||
:param rel_name: name of the relation
|
||||
:type rel_name: str
|
||||
:param dbname: filename of persistent store
|
||||
:type schema: str
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
records = _str2records(filename, rel_name)
|
||||
connection = sqlite3.connect(dbname)
|
||||
cur = connection.cursor()
|
||||
if setup:
|
||||
cur.execute(
|
||||
"""CREATE TABLE city_table
|
||||
(City text, Country text, Population int)"""
|
||||
)
|
||||
|
||||
table_name = "city_table"
|
||||
for t in records:
|
||||
cur.execute("insert into %s values (?,?,?)" % table_name, t)
|
||||
if verbose:
|
||||
print("inserting values into %s: " % table_name, t)
|
||||
connection.commit()
|
||||
if verbose:
|
||||
print("Committing update to %s" % dbname)
|
||||
cur.close()
|
||||
|
||||
|
||||
def sql_query(dbname, query):
|
||||
"""
|
||||
Execute an SQL query over a database.
|
||||
:param dbname: filename of persistent store
|
||||
:type schema: str
|
||||
:param query: SQL query
|
||||
:type rel_name: str
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
try:
|
||||
path = nltk.data.find(dbname)
|
||||
connection = sqlite3.connect(str(path))
|
||||
cur = connection.cursor()
|
||||
return cur.execute(query)
|
||||
except (ValueError, sqlite3.OperationalError):
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Make sure the database file %s is installed and uncompressed." % dbname
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def _str2records(filename, rel):
|
||||
"""
|
||||
Read a file into memory and convert each relation clause into a list.
|
||||
"""
|
||||
recs = []
|
||||
contents = nltk.data.load("corpora/chat80/%s" % filename, format="text")
|
||||
for line in contents.splitlines():
|
||||
if line.startswith(rel):
|
||||
line = re.sub(rel + r"\(", "", line)
|
||||
line = re.sub(r"\)\.$", "", line)
|
||||
record = line.split(",")
|
||||
recs.append(record)
|
||||
return recs
|
||||
|
||||
|
||||
def unary_concept(label, subj, records):
|
||||
"""
|
||||
Make a unary concept out of the primary key in a record.
|
||||
|
||||
A record is a list of entities in some relation, such as
|
||||
``['france', 'paris']``, where ``'france'`` is acting as the primary
|
||||
key.
|
||||
|
||||
:param label: the preferred label for the concept
|
||||
:type label: string
|
||||
:param subj: position in the record of the subject of the predicate
|
||||
:type subj: int
|
||||
:param records: a list of records
|
||||
:type records: list of lists
|
||||
:return: ``Concept`` of arity 1
|
||||
:rtype: Concept
|
||||
"""
|
||||
c = Concept(label, arity=1, extension=set())
|
||||
for record in records:
|
||||
c.augment(record[subj])
|
||||
return c
|
||||
|
||||
|
||||
def binary_concept(label, closures, subj, obj, records):
|
||||
"""
|
||||
Make a binary concept out of the primary key and another field in a record.
|
||||
|
||||
A record is a list of entities in some relation, such as
|
||||
``['france', 'paris']``, where ``'france'`` is acting as the primary
|
||||
key, and ``'paris'`` stands in the ``'capital_of'`` relation to
|
||||
``'france'``.
|
||||
|
||||
More generally, given a record such as ``['a', 'b', 'c']``, where
|
||||
label is bound to ``'B'``, and ``obj`` bound to 1, the derived
|
||||
binary concept will have label ``'B_of'``, and its extension will
|
||||
be a set of pairs such as ``('a', 'b')``.
|
||||
|
||||
|
||||
:param label: the base part of the preferred label for the concept
|
||||
:type label: str
|
||||
:param closures: closure properties for the extension of the concept
|
||||
:type closures: list
|
||||
:param subj: position in the record of the subject of the predicate
|
||||
:type subj: int
|
||||
:param obj: position in the record of the object of the predicate
|
||||
:type obj: int
|
||||
:param records: a list of records
|
||||
:type records: list of lists
|
||||
:return: ``Concept`` of arity 2
|
||||
:rtype: Concept
|
||||
"""
|
||||
if not label == "border" and not label == "contain":
|
||||
label = label + "_of"
|
||||
c = Concept(label, arity=2, closures=closures, extension=set())
|
||||
for record in records:
|
||||
c.augment((record[subj], record[obj]))
|
||||
# close the concept's extension according to the properties in closures
|
||||
c.close()
|
||||
return c
|
||||
|
||||
|
||||
def process_bundle(rels):
|
||||
"""
|
||||
Given a list of relation metadata bundles, make a corresponding
|
||||
dictionary of concepts, indexed by the relation name.
|
||||
|
||||
:param rels: bundle of metadata needed for constructing a concept
|
||||
:type rels: list(dict)
|
||||
:return: a dictionary of concepts, indexed by the relation name.
|
||||
:rtype: dict(str): Concept
|
||||
"""
|
||||
concepts = {}
|
||||
for rel in rels:
|
||||
rel_name = rel["rel_name"]
|
||||
closures = rel["closures"]
|
||||
schema = rel["schema"]
|
||||
filename = rel["filename"]
|
||||
|
||||
concept_list = clause2concepts(filename, rel_name, schema, closures)
|
||||
for c in concept_list:
|
||||
label = c.prefLabel
|
||||
if label in concepts:
|
||||
for data in c.extension:
|
||||
concepts[label].augment(data)
|
||||
concepts[label].close()
|
||||
else:
|
||||
concepts[label] = c
|
||||
return concepts
|
||||
|
||||
|
||||
def make_valuation(concepts, read=False, lexicon=False):
|
||||
"""
|
||||
Convert a list of ``Concept`` objects into a list of (label, extension) pairs;
|
||||
optionally create a ``Valuation`` object.
|
||||
|
||||
:param concepts: concepts
|
||||
:type concepts: list(Concept)
|
||||
:param read: if ``True``, ``(symbol, set)`` pairs are read into a ``Valuation``
|
||||
:type read: bool
|
||||
:rtype: list or Valuation
|
||||
"""
|
||||
vals = []
|
||||
|
||||
for c in concepts:
|
||||
vals.append((c.prefLabel, c.extension))
|
||||
if lexicon:
|
||||
read = True
|
||||
if read:
|
||||
from nltk.sem import Valuation
|
||||
|
||||
val = Valuation({})
|
||||
val.update(vals)
|
||||
# add labels for individuals
|
||||
val = label_indivs(val, lexicon=lexicon)
|
||||
return val
|
||||
else:
|
||||
return vals
|
||||
|
||||
|
||||
def val_dump(rels, db):
|
||||
"""
|
||||
Make a ``Valuation`` from a list of relation metadata bundles and dump to
|
||||
persistent database.
|
||||
|
||||
:param rels: bundle of metadata needed for constructing a concept
|
||||
:type rels: list of dict
|
||||
:param db: name of file to which data is written.
|
||||
The suffix '.db' will be automatically appended.
|
||||
:type db: str
|
||||
"""
|
||||
concepts = process_bundle(rels).values()
|
||||
valuation = make_valuation(concepts, read=True)
|
||||
db_out = shelve.open(db, "n")
|
||||
|
||||
db_out.update(valuation)
|
||||
|
||||
db_out.close()
|
||||
|
||||
|
||||
def val_load(db):
|
||||
"""
|
||||
Load a ``Valuation`` from a persistent database.
|
||||
|
||||
:param db: name of file from which data is read.
|
||||
The suffix '.db' should be omitted from the name.
|
||||
:type db: str
|
||||
"""
|
||||
dbname = db + ".db"
|
||||
|
||||
if not os.access(dbname, os.R_OK):
|
||||
sys.exit("Cannot read file: %s" % dbname)
|
||||
else:
|
||||
db_in = shelve.open(db)
|
||||
from nltk.sem import Valuation
|
||||
|
||||
val = Valuation(db_in)
|
||||
# val.read(db_in.items())
|
||||
return val
|
||||
|
||||
|
||||
# def alpha(str):
|
||||
# """
|
||||
# Utility to filter out non-alphabetic constants.
|
||||
|
||||
#:param str: candidate constant
|
||||
#:type str: string
|
||||
#:rtype: bool
|
||||
# """
|
||||
# try:
|
||||
# int(str)
|
||||
# return False
|
||||
# except ValueError:
|
||||
## some unknown values in records are labeled '?'
|
||||
# if not str == '?':
|
||||
# return True
|
||||
|
||||
|
||||
def label_indivs(valuation, lexicon=False):
|
||||
"""
|
||||
Assign individual constants to the individuals in the domain of a ``Valuation``.
|
||||
|
||||
Given a valuation with an entry of the form ``{'rel': {'a': True}}``,
|
||||
add a new entry ``{'a': 'a'}``.
|
||||
|
||||
:type valuation: Valuation
|
||||
:rtype: Valuation
|
||||
"""
|
||||
# collect all the individuals into a domain
|
||||
domain = valuation.domain
|
||||
# convert the domain into a sorted list of alphabetic terms
|
||||
# use the same string as a label
|
||||
pairs = [(e, e) for e in domain]
|
||||
if lexicon:
|
||||
lex = make_lex(domain)
|
||||
with open("chat_pnames.cfg", "w") as outfile:
|
||||
outfile.writelines(lex)
|
||||
# read the pairs into the valuation
|
||||
valuation.update(pairs)
|
||||
return valuation
|
||||
|
||||
|
||||
def make_lex(symbols):
|
||||
"""
|
||||
Create lexical CFG rules for each individual symbol.
|
||||
|
||||
Given a valuation with an entry of the form ``{'zloty': 'zloty'}``,
|
||||
create a lexical rule for the proper name 'Zloty'.
|
||||
|
||||
:param symbols: a list of individual constants in the semantic representation
|
||||
:type symbols: sequence -- set(str)
|
||||
:rtype: list(str)
|
||||
"""
|
||||
lex = []
|
||||
header = """
|
||||
##################################################################
|
||||
# Lexical rules automatically generated by running 'chat80.py -x'.
|
||||
##################################################################
|
||||
|
||||
"""
|
||||
lex.append(header)
|
||||
template = r"PropN[num=sg, sem=<\P.(P %s)>] -> '%s'\n"
|
||||
|
||||
for s in symbols:
|
||||
parts = s.split("_")
|
||||
caps = [p.capitalize() for p in parts]
|
||||
pname = "_".join(caps)
|
||||
rule = template % (s, pname)
|
||||
lex.append(rule)
|
||||
return lex
|
||||
|
||||
|
||||
###########################################################################
|
||||
# Interface function to emulate other corpus readers
|
||||
###########################################################################
|
||||
|
||||
|
||||
def concepts(items=items):
|
||||
"""
|
||||
Build a list of concepts corresponding to the relation names in ``items``.
|
||||
|
||||
:param items: names of the Chat-80 relations to extract
|
||||
:type items: list(str)
|
||||
:return: the ``Concept`` objects which are extracted from the relations
|
||||
:rtype: list(Concept)
|
||||
"""
|
||||
if isinstance(items, str):
|
||||
items = (items,)
|
||||
|
||||
rels = [item_metadata[r] for r in items]
|
||||
|
||||
concept_map = process_bundle(rels)
|
||||
return concept_map.values()
|
||||
|
||||
|
||||
###########################################################################
|
||||
|
||||
|
||||
def main():
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
description = """
|
||||
Extract data from the Chat-80 Prolog files and convert them into a
|
||||
Valuation object for use in the NLTK semantics package.
|
||||
"""
|
||||
|
||||
opts = OptionParser(description=description)
|
||||
opts.set_defaults(verbose=True, lex=False, vocab=False)
|
||||
opts.add_option(
|
||||
"-s", "--store", dest="outdb", help="store a valuation in DB", metavar="DB"
|
||||
)
|
||||
opts.add_option(
|
||||
"-l",
|
||||
"--load",
|
||||
dest="indb",
|
||||
help="load a stored valuation from DB",
|
||||
metavar="DB",
|
||||
)
|
||||
opts.add_option(
|
||||
"-c",
|
||||
"--concepts",
|
||||
action="store_true",
|
||||
help="print concepts instead of a valuation",
|
||||
)
|
||||
opts.add_option(
|
||||
"-r",
|
||||
"--relation",
|
||||
dest="label",
|
||||
help="print concept with label REL (check possible labels with '-v' option)",
|
||||
metavar="REL",
|
||||
)
|
||||
opts.add_option(
|
||||
"-q",
|
||||
"--quiet",
|
||||
action="store_false",
|
||||
dest="verbose",
|
||||
help="don't print out progress info",
|
||||
)
|
||||
opts.add_option(
|
||||
"-x",
|
||||
"--lex",
|
||||
action="store_true",
|
||||
dest="lex",
|
||||
help="write a file of lexical entries for country names, then exit",
|
||||
)
|
||||
opts.add_option(
|
||||
"-v",
|
||||
"--vocab",
|
||||
action="store_true",
|
||||
dest="vocab",
|
||||
help="print out the vocabulary of concept labels and their arity, then exit",
|
||||
)
|
||||
|
||||
(options, args) = opts.parse_args()
|
||||
if options.outdb and options.indb:
|
||||
opts.error("Options --store and --load are mutually exclusive")
|
||||
|
||||
if options.outdb:
|
||||
# write the valuation to a persistent database
|
||||
if options.verbose:
|
||||
outdb = options.outdb + ".db"
|
||||
print("Dumping a valuation to %s" % outdb)
|
||||
val_dump(rels, options.outdb)
|
||||
sys.exit(0)
|
||||
else:
|
||||
# try to read in a valuation from a database
|
||||
if options.indb is not None:
|
||||
dbname = options.indb + ".db"
|
||||
if not os.access(dbname, os.R_OK):
|
||||
sys.exit("Cannot read file: %s" % dbname)
|
||||
else:
|
||||
valuation = val_load(options.indb)
|
||||
# we need to create the valuation from scratch
|
||||
else:
|
||||
# build some concepts
|
||||
concept_map = process_bundle(rels)
|
||||
concepts = concept_map.values()
|
||||
# just print out the vocabulary
|
||||
if options.vocab:
|
||||
items = sorted((c.arity, c.prefLabel) for c in concepts)
|
||||
for arity, label in items:
|
||||
print(label, arity)
|
||||
sys.exit(0)
|
||||
# show all the concepts
|
||||
if options.concepts:
|
||||
for c in concepts:
|
||||
print(c)
|
||||
print()
|
||||
if options.label:
|
||||
print(concept_map[options.label])
|
||||
sys.exit(0)
|
||||
else:
|
||||
# turn the concepts into a Valuation
|
||||
if options.lex:
|
||||
if options.verbose:
|
||||
print("Writing out lexical rules")
|
||||
make_valuation(concepts, lexicon=True)
|
||||
else:
|
||||
valuation = make_valuation(concepts, read=True)
|
||||
print(valuation)
|
||||
|
||||
|
||||
def sql_demo():
|
||||
"""
|
||||
Print out every row from the 'city.db' database.
|
||||
"""
|
||||
print()
|
||||
print("Using SQL to extract rows from 'city.db' RDB.")
|
||||
for row in sql_query("corpora/city_database/city.db", "SELECT * FROM city_table"):
|
||||
print(row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
sql_demo()
|
||||
@@ -0,0 +1,124 @@
|
||||
# Natural Language Toolkit: Cooper storage for Quantifier Ambiguity
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from nltk.parse import load_parser
|
||||
from nltk.parse.featurechart import InstantiateVarsChart
|
||||
from nltk.sem.logic import ApplicationExpression, LambdaExpression, Variable
|
||||
|
||||
|
||||
class CooperStore:
|
||||
"""
|
||||
A container for handling quantifier ambiguity via Cooper storage.
|
||||
"""
|
||||
|
||||
def __init__(self, featstruct):
|
||||
"""
|
||||
:param featstruct: The value of the ``sem`` node in a tree from
|
||||
``parse_with_bindops()``
|
||||
:type featstruct: FeatStruct (with features ``core`` and ``store``)
|
||||
|
||||
"""
|
||||
self.featstruct = featstruct
|
||||
self.readings = []
|
||||
try:
|
||||
self.core = featstruct["CORE"]
|
||||
self.store = featstruct["STORE"]
|
||||
except KeyError:
|
||||
print("%s is not a Cooper storage structure" % featstruct)
|
||||
|
||||
def _permute(self, lst):
|
||||
"""
|
||||
:return: An iterator over the permutations of the input list
|
||||
:type lst: list
|
||||
:rtype: iter
|
||||
"""
|
||||
remove = lambda lst0, index: lst0[:index] + lst0[index + 1 :]
|
||||
if lst:
|
||||
for index, x in enumerate(lst):
|
||||
for y in self._permute(remove(lst, index)):
|
||||
yield (x,) + y
|
||||
else:
|
||||
yield ()
|
||||
|
||||
def s_retrieve(self, trace=False):
|
||||
r"""
|
||||
Carry out S-Retrieval of binding operators in store. If hack=True,
|
||||
serialize the bindop and core as strings and reparse. Ugh.
|
||||
|
||||
Each permutation of the store (i.e. list of binding operators) is
|
||||
taken to be a possible scoping of quantifiers. We iterate through the
|
||||
binding operators in each permutation, and successively apply them to
|
||||
the current term, starting with the core semantic representation,
|
||||
working from the inside out.
|
||||
|
||||
Binding operators are of the form::
|
||||
|
||||
bo(\P.all x.(man(x) -> P(x)),z1)
|
||||
"""
|
||||
for perm, store_perm in enumerate(self._permute(self.store)):
|
||||
if trace:
|
||||
print("Permutation %s" % (perm + 1))
|
||||
term = self.core
|
||||
for bindop in store_perm:
|
||||
# we just want the arguments that are wrapped by the 'bo' predicate
|
||||
quant, varex = tuple(bindop.args)
|
||||
# use var to make an abstraction over the current term and then
|
||||
# apply the quantifier to it
|
||||
term = ApplicationExpression(
|
||||
quant, LambdaExpression(varex.variable, term)
|
||||
)
|
||||
if trace:
|
||||
print(" ", term)
|
||||
term = term.simplify()
|
||||
self.readings.append(term)
|
||||
|
||||
|
||||
def parse_with_bindops(sentence, grammar=None, trace=0):
|
||||
"""
|
||||
Use a grammar with Binding Operators to parse a sentence.
|
||||
"""
|
||||
if not grammar:
|
||||
grammar = "grammars/book_grammars/storage.fcfg"
|
||||
parser = load_parser(grammar, trace=trace, chart_class=InstantiateVarsChart)
|
||||
# Parse the sentence.
|
||||
tokens = sentence.split()
|
||||
return list(parser.parse(tokens))
|
||||
|
||||
|
||||
def demo():
|
||||
from nltk.sem import cooper_storage as cs
|
||||
|
||||
sentence = "every girl chases a dog"
|
||||
# sentence = "a man gives a bone to every dog"
|
||||
print()
|
||||
print("Analysis of sentence '%s'" % sentence)
|
||||
print("=" * 50)
|
||||
trees = cs.parse_with_bindops(sentence, trace=0)
|
||||
for tree in trees:
|
||||
semrep = cs.CooperStore(tree.label()["SEM"])
|
||||
print()
|
||||
print("Binding operators:")
|
||||
print("-" * 15)
|
||||
for s in semrep.store:
|
||||
print(s)
|
||||
print()
|
||||
print("Core:")
|
||||
print("-" * 15)
|
||||
print(semrep.core)
|
||||
print()
|
||||
print("S-Retrieval:")
|
||||
print("-" * 15)
|
||||
semrep.s_retrieve(trace=True)
|
||||
print("Readings:")
|
||||
print("-" * 15)
|
||||
|
||||
for i, reading in enumerate(semrep.readings):
|
||||
print(f"{i + 1}: {reading}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
1456
Backend/venv/lib/python3.12/site-packages/nltk/sem/drt.py
Normal file
1456
Backend/venv/lib/python3.12/site-packages/nltk/sem/drt.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,553 @@
|
||||
# Natural Language Toolkit: GUI Demo for Glue Semantics with Discourse
|
||||
# Representation Theory (DRT) as meaning language
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
try:
|
||||
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.draw.util import CanvasFrame, ShowText
|
||||
|
||||
except ImportError:
|
||||
"""Ignore ImportError because tkinter might not be available."""
|
||||
|
||||
from nltk.parse import MaltParser
|
||||
from nltk.sem.drt import DrsDrawer, DrtVariableExpression
|
||||
from nltk.sem.glue import DrtGlue
|
||||
from nltk.sem.logic import Variable
|
||||
from nltk.tag import RegexpTagger
|
||||
from nltk.util import in_idle
|
||||
|
||||
|
||||
class DrtGlueDemo:
|
||||
def __init__(self, examples):
|
||||
# Set up the main window.
|
||||
self._top = Tk()
|
||||
self._top.title("DRT Glue Demo")
|
||||
|
||||
# Set up key bindings.
|
||||
self._init_bindings()
|
||||
|
||||
# Initialize the fonts.self._error = None
|
||||
self._init_fonts(self._top)
|
||||
|
||||
self._examples = examples
|
||||
self._readingCache = [None for example in examples]
|
||||
|
||||
# The user can hide the grammar.
|
||||
self._show_grammar = IntVar(self._top)
|
||||
self._show_grammar.set(1)
|
||||
|
||||
# Set the data to None
|
||||
self._curExample = -1
|
||||
self._readings = []
|
||||
self._drs = None
|
||||
self._drsWidget = None
|
||||
self._error = None
|
||||
|
||||
self._init_glue()
|
||||
|
||||
# Create the basic frames.
|
||||
self._init_menubar(self._top)
|
||||
self._init_buttons(self._top)
|
||||
self._init_exampleListbox(self._top)
|
||||
self._init_readingListbox(self._top)
|
||||
self._init_canvas(self._top)
|
||||
|
||||
# Resize callback
|
||||
self._canvas.bind("<Configure>", self._configure)
|
||||
|
||||
#########################################
|
||||
## Initialization Helpers
|
||||
#########################################
|
||||
|
||||
def _init_glue(self):
|
||||
tagger = RegexpTagger(
|
||||
[
|
||||
("^(David|Mary|John)$", "NNP"),
|
||||
(
|
||||
"^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
|
||||
"VB",
|
||||
),
|
||||
("^(go|order|vanish|find|approach)$", "VB"),
|
||||
("^(a)$", "ex_quant"),
|
||||
("^(every)$", "univ_quant"),
|
||||
("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
|
||||
("^(big|gray|former)$", "JJ"),
|
||||
("^(him|himself)$", "PRP"),
|
||||
]
|
||||
)
|
||||
|
||||
depparser = MaltParser(tagger=tagger)
|
||||
self._glue = DrtGlue(depparser=depparser, remove_duplicates=False)
|
||||
|
||||
def _init_fonts(self, root):
|
||||
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
||||
self._sysfont = Font(font=Button()["font"])
|
||||
root.option_add("*Font", self._sysfont)
|
||||
|
||||
# TWhat's our font size (default=same as sysfont)
|
||||
self._size = IntVar(root)
|
||||
self._size.set(self._sysfont.cget("size"))
|
||||
|
||||
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
|
||||
self._font = Font(family="helvetica", size=self._size.get())
|
||||
if self._size.get() < 0:
|
||||
big = self._size.get() - 2
|
||||
else:
|
||||
big = self._size.get() + 2
|
||||
self._bigfont = Font(family="helvetica", weight="bold", size=big)
|
||||
|
||||
def _init_exampleListbox(self, parent):
|
||||
self._exampleFrame = listframe = Frame(parent)
|
||||
self._exampleFrame.pack(fill="both", side="left", padx=2)
|
||||
self._exampleList_label = Label(
|
||||
self._exampleFrame, font=self._boldfont, text="Examples"
|
||||
)
|
||||
self._exampleList_label.pack()
|
||||
self._exampleList = Listbox(
|
||||
self._exampleFrame,
|
||||
selectmode="single",
|
||||
relief="groove",
|
||||
background="white",
|
||||
foreground="#909090",
|
||||
font=self._font,
|
||||
selectforeground="#004040",
|
||||
selectbackground="#c0f0c0",
|
||||
)
|
||||
|
||||
self._exampleList.pack(side="right", fill="both", expand=1)
|
||||
|
||||
for example in self._examples:
|
||||
self._exampleList.insert("end", (" %s" % example))
|
||||
self._exampleList.config(height=min(len(self._examples), 25), width=40)
|
||||
|
||||
# Add a scrollbar if there are more than 25 examples.
|
||||
if len(self._examples) > 25:
|
||||
listscroll = Scrollbar(self._exampleFrame, orient="vertical")
|
||||
self._exampleList.config(yscrollcommand=listscroll.set)
|
||||
listscroll.config(command=self._exampleList.yview)
|
||||
listscroll.pack(side="left", fill="y")
|
||||
|
||||
# If they select a example, apply it.
|
||||
self._exampleList.bind("<<ListboxSelect>>", self._exampleList_select)
|
||||
|
||||
def _init_readingListbox(self, parent):
|
||||
self._readingFrame = listframe = Frame(parent)
|
||||
self._readingFrame.pack(fill="both", side="left", padx=2)
|
||||
self._readingList_label = Label(
|
||||
self._readingFrame, font=self._boldfont, text="Readings"
|
||||
)
|
||||
self._readingList_label.pack()
|
||||
self._readingList = Listbox(
|
||||
self._readingFrame,
|
||||
selectmode="single",
|
||||
relief="groove",
|
||||
background="white",
|
||||
foreground="#909090",
|
||||
font=self._font,
|
||||
selectforeground="#004040",
|
||||
selectbackground="#c0f0c0",
|
||||
)
|
||||
|
||||
self._readingList.pack(side="right", fill="both", expand=1)
|
||||
|
||||
# Add a scrollbar if there are more than 25 examples.
|
||||
listscroll = Scrollbar(self._readingFrame, orient="vertical")
|
||||
self._readingList.config(yscrollcommand=listscroll.set)
|
||||
listscroll.config(command=self._readingList.yview)
|
||||
listscroll.pack(side="right", fill="y")
|
||||
|
||||
self._populate_readingListbox()
|
||||
|
||||
def _populate_readingListbox(self):
|
||||
# Populate the listbox with integers
|
||||
self._readingList.delete(0, "end")
|
||||
for i in range(len(self._readings)):
|
||||
self._readingList.insert("end", (" %s" % (i + 1)))
|
||||
self._readingList.config(height=min(len(self._readings), 25), width=5)
|
||||
|
||||
# If they select a example, apply it.
|
||||
self._readingList.bind("<<ListboxSelect>>", self._readingList_select)
|
||||
|
||||
def _init_bindings(self):
|
||||
# Key bindings are a good thing.
|
||||
self._top.bind("<Control-q>", self.destroy)
|
||||
self._top.bind("<Control-x>", self.destroy)
|
||||
self._top.bind("<Escape>", self.destroy)
|
||||
self._top.bind("n", self.next)
|
||||
self._top.bind("<space>", self.next)
|
||||
self._top.bind("p", self.prev)
|
||||
self._top.bind("<BackSpace>", self.prev)
|
||||
|
||||
def _init_buttons(self, parent):
|
||||
# Set up the frames.
|
||||
self._buttonframe = buttonframe = Frame(parent)
|
||||
buttonframe.pack(fill="none", side="bottom", padx=3, pady=2)
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Prev",
|
||||
background="#90c0d0",
|
||||
foreground="black",
|
||||
command=self.prev,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Next",
|
||||
background="#90c0d0",
|
||||
foreground="black",
|
||||
command=self.next,
|
||||
).pack(side="left")
|
||||
|
||||
def _configure(self, event):
|
||||
self._autostep = 0
|
||||
(x1, y1, x2, y2) = self._cframe.scrollregion()
|
||||
y2 = event.height - 6
|
||||
self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2)
|
||||
self._redraw()
|
||||
|
||||
def _init_canvas(self, parent):
|
||||
self._cframe = CanvasFrame(
|
||||
parent,
|
||||
background="white",
|
||||
# width=525, height=250,
|
||||
closeenough=10,
|
||||
border=2,
|
||||
relief="sunken",
|
||||
)
|
||||
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
|
||||
canvas = self._canvas = self._cframe.canvas()
|
||||
|
||||
# Initially, there's no tree or text
|
||||
self._tree = None
|
||||
self._textwidgets = []
|
||||
self._textline = None
|
||||
|
||||
def _init_menubar(self, parent):
|
||||
menubar = Menu(parent)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="q"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
actionmenu = Menu(menubar, tearoff=0)
|
||||
actionmenu.add_command(
|
||||
label="Next", underline=0, command=self.next, accelerator="n, Space"
|
||||
)
|
||||
actionmenu.add_command(
|
||||
label="Previous", underline=0, command=self.prev, accelerator="p, Backspace"
|
||||
)
|
||||
menubar.add_cascade(label="Action", underline=0, menu=actionmenu)
|
||||
|
||||
optionmenu = Menu(menubar, tearoff=0)
|
||||
optionmenu.add_checkbutton(
|
||||
label="Remove Duplicates",
|
||||
underline=0,
|
||||
variable=self._glue.remove_duplicates,
|
||||
command=self._toggle_remove_duplicates,
|
||||
accelerator="r",
|
||||
)
|
||||
menubar.add_cascade(label="Options", underline=0, menu=optionmenu)
|
||||
|
||||
viewmenu = Menu(menubar, tearoff=0)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Tiny",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=10,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Small",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=12,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Medium",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=14,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Large",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=18,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Huge",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=24,
|
||||
command=self.resize,
|
||||
)
|
||||
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
||||
|
||||
helpmenu = Menu(menubar, tearoff=0)
|
||||
helpmenu.add_command(label="About", underline=0, command=self.about)
|
||||
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
||||
|
||||
parent.config(menu=menubar)
|
||||
|
||||
#########################################
|
||||
## Main draw procedure
|
||||
#########################################
|
||||
|
||||
def _redraw(self):
|
||||
canvas = self._canvas
|
||||
|
||||
# Delete the old DRS, widgets, etc.
|
||||
if self._drsWidget is not None:
|
||||
self._drsWidget.clear()
|
||||
|
||||
if self._drs:
|
||||
self._drsWidget = DrsWidget(self._canvas, self._drs)
|
||||
self._drsWidget.draw()
|
||||
|
||||
if self._error:
|
||||
self._drsWidget = DrsWidget(self._canvas, self._error)
|
||||
self._drsWidget.draw()
|
||||
|
||||
#########################################
|
||||
## Button Callbacks
|
||||
#########################################
|
||||
|
||||
def destroy(self, *e):
|
||||
self._autostep = 0
|
||||
if self._top is None:
|
||||
return
|
||||
self._top.destroy()
|
||||
self._top = None
|
||||
|
||||
def prev(self, *e):
|
||||
selection = self._readingList.curselection()
|
||||
readingListSize = self._readingList.size()
|
||||
|
||||
# there are readings
|
||||
if readingListSize > 0:
|
||||
# if one reading is currently selected
|
||||
if len(selection) == 1:
|
||||
index = int(selection[0])
|
||||
|
||||
# if it's on (or before) the first item
|
||||
if index <= 0:
|
||||
self._select_previous_example()
|
||||
else:
|
||||
self._readingList_store_selection(index - 1)
|
||||
|
||||
else:
|
||||
# select its first reading
|
||||
self._readingList_store_selection(readingListSize - 1)
|
||||
|
||||
else:
|
||||
self._select_previous_example()
|
||||
|
||||
def _select_previous_example(self):
|
||||
# if the current example is not the first example
|
||||
if self._curExample > 0:
|
||||
self._exampleList_store_selection(self._curExample - 1)
|
||||
else:
|
||||
# go to the last example
|
||||
self._exampleList_store_selection(len(self._examples) - 1)
|
||||
|
||||
def next(self, *e):
|
||||
selection = self._readingList.curselection()
|
||||
readingListSize = self._readingList.size()
|
||||
|
||||
# if there are readings
|
||||
if readingListSize > 0:
|
||||
# if one reading is currently selected
|
||||
if len(selection) == 1:
|
||||
index = int(selection[0])
|
||||
|
||||
# if it's on (or past) the last item
|
||||
if index >= (readingListSize - 1):
|
||||
self._select_next_example()
|
||||
else:
|
||||
self._readingList_store_selection(index + 1)
|
||||
|
||||
else:
|
||||
# select its first reading
|
||||
self._readingList_store_selection(0)
|
||||
|
||||
else:
|
||||
self._select_next_example()
|
||||
|
||||
def _select_next_example(self):
|
||||
# if the current example is not the last example
|
||||
if self._curExample < len(self._examples) - 1:
|
||||
self._exampleList_store_selection(self._curExample + 1)
|
||||
else:
|
||||
# go to the first example
|
||||
self._exampleList_store_selection(0)
|
||||
|
||||
def about(self, *e):
|
||||
ABOUT = (
|
||||
"NLTK Discourse Representation Theory (DRT) Glue Semantics Demo\n"
|
||||
+ "Written by Daniel H. Garrette"
|
||||
)
|
||||
TITLE = "About: NLTK DRT Glue Demo"
|
||||
try:
|
||||
from tkinter.messagebox import Message
|
||||
|
||||
Message(message=ABOUT, title=TITLE).show()
|
||||
except:
|
||||
ShowText(self._top, TITLE, ABOUT)
|
||||
|
||||
def postscript(self, *e):
|
||||
self._autostep = 0
|
||||
self._cframe.print_to_file()
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
"""
|
||||
Enter the Tkinter mainloop. This function must be called if
|
||||
this demo is created from a non-interactive program (e.g.
|
||||
from a secript); otherwise, the demo will close as soon as
|
||||
the script completes.
|
||||
"""
|
||||
if in_idle():
|
||||
return
|
||||
self._top.mainloop(*args, **kwargs)
|
||||
|
||||
def resize(self, size=None):
|
||||
if size is not None:
|
||||
self._size.set(size)
|
||||
size = self._size.get()
|
||||
self._font.configure(size=-(abs(size)))
|
||||
self._boldfont.configure(size=-(abs(size)))
|
||||
self._sysfont.configure(size=-(abs(size)))
|
||||
self._bigfont.configure(size=-(abs(size + 2)))
|
||||
self._redraw()
|
||||
|
||||
def _toggle_remove_duplicates(self):
|
||||
self._glue.remove_duplicates = not self._glue.remove_duplicates
|
||||
|
||||
self._exampleList.selection_clear(0, "end")
|
||||
self._readings = []
|
||||
self._populate_readingListbox()
|
||||
self._readingCache = [None for ex in self._examples]
|
||||
self._curExample = -1
|
||||
self._error = None
|
||||
|
||||
self._drs = None
|
||||
self._redraw()
|
||||
|
||||
def _exampleList_select(self, event):
|
||||
selection = self._exampleList.curselection()
|
||||
if len(selection) != 1:
|
||||
return
|
||||
self._exampleList_store_selection(int(selection[0]))
|
||||
|
||||
def _exampleList_store_selection(self, index):
|
||||
self._curExample = index
|
||||
example = self._examples[index]
|
||||
|
||||
self._exampleList.selection_clear(0, "end")
|
||||
if example:
|
||||
cache = self._readingCache[index]
|
||||
if cache:
|
||||
if isinstance(cache, list):
|
||||
self._readings = cache
|
||||
self._error = None
|
||||
else:
|
||||
self._readings = []
|
||||
self._error = cache
|
||||
else:
|
||||
try:
|
||||
self._readings = self._glue.parse_to_meaning(example)
|
||||
self._error = None
|
||||
self._readingCache[index] = self._readings
|
||||
except Exception as e:
|
||||
self._readings = []
|
||||
self._error = DrtVariableExpression(Variable("Error: " + str(e)))
|
||||
self._readingCache[index] = self._error
|
||||
|
||||
# add a star to the end of the example
|
||||
self._exampleList.delete(index)
|
||||
self._exampleList.insert(index, (" %s *" % example))
|
||||
self._exampleList.config(
|
||||
height=min(len(self._examples), 25), width=40
|
||||
)
|
||||
|
||||
self._populate_readingListbox()
|
||||
|
||||
self._exampleList.selection_set(index)
|
||||
|
||||
self._drs = None
|
||||
self._redraw()
|
||||
|
||||
def _readingList_select(self, event):
|
||||
selection = self._readingList.curselection()
|
||||
if len(selection) != 1:
|
||||
return
|
||||
self._readingList_store_selection(int(selection[0]))
|
||||
|
||||
def _readingList_store_selection(self, index):
|
||||
reading = self._readings[index]
|
||||
|
||||
self._readingList.selection_clear(0, "end")
|
||||
if reading:
|
||||
self._readingList.selection_set(index)
|
||||
|
||||
self._drs = reading.simplify().normalize().resolve_anaphora()
|
||||
|
||||
self._redraw()
|
||||
|
||||
|
||||
class DrsWidget:
|
||||
def __init__(self, canvas, drs, **attribs):
|
||||
self._drs = drs
|
||||
self._canvas = canvas
|
||||
canvas.font = Font(
|
||||
font=canvas.itemcget(canvas.create_text(0, 0, text=""), "font")
|
||||
)
|
||||
canvas._BUFFER = 3
|
||||
self.bbox = (0, 0, 0, 0)
|
||||
|
||||
def draw(self):
|
||||
(right, bottom) = DrsDrawer(self._drs, canvas=self._canvas).draw()
|
||||
self.bbox = (0, 0, right + 1, bottom + 1)
|
||||
|
||||
def clear(self):
|
||||
self._canvas.create_rectangle(self.bbox, fill="white", width="0")
|
||||
|
||||
|
||||
def demo():
|
||||
examples = [
|
||||
"John walks",
|
||||
"David sees Mary",
|
||||
"David eats a sandwich",
|
||||
"every man chases a dog",
|
||||
# 'every man believes a dog yawns',
|
||||
# 'John gives David a sandwich',
|
||||
"John chases himself",
|
||||
# 'John persuades David to order a pizza',
|
||||
# 'John tries to go',
|
||||
# 'John tries to find a unicorn',
|
||||
# 'John seems to vanish',
|
||||
# 'a unicorn seems to approach',
|
||||
# 'every big cat leaves',
|
||||
# 'every gray cat leaves',
|
||||
# 'every big gray cat leaves',
|
||||
# 'a former senator leaves',
|
||||
# 'John likes a cat',
|
||||
# 'John likes every cat',
|
||||
# 'he walks',
|
||||
# 'John walks and he leaves'
|
||||
]
|
||||
DrtGlueDemo(examples).mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
830
Backend/venv/lib/python3.12/site-packages/nltk/sem/evaluate.py
Normal file
830
Backend/venv/lib/python3.12/site-packages/nltk/sem/evaluate.py
Normal file
@@ -0,0 +1,830 @@
|
||||
# Natural Language Toolkit: Models for first-order languages with lambda
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>,
|
||||
# URL: <https://www.nltk.org>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
# TODO:
|
||||
# - fix tracing
|
||||
# - fix iterator-based approach to existentials
|
||||
|
||||
"""
|
||||
This module provides data structures for representing first-order
|
||||
models.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import re
|
||||
import sys
|
||||
import textwrap
|
||||
from pprint import pformat
|
||||
|
||||
from nltk.decorators import decorator # this used in code that is commented out
|
||||
from nltk.sem.logic import (
|
||||
AbstractVariableExpression,
|
||||
AllExpression,
|
||||
AndExpression,
|
||||
ApplicationExpression,
|
||||
EqualityExpression,
|
||||
ExistsExpression,
|
||||
Expression,
|
||||
IffExpression,
|
||||
ImpExpression,
|
||||
IndividualVariableExpression,
|
||||
IotaExpression,
|
||||
LambdaExpression,
|
||||
NegatedExpression,
|
||||
OrExpression,
|
||||
Variable,
|
||||
is_indvar,
|
||||
)
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Undefined(Error):
|
||||
pass
|
||||
|
||||
|
||||
def trace(f, *args, **kw):
|
||||
argspec = inspect.getfullargspec(f)
|
||||
d = dict(zip(argspec[0], args))
|
||||
if d.pop("trace", None):
|
||||
print()
|
||||
for item in d.items():
|
||||
print("%s => %s" % item)
|
||||
return f(*args, **kw)
|
||||
|
||||
|
||||
def is_rel(s):
|
||||
"""
|
||||
Check whether a set represents a relation (of any arity).
|
||||
|
||||
:param s: a set containing tuples of str elements
|
||||
:type s: set
|
||||
:rtype: bool
|
||||
"""
|
||||
# we have the empty relation, i.e. set()
|
||||
if len(s) == 0:
|
||||
return True
|
||||
# all the elements are tuples of the same length
|
||||
elif all(isinstance(el, tuple) for el in s) and len(max(s)) == len(min(s)):
|
||||
return True
|
||||
else:
|
||||
raise ValueError("Set %r contains sequences of different lengths" % s)
|
||||
|
||||
|
||||
def set2rel(s):
|
||||
"""
|
||||
Convert a set containing individuals (strings or numbers) into a set of
|
||||
unary tuples. Any tuples of strings already in the set are passed through
|
||||
unchanged.
|
||||
|
||||
For example:
|
||||
- set(['a', 'b']) => set([('a',), ('b',)])
|
||||
- set([3, 27]) => set([('3',), ('27',)])
|
||||
|
||||
:type s: set
|
||||
:rtype: set of tuple of str
|
||||
"""
|
||||
new = set()
|
||||
for elem in s:
|
||||
if isinstance(elem, str):
|
||||
new.add((elem,))
|
||||
elif isinstance(elem, int):
|
||||
new.add(str(elem))
|
||||
else:
|
||||
new.add(elem)
|
||||
return new
|
||||
|
||||
|
||||
def arity(rel):
|
||||
"""
|
||||
Check the arity of a relation.
|
||||
:type rel: set of tuples
|
||||
:rtype: int of tuple of str
|
||||
"""
|
||||
if len(rel) == 0:
|
||||
return 0
|
||||
return len(list(rel)[0])
|
||||
|
||||
|
||||
class Valuation(dict):
|
||||
"""
|
||||
A dictionary which represents a model-theoretic Valuation of non-logical constants.
|
||||
Keys are strings representing the constants to be interpreted, and values correspond
|
||||
to individuals (represented as strings) and n-ary relations (represented as sets of tuples
|
||||
of strings).
|
||||
|
||||
An instance of ``Valuation`` will raise a KeyError exception (i.e.,
|
||||
just behave like a standard dictionary) if indexed with an expression that
|
||||
is not in its list of symbols.
|
||||
"""
|
||||
|
||||
def __init__(self, xs):
|
||||
"""
|
||||
:param xs: a list of (symbol, value) pairs.
|
||||
"""
|
||||
super().__init__()
|
||||
for sym, val in xs:
|
||||
if isinstance(val, str) or isinstance(val, bool):
|
||||
self[sym] = val
|
||||
elif isinstance(val, set):
|
||||
self[sym] = set2rel(val)
|
||||
else:
|
||||
msg = textwrap.fill(
|
||||
"Error in initializing Valuation. "
|
||||
"Unrecognized value for symbol '%s':\n%s" % (sym, val),
|
||||
width=66,
|
||||
)
|
||||
|
||||
raise ValueError(msg)
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key in self:
|
||||
return dict.__getitem__(self, key)
|
||||
else:
|
||||
raise Undefined("Unknown expression: '%s'" % key)
|
||||
|
||||
def __str__(self):
|
||||
return pformat(self)
|
||||
|
||||
@property
|
||||
def domain(self):
|
||||
"""Set-theoretic domain of the value-space of a Valuation."""
|
||||
dom = []
|
||||
for val in self.values():
|
||||
if isinstance(val, str):
|
||||
dom.append(val)
|
||||
elif not isinstance(val, bool):
|
||||
dom.extend(
|
||||
[elem for tuple_ in val for elem in tuple_ if elem is not None]
|
||||
)
|
||||
return set(dom)
|
||||
|
||||
@property
|
||||
def symbols(self):
|
||||
"""The non-logical constants which the Valuation recognizes."""
|
||||
return sorted(self.keys())
|
||||
|
||||
@classmethod
|
||||
def fromstring(cls, s):
|
||||
return read_valuation(s)
|
||||
|
||||
|
||||
##########################################
|
||||
# REs used by the _read_valuation function
|
||||
##########################################
|
||||
_VAL_SPLIT_RE = re.compile(r"\s*=+>\s*")
|
||||
_ELEMENT_SPLIT_RE = re.compile(r"\s*,\s*")
|
||||
_TUPLES_RE = re.compile(
|
||||
r"""\s*
|
||||
(\([^)]+\)) # tuple-expression
|
||||
\s*""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
|
||||
def _read_valuation_line(s):
|
||||
"""
|
||||
Read a line in a valuation file.
|
||||
|
||||
Lines are expected to be of the form::
|
||||
|
||||
noosa => n
|
||||
girl => {g1, g2}
|
||||
chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
|
||||
|
||||
:param s: input line
|
||||
:type s: str
|
||||
:return: a pair (symbol, value)
|
||||
:rtype: tuple
|
||||
"""
|
||||
pieces = _VAL_SPLIT_RE.split(s)
|
||||
symbol = pieces[0]
|
||||
value = pieces[1]
|
||||
# check whether the value is meant to be a set
|
||||
if value.startswith("{"):
|
||||
value = value[1:-1]
|
||||
tuple_strings = _TUPLES_RE.findall(value)
|
||||
# are the set elements tuples?
|
||||
if tuple_strings:
|
||||
set_elements = []
|
||||
for ts in tuple_strings:
|
||||
ts = ts[1:-1]
|
||||
element = tuple(_ELEMENT_SPLIT_RE.split(ts))
|
||||
set_elements.append(element)
|
||||
else:
|
||||
set_elements = _ELEMENT_SPLIT_RE.split(value)
|
||||
value = set(set_elements)
|
||||
return symbol, value
|
||||
|
||||
|
||||
def read_valuation(s, encoding=None):
|
||||
"""
|
||||
Convert a valuation string into a valuation.
|
||||
|
||||
:param s: a valuation string
|
||||
:type s: str
|
||||
:param encoding: the encoding of the input string, if it is binary
|
||||
:type encoding: str
|
||||
:return: a ``nltk.sem`` valuation
|
||||
:rtype: Valuation
|
||||
"""
|
||||
if encoding is not None:
|
||||
s = s.decode(encoding)
|
||||
statements = []
|
||||
for linenum, line in enumerate(s.splitlines()):
|
||||
line = line.strip()
|
||||
if line.startswith("#") or line == "":
|
||||
continue
|
||||
try:
|
||||
statements.append(_read_valuation_line(line))
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Unable to parse line {linenum}: {line}") from e
|
||||
return Valuation(statements)
|
||||
|
||||
|
||||
class Assignment(dict):
|
||||
r"""
|
||||
A dictionary which represents an assignment of values to variables.
|
||||
|
||||
An assignment can only assign values from its domain.
|
||||
|
||||
If an unknown expression *a* is passed to a model *M*\ 's
|
||||
interpretation function *i*, *i* will first check whether *M*\ 's
|
||||
valuation assigns an interpretation to *a* as a constant, and if
|
||||
this fails, *i* will delegate the interpretation of *a* to
|
||||
*g*. *g* only assigns values to individual variables (i.e.,
|
||||
members of the class ``IndividualVariableExpression`` in the ``logic``
|
||||
module. If a variable is not assigned a value by *g*, it will raise
|
||||
an ``Undefined`` exception.
|
||||
|
||||
A variable *Assignment* is a mapping from individual variables to
|
||||
entities in the domain. Individual variables are usually indicated
|
||||
with the letters ``'x'``, ``'y'``, ``'w'`` and ``'z'``, optionally
|
||||
followed by an integer (e.g., ``'x0'``, ``'y332'``). Assignments are
|
||||
created using the ``Assignment`` constructor, which also takes the
|
||||
domain as a parameter.
|
||||
|
||||
>>> from nltk.sem.evaluate import Assignment
|
||||
>>> dom = set(['u1', 'u2', 'u3', 'u4'])
|
||||
>>> g3 = Assignment(dom, [('x', 'u1'), ('y', 'u2')])
|
||||
>>> g3 == {'x': 'u1', 'y': 'u2'}
|
||||
True
|
||||
|
||||
There is also a ``print`` format for assignments which uses a notation
|
||||
closer to that in logic textbooks:
|
||||
|
||||
>>> print(g3)
|
||||
g[u1/x][u2/y]
|
||||
|
||||
It is also possible to update an assignment using the ``add`` method:
|
||||
|
||||
>>> dom = set(['u1', 'u2', 'u3', 'u4'])
|
||||
>>> g4 = Assignment(dom)
|
||||
>>> g4.add('x', 'u1')
|
||||
{'x': 'u1'}
|
||||
|
||||
With no arguments, ``purge()`` is equivalent to ``clear()`` on a dictionary:
|
||||
|
||||
>>> g4.purge()
|
||||
>>> g4
|
||||
{}
|
||||
|
||||
:param domain: the domain of discourse
|
||||
:type domain: set
|
||||
:param assign: a list of (varname, value) associations
|
||||
:type assign: list
|
||||
"""
|
||||
|
||||
def __init__(self, domain, assign=None):
|
||||
super().__init__()
|
||||
self.domain = domain
|
||||
if assign:
|
||||
for var, val in assign:
|
||||
assert val in self.domain, "'{}' is not in the domain: {}".format(
|
||||
val,
|
||||
self.domain,
|
||||
)
|
||||
assert is_indvar(var), (
|
||||
"Wrong format for an Individual Variable: '%s'" % var
|
||||
)
|
||||
self[var] = val
|
||||
self.variant = None
|
||||
self._addvariant()
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key in self:
|
||||
return dict.__getitem__(self, key)
|
||||
else:
|
||||
raise Undefined("Not recognized as a variable: '%s'" % key)
|
||||
|
||||
def copy(self):
|
||||
new = Assignment(self.domain)
|
||||
new.update(self)
|
||||
return new
|
||||
|
||||
def purge(self, var=None):
|
||||
"""
|
||||
Remove one or all keys (i.e. logic variables) from an
|
||||
assignment, and update ``self.variant``.
|
||||
|
||||
:param var: a Variable acting as a key for the assignment.
|
||||
"""
|
||||
if var:
|
||||
del self[var]
|
||||
else:
|
||||
self.clear()
|
||||
self._addvariant()
|
||||
return None
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
Pretty printing for assignments. {'x', 'u'} appears as 'g[u/x]'
|
||||
"""
|
||||
gstring = "g"
|
||||
# Deterministic output for unit testing.
|
||||
variant = sorted(self.variant)
|
||||
for val, var in variant:
|
||||
gstring += f"[{val}/{var}]"
|
||||
return gstring
|
||||
|
||||
def _addvariant(self):
|
||||
"""
|
||||
Create a more pretty-printable version of the assignment.
|
||||
"""
|
||||
list_ = []
|
||||
for item in self.items():
|
||||
pair = (item[1], item[0])
|
||||
list_.append(pair)
|
||||
self.variant = list_
|
||||
return None
|
||||
|
||||
def add(self, var, val):
|
||||
"""
|
||||
Add a new variable-value pair to the assignment, and update
|
||||
``self.variant``.
|
||||
|
||||
"""
|
||||
assert val in self.domain, f"{val} is not in the domain {self.domain}"
|
||||
assert is_indvar(var), "Wrong format for an Individual Variable: '%s'" % var
|
||||
self[var] = val
|
||||
self._addvariant()
|
||||
return self
|
||||
|
||||
|
||||
class Model:
|
||||
"""
|
||||
A first order model is a domain *D* of discourse and a valuation *V*.
|
||||
|
||||
A domain *D* is a set, and a valuation *V* is a map that associates
|
||||
expressions with values in the model.
|
||||
The domain of *V* should be a subset of *D*.
|
||||
|
||||
Construct a new ``Model``.
|
||||
|
||||
:type domain: set
|
||||
:param domain: A set of entities representing the domain of discourse of the model.
|
||||
:type valuation: Valuation
|
||||
:param valuation: the valuation of the model.
|
||||
:param prop: If this is set, then we are building a propositional\
|
||||
model and don't require the domain of *V* to be subset of *D*.
|
||||
"""
|
||||
|
||||
def __init__(self, domain, valuation):
|
||||
assert isinstance(domain, set)
|
||||
self.domain = domain
|
||||
self.valuation = valuation
|
||||
if not domain.issuperset(valuation.domain):
|
||||
raise Error(
|
||||
"The valuation domain, %s, must be a subset of the model's domain, %s"
|
||||
% (valuation.domain, domain)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.domain!r}, {self.valuation!r})"
|
||||
|
||||
def __str__(self):
|
||||
return f"Domain = {self.domain},\nValuation = \n{self.valuation}"
|
||||
|
||||
def evaluate(self, expr, g, trace=None):
|
||||
"""
|
||||
Read input expressions, and provide a handler for ``satisfy``
|
||||
that blocks further propagation of the ``Undefined`` error.
|
||||
:param expr: An ``Expression`` of ``logic``.
|
||||
:type g: Assignment
|
||||
:param g: an assignment to individual variables.
|
||||
:rtype: bool or 'Undefined'
|
||||
"""
|
||||
try:
|
||||
parsed = Expression.fromstring(expr)
|
||||
value = self.satisfy(parsed, g, trace=trace)
|
||||
if trace:
|
||||
print()
|
||||
print(f"'{expr}' evaluates to {value} under M, {g}")
|
||||
return value
|
||||
except Undefined:
|
||||
if trace:
|
||||
print()
|
||||
print(f"'{expr}' is undefined under M, {g}")
|
||||
return "Undefined"
|
||||
|
||||
def satisfy(self, parsed, g, trace=None):
|
||||
"""
|
||||
Recursive interpretation function for a formula of first-order logic.
|
||||
|
||||
Raises an ``Undefined`` error when ``parsed`` is an atomic string
|
||||
but is not a symbol or an individual variable.
|
||||
|
||||
:return: Returns a truth value or ``Undefined`` if ``parsed`` is\
|
||||
complex, and calls the interpretation function ``i`` if ``parsed``\
|
||||
is atomic.
|
||||
|
||||
:param parsed: An expression of ``logic``.
|
||||
:type g: Assignment
|
||||
:param g: an assignment to individual variables.
|
||||
"""
|
||||
|
||||
if isinstance(parsed, ApplicationExpression):
|
||||
function, arguments = parsed.uncurry()
|
||||
if isinstance(function, AbstractVariableExpression):
|
||||
# It's a predicate expression ("P(x,y)"), so used uncurried arguments
|
||||
funval = self.satisfy(function, g)
|
||||
argvals = tuple(self.satisfy(arg, g) for arg in arguments)
|
||||
return argvals in funval
|
||||
else:
|
||||
# It must be a lambda expression, so use curried form
|
||||
funval = self.satisfy(parsed.function, g)
|
||||
argval = self.satisfy(parsed.argument, g)
|
||||
return funval[argval]
|
||||
elif isinstance(parsed, NegatedExpression):
|
||||
return not self.satisfy(parsed.term, g)
|
||||
elif isinstance(parsed, AndExpression):
|
||||
return self.satisfy(parsed.first, g) and self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, OrExpression):
|
||||
return self.satisfy(parsed.first, g) or self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, ImpExpression):
|
||||
return (not self.satisfy(parsed.first, g)) or self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, IffExpression):
|
||||
return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, EqualityExpression):
|
||||
return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, AllExpression):
|
||||
new_g = g.copy()
|
||||
for u in self.domain:
|
||||
new_g.add(parsed.variable.name, u)
|
||||
if not self.satisfy(parsed.term, new_g):
|
||||
return False
|
||||
return True
|
||||
elif isinstance(parsed, ExistsExpression):
|
||||
new_g = g.copy()
|
||||
for u in self.domain:
|
||||
new_g.add(parsed.variable.name, u)
|
||||
if self.satisfy(parsed.term, new_g):
|
||||
return True
|
||||
return False
|
||||
elif isinstance(parsed, IotaExpression):
|
||||
new_g = g.copy()
|
||||
for u in self.domain:
|
||||
new_g.add(parsed.variable.name, u)
|
||||
if self.satisfy(parsed.term, new_g):
|
||||
return True
|
||||
return False
|
||||
elif isinstance(parsed, LambdaExpression):
|
||||
cf = {}
|
||||
var = parsed.variable.name
|
||||
for u in self.domain:
|
||||
val = self.satisfy(parsed.term, g.add(var, u))
|
||||
# NB the dict would be a lot smaller if we do this:
|
||||
# if val: cf[u] = val
|
||||
# But then need to deal with cases where f(a) should yield
|
||||
# a function rather than just False.
|
||||
cf[u] = val
|
||||
return cf
|
||||
else:
|
||||
return self.i(parsed, g, trace)
|
||||
|
||||
# @decorator(trace_eval)
|
||||
def i(self, parsed, g, trace=False):
|
||||
"""
|
||||
An interpretation function.
|
||||
|
||||
Assuming that ``parsed`` is atomic:
|
||||
|
||||
- if ``parsed`` is a non-logical constant, calls the valuation *V*
|
||||
- else if ``parsed`` is an individual variable, calls assignment *g*
|
||||
- else returns ``Undefined``.
|
||||
|
||||
:param parsed: an ``Expression`` of ``logic``.
|
||||
:type g: Assignment
|
||||
:param g: an assignment to individual variables.
|
||||
:return: a semantic value
|
||||
"""
|
||||
# If parsed is a propositional letter 'p', 'q', etc, it could be in valuation.symbols
|
||||
# and also be an IndividualVariableExpression. We want to catch this first case.
|
||||
# So there is a procedural consequence to the ordering of clauses here:
|
||||
if parsed.variable.name in self.valuation.symbols:
|
||||
return self.valuation[parsed.variable.name]
|
||||
elif isinstance(parsed, IndividualVariableExpression):
|
||||
return g[parsed.variable.name]
|
||||
|
||||
else:
|
||||
raise Undefined("Can't find a value for %s" % parsed)
|
||||
|
||||
def satisfiers(self, parsed, varex, g, trace=None, nesting=0):
|
||||
"""
|
||||
Generate the entities from the model's domain that satisfy an open formula.
|
||||
|
||||
:param parsed: an open formula
|
||||
:type parsed: Expression
|
||||
:param varex: the relevant free individual variable in ``parsed``.
|
||||
:type varex: VariableExpression or str
|
||||
:param g: a variable assignment
|
||||
:type g: Assignment
|
||||
:return: a set of the entities that satisfy ``parsed``.
|
||||
"""
|
||||
|
||||
spacer = " "
|
||||
indent = spacer + (spacer * nesting)
|
||||
candidates = []
|
||||
|
||||
if isinstance(varex, str):
|
||||
var = Variable(varex)
|
||||
else:
|
||||
var = varex
|
||||
|
||||
if var in parsed.free():
|
||||
if trace:
|
||||
print()
|
||||
print(
|
||||
(spacer * nesting)
|
||||
+ f"Open formula is '{parsed}' with assignment {g}"
|
||||
)
|
||||
for u in self.domain:
|
||||
new_g = g.copy()
|
||||
new_g.add(var.name, u)
|
||||
if trace and trace > 1:
|
||||
lowtrace = trace - 1
|
||||
else:
|
||||
lowtrace = 0
|
||||
value = self.satisfy(parsed, new_g, lowtrace)
|
||||
|
||||
if trace:
|
||||
print(indent + "(trying assignment %s)" % new_g)
|
||||
|
||||
# parsed == False under g[u/var]?
|
||||
if value == False:
|
||||
if trace:
|
||||
print(indent + f"value of '{parsed}' under {new_g} is False")
|
||||
|
||||
# so g[u/var] is a satisfying assignment
|
||||
else:
|
||||
candidates.append(u)
|
||||
if trace:
|
||||
print(indent + f"value of '{parsed}' under {new_g} is {value}")
|
||||
|
||||
result = {c for c in candidates}
|
||||
# var isn't free in parsed
|
||||
else:
|
||||
raise Undefined(f"{var.name} is not free in {parsed}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////
|
||||
# Demo..
|
||||
# //////////////////////////////////////////////////////////////////////
|
||||
# number of spacer chars
|
||||
mult = 30
|
||||
|
||||
|
||||
# Demo 1: Propositional Logic
|
||||
#################
|
||||
def propdemo(trace=None):
|
||||
"""Example of a propositional model."""
|
||||
|
||||
global val1, dom1, m1, g1
|
||||
val1 = Valuation([("P", True), ("Q", True), ("R", False)])
|
||||
dom1 = set()
|
||||
m1 = Model(dom1, val1)
|
||||
g1 = Assignment(dom1)
|
||||
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("Propositional Formulas Demo")
|
||||
print("*" * mult)
|
||||
print("(Propositional constants treated as nullary predicates)")
|
||||
print()
|
||||
print("Model m1:\n", m1)
|
||||
print("*" * mult)
|
||||
sentences = [
|
||||
"(P & Q)",
|
||||
"(P & R)",
|
||||
"- P",
|
||||
"- R",
|
||||
"- - P",
|
||||
"- (P & R)",
|
||||
"(P | R)",
|
||||
"(R | P)",
|
||||
"(R | R)",
|
||||
"(- P | R)",
|
||||
"(P | - P)",
|
||||
"(P -> Q)",
|
||||
"(P -> R)",
|
||||
"(R -> P)",
|
||||
"(P <-> P)",
|
||||
"(R <-> R)",
|
||||
"(P <-> R)",
|
||||
]
|
||||
|
||||
for sent in sentences:
|
||||
if trace:
|
||||
print()
|
||||
m1.evaluate(sent, g1, trace)
|
||||
else:
|
||||
print(f"The value of '{sent}' is: {m1.evaluate(sent, g1)}")
|
||||
|
||||
|
||||
# Demo 2: FOL Model
|
||||
#############
|
||||
|
||||
|
||||
def folmodel(quiet=False, trace=None):
|
||||
"""Example of a first-order model."""
|
||||
|
||||
global val2, v2, dom2, m2, g2
|
||||
|
||||
v2 = [
|
||||
("adam", "b1"),
|
||||
("betty", "g1"),
|
||||
("fido", "d1"),
|
||||
("girl", {"g1", "g2"}),
|
||||
("boy", {"b1", "b2"}),
|
||||
("dog", {"d1"}),
|
||||
("love", {("b1", "g1"), ("b2", "g2"), ("g1", "b1"), ("g2", "b1")}),
|
||||
]
|
||||
val2 = Valuation(v2)
|
||||
dom2 = val2.domain
|
||||
m2 = Model(dom2, val2)
|
||||
g2 = Assignment(dom2, [("x", "b1"), ("y", "g2")])
|
||||
|
||||
if not quiet:
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("Models Demo")
|
||||
print("*" * mult)
|
||||
print("Model m2:\n", "-" * 14, "\n", m2)
|
||||
print("Variable assignment = ", g2)
|
||||
|
||||
exprs = ["adam", "boy", "love", "walks", "x", "y", "z"]
|
||||
parsed_exprs = [Expression.fromstring(e) for e in exprs]
|
||||
|
||||
print()
|
||||
for parsed in parsed_exprs:
|
||||
try:
|
||||
print(
|
||||
"The interpretation of '%s' in m2 is %s"
|
||||
% (parsed, m2.i(parsed, g2))
|
||||
)
|
||||
except Undefined:
|
||||
print("The interpretation of '%s' in m2 is Undefined" % parsed)
|
||||
|
||||
applications = [
|
||||
("boy", ("adam")),
|
||||
("walks", ("adam",)),
|
||||
("love", ("adam", "y")),
|
||||
("love", ("y", "adam")),
|
||||
]
|
||||
|
||||
for fun, args in applications:
|
||||
try:
|
||||
funval = m2.i(Expression.fromstring(fun), g2)
|
||||
argsval = tuple(m2.i(Expression.fromstring(arg), g2) for arg in args)
|
||||
print(f"{fun}({args}) evaluates to {argsval in funval}")
|
||||
except Undefined:
|
||||
print(f"{fun}({args}) evaluates to Undefined")
|
||||
|
||||
|
||||
# Demo 3: FOL
|
||||
#########
|
||||
|
||||
|
||||
def foldemo(trace=None):
|
||||
"""
|
||||
Interpretation of closed expressions in a first-order model.
|
||||
"""
|
||||
folmodel(quiet=True)
|
||||
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("FOL Formulas Demo")
|
||||
print("*" * mult)
|
||||
|
||||
formulas = [
|
||||
"love (adam, betty)",
|
||||
"(adam = mia)",
|
||||
"\\x. (boy(x) | girl(x))",
|
||||
"\\x. boy(x)(adam)",
|
||||
"\\x y. love(x, y)",
|
||||
"\\x y. love(x, y)(adam)(betty)",
|
||||
"\\x y. love(x, y)(adam, betty)",
|
||||
"\\x y. (boy(x) & love(x, y))",
|
||||
"\\x. exists y. (boy(x) & love(x, y))",
|
||||
"exists z1. boy(z1)",
|
||||
"exists x. (boy(x) & -(x = adam))",
|
||||
"exists x. (boy(x) & all y. love(y, x))",
|
||||
"all x. (boy(x) | girl(x))",
|
||||
"all x. (girl(x) -> exists y. boy(y) & love(x, y))", # Every girl loves exists boy.
|
||||
"exists x. (boy(x) & all y. (girl(y) -> love(y, x)))", # There is exists boy that every girl loves.
|
||||
"exists x. (boy(x) & all y. (girl(y) -> love(x, y)))", # exists boy loves every girl.
|
||||
"all x. (dog(x) -> - girl(x))",
|
||||
"exists x. exists y. (love(x, y) & love(x, y))",
|
||||
]
|
||||
|
||||
for fmla in formulas:
|
||||
g2.purge()
|
||||
if trace:
|
||||
m2.evaluate(fmla, g2, trace)
|
||||
else:
|
||||
print(f"The value of '{fmla}' is: {m2.evaluate(fmla, g2)}")
|
||||
|
||||
|
||||
# Demo 3: Satisfaction
|
||||
#############
|
||||
|
||||
|
||||
def satdemo(trace=None):
|
||||
"""Satisfiers of an open formula in a first order model."""
|
||||
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("Satisfiers Demo")
|
||||
print("*" * mult)
|
||||
|
||||
folmodel(quiet=True)
|
||||
|
||||
formulas = [
|
||||
"boy(x)",
|
||||
"(x = x)",
|
||||
"(boy(x) | girl(x))",
|
||||
"(boy(x) & girl(x))",
|
||||
"love(adam, x)",
|
||||
"love(x, adam)",
|
||||
"-(x = adam)",
|
||||
"exists z22. love(x, z22)",
|
||||
"exists y. love(y, x)",
|
||||
"all y. (girl(y) -> love(x, y))",
|
||||
"all y. (girl(y) -> love(y, x))",
|
||||
"all y. (girl(y) -> (boy(x) & love(y, x)))",
|
||||
"(boy(x) & all y. (girl(y) -> love(x, y)))",
|
||||
"(boy(x) & all y. (girl(y) -> love(y, x)))",
|
||||
"(boy(x) & exists y. (girl(y) & love(y, x)))",
|
||||
"(girl(x) -> dog(x))",
|
||||
"all y. (dog(y) -> (x = y))",
|
||||
"exists y. love(y, x)",
|
||||
"exists y. (love(adam, y) & love(y, x))",
|
||||
]
|
||||
|
||||
if trace:
|
||||
print(m2)
|
||||
|
||||
for fmla in formulas:
|
||||
print(fmla)
|
||||
Expression.fromstring(fmla)
|
||||
|
||||
parsed = [Expression.fromstring(fmla) for fmla in formulas]
|
||||
|
||||
for p in parsed:
|
||||
g2.purge()
|
||||
print(
|
||||
"The satisfiers of '{}' are: {}".format(p, m2.satisfiers(p, "x", g2, trace))
|
||||
)
|
||||
|
||||
|
||||
def demo(num=0, trace=None):
|
||||
"""
|
||||
Run exists demos.
|
||||
|
||||
- num = 1: propositional logic demo
|
||||
- num = 2: first order model demo (only if trace is set)
|
||||
- num = 3: first order sentences demo
|
||||
- num = 4: satisfaction of open formulas demo
|
||||
- any other value: run all the demos
|
||||
|
||||
:param trace: trace = 1, or trace = 2 for more verbose tracing
|
||||
"""
|
||||
demos = {1: propdemo, 2: folmodel, 3: foldemo, 4: satdemo}
|
||||
|
||||
try:
|
||||
demos[num](trace=trace)
|
||||
except KeyError:
|
||||
for num in demos:
|
||||
demos[num](trace=trace)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo(2, trace=0)
|
||||
835
Backend/venv/lib/python3.12/site-packages/nltk/sem/glue.py
Normal file
835
Backend/venv/lib/python3.12/site-packages/nltk/sem/glue.py
Normal file
@@ -0,0 +1,835 @@
|
||||
# Natural Language Toolkit: Glue Semantics
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
import os
|
||||
from itertools import chain
|
||||
|
||||
import nltk
|
||||
from nltk.internals import Counter
|
||||
from nltk.sem import drt, linearlogic
|
||||
from nltk.sem.logic import (
|
||||
AbstractVariableExpression,
|
||||
Expression,
|
||||
LambdaExpression,
|
||||
Variable,
|
||||
VariableExpression,
|
||||
)
|
||||
from nltk.tag import BigramTagger, RegexpTagger, TrigramTagger, UnigramTagger
|
||||
|
||||
SPEC_SEMTYPES = {
|
||||
"a": "ex_quant",
|
||||
"an": "ex_quant",
|
||||
"every": "univ_quant",
|
||||
"the": "def_art",
|
||||
"no": "no_quant",
|
||||
"default": "ex_quant",
|
||||
}
|
||||
|
||||
OPTIONAL_RELATIONSHIPS = ["nmod", "vmod", "punct"]
|
||||
|
||||
|
||||
class GlueFormula:
|
||||
def __init__(self, meaning, glue, indices=None):
|
||||
if not indices:
|
||||
indices = set()
|
||||
|
||||
if isinstance(meaning, str):
|
||||
self.meaning = Expression.fromstring(meaning)
|
||||
elif isinstance(meaning, Expression):
|
||||
self.meaning = meaning
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Meaning term neither string or expression: %s, %s"
|
||||
% (meaning, meaning.__class__)
|
||||
)
|
||||
|
||||
if isinstance(glue, str):
|
||||
self.glue = linearlogic.LinearLogicParser().parse(glue)
|
||||
elif isinstance(glue, linearlogic.Expression):
|
||||
self.glue = glue
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Glue term neither string or expression: %s, %s"
|
||||
% (glue, glue.__class__)
|
||||
)
|
||||
|
||||
self.indices = indices
|
||||
|
||||
def applyto(self, arg):
|
||||
"""self = (\\x.(walk x), (subj -o f))
|
||||
arg = (john , subj)
|
||||
returns ((walk john), f)
|
||||
"""
|
||||
if self.indices & arg.indices: # if the sets are NOT disjoint
|
||||
raise linearlogic.LinearLogicApplicationException(
|
||||
f"'{self}' applied to '{arg}'. Indices are not disjoint."
|
||||
)
|
||||
else: # if the sets ARE disjoint
|
||||
return_indices = self.indices | arg.indices
|
||||
|
||||
try:
|
||||
return_glue = linearlogic.ApplicationExpression(
|
||||
self.glue, arg.glue, arg.indices
|
||||
)
|
||||
except linearlogic.LinearLogicApplicationException as e:
|
||||
raise linearlogic.LinearLogicApplicationException(
|
||||
f"'{self.simplify()}' applied to '{arg.simplify()}'"
|
||||
) from e
|
||||
|
||||
arg_meaning_abstracted = arg.meaning
|
||||
if return_indices:
|
||||
for dep in self.glue.simplify().antecedent.dependencies[
|
||||
::-1
|
||||
]: # if self.glue is (A -o B), dep is in A.dependencies
|
||||
arg_meaning_abstracted = self.make_LambdaExpression(
|
||||
Variable("v%s" % dep), arg_meaning_abstracted
|
||||
)
|
||||
return_meaning = self.meaning.applyto(arg_meaning_abstracted)
|
||||
|
||||
return self.__class__(return_meaning, return_glue, return_indices)
|
||||
|
||||
def make_VariableExpression(self, name):
|
||||
return VariableExpression(name)
|
||||
|
||||
def make_LambdaExpression(self, variable, term):
|
||||
return LambdaExpression(variable, term)
|
||||
|
||||
def lambda_abstract(self, other):
|
||||
assert isinstance(other, GlueFormula)
|
||||
assert isinstance(other.meaning, AbstractVariableExpression)
|
||||
return self.__class__(
|
||||
self.make_LambdaExpression(other.meaning.variable, self.meaning),
|
||||
linearlogic.ImpExpression(other.glue, self.glue),
|
||||
)
|
||||
|
||||
def compile(self, counter=None):
|
||||
"""From Iddo Lev's PhD Dissertation p108-109"""
|
||||
if not counter:
|
||||
counter = Counter()
|
||||
(compiled_glue, new_forms) = self.glue.simplify().compile_pos(
|
||||
counter, self.__class__
|
||||
)
|
||||
return new_forms + [
|
||||
self.__class__(self.meaning, compiled_glue, {counter.get()})
|
||||
]
|
||||
|
||||
def simplify(self):
|
||||
return self.__class__(
|
||||
self.meaning.simplify(), self.glue.simplify(), self.indices
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ == other.__class__
|
||||
and self.meaning == other.meaning
|
||||
and self.glue == other.glue
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
# sorting for use in doctests which must be deterministic
|
||||
def __lt__(self, other):
|
||||
return str(self) < str(other)
|
||||
|
||||
def __str__(self):
|
||||
assert isinstance(self.indices, set)
|
||||
accum = f"{self.meaning} : {self.glue}"
|
||||
if self.indices:
|
||||
accum += (
|
||||
" : {" + ", ".join(str(index) for index in sorted(self.indices)) + "}"
|
||||
)
|
||||
return accum
|
||||
|
||||
def __repr__(self):
|
||||
return "%s" % self
|
||||
|
||||
|
||||
class GlueDict(dict):
|
||||
def __init__(self, filename, encoding=None):
|
||||
self.filename = filename
|
||||
self.file_encoding = encoding
|
||||
self.read_file()
|
||||
|
||||
def read_file(self, empty_first=True):
|
||||
if empty_first:
|
||||
self.clear()
|
||||
|
||||
try:
|
||||
contents = nltk.data.load(
|
||||
self.filename, format="text", encoding=self.file_encoding
|
||||
)
|
||||
# TODO: the above can't handle zip files, but this should anyway be fixed in nltk.data.load()
|
||||
except LookupError as e:
|
||||
try:
|
||||
contents = nltk.data.load(
|
||||
"file:" + self.filename, format="text", encoding=self.file_encoding
|
||||
)
|
||||
except LookupError:
|
||||
raise e
|
||||
lines = contents.splitlines()
|
||||
|
||||
for line in lines: # example: 'n : (\\x.(<word> x), (v-or))'
|
||||
# lambdacalc -^ linear logic -^
|
||||
line = line.strip() # remove trailing newline
|
||||
if not len(line):
|
||||
continue # skip empty lines
|
||||
if line[0] == "#":
|
||||
continue # skip commented out lines
|
||||
|
||||
parts = line.split(
|
||||
" : ", 2
|
||||
) # ['verb', '(\\x.(<word> x), ( subj -o f ))', '[subj]']
|
||||
|
||||
glue_formulas = []
|
||||
paren_count = 0
|
||||
tuple_start = 0
|
||||
tuple_comma = 0
|
||||
|
||||
relationships = None
|
||||
|
||||
if len(parts) > 1:
|
||||
for i, c in enumerate(parts[1]):
|
||||
if c == "(":
|
||||
if paren_count == 0: # if it's the first '(' of a tuple
|
||||
tuple_start = i + 1 # then save the index
|
||||
paren_count += 1
|
||||
elif c == ")":
|
||||
paren_count -= 1
|
||||
if paren_count == 0: # if it's the last ')' of a tuple
|
||||
meaning_term = parts[1][
|
||||
tuple_start:tuple_comma
|
||||
] # '\\x.(<word> x)'
|
||||
glue_term = parts[1][tuple_comma + 1 : i] # '(v-r)'
|
||||
glue_formulas.append(
|
||||
[meaning_term, glue_term]
|
||||
) # add the GlueFormula to the list
|
||||
elif c == ",":
|
||||
if (
|
||||
paren_count == 1
|
||||
): # if it's a comma separating the parts of the tuple
|
||||
tuple_comma = i # then save the index
|
||||
elif c == "#": # skip comments at the ends of lines
|
||||
if (
|
||||
paren_count != 0
|
||||
): # if the line hasn't parsed correctly so far
|
||||
raise RuntimeError(
|
||||
"Formula syntax is incorrect for entry " + line
|
||||
)
|
||||
break # break to the next line
|
||||
|
||||
if len(parts) > 2: # if there is a relationship entry at the end
|
||||
rel_start = parts[2].index("[") + 1
|
||||
rel_end = parts[2].index("]")
|
||||
if rel_start == rel_end:
|
||||
relationships = frozenset()
|
||||
else:
|
||||
relationships = frozenset(
|
||||
r.strip() for r in parts[2][rel_start:rel_end].split(",")
|
||||
)
|
||||
|
||||
try:
|
||||
start_inheritance = parts[0].index("(")
|
||||
end_inheritance = parts[0].index(")")
|
||||
sem = parts[0][:start_inheritance].strip()
|
||||
supertype = parts[0][start_inheritance + 1 : end_inheritance]
|
||||
except:
|
||||
sem = parts[0].strip()
|
||||
supertype = None
|
||||
|
||||
if sem not in self:
|
||||
self[sem] = {}
|
||||
|
||||
if (
|
||||
relationships is None
|
||||
): # if not specified for a specific relationship set
|
||||
# add all relationship entries for parents
|
||||
if supertype:
|
||||
for rels in self[supertype]:
|
||||
if rels not in self[sem]:
|
||||
self[sem][rels] = []
|
||||
glue = self[supertype][rels]
|
||||
self[sem][rels].extend(glue)
|
||||
self[sem][rels].extend(
|
||||
glue_formulas
|
||||
) # add the glue formulas to every rel entry
|
||||
else:
|
||||
if None not in self[sem]:
|
||||
self[sem][None] = []
|
||||
self[sem][None].extend(
|
||||
glue_formulas
|
||||
) # add the glue formulas to every rel entry
|
||||
else:
|
||||
if relationships not in self[sem]:
|
||||
self[sem][relationships] = []
|
||||
if supertype:
|
||||
self[sem][relationships].extend(self[supertype][relationships])
|
||||
self[sem][relationships].extend(
|
||||
glue_formulas
|
||||
) # add the glue entry to the dictionary
|
||||
|
||||
def __str__(self):
|
||||
accum = ""
|
||||
for pos in self:
|
||||
str_pos = "%s" % pos
|
||||
for relset in self[pos]:
|
||||
i = 1
|
||||
for gf in self[pos][relset]:
|
||||
if i == 1:
|
||||
accum += str_pos + ": "
|
||||
else:
|
||||
accum += " " * (len(str_pos) + 2)
|
||||
accum += "%s" % gf
|
||||
if relset and i == len(self[pos][relset]):
|
||||
accum += " : %s" % relset
|
||||
accum += "\n"
|
||||
i += 1
|
||||
return accum
|
||||
|
||||
def to_glueformula_list(self, depgraph, node=None, counter=None, verbose=False):
|
||||
if node is None:
|
||||
# TODO: should it be depgraph.root? Is this code tested?
|
||||
top = depgraph.nodes[0]
|
||||
depList = list(chain.from_iterable(top["deps"].values()))
|
||||
root = depgraph.nodes[depList[0]]
|
||||
|
||||
return self.to_glueformula_list(depgraph, root, Counter(), verbose)
|
||||
|
||||
glueformulas = self.lookup(node, depgraph, counter)
|
||||
for dep_idx in chain.from_iterable(node["deps"].values()):
|
||||
dep = depgraph.nodes[dep_idx]
|
||||
glueformulas.extend(
|
||||
self.to_glueformula_list(depgraph, dep, counter, verbose)
|
||||
)
|
||||
return glueformulas
|
||||
|
||||
def lookup(self, node, depgraph, counter):
|
||||
semtype_names = self.get_semtypes(node)
|
||||
|
||||
semtype = None
|
||||
for name in semtype_names:
|
||||
if name in self:
|
||||
semtype = self[name]
|
||||
break
|
||||
if semtype is None:
|
||||
# raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word)
|
||||
return []
|
||||
|
||||
self.add_missing_dependencies(node, depgraph)
|
||||
|
||||
lookup = self._lookup_semtype_option(semtype, node, depgraph)
|
||||
|
||||
if not len(lookup):
|
||||
raise KeyError(
|
||||
"There is no GlueDict entry for sem type of '%s' "
|
||||
"with tag '%s', and rel '%s'" % (node["word"], node["tag"], node["rel"])
|
||||
)
|
||||
|
||||
return self.get_glueformulas_from_semtype_entry(
|
||||
lookup, node["word"], node, depgraph, counter
|
||||
)
|
||||
|
||||
def add_missing_dependencies(self, node, depgraph):
|
||||
rel = node["rel"].lower()
|
||||
|
||||
if rel == "main":
|
||||
headnode = depgraph.nodes[node["head"]]
|
||||
subj = self.lookup_unique("subj", headnode, depgraph)
|
||||
relation = subj["rel"]
|
||||
node["deps"].setdefault(relation, [])
|
||||
node["deps"][relation].append(subj["address"])
|
||||
# node['deps'].append(subj['address'])
|
||||
|
||||
def _lookup_semtype_option(self, semtype, node, depgraph):
|
||||
relationships = frozenset(
|
||||
depgraph.nodes[dep]["rel"].lower()
|
||||
for dep in chain.from_iterable(node["deps"].values())
|
||||
if depgraph.nodes[dep]["rel"].lower() not in OPTIONAL_RELATIONSHIPS
|
||||
)
|
||||
|
||||
try:
|
||||
lookup = semtype[relationships]
|
||||
except KeyError:
|
||||
# An exact match is not found, so find the best match where
|
||||
# 'best' is defined as the glue entry whose relationship set has the
|
||||
# most relations of any possible relationship set that is a subset
|
||||
# of the actual depgraph
|
||||
best_match = frozenset()
|
||||
for relset_option in set(semtype) - {None}:
|
||||
if (
|
||||
len(relset_option) > len(best_match)
|
||||
and relset_option < relationships
|
||||
):
|
||||
best_match = relset_option
|
||||
if not best_match:
|
||||
if None in semtype:
|
||||
best_match = None
|
||||
else:
|
||||
return None
|
||||
lookup = semtype[best_match]
|
||||
|
||||
return lookup
|
||||
|
||||
def get_semtypes(self, node):
|
||||
"""
|
||||
Based on the node, return a list of plausible semtypes in order of
|
||||
plausibility.
|
||||
"""
|
||||
rel = node["rel"].lower()
|
||||
word = node["word"].lower()
|
||||
|
||||
if rel == "spec":
|
||||
if word in SPEC_SEMTYPES:
|
||||
return [SPEC_SEMTYPES[word]]
|
||||
else:
|
||||
return [SPEC_SEMTYPES["default"]]
|
||||
elif rel in ["nmod", "vmod"]:
|
||||
return [node["tag"], rel]
|
||||
else:
|
||||
return [node["tag"]]
|
||||
|
||||
def get_glueformulas_from_semtype_entry(
|
||||
self, lookup, word, node, depgraph, counter
|
||||
):
|
||||
glueformulas = []
|
||||
|
||||
glueFormulaFactory = self.get_GlueFormula_factory()
|
||||
for meaning, glue in lookup:
|
||||
gf = glueFormulaFactory(self.get_meaning_formula(meaning, word), glue)
|
||||
if not len(glueformulas):
|
||||
gf.word = word
|
||||
else:
|
||||
gf.word = f"{word}{len(glueformulas) + 1}"
|
||||
|
||||
gf.glue = self.initialize_labels(gf.glue, node, depgraph, counter.get())
|
||||
|
||||
glueformulas.append(gf)
|
||||
return glueformulas
|
||||
|
||||
def get_meaning_formula(self, generic, word):
|
||||
"""
|
||||
:param generic: A meaning formula string containing the
|
||||
parameter "<word>"
|
||||
:param word: The actual word to be replace "<word>"
|
||||
"""
|
||||
word = word.replace(".", "")
|
||||
return generic.replace("<word>", word)
|
||||
|
||||
def initialize_labels(self, expr, node, depgraph, unique_index):
|
||||
if isinstance(expr, linearlogic.AtomicExpression):
|
||||
name = self.find_label_name(expr.name.lower(), node, depgraph, unique_index)
|
||||
if name[0].isupper():
|
||||
return linearlogic.VariableExpression(name)
|
||||
else:
|
||||
return linearlogic.ConstantExpression(name)
|
||||
else:
|
||||
return linearlogic.ImpExpression(
|
||||
self.initialize_labels(expr.antecedent, node, depgraph, unique_index),
|
||||
self.initialize_labels(expr.consequent, node, depgraph, unique_index),
|
||||
)
|
||||
|
||||
def find_label_name(self, name, node, depgraph, unique_index):
|
||||
try:
|
||||
dot = name.index(".")
|
||||
|
||||
before_dot = name[:dot]
|
||||
after_dot = name[dot + 1 :]
|
||||
if before_dot == "super":
|
||||
return self.find_label_name(
|
||||
after_dot, depgraph.nodes[node["head"]], depgraph, unique_index
|
||||
)
|
||||
else:
|
||||
return self.find_label_name(
|
||||
after_dot,
|
||||
self.lookup_unique(before_dot, node, depgraph),
|
||||
depgraph,
|
||||
unique_index,
|
||||
)
|
||||
except ValueError:
|
||||
lbl = self.get_label(node)
|
||||
if name == "f":
|
||||
return lbl
|
||||
elif name == "v":
|
||||
return "%sv" % lbl
|
||||
elif name == "r":
|
||||
return "%sr" % lbl
|
||||
elif name == "super":
|
||||
return self.get_label(depgraph.nodes[node["head"]])
|
||||
elif name == "var":
|
||||
return f"{lbl.upper()}{unique_index}"
|
||||
elif name == "a":
|
||||
return self.get_label(self.lookup_unique("conja", node, depgraph))
|
||||
elif name == "b":
|
||||
return self.get_label(self.lookup_unique("conjb", node, depgraph))
|
||||
else:
|
||||
return self.get_label(self.lookup_unique(name, node, depgraph))
|
||||
|
||||
def get_label(self, node):
|
||||
"""
|
||||
Pick an alphabetic character as identifier for an entity in the model.
|
||||
|
||||
:param value: where to index into the list of characters
|
||||
:type value: int
|
||||
"""
|
||||
value = node["address"]
|
||||
|
||||
letter = [
|
||||
"f",
|
||||
"g",
|
||||
"h",
|
||||
"i",
|
||||
"j",
|
||||
"k",
|
||||
"l",
|
||||
"m",
|
||||
"n",
|
||||
"o",
|
||||
"p",
|
||||
"q",
|
||||
"r",
|
||||
"s",
|
||||
"t",
|
||||
"u",
|
||||
"v",
|
||||
"w",
|
||||
"x",
|
||||
"y",
|
||||
"z",
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"d",
|
||||
"e",
|
||||
][value - 1]
|
||||
num = int(value) // 26
|
||||
if num > 0:
|
||||
return letter + str(num)
|
||||
else:
|
||||
return letter
|
||||
|
||||
def lookup_unique(self, rel, node, depgraph):
|
||||
"""
|
||||
Lookup 'key'. There should be exactly one item in the associated relation.
|
||||
"""
|
||||
deps = [
|
||||
depgraph.nodes[dep]
|
||||
for dep in chain.from_iterable(node["deps"].values())
|
||||
if depgraph.nodes[dep]["rel"].lower() == rel.lower()
|
||||
]
|
||||
|
||||
if len(deps) == 0:
|
||||
raise KeyError(
|
||||
"'{}' doesn't contain a feature '{}'".format(node["word"], rel)
|
||||
)
|
||||
elif len(deps) > 1:
|
||||
raise KeyError(
|
||||
"'{}' should only have one feature '{}'".format(node["word"], rel)
|
||||
)
|
||||
else:
|
||||
return deps[0]
|
||||
|
||||
def get_GlueFormula_factory(self):
|
||||
return GlueFormula
|
||||
|
||||
|
||||
class Glue:
|
||||
def __init__(
|
||||
self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
|
||||
):
|
||||
self.verbose = verbose
|
||||
self.remove_duplicates = remove_duplicates
|
||||
self.depparser = depparser
|
||||
|
||||
from nltk import Prover9
|
||||
|
||||
self.prover = Prover9()
|
||||
|
||||
if semtype_file:
|
||||
self.semtype_file = semtype_file
|
||||
else:
|
||||
self.semtype_file = os.path.join(
|
||||
"grammars", "sample_grammars", "glue.semtype"
|
||||
)
|
||||
|
||||
def train_depparser(self, depgraphs=None):
|
||||
if depgraphs:
|
||||
self.depparser.train(depgraphs)
|
||||
else:
|
||||
self.depparser.train_from_file(
|
||||
nltk.data.find(
|
||||
os.path.join("grammars", "sample_grammars", "glue_train.conll")
|
||||
)
|
||||
)
|
||||
|
||||
def parse_to_meaning(self, sentence):
|
||||
readings = []
|
||||
for agenda in self.parse_to_compiled(sentence):
|
||||
readings.extend(self.get_readings(agenda))
|
||||
return readings
|
||||
|
||||
def get_readings(self, agenda):
|
||||
readings = []
|
||||
agenda_length = len(agenda)
|
||||
atomics = dict()
|
||||
nonatomics = dict()
|
||||
while agenda: # is not empty
|
||||
cur = agenda.pop()
|
||||
glue_simp = cur.glue.simplify()
|
||||
if isinstance(
|
||||
glue_simp, linearlogic.ImpExpression
|
||||
): # if cur.glue is non-atomic
|
||||
for key in atomics:
|
||||
try:
|
||||
if isinstance(cur.glue, linearlogic.ApplicationExpression):
|
||||
bindings = cur.glue.bindings
|
||||
else:
|
||||
bindings = linearlogic.BindingDict()
|
||||
glue_simp.antecedent.unify(key, bindings)
|
||||
for atomic in atomics[key]:
|
||||
if not (
|
||||
cur.indices & atomic.indices
|
||||
): # if the sets of indices are disjoint
|
||||
try:
|
||||
agenda.append(cur.applyto(atomic))
|
||||
except linearlogic.LinearLogicApplicationException:
|
||||
pass
|
||||
except linearlogic.UnificationException:
|
||||
pass
|
||||
try:
|
||||
nonatomics[glue_simp.antecedent].append(cur)
|
||||
except KeyError:
|
||||
nonatomics[glue_simp.antecedent] = [cur]
|
||||
|
||||
else: # else cur.glue is atomic
|
||||
for key in nonatomics:
|
||||
for nonatomic in nonatomics[key]:
|
||||
try:
|
||||
if isinstance(
|
||||
nonatomic.glue, linearlogic.ApplicationExpression
|
||||
):
|
||||
bindings = nonatomic.glue.bindings
|
||||
else:
|
||||
bindings = linearlogic.BindingDict()
|
||||
glue_simp.unify(key, bindings)
|
||||
if not (
|
||||
cur.indices & nonatomic.indices
|
||||
): # if the sets of indices are disjoint
|
||||
try:
|
||||
agenda.append(nonatomic.applyto(cur))
|
||||
except linearlogic.LinearLogicApplicationException:
|
||||
pass
|
||||
except linearlogic.UnificationException:
|
||||
pass
|
||||
try:
|
||||
atomics[glue_simp].append(cur)
|
||||
except KeyError:
|
||||
atomics[glue_simp] = [cur]
|
||||
|
||||
for entry in atomics:
|
||||
for gf in atomics[entry]:
|
||||
if len(gf.indices) == agenda_length:
|
||||
self._add_to_reading_list(gf, readings)
|
||||
for entry in nonatomics:
|
||||
for gf in nonatomics[entry]:
|
||||
if len(gf.indices) == agenda_length:
|
||||
self._add_to_reading_list(gf, readings)
|
||||
return readings
|
||||
|
||||
def _add_to_reading_list(self, glueformula, reading_list):
|
||||
add_reading = True
|
||||
if self.remove_duplicates:
|
||||
for reading in reading_list:
|
||||
try:
|
||||
if reading.equiv(glueformula.meaning, self.prover):
|
||||
add_reading = False
|
||||
break
|
||||
except Exception as e:
|
||||
# if there is an exception, the syntax of the formula
|
||||
# may not be understandable by the prover, so don't
|
||||
# throw out the reading.
|
||||
print("Error when checking logical equality of statements", e)
|
||||
|
||||
if add_reading:
|
||||
reading_list.append(glueformula.meaning)
|
||||
|
||||
def parse_to_compiled(self, sentence):
|
||||
gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)]
|
||||
return [self.gfl_to_compiled(gfl) for gfl in gfls]
|
||||
|
||||
def dep_parse(self, sentence):
|
||||
"""
|
||||
Return a dependency graph for the sentence.
|
||||
|
||||
:param sentence: the sentence to be parsed
|
||||
:type sentence: list(str)
|
||||
:rtype: DependencyGraph
|
||||
"""
|
||||
|
||||
# Lazy-initialize the depparser
|
||||
if self.depparser is None:
|
||||
from nltk.parse import MaltParser
|
||||
|
||||
self.depparser = MaltParser(tagger=self.get_pos_tagger())
|
||||
if not self.depparser._trained:
|
||||
self.train_depparser()
|
||||
return self.depparser.parse(sentence, verbose=self.verbose)
|
||||
|
||||
def depgraph_to_glue(self, depgraph):
|
||||
return self.get_glue_dict().to_glueformula_list(depgraph)
|
||||
|
||||
def get_glue_dict(self):
|
||||
return GlueDict(self.semtype_file)
|
||||
|
||||
def gfl_to_compiled(self, gfl):
|
||||
index_counter = Counter()
|
||||
return_list = []
|
||||
for gf in gfl:
|
||||
return_list.extend(gf.compile(index_counter))
|
||||
|
||||
if self.verbose:
|
||||
print("Compiled Glue Premises:")
|
||||
for cgf in return_list:
|
||||
print(cgf)
|
||||
|
||||
return return_list
|
||||
|
||||
def get_pos_tagger(self):
|
||||
from nltk.corpus import brown
|
||||
|
||||
regexp_tagger = RegexpTagger(
|
||||
[
|
||||
(r"^-?[0-9]+(\.[0-9]+)?$", "CD"), # cardinal numbers
|
||||
(r"(The|the|A|a|An|an)$", "AT"), # articles
|
||||
(r".*able$", "JJ"), # adjectives
|
||||
(r".*ness$", "NN"), # nouns formed from adjectives
|
||||
(r".*ly$", "RB"), # adverbs
|
||||
(r".*s$", "NNS"), # plural nouns
|
||||
(r".*ing$", "VBG"), # gerunds
|
||||
(r".*ed$", "VBD"), # past tense verbs
|
||||
(r".*", "NN"), # nouns (default)
|
||||
]
|
||||
)
|
||||
brown_train = brown.tagged_sents(categories="news")
|
||||
unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger)
|
||||
bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger)
|
||||
trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger)
|
||||
|
||||
# Override particular words
|
||||
main_tagger = RegexpTagger(
|
||||
[(r"(A|a|An|an)$", "ex_quant"), (r"(Every|every|All|all)$", "univ_quant")],
|
||||
backoff=trigram_tagger,
|
||||
)
|
||||
|
||||
return main_tagger
|
||||
|
||||
|
||||
class DrtGlueFormula(GlueFormula):
|
||||
def __init__(self, meaning, glue, indices=None):
|
||||
if not indices:
|
||||
indices = set()
|
||||
|
||||
if isinstance(meaning, str):
|
||||
self.meaning = drt.DrtExpression.fromstring(meaning)
|
||||
elif isinstance(meaning, drt.DrtExpression):
|
||||
self.meaning = meaning
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Meaning term neither string or expression: %s, %s"
|
||||
% (meaning, meaning.__class__)
|
||||
)
|
||||
|
||||
if isinstance(glue, str):
|
||||
self.glue = linearlogic.LinearLogicParser().parse(glue)
|
||||
elif isinstance(glue, linearlogic.Expression):
|
||||
self.glue = glue
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Glue term neither string or expression: %s, %s"
|
||||
% (glue, glue.__class__)
|
||||
)
|
||||
|
||||
self.indices = indices
|
||||
|
||||
def make_VariableExpression(self, name):
|
||||
return drt.DrtVariableExpression(name)
|
||||
|
||||
def make_LambdaExpression(self, variable, term):
|
||||
return drt.DrtLambdaExpression(variable, term)
|
||||
|
||||
|
||||
class DrtGlueDict(GlueDict):
|
||||
def get_GlueFormula_factory(self):
|
||||
return DrtGlueFormula
|
||||
|
||||
|
||||
class DrtGlue(Glue):
|
||||
def __init__(
|
||||
self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
|
||||
):
|
||||
if not semtype_file:
|
||||
semtype_file = os.path.join(
|
||||
"grammars", "sample_grammars", "drt_glue.semtype"
|
||||
)
|
||||
Glue.__init__(self, semtype_file, remove_duplicates, depparser, verbose)
|
||||
|
||||
def get_glue_dict(self):
|
||||
return DrtGlueDict(self.semtype_file)
|
||||
|
||||
|
||||
def demo(show_example=-1):
|
||||
from nltk.parse import MaltParser
|
||||
|
||||
examples = [
|
||||
"David sees Mary",
|
||||
"David eats a sandwich",
|
||||
"every man chases a dog",
|
||||
"every man believes a dog sleeps",
|
||||
"John gives David a sandwich",
|
||||
"John chases himself",
|
||||
]
|
||||
# 'John persuades David to order a pizza',
|
||||
# 'John tries to go',
|
||||
# 'John tries to find a unicorn',
|
||||
# 'John seems to vanish',
|
||||
# 'a unicorn seems to approach',
|
||||
# 'every big cat leaves',
|
||||
# 'every gray cat leaves',
|
||||
# 'every big gray cat leaves',
|
||||
# 'a former senator leaves',
|
||||
|
||||
print("============== DEMO ==============")
|
||||
|
||||
tagger = RegexpTagger(
|
||||
[
|
||||
("^(David|Mary|John)$", "NNP"),
|
||||
(
|
||||
"^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
|
||||
"VB",
|
||||
),
|
||||
("^(go|order|vanish|find|approach)$", "VB"),
|
||||
("^(a)$", "ex_quant"),
|
||||
("^(every)$", "univ_quant"),
|
||||
("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
|
||||
("^(big|gray|former)$", "JJ"),
|
||||
("^(him|himself)$", "PRP"),
|
||||
]
|
||||
)
|
||||
|
||||
depparser = MaltParser(tagger=tagger)
|
||||
glue = Glue(depparser=depparser, verbose=False)
|
||||
|
||||
for i, sentence in enumerate(examples):
|
||||
if i == show_example or show_example == -1:
|
||||
print(f"[[[Example {i}]]] {sentence}")
|
||||
for reading in glue.parse_to_meaning(sentence.split()):
|
||||
print(reading.simplify())
|
||||
print("")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
395
Backend/venv/lib/python3.12/site-packages/nltk/sem/hole.py
Normal file
395
Backend/venv/lib/python3.12/site-packages/nltk/sem/hole.py
Normal file
@@ -0,0 +1,395 @@
|
||||
# Natural Language Toolkit: Logic
|
||||
#
|
||||
# Author: Peter Wang
|
||||
# Updated by: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
An implementation of the Hole Semantics model, following Blackburn and Bos,
|
||||
Representation and Inference for Natural Language (CSLI, 2005).
|
||||
|
||||
The semantic representations are built by the grammar hole.fcfg.
|
||||
This module contains driver code to read in sentences and parse them
|
||||
according to a hole semantics grammar.
|
||||
|
||||
After parsing, the semantic representation is in the form of an underspecified
|
||||
representation that is not easy to read. We use a "plugging" algorithm to
|
||||
convert that representation into first-order logic formulas.
|
||||
"""
|
||||
|
||||
from functools import reduce
|
||||
|
||||
from nltk.parse import load_parser
|
||||
from nltk.sem.logic import (
|
||||
AllExpression,
|
||||
AndExpression,
|
||||
ApplicationExpression,
|
||||
ExistsExpression,
|
||||
IffExpression,
|
||||
ImpExpression,
|
||||
LambdaExpression,
|
||||
NegatedExpression,
|
||||
OrExpression,
|
||||
)
|
||||
from nltk.sem.skolemize import skolemize
|
||||
|
||||
# Note that in this code there may be multiple types of trees being referred to:
|
||||
#
|
||||
# 1. parse trees
|
||||
# 2. the underspecified representation
|
||||
# 3. first-order logic formula trees
|
||||
# 4. the search space when plugging (search tree)
|
||||
#
|
||||
|
||||
|
||||
class Constants:
|
||||
ALL = "ALL"
|
||||
EXISTS = "EXISTS"
|
||||
NOT = "NOT"
|
||||
AND = "AND"
|
||||
OR = "OR"
|
||||
IMP = "IMP"
|
||||
IFF = "IFF"
|
||||
PRED = "PRED"
|
||||
LEQ = "LEQ"
|
||||
HOLE = "HOLE"
|
||||
LABEL = "LABEL"
|
||||
|
||||
MAP = {
|
||||
ALL: lambda v, e: AllExpression(v.variable, e),
|
||||
EXISTS: lambda v, e: ExistsExpression(v.variable, e),
|
||||
NOT: NegatedExpression,
|
||||
AND: AndExpression,
|
||||
OR: OrExpression,
|
||||
IMP: ImpExpression,
|
||||
IFF: IffExpression,
|
||||
PRED: ApplicationExpression,
|
||||
}
|
||||
|
||||
|
||||
class HoleSemantics:
|
||||
"""
|
||||
This class holds the broken-down components of a hole semantics, i.e. it
|
||||
extracts the holes, labels, logic formula fragments and constraints out of
|
||||
a big conjunction of such as produced by the hole semantics grammar. It
|
||||
then provides some operations on the semantics dealing with holes, labels
|
||||
and finding legal ways to plug holes with labels.
|
||||
"""
|
||||
|
||||
def __init__(self, usr):
|
||||
"""
|
||||
Constructor. `usr' is a ``sem.Expression`` representing an
|
||||
Underspecified Representation Structure (USR). A USR has the following
|
||||
special predicates:
|
||||
ALL(l,v,n),
|
||||
EXISTS(l,v,n),
|
||||
AND(l,n,n),
|
||||
OR(l,n,n),
|
||||
IMP(l,n,n),
|
||||
IFF(l,n,n),
|
||||
PRED(l,v,n,v[,v]*) where the brackets and star indicate zero or more repetitions,
|
||||
LEQ(n,n),
|
||||
HOLE(n),
|
||||
LABEL(n)
|
||||
where l is the label of the node described by the predicate, n is either
|
||||
a label or a hole, and v is a variable.
|
||||
"""
|
||||
self.holes = set()
|
||||
self.labels = set()
|
||||
self.fragments = {} # mapping of label -> formula fragment
|
||||
self.constraints = set() # set of Constraints
|
||||
self._break_down(usr)
|
||||
self.top_most_labels = self._find_top_most_labels()
|
||||
self.top_hole = self._find_top_hole()
|
||||
|
||||
def is_node(self, x):
|
||||
"""
|
||||
Return true if x is a node (label or hole) in this semantic
|
||||
representation.
|
||||
"""
|
||||
return x in (self.labels | self.holes)
|
||||
|
||||
def _break_down(self, usr):
|
||||
"""
|
||||
Extract holes, labels, formula fragments and constraints from the hole
|
||||
semantics underspecified representation (USR).
|
||||
"""
|
||||
if isinstance(usr, AndExpression):
|
||||
self._break_down(usr.first)
|
||||
self._break_down(usr.second)
|
||||
elif isinstance(usr, ApplicationExpression):
|
||||
func, args = usr.uncurry()
|
||||
if func.variable.name == Constants.LEQ:
|
||||
self.constraints.add(Constraint(args[0], args[1]))
|
||||
elif func.variable.name == Constants.HOLE:
|
||||
self.holes.add(args[0])
|
||||
elif func.variable.name == Constants.LABEL:
|
||||
self.labels.add(args[0])
|
||||
else:
|
||||
label = args[0]
|
||||
assert label not in self.fragments
|
||||
self.fragments[label] = (func, args[1:])
|
||||
else:
|
||||
raise ValueError(usr.label())
|
||||
|
||||
def _find_top_nodes(self, node_list):
|
||||
top_nodes = node_list.copy()
|
||||
for f in self.fragments.values():
|
||||
# the label is the first argument of the predicate
|
||||
args = f[1]
|
||||
for arg in args:
|
||||
if arg in node_list:
|
||||
top_nodes.discard(arg)
|
||||
return top_nodes
|
||||
|
||||
def _find_top_most_labels(self):
|
||||
"""
|
||||
Return the set of labels which are not referenced directly as part of
|
||||
another formula fragment. These will be the top-most labels for the
|
||||
subtree that they are part of.
|
||||
"""
|
||||
return self._find_top_nodes(self.labels)
|
||||
|
||||
def _find_top_hole(self):
|
||||
"""
|
||||
Return the hole that will be the top of the formula tree.
|
||||
"""
|
||||
top_holes = self._find_top_nodes(self.holes)
|
||||
assert len(top_holes) == 1 # it must be unique
|
||||
return top_holes.pop()
|
||||
|
||||
def pluggings(self):
|
||||
"""
|
||||
Calculate and return all the legal pluggings (mappings of labels to
|
||||
holes) of this semantics given the constraints.
|
||||
"""
|
||||
record = []
|
||||
self._plug_nodes([(self.top_hole, [])], self.top_most_labels, {}, record)
|
||||
return record
|
||||
|
||||
def _plug_nodes(self, queue, potential_labels, plug_acc, record):
|
||||
"""
|
||||
Plug the nodes in `queue' with the labels in `potential_labels'.
|
||||
|
||||
Each element of `queue' is a tuple of the node to plug and the list of
|
||||
ancestor holes from the root of the graph to that node.
|
||||
|
||||
`potential_labels' is a set of the labels which are still available for
|
||||
plugging.
|
||||
|
||||
`plug_acc' is the incomplete mapping of holes to labels made on the
|
||||
current branch of the search tree so far.
|
||||
|
||||
`record' is a list of all the complete pluggings that we have found in
|
||||
total so far. It is the only parameter that is destructively updated.
|
||||
"""
|
||||
if queue != []:
|
||||
(node, ancestors) = queue[0]
|
||||
if node in self.holes:
|
||||
# The node is a hole, try to plug it.
|
||||
self._plug_hole(
|
||||
node, ancestors, queue[1:], potential_labels, plug_acc, record
|
||||
)
|
||||
else:
|
||||
assert node in self.labels
|
||||
# The node is a label. Replace it in the queue by the holes and
|
||||
# labels in the formula fragment named by that label.
|
||||
args = self.fragments[node][1]
|
||||
head = [(a, ancestors) for a in args if self.is_node(a)]
|
||||
self._plug_nodes(head + queue[1:], potential_labels, plug_acc, record)
|
||||
else:
|
||||
raise Exception("queue empty")
|
||||
|
||||
def _plug_hole(self, hole, ancestors0, queue, potential_labels0, plug_acc0, record):
|
||||
"""
|
||||
Try all possible ways of plugging a single hole.
|
||||
See _plug_nodes for the meanings of the parameters.
|
||||
"""
|
||||
# Add the current hole we're trying to plug into the list of ancestors.
|
||||
assert hole not in ancestors0
|
||||
ancestors = [hole] + ancestors0
|
||||
|
||||
# Try each potential label in this hole in turn.
|
||||
for l in potential_labels0:
|
||||
# Is the label valid in this hole?
|
||||
if self._violates_constraints(l, ancestors):
|
||||
continue
|
||||
|
||||
plug_acc = plug_acc0.copy()
|
||||
plug_acc[hole] = l
|
||||
potential_labels = potential_labels0.copy()
|
||||
potential_labels.remove(l)
|
||||
|
||||
if len(potential_labels) == 0:
|
||||
# No more potential labels. That must mean all the holes have
|
||||
# been filled so we have found a legal plugging so remember it.
|
||||
#
|
||||
# Note that the queue might not be empty because there might
|
||||
# be labels on there that point to formula fragments with
|
||||
# no holes in them. _sanity_check_plugging will make sure
|
||||
# all holes are filled.
|
||||
self._sanity_check_plugging(plug_acc, self.top_hole, [])
|
||||
record.append(plug_acc)
|
||||
else:
|
||||
# Recursively try to fill in the rest of the holes in the
|
||||
# queue. The label we just plugged into the hole could have
|
||||
# holes of its own so at the end of the queue. Putting it on
|
||||
# the end of the queue gives us a breadth-first search, so that
|
||||
# all the holes at level i of the formula tree are filled
|
||||
# before filling level i+1.
|
||||
# A depth-first search would work as well since the trees must
|
||||
# be finite but the bookkeeping would be harder.
|
||||
self._plug_nodes(
|
||||
queue + [(l, ancestors)], potential_labels, plug_acc, record
|
||||
)
|
||||
|
||||
def _violates_constraints(self, label, ancestors):
|
||||
"""
|
||||
Return True if the `label' cannot be placed underneath the holes given
|
||||
by the set `ancestors' because it would violate the constraints imposed
|
||||
on it.
|
||||
"""
|
||||
for c in self.constraints:
|
||||
if c.lhs == label:
|
||||
if c.rhs not in ancestors:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _sanity_check_plugging(self, plugging, node, ancestors):
|
||||
"""
|
||||
Make sure that a given plugging is legal. We recursively go through
|
||||
each node and make sure that no constraints are violated.
|
||||
We also check that all holes have been filled.
|
||||
"""
|
||||
if node in self.holes:
|
||||
ancestors = [node] + ancestors
|
||||
label = plugging[node]
|
||||
else:
|
||||
label = node
|
||||
assert label in self.labels
|
||||
for c in self.constraints:
|
||||
if c.lhs == label:
|
||||
assert c.rhs in ancestors
|
||||
args = self.fragments[label][1]
|
||||
for arg in args:
|
||||
if self.is_node(arg):
|
||||
self._sanity_check_plugging(plugging, arg, [label] + ancestors)
|
||||
|
||||
def formula_tree(self, plugging):
|
||||
"""
|
||||
Return the first-order logic formula tree for this underspecified
|
||||
representation using the plugging given.
|
||||
"""
|
||||
return self._formula_tree(plugging, self.top_hole)
|
||||
|
||||
def _formula_tree(self, plugging, node):
|
||||
if node in plugging:
|
||||
return self._formula_tree(plugging, plugging[node])
|
||||
elif node in self.fragments:
|
||||
pred, args = self.fragments[node]
|
||||
children = [self._formula_tree(plugging, arg) for arg in args]
|
||||
return reduce(Constants.MAP[pred.variable.name], children)
|
||||
else:
|
||||
return node
|
||||
|
||||
|
||||
class Constraint:
|
||||
"""
|
||||
This class represents a constraint of the form (L =< N),
|
||||
where L is a label and N is a node (a label or a hole).
|
||||
"""
|
||||
|
||||
def __init__(self, lhs, rhs):
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
|
||||
def __eq__(self, other):
|
||||
if self.__class__ == other.__class__:
|
||||
return self.lhs == other.lhs and self.rhs == other.rhs
|
||||
else:
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(repr(self))
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.lhs} < {self.rhs})"
|
||||
|
||||
|
||||
def hole_readings(sentence, grammar_filename=None, verbose=False):
|
||||
if not grammar_filename:
|
||||
grammar_filename = "grammars/sample_grammars/hole.fcfg"
|
||||
|
||||
if verbose:
|
||||
print("Reading grammar file", grammar_filename)
|
||||
|
||||
parser = load_parser(grammar_filename)
|
||||
|
||||
# Parse the sentence.
|
||||
tokens = sentence.split()
|
||||
trees = list(parser.parse(tokens))
|
||||
if verbose:
|
||||
print("Got %d different parses" % len(trees))
|
||||
|
||||
all_readings = []
|
||||
for tree in trees:
|
||||
# Get the semantic feature from the top of the parse tree.
|
||||
sem = tree.label()["SEM"].simplify()
|
||||
|
||||
# Print the raw semantic representation.
|
||||
if verbose:
|
||||
print("Raw: ", sem)
|
||||
|
||||
# Skolemize away all quantifiers. All variables become unique.
|
||||
while isinstance(sem, LambdaExpression):
|
||||
sem = sem.term
|
||||
skolemized = skolemize(sem)
|
||||
|
||||
if verbose:
|
||||
print("Skolemized:", skolemized)
|
||||
|
||||
# Break the hole semantics representation down into its components
|
||||
# i.e. holes, labels, formula fragments and constraints.
|
||||
hole_sem = HoleSemantics(skolemized)
|
||||
|
||||
# Maybe show the details of the semantic representation.
|
||||
if verbose:
|
||||
print("Holes: ", hole_sem.holes)
|
||||
print("Labels: ", hole_sem.labels)
|
||||
print("Constraints: ", hole_sem.constraints)
|
||||
print("Top hole: ", hole_sem.top_hole)
|
||||
print("Top labels: ", hole_sem.top_most_labels)
|
||||
print("Fragments:")
|
||||
for l, f in hole_sem.fragments.items():
|
||||
print(f"\t{l}: {f}")
|
||||
|
||||
# Find all the possible ways to plug the formulas together.
|
||||
pluggings = hole_sem.pluggings()
|
||||
|
||||
# Build FOL formula trees using the pluggings.
|
||||
readings = list(map(hole_sem.formula_tree, pluggings))
|
||||
|
||||
# Print out the formulas in a textual format.
|
||||
if verbose:
|
||||
for i, r in enumerate(readings):
|
||||
print()
|
||||
print("%d. %s" % (i, r))
|
||||
print()
|
||||
|
||||
all_readings.extend(readings)
|
||||
|
||||
return all_readings
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for r in hole_readings("a dog barks"):
|
||||
print(r)
|
||||
print()
|
||||
for r in hole_readings("every girl chases a dog"):
|
||||
print(r)
|
||||
261
Backend/venv/lib/python3.12/site-packages/nltk/sem/lfg.py
Normal file
261
Backend/venv/lib/python3.12/site-packages/nltk/sem/lfg.py
Normal file
@@ -0,0 +1,261 @@
|
||||
# Natural Language Toolkit: Lexical Functional Grammar
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from itertools import chain
|
||||
|
||||
from nltk.internals import Counter
|
||||
|
||||
|
||||
class FStructure(dict):
|
||||
def safeappend(self, key, item):
|
||||
"""
|
||||
Append 'item' to the list at 'key'. If no list exists for 'key', then
|
||||
construct one.
|
||||
"""
|
||||
if key not in self:
|
||||
self[key] = []
|
||||
self[key].append(item)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, key.lower(), value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.__getitem__(self, key.lower())
|
||||
|
||||
def __contains__(self, key):
|
||||
return dict.__contains__(self, key.lower())
|
||||
|
||||
def to_glueformula_list(self, glue_dict):
|
||||
depgraph = self.to_depgraph()
|
||||
return glue_dict.to_glueformula_list(depgraph)
|
||||
|
||||
def to_depgraph(self, rel=None):
|
||||
from nltk.parse.dependencygraph import DependencyGraph
|
||||
|
||||
depgraph = DependencyGraph()
|
||||
nodes = depgraph.nodes
|
||||
|
||||
self._to_depgraph(nodes, 0, "ROOT")
|
||||
|
||||
# Add all the dependencies for all the nodes
|
||||
for address, node in nodes.items():
|
||||
for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
|
||||
if n2["head"] == address:
|
||||
relation = n2["rel"]
|
||||
node["deps"].setdefault(relation, [])
|
||||
node["deps"][relation].append(n2["address"])
|
||||
|
||||
depgraph.root = nodes[1]
|
||||
|
||||
return depgraph
|
||||
|
||||
def _to_depgraph(self, nodes, head, rel):
|
||||
index = len(nodes)
|
||||
|
||||
nodes[index].update(
|
||||
{
|
||||
"address": index,
|
||||
"word": self.pred[0],
|
||||
"tag": self.pred[1],
|
||||
"head": head,
|
||||
"rel": rel,
|
||||
}
|
||||
)
|
||||
|
||||
for feature in sorted(self):
|
||||
for item in sorted(self[feature]):
|
||||
if isinstance(item, FStructure):
|
||||
item._to_depgraph(nodes, index, feature)
|
||||
elif isinstance(item, tuple):
|
||||
new_index = len(nodes)
|
||||
nodes[new_index].update(
|
||||
{
|
||||
"address": new_index,
|
||||
"word": item[0],
|
||||
"tag": item[1],
|
||||
"head": index,
|
||||
"rel": feature,
|
||||
}
|
||||
)
|
||||
elif isinstance(item, list):
|
||||
for n in item:
|
||||
n._to_depgraph(nodes, index, feature)
|
||||
else:
|
||||
raise Exception(
|
||||
"feature %s is not an FStruct, a list, or a tuple" % feature
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def read_depgraph(depgraph):
|
||||
return FStructure._read_depgraph(depgraph.root, depgraph)
|
||||
|
||||
@staticmethod
|
||||
def _read_depgraph(node, depgraph, label_counter=None, parent=None):
|
||||
if not label_counter:
|
||||
label_counter = Counter()
|
||||
|
||||
if node["rel"].lower() in ["spec", "punct"]:
|
||||
# the value of a 'spec' entry is a word, not an FStructure
|
||||
return (node["word"], node["tag"])
|
||||
|
||||
else:
|
||||
fstruct = FStructure()
|
||||
fstruct.pred = None
|
||||
fstruct.label = FStructure._make_label(label_counter.get())
|
||||
|
||||
fstruct.parent = parent
|
||||
|
||||
word, tag = node["word"], node["tag"]
|
||||
if tag[:2] == "VB":
|
||||
if tag[2:3] == "D":
|
||||
fstruct.safeappend("tense", ("PAST", "tense"))
|
||||
fstruct.pred = (word, tag[:2])
|
||||
|
||||
if not fstruct.pred:
|
||||
fstruct.pred = (word, tag)
|
||||
|
||||
children = [
|
||||
depgraph.nodes[idx]
|
||||
for idx in chain.from_iterable(node["deps"].values())
|
||||
]
|
||||
for child in children:
|
||||
fstruct.safeappend(
|
||||
child["rel"],
|
||||
FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
|
||||
)
|
||||
|
||||
return fstruct
|
||||
|
||||
@staticmethod
|
||||
def _make_label(value):
|
||||
"""
|
||||
Pick an alphabetic character as identifier for an entity in the model.
|
||||
|
||||
:param value: where to index into the list of characters
|
||||
:type value: int
|
||||
"""
|
||||
letter = [
|
||||
"f",
|
||||
"g",
|
||||
"h",
|
||||
"i",
|
||||
"j",
|
||||
"k",
|
||||
"l",
|
||||
"m",
|
||||
"n",
|
||||
"o",
|
||||
"p",
|
||||
"q",
|
||||
"r",
|
||||
"s",
|
||||
"t",
|
||||
"u",
|
||||
"v",
|
||||
"w",
|
||||
"x",
|
||||
"y",
|
||||
"z",
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"d",
|
||||
"e",
|
||||
][value - 1]
|
||||
num = int(value) // 26
|
||||
if num > 0:
|
||||
return letter + str(num)
|
||||
else:
|
||||
return letter
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__().replace("\n", "")
|
||||
|
||||
def __str__(self):
|
||||
return self.pretty_format()
|
||||
|
||||
def pretty_format(self, indent=3):
|
||||
try:
|
||||
accum = "%s:[" % self.label
|
||||
except NameError:
|
||||
accum = "["
|
||||
try:
|
||||
accum += "pred '%s'" % (self.pred[0])
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
for feature in sorted(self):
|
||||
for item in self[feature]:
|
||||
if isinstance(item, FStructure):
|
||||
next_indent = indent + len(feature) + 3 + len(self.label)
|
||||
accum += "\n{}{} {}".format(
|
||||
" " * (indent),
|
||||
feature,
|
||||
item.pretty_format(next_indent),
|
||||
)
|
||||
elif isinstance(item, tuple):
|
||||
accum += "\n{}{} '{}'".format(" " * (indent), feature, item[0])
|
||||
elif isinstance(item, list):
|
||||
accum += "\n{}{} {{{}}}".format(
|
||||
" " * (indent),
|
||||
feature,
|
||||
("\n%s" % (" " * (indent + len(feature) + 2))).join(item),
|
||||
)
|
||||
else: # ERROR
|
||||
raise Exception(
|
||||
"feature %s is not an FStruct, a list, or a tuple" % feature
|
||||
)
|
||||
return accum + "]"
|
||||
|
||||
|
||||
def demo_read_depgraph():
|
||||
from nltk.parse.dependencygraph import DependencyGraph
|
||||
|
||||
dg1 = DependencyGraph(
|
||||
"""\
|
||||
Esso NNP 2 SUB
|
||||
said VBD 0 ROOT
|
||||
the DT 5 NMOD
|
||||
Whiting NNP 5 NMOD
|
||||
field NN 6 SUB
|
||||
started VBD 2 VMOD
|
||||
production NN 6 OBJ
|
||||
Tuesday NNP 6 VMOD
|
||||
"""
|
||||
)
|
||||
dg2 = DependencyGraph(
|
||||
"""\
|
||||
John NNP 2 SUB
|
||||
sees VBP 0 ROOT
|
||||
Mary NNP 2 OBJ
|
||||
"""
|
||||
)
|
||||
dg3 = DependencyGraph(
|
||||
"""\
|
||||
a DT 2 SPEC
|
||||
man NN 3 SUBJ
|
||||
walks VB 0 ROOT
|
||||
"""
|
||||
)
|
||||
dg4 = DependencyGraph(
|
||||
"""\
|
||||
every DT 2 SPEC
|
||||
girl NN 3 SUBJ
|
||||
chases VB 0 ROOT
|
||||
a DT 5 SPEC
|
||||
dog NN 3 OBJ
|
||||
"""
|
||||
)
|
||||
|
||||
depgraphs = [dg1, dg2, dg3, dg4]
|
||||
for dg in depgraphs:
|
||||
print(FStructure.read_depgraph(dg))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo_read_depgraph()
|
||||
@@ -0,0 +1,481 @@
|
||||
# Natural Language Toolkit: Linear Logic
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from nltk.internals import Counter
|
||||
from nltk.sem.logic import APP, LogicParser
|
||||
|
||||
_counter = Counter()
|
||||
|
||||
|
||||
class Tokens:
|
||||
# Punctuation
|
||||
OPEN = "("
|
||||
CLOSE = ")"
|
||||
|
||||
# Operations
|
||||
IMP = "-o"
|
||||
|
||||
PUNCT = [OPEN, CLOSE]
|
||||
TOKENS = PUNCT + [IMP]
|
||||
|
||||
|
||||
class LinearLogicParser(LogicParser):
|
||||
"""A linear logic expression parser."""
|
||||
|
||||
def __init__(self):
|
||||
LogicParser.__init__(self)
|
||||
|
||||
self.operator_precedence = {APP: 1, Tokens.IMP: 2, None: 3}
|
||||
self.right_associated_operations += [Tokens.IMP]
|
||||
|
||||
def get_all_symbols(self):
|
||||
return Tokens.TOKENS
|
||||
|
||||
def handle(self, tok, context):
|
||||
if tok not in Tokens.TOKENS:
|
||||
return self.handle_variable(tok, context)
|
||||
elif tok == Tokens.OPEN:
|
||||
return self.handle_open(tok, context)
|
||||
|
||||
def get_BooleanExpression_factory(self, tok):
|
||||
if tok == Tokens.IMP:
|
||||
return ImpExpression
|
||||
else:
|
||||
return None
|
||||
|
||||
def make_BooleanExpression(self, factory, first, second):
|
||||
return factory(first, second)
|
||||
|
||||
def attempt_ApplicationExpression(self, expression, context):
|
||||
"""Attempt to make an application expression. If the next tokens
|
||||
are an argument in parens, then the argument expression is a
|
||||
function being applied to the arguments. Otherwise, return the
|
||||
argument expression."""
|
||||
if self.has_priority(APP, context):
|
||||
if self.inRange(0) and self.token(0) == Tokens.OPEN:
|
||||
self.token() # swallow then open paren
|
||||
argument = self.process_next_expression(APP)
|
||||
self.assertNextToken(Tokens.CLOSE)
|
||||
expression = ApplicationExpression(expression, argument, None)
|
||||
return expression
|
||||
|
||||
def make_VariableExpression(self, name):
|
||||
if name[0].isupper():
|
||||
return VariableExpression(name)
|
||||
else:
|
||||
return ConstantExpression(name)
|
||||
|
||||
|
||||
class Expression:
|
||||
_linear_logic_parser = LinearLogicParser()
|
||||
|
||||
@classmethod
|
||||
def fromstring(cls, s):
|
||||
return cls._linear_logic_parser.parse(s)
|
||||
|
||||
def applyto(self, other, other_indices=None):
|
||||
return ApplicationExpression(self, other, other_indices)
|
||||
|
||||
def __call__(self, other):
|
||||
return self.applyto(other)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.__class__.__name__} {self}>"
|
||||
|
||||
|
||||
class AtomicExpression(Expression):
|
||||
def __init__(self, name, dependencies=None):
|
||||
"""
|
||||
:param name: str for the constant name
|
||||
:param dependencies: list of int for the indices on which this atom is dependent
|
||||
"""
|
||||
assert isinstance(name, str)
|
||||
self.name = name
|
||||
|
||||
if not dependencies:
|
||||
dependencies = []
|
||||
self.dependencies = dependencies
|
||||
|
||||
def simplify(self, bindings=None):
|
||||
"""
|
||||
If 'self' is bound by 'bindings', return the atomic to which it is bound.
|
||||
Otherwise, return self.
|
||||
|
||||
:param bindings: ``BindingDict`` A dictionary of bindings used to simplify
|
||||
:return: ``AtomicExpression``
|
||||
"""
|
||||
if bindings and self in bindings:
|
||||
return bindings[self]
|
||||
else:
|
||||
return self
|
||||
|
||||
def compile_pos(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
self.dependencies = []
|
||||
return (self, [])
|
||||
|
||||
def compile_neg(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
self.dependencies = []
|
||||
return (self, [])
|
||||
|
||||
def initialize_labels(self, fstruct):
|
||||
self.name = fstruct.initialize_label(self.name.lower())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__ and self.name == other.name
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __str__(self):
|
||||
accum = self.name
|
||||
if self.dependencies:
|
||||
accum += "%s" % self.dependencies
|
||||
return accum
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
|
||||
class ConstantExpression(AtomicExpression):
|
||||
def unify(self, other, bindings):
|
||||
"""
|
||||
If 'other' is a constant, then it must be equal to 'self'. If 'other' is a variable,
|
||||
then it must not be bound to anything other than 'self'.
|
||||
|
||||
:param other: ``Expression``
|
||||
:param bindings: ``BindingDict`` A dictionary of all current bindings
|
||||
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new binding
|
||||
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
|
||||
"""
|
||||
assert isinstance(other, Expression)
|
||||
if isinstance(other, VariableExpression):
|
||||
try:
|
||||
return bindings + BindingDict([(other, self)])
|
||||
except VariableBindingException:
|
||||
pass
|
||||
elif self == other:
|
||||
return bindings
|
||||
raise UnificationException(self, other, bindings)
|
||||
|
||||
|
||||
class VariableExpression(AtomicExpression):
|
||||
def unify(self, other, bindings):
|
||||
"""
|
||||
'self' must not be bound to anything other than 'other'.
|
||||
|
||||
:param other: ``Expression``
|
||||
:param bindings: ``BindingDict`` A dictionary of all current bindings
|
||||
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and the new binding
|
||||
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
|
||||
"""
|
||||
assert isinstance(other, Expression)
|
||||
try:
|
||||
if self == other:
|
||||
return bindings
|
||||
else:
|
||||
return bindings + BindingDict([(self, other)])
|
||||
except VariableBindingException as e:
|
||||
raise UnificationException(self, other, bindings) from e
|
||||
|
||||
|
||||
class ImpExpression(Expression):
|
||||
def __init__(self, antecedent, consequent):
|
||||
"""
|
||||
:param antecedent: ``Expression`` for the antecedent
|
||||
:param consequent: ``Expression`` for the consequent
|
||||
"""
|
||||
assert isinstance(antecedent, Expression)
|
||||
assert isinstance(consequent, Expression)
|
||||
self.antecedent = antecedent
|
||||
self.consequent = consequent
|
||||
|
||||
def simplify(self, bindings=None):
|
||||
return self.__class__(
|
||||
self.antecedent.simplify(bindings), self.consequent.simplify(bindings)
|
||||
)
|
||||
|
||||
def unify(self, other, bindings):
|
||||
"""
|
||||
Both the antecedent and consequent of 'self' and 'other' must unify.
|
||||
|
||||
:param other: ``ImpExpression``
|
||||
:param bindings: ``BindingDict`` A dictionary of all current bindings
|
||||
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new bindings
|
||||
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
|
||||
"""
|
||||
assert isinstance(other, ImpExpression)
|
||||
try:
|
||||
return (
|
||||
bindings
|
||||
+ self.antecedent.unify(other.antecedent, bindings)
|
||||
+ self.consequent.unify(other.consequent, bindings)
|
||||
)
|
||||
except VariableBindingException as e:
|
||||
raise UnificationException(self, other, bindings) from e
|
||||
|
||||
def compile_pos(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
(a, a_new) = self.antecedent.compile_neg(index_counter, glueFormulaFactory)
|
||||
(c, c_new) = self.consequent.compile_pos(index_counter, glueFormulaFactory)
|
||||
return (ImpExpression(a, c), a_new + c_new)
|
||||
|
||||
def compile_neg(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,list of ``GlueFormula``) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
(a, a_new) = self.antecedent.compile_pos(index_counter, glueFormulaFactory)
|
||||
(c, c_new) = self.consequent.compile_neg(index_counter, glueFormulaFactory)
|
||||
fresh_index = index_counter.get()
|
||||
c.dependencies.append(fresh_index)
|
||||
new_v = glueFormulaFactory("v%s" % fresh_index, a, {fresh_index})
|
||||
return (c, a_new + c_new + [new_v])
|
||||
|
||||
def initialize_labels(self, fstruct):
|
||||
self.antecedent.initialize_labels(fstruct)
|
||||
self.consequent.initialize_labels(fstruct)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ == other.__class__
|
||||
and self.antecedent == other.antecedent
|
||||
and self.consequent == other.consequent
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __str__(self):
|
||||
return "{}{} {} {}{}".format(
|
||||
Tokens.OPEN,
|
||||
self.antecedent,
|
||||
Tokens.IMP,
|
||||
self.consequent,
|
||||
Tokens.CLOSE,
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(f"{hash(self.antecedent)}{Tokens.IMP}{hash(self.consequent)}")
|
||||
|
||||
|
||||
class ApplicationExpression(Expression):
|
||||
def __init__(self, function, argument, argument_indices=None):
|
||||
"""
|
||||
:param function: ``Expression`` for the function
|
||||
:param argument: ``Expression`` for the argument
|
||||
:param argument_indices: set for the indices of the glue formula from which the argument came
|
||||
:raise LinearLogicApplicationException: If 'function' cannot be applied to 'argument' given 'argument_indices'.
|
||||
"""
|
||||
function_simp = function.simplify()
|
||||
argument_simp = argument.simplify()
|
||||
|
||||
assert isinstance(function_simp, ImpExpression)
|
||||
assert isinstance(argument_simp, Expression)
|
||||
|
||||
bindings = BindingDict()
|
||||
|
||||
try:
|
||||
if isinstance(function, ApplicationExpression):
|
||||
bindings += function.bindings
|
||||
if isinstance(argument, ApplicationExpression):
|
||||
bindings += argument.bindings
|
||||
bindings += function_simp.antecedent.unify(argument_simp, bindings)
|
||||
except UnificationException as e:
|
||||
raise LinearLogicApplicationException(
|
||||
f"Cannot apply {function_simp} to {argument_simp}. {e}"
|
||||
) from e
|
||||
|
||||
# If you are running it on complied premises, more conditions apply
|
||||
if argument_indices:
|
||||
# A.dependencies of (A -o (B -o C)) must be a proper subset of argument_indices
|
||||
if not set(function_simp.antecedent.dependencies) < argument_indices:
|
||||
raise LinearLogicApplicationException(
|
||||
"Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s"
|
||||
% (function_simp, argument_simp)
|
||||
)
|
||||
if set(function_simp.antecedent.dependencies) == argument_indices:
|
||||
raise LinearLogicApplicationException(
|
||||
"Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s"
|
||||
% (function_simp, argument_simp)
|
||||
)
|
||||
|
||||
self.function = function
|
||||
self.argument = argument
|
||||
self.bindings = bindings
|
||||
|
||||
def simplify(self, bindings=None):
|
||||
"""
|
||||
Since function is an implication, return its consequent. There should be
|
||||
no need to check that the application is valid since the checking is done
|
||||
by the constructor.
|
||||
|
||||
:param bindings: ``BindingDict`` A dictionary of bindings used to simplify
|
||||
:return: ``Expression``
|
||||
"""
|
||||
if not bindings:
|
||||
bindings = self.bindings
|
||||
|
||||
return self.function.simplify(bindings).consequent
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ == other.__class__
|
||||
and self.function == other.function
|
||||
and self.argument == other.argument
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __str__(self):
|
||||
return "%s" % self.function + Tokens.OPEN + "%s" % self.argument + Tokens.CLOSE
|
||||
|
||||
def __hash__(self):
|
||||
return hash(f"{hash(self.antecedent)}{Tokens.OPEN}{hash(self.consequent)}")
|
||||
|
||||
|
||||
class BindingDict:
|
||||
def __init__(self, bindings=None):
|
||||
"""
|
||||
:param bindings:
|
||||
list [(``VariableExpression``, ``AtomicExpression``)] to initialize the dictionary
|
||||
dict {``VariableExpression``: ``AtomicExpression``} to initialize the dictionary
|
||||
"""
|
||||
self.d = {}
|
||||
|
||||
if isinstance(bindings, dict):
|
||||
bindings = bindings.items()
|
||||
|
||||
if bindings:
|
||||
for v, b in bindings:
|
||||
self[v] = b
|
||||
|
||||
def __setitem__(self, variable, binding):
|
||||
"""
|
||||
A binding is consistent with the dict if its variable is not already bound, OR if its
|
||||
variable is already bound to its argument.
|
||||
|
||||
:param variable: ``VariableExpression`` The variable bind
|
||||
:param binding: ``Expression`` The expression to which 'variable' should be bound
|
||||
:raise VariableBindingException: If the variable cannot be bound in this dictionary
|
||||
"""
|
||||
assert isinstance(variable, VariableExpression)
|
||||
assert isinstance(binding, Expression)
|
||||
|
||||
assert variable != binding
|
||||
|
||||
existing = self.d.get(variable, None)
|
||||
|
||||
if not existing or binding == existing:
|
||||
self.d[variable] = binding
|
||||
else:
|
||||
raise VariableBindingException(
|
||||
"Variable %s already bound to another value" % (variable)
|
||||
)
|
||||
|
||||
def __getitem__(self, variable):
|
||||
"""
|
||||
Return the expression to which 'variable' is bound
|
||||
"""
|
||||
assert isinstance(variable, VariableExpression)
|
||||
|
||||
intermediate = self.d[variable]
|
||||
while intermediate:
|
||||
try:
|
||||
intermediate = self.d[intermediate]
|
||||
except KeyError:
|
||||
return intermediate
|
||||
|
||||
def __contains__(self, item):
|
||||
return item in self.d
|
||||
|
||||
def __add__(self, other):
|
||||
"""
|
||||
:param other: ``BindingDict`` The dict with which to combine self
|
||||
:return: ``BindingDict`` A new dict containing all the elements of both parameters
|
||||
:raise VariableBindingException: If the parameter dictionaries are not consistent with each other
|
||||
"""
|
||||
try:
|
||||
combined = BindingDict()
|
||||
for v in self.d:
|
||||
combined[v] = self.d[v]
|
||||
for v in other.d:
|
||||
combined[v] = other.d[v]
|
||||
return combined
|
||||
except VariableBindingException as e:
|
||||
raise VariableBindingException(
|
||||
"Attempting to add two contradicting"
|
||||
" VariableBindingsLists: %s, %s" % (self, other)
|
||||
) from e
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, BindingDict):
|
||||
raise TypeError
|
||||
return self.d == other.d
|
||||
|
||||
def __str__(self):
|
||||
return "{" + ", ".join(f"{v}: {self.d[v]}" for v in sorted(self.d.keys())) + "}"
|
||||
|
||||
def __repr__(self):
|
||||
return "BindingDict: %s" % self
|
||||
|
||||
|
||||
class VariableBindingException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UnificationException(Exception):
|
||||
def __init__(self, a, b, bindings):
|
||||
Exception.__init__(self, f"Cannot unify {a} with {b} given {bindings}")
|
||||
|
||||
|
||||
class LinearLogicApplicationException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def demo():
|
||||
lexpr = Expression.fromstring
|
||||
|
||||
print(lexpr(r"f"))
|
||||
print(lexpr(r"(g -o f)"))
|
||||
print(lexpr(r"((g -o G) -o G)"))
|
||||
print(lexpr(r"g -o h -o f"))
|
||||
print(lexpr(r"(g -o f)(g)").simplify())
|
||||
print(lexpr(r"(H -o f)(g)").simplify())
|
||||
print(lexpr(r"((g -o G) -o G)((g -o f))").simplify())
|
||||
print(lexpr(r"(H -o H)((g -o f))").simplify())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
2065
Backend/venv/lib/python3.12/site-packages/nltk/sem/logic.py
Normal file
2065
Backend/venv/lib/python3.12/site-packages/nltk/sem/logic.py
Normal file
File diff suppressed because it is too large
Load Diff
539
Backend/venv/lib/python3.12/site-packages/nltk/sem/relextract.py
Normal file
539
Backend/venv/lib/python3.12/site-packages/nltk/sem/relextract.py
Normal file
@@ -0,0 +1,539 @@
|
||||
# Natural Language Toolkit: Relation Extraction
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Code for extracting relational triples from the ieer and conll2002 corpora.
|
||||
|
||||
Relations are stored internally as dictionaries ('reldicts').
|
||||
|
||||
The two serialization outputs are "rtuple" and "clause".
|
||||
|
||||
- An rtuple is a tuple of the form ``(subj, filler, obj)``,
|
||||
where ``subj`` and ``obj`` are pairs of Named Entity mentions, and ``filler`` is the string of words
|
||||
occurring between ``sub`` and ``obj`` (with no intervening NEs). Strings are printed via ``repr()`` to
|
||||
circumvent locale variations in rendering utf-8 encoded strings.
|
||||
- A clause is an atom of the form ``relsym(subjsym, objsym)``,
|
||||
where the relation, subject and object have been canonicalized to single strings.
|
||||
"""
|
||||
|
||||
# todo: get a more general solution to canonicalized symbols for clauses -- maybe use xmlcharrefs?
|
||||
|
||||
import html
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
# Dictionary that associates corpora with NE classes
|
||||
NE_CLASSES = {
|
||||
"ieer": [
|
||||
"LOCATION",
|
||||
"ORGANIZATION",
|
||||
"PERSON",
|
||||
"DURATION",
|
||||
"DATE",
|
||||
"CARDINAL",
|
||||
"PERCENT",
|
||||
"MONEY",
|
||||
"MEASURE",
|
||||
],
|
||||
"conll2002": ["LOC", "PER", "ORG"],
|
||||
"ace": [
|
||||
"LOCATION",
|
||||
"ORGANIZATION",
|
||||
"PERSON",
|
||||
"DURATION",
|
||||
"DATE",
|
||||
"CARDINAL",
|
||||
"PERCENT",
|
||||
"MONEY",
|
||||
"MEASURE",
|
||||
"FACILITY",
|
||||
"GPE",
|
||||
],
|
||||
}
|
||||
|
||||
# Allow abbreviated class labels
|
||||
short2long = dict(LOC="LOCATION", ORG="ORGANIZATION", PER="PERSON")
|
||||
long2short = dict(LOCATION="LOC", ORGANIZATION="ORG", PERSON="PER")
|
||||
|
||||
|
||||
def _expand(type):
|
||||
"""
|
||||
Expand an NE class name.
|
||||
:type type: str
|
||||
:rtype: str
|
||||
"""
|
||||
try:
|
||||
return short2long[type]
|
||||
except KeyError:
|
||||
return type
|
||||
|
||||
|
||||
def class_abbrev(type):
|
||||
"""
|
||||
Abbreviate an NE class name.
|
||||
:type type: str
|
||||
:rtype: str
|
||||
"""
|
||||
try:
|
||||
return long2short[type]
|
||||
except KeyError:
|
||||
return type
|
||||
|
||||
|
||||
def _join(lst, sep=" ", untag=False):
|
||||
"""
|
||||
Join a list into a string, turning tags tuples into tag strings or just words.
|
||||
:param untag: if ``True``, omit the tag from tagged input strings.
|
||||
:type lst: list
|
||||
:rtype: str
|
||||
"""
|
||||
try:
|
||||
return sep.join(lst)
|
||||
except TypeError:
|
||||
if untag:
|
||||
return sep.join(tup[0] for tup in lst)
|
||||
from nltk.tag import tuple2str
|
||||
|
||||
return sep.join(tuple2str(tup) for tup in lst)
|
||||
|
||||
|
||||
def descape_entity(m, defs=html.entities.entitydefs):
|
||||
"""
|
||||
Translate one entity to its ISO Latin value.
|
||||
Inspired by example from effbot.org
|
||||
|
||||
|
||||
"""
|
||||
try:
|
||||
return defs[m.group(1)]
|
||||
|
||||
except KeyError:
|
||||
return m.group(0) # use as is
|
||||
|
||||
|
||||
def list2sym(lst):
|
||||
"""
|
||||
Convert a list of strings into a canonical symbol.
|
||||
:type lst: list
|
||||
:return: a Unicode string without whitespace
|
||||
:rtype: unicode
|
||||
"""
|
||||
sym = _join(lst, "_", untag=True)
|
||||
sym = sym.lower()
|
||||
ENT = re.compile(r"&(\w+?);")
|
||||
sym = ENT.sub(descape_entity, sym)
|
||||
sym = sym.replace(".", "")
|
||||
return sym
|
||||
|
||||
|
||||
def tree2semi_rel(tree):
|
||||
"""
|
||||
Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``).
|
||||
|
||||
In order to facilitate the construction of (``Tree``, string, ``Tree``) triples, this
|
||||
identifies pairs whose first member is a list (possibly empty) of terminal
|
||||
strings, and whose second member is a ``Tree`` of the form (NE_label, terminals).
|
||||
|
||||
:param tree: a chunk tree
|
||||
:return: a list of pairs (list(str), ``Tree``)
|
||||
:rtype: list of tuple
|
||||
"""
|
||||
|
||||
from nltk.tree import Tree
|
||||
|
||||
semi_rels = []
|
||||
semi_rel = [[], None]
|
||||
|
||||
for dtr in tree:
|
||||
if not isinstance(dtr, Tree):
|
||||
semi_rel[0].append(dtr)
|
||||
else:
|
||||
# dtr is a Tree
|
||||
semi_rel[1] = dtr
|
||||
semi_rels.append(semi_rel)
|
||||
semi_rel = [[], None]
|
||||
return semi_rels
|
||||
|
||||
|
||||
def semi_rel2reldict(pairs, window=5, trace=False):
|
||||
"""
|
||||
Converts the pairs generated by ``tree2semi_rel`` into a 'reldict': a dictionary which
|
||||
stores information about the subject and object NEs plus the filler between them.
|
||||
Additionally, a left and right context of length =< window are captured (within
|
||||
a given input sentence).
|
||||
|
||||
:param pairs: a pair of list(str) and ``Tree``, as generated by
|
||||
:param window: a threshold for the number of items to include in the left and right context
|
||||
:type window: int
|
||||
:return: 'relation' dictionaries whose keys are 'lcon', 'subjclass', 'subjtext', 'subjsym', 'filler', objclass', objtext', 'objsym' and 'rcon'
|
||||
:rtype: list(defaultdict)
|
||||
"""
|
||||
result = []
|
||||
while len(pairs) > 2:
|
||||
reldict = defaultdict(str)
|
||||
reldict["lcon"] = _join(pairs[0][0][-window:])
|
||||
reldict["subjclass"] = pairs[0][1].label()
|
||||
reldict["subjtext"] = _join(pairs[0][1].leaves())
|
||||
reldict["subjsym"] = list2sym(pairs[0][1].leaves())
|
||||
reldict["filler"] = _join(pairs[1][0])
|
||||
reldict["untagged_filler"] = _join(pairs[1][0], untag=True)
|
||||
reldict["objclass"] = pairs[1][1].label()
|
||||
reldict["objtext"] = _join(pairs[1][1].leaves())
|
||||
reldict["objsym"] = list2sym(pairs[1][1].leaves())
|
||||
reldict["rcon"] = _join(pairs[2][0][:window])
|
||||
if trace:
|
||||
print(
|
||||
"(%s(%s, %s)"
|
||||
% (
|
||||
reldict["untagged_filler"],
|
||||
reldict["subjclass"],
|
||||
reldict["objclass"],
|
||||
)
|
||||
)
|
||||
result.append(reldict)
|
||||
pairs = pairs[1:]
|
||||
return result
|
||||
|
||||
|
||||
def extract_rels(subjclass, objclass, doc, corpus="ace", pattern=None, window=10):
|
||||
"""
|
||||
Filter the output of ``semi_rel2reldict`` according to specified NE classes and a filler pattern.
|
||||
|
||||
The parameters ``subjclass`` and ``objclass`` can be used to restrict the
|
||||
Named Entities to particular types (any of 'LOCATION', 'ORGANIZATION',
|
||||
'PERSON', 'DURATION', 'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE').
|
||||
|
||||
:param subjclass: the class of the subject Named Entity.
|
||||
:type subjclass: str
|
||||
:param objclass: the class of the object Named Entity.
|
||||
:type objclass: str
|
||||
:param doc: input document
|
||||
:type doc: ieer document or a list of chunk trees
|
||||
:param corpus: name of the corpus to take as input; possible values are
|
||||
'ieer' and 'conll2002'
|
||||
:type corpus: str
|
||||
:param pattern: a regular expression for filtering the fillers of
|
||||
retrieved triples.
|
||||
:type pattern: SRE_Pattern
|
||||
:param window: filters out fillers which exceed this threshold
|
||||
:type window: int
|
||||
:return: see ``mk_reldicts``
|
||||
:rtype: list(defaultdict)
|
||||
"""
|
||||
|
||||
if subjclass and subjclass not in NE_CLASSES[corpus]:
|
||||
if _expand(subjclass) in NE_CLASSES[corpus]:
|
||||
subjclass = _expand(subjclass)
|
||||
else:
|
||||
raise ValueError(
|
||||
"your value for the subject type has not been recognized: %s"
|
||||
% subjclass
|
||||
)
|
||||
if objclass and objclass not in NE_CLASSES[corpus]:
|
||||
if _expand(objclass) in NE_CLASSES[corpus]:
|
||||
objclass = _expand(objclass)
|
||||
else:
|
||||
raise ValueError(
|
||||
"your value for the object type has not been recognized: %s" % objclass
|
||||
)
|
||||
|
||||
if corpus == "ace" or corpus == "conll2002":
|
||||
pairs = tree2semi_rel(doc)
|
||||
elif corpus == "ieer":
|
||||
pairs = tree2semi_rel(doc.text) + tree2semi_rel(doc.headline)
|
||||
else:
|
||||
raise ValueError("corpus type not recognized")
|
||||
|
||||
reldicts = semi_rel2reldict(pairs)
|
||||
|
||||
relfilter = lambda x: (
|
||||
x["subjclass"] == subjclass
|
||||
and len(x["filler"].split()) <= window
|
||||
and pattern.match(x["filler"])
|
||||
and x["objclass"] == objclass
|
||||
)
|
||||
|
||||
return list(filter(relfilter, reldicts))
|
||||
|
||||
|
||||
def rtuple(reldict, lcon=False, rcon=False):
|
||||
"""
|
||||
Pretty print the reldict as an rtuple.
|
||||
:param reldict: a relation dictionary
|
||||
:type reldict: defaultdict
|
||||
"""
|
||||
items = [
|
||||
class_abbrev(reldict["subjclass"]),
|
||||
reldict["subjtext"],
|
||||
reldict["filler"],
|
||||
class_abbrev(reldict["objclass"]),
|
||||
reldict["objtext"],
|
||||
]
|
||||
format = "[%s: %r] %r [%s: %r]"
|
||||
if lcon:
|
||||
items = [reldict["lcon"]] + items
|
||||
format = "...%r)" + format
|
||||
if rcon:
|
||||
items.append(reldict["rcon"])
|
||||
format = format + "(%r..."
|
||||
printargs = tuple(items)
|
||||
return format % printargs
|
||||
|
||||
|
||||
def clause(reldict, relsym):
|
||||
"""
|
||||
Print the relation in clausal form.
|
||||
:param reldict: a relation dictionary
|
||||
:type reldict: defaultdict
|
||||
:param relsym: a label for the relation
|
||||
:type relsym: str
|
||||
"""
|
||||
items = (relsym, reldict["subjsym"], reldict["objsym"])
|
||||
return "%s(%r, %r)" % items
|
||||
|
||||
|
||||
#######################################################
|
||||
# Demos of relation extraction with regular expressions
|
||||
#######################################################
|
||||
|
||||
|
||||
############################################
|
||||
# Example of in(ORG, LOC)
|
||||
############################################
|
||||
def in_demo(trace=0, sql=True):
|
||||
"""
|
||||
Select pairs of organizations and locations whose mentions occur with an
|
||||
intervening occurrence of the preposition "in".
|
||||
|
||||
If the sql parameter is set to True, then the entity pairs are loaded into
|
||||
an in-memory database, and subsequently pulled out using an SQL "SELECT"
|
||||
query.
|
||||
"""
|
||||
from nltk.corpus import ieer
|
||||
|
||||
if sql:
|
||||
try:
|
||||
import sqlite3
|
||||
|
||||
connection = sqlite3.connect(":memory:")
|
||||
cur = connection.cursor()
|
||||
cur.execute(
|
||||
"""create table Locations
|
||||
(OrgName text, LocationName text, DocID text)"""
|
||||
)
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
warnings.warn("Cannot import sqlite; sql flag will be ignored.")
|
||||
|
||||
IN = re.compile(r".*\bin\b(?!\b.+ing)")
|
||||
|
||||
print()
|
||||
print("IEER: in(ORG, LOC) -- just the clauses:")
|
||||
print("=" * 45)
|
||||
|
||||
for file in ieer.fileids():
|
||||
for doc in ieer.parsed_docs(file):
|
||||
if trace:
|
||||
print(doc.docno)
|
||||
print("=" * 15)
|
||||
for rel in extract_rels("ORG", "LOC", doc, corpus="ieer", pattern=IN):
|
||||
print(clause(rel, relsym="IN"))
|
||||
if sql:
|
||||
try:
|
||||
rtuple = (rel["subjtext"], rel["objtext"], doc.docno)
|
||||
cur.execute(
|
||||
"""insert into Locations
|
||||
values (?, ?, ?)""",
|
||||
rtuple,
|
||||
)
|
||||
connection.commit()
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
if sql:
|
||||
try:
|
||||
cur.execute(
|
||||
"""select OrgName from Locations
|
||||
where LocationName = 'Atlanta'"""
|
||||
)
|
||||
print()
|
||||
print("Extract data from SQL table: ORGs in Atlanta")
|
||||
print("-" * 15)
|
||||
for row in cur:
|
||||
print(row)
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
|
||||
############################################
|
||||
# Example of has_role(PER, LOC)
|
||||
############################################
|
||||
|
||||
|
||||
def roles_demo(trace=0):
|
||||
from nltk.corpus import ieer
|
||||
|
||||
roles = r"""
|
||||
(.*( # assorted roles
|
||||
analyst|
|
||||
chair(wo)?man|
|
||||
commissioner|
|
||||
counsel|
|
||||
director|
|
||||
economist|
|
||||
editor|
|
||||
executive|
|
||||
foreman|
|
||||
governor|
|
||||
head|
|
||||
lawyer|
|
||||
leader|
|
||||
librarian).*)|
|
||||
manager|
|
||||
partner|
|
||||
president|
|
||||
producer|
|
||||
professor|
|
||||
researcher|
|
||||
spokes(wo)?man|
|
||||
writer|
|
||||
,\sof\sthe?\s* # "X, of (the) Y"
|
||||
"""
|
||||
ROLES = re.compile(roles, re.VERBOSE)
|
||||
|
||||
print()
|
||||
print("IEER: has_role(PER, ORG) -- raw rtuples:")
|
||||
print("=" * 45)
|
||||
|
||||
for file in ieer.fileids():
|
||||
for doc in ieer.parsed_docs(file):
|
||||
lcon = rcon = False
|
||||
if trace:
|
||||
print(doc.docno)
|
||||
print("=" * 15)
|
||||
lcon = rcon = True
|
||||
for rel in extract_rels("PER", "ORG", doc, corpus="ieer", pattern=ROLES):
|
||||
print(rtuple(rel, lcon=lcon, rcon=rcon))
|
||||
|
||||
|
||||
##############################################
|
||||
### Show what's in the IEER Headlines
|
||||
##############################################
|
||||
|
||||
|
||||
def ieer_headlines():
|
||||
from nltk.corpus import ieer
|
||||
from nltk.tree import Tree
|
||||
|
||||
print("IEER: First 20 Headlines")
|
||||
print("=" * 45)
|
||||
|
||||
trees = [
|
||||
(doc.docno, doc.headline)
|
||||
for file in ieer.fileids()
|
||||
for doc in ieer.parsed_docs(file)
|
||||
]
|
||||
for tree in trees[:20]:
|
||||
print()
|
||||
print("%s:\n%s" % tree)
|
||||
|
||||
|
||||
#############################################
|
||||
## Dutch CONLL2002: take_on_role(PER, ORG
|
||||
#############################################
|
||||
|
||||
|
||||
def conllned(trace=1):
|
||||
"""
|
||||
Find the copula+'van' relation ('of') in the Dutch tagged training corpus
|
||||
from CoNLL 2002.
|
||||
"""
|
||||
|
||||
from nltk.corpus import conll2002
|
||||
|
||||
vnv = """
|
||||
(
|
||||
is/V| # 3rd sing present and
|
||||
was/V| # past forms of the verb zijn ('be')
|
||||
werd/V| # and also present
|
||||
wordt/V # past of worden ('become)
|
||||
)
|
||||
.* # followed by anything
|
||||
van/Prep # followed by van ('of')
|
||||
"""
|
||||
VAN = re.compile(vnv, re.VERBOSE)
|
||||
|
||||
print()
|
||||
print("Dutch CoNLL2002: van(PER, ORG) -- raw rtuples with context:")
|
||||
print("=" * 45)
|
||||
|
||||
for doc in conll2002.chunked_sents("ned.train"):
|
||||
lcon = rcon = False
|
||||
if trace:
|
||||
lcon = rcon = True
|
||||
for rel in extract_rels(
|
||||
"PER", "ORG", doc, corpus="conll2002", pattern=VAN, window=10
|
||||
):
|
||||
print(rtuple(rel, lcon=lcon, rcon=rcon))
|
||||
|
||||
|
||||
#############################################
|
||||
## Spanish CONLL2002: (PER, ORG)
|
||||
#############################################
|
||||
|
||||
|
||||
def conllesp():
|
||||
from nltk.corpus import conll2002
|
||||
|
||||
de = """
|
||||
.*
|
||||
(
|
||||
de/SP|
|
||||
del/SP
|
||||
)
|
||||
"""
|
||||
DE = re.compile(de, re.VERBOSE)
|
||||
|
||||
print()
|
||||
print("Spanish CoNLL2002: de(ORG, LOC) -- just the first 10 clauses:")
|
||||
print("=" * 45)
|
||||
rels = [
|
||||
rel
|
||||
for doc in conll2002.chunked_sents("esp.train")
|
||||
for rel in extract_rels("ORG", "LOC", doc, corpus="conll2002", pattern=DE)
|
||||
]
|
||||
for r in rels[:10]:
|
||||
print(clause(r, relsym="DE"))
|
||||
print()
|
||||
|
||||
|
||||
def ne_chunked():
|
||||
print()
|
||||
print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker")
|
||||
print("=" * 45)
|
||||
ROLE = re.compile(
|
||||
r".*(chairman|president|trader|scientist|economist|analyst|partner).*"
|
||||
)
|
||||
rels = []
|
||||
for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]):
|
||||
sent = nltk.ne_chunk(sent)
|
||||
rels = extract_rels("PER", "ORG", sent, corpus="ace", pattern=ROLE, window=7)
|
||||
for rel in rels:
|
||||
print(f"{i:<5}{rtuple(rel)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import nltk
|
||||
from nltk.sem import relextract
|
||||
|
||||
in_demo(trace=0)
|
||||
roles_demo(trace=0)
|
||||
conllned()
|
||||
conllesp()
|
||||
ieer_headlines()
|
||||
ne_chunked()
|
||||
148
Backend/venv/lib/python3.12/site-packages/nltk/sem/skolemize.py
Normal file
148
Backend/venv/lib/python3.12/site-packages/nltk/sem/skolemize.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# Natural Language Toolkit: Semantic Interpretation
|
||||
#
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from nltk.sem.logic import (
|
||||
AllExpression,
|
||||
AndExpression,
|
||||
ApplicationExpression,
|
||||
EqualityExpression,
|
||||
ExistsExpression,
|
||||
IffExpression,
|
||||
ImpExpression,
|
||||
NegatedExpression,
|
||||
OrExpression,
|
||||
VariableExpression,
|
||||
skolem_function,
|
||||
unique_variable,
|
||||
)
|
||||
|
||||
|
||||
def skolemize(expression, univ_scope=None, used_variables=None):
|
||||
"""
|
||||
Skolemize the expression and convert to conjunctive normal form (CNF)
|
||||
"""
|
||||
if univ_scope is None:
|
||||
univ_scope = set()
|
||||
if used_variables is None:
|
||||
used_variables = set()
|
||||
|
||||
if isinstance(expression, AllExpression):
|
||||
term = skolemize(
|
||||
expression.term,
|
||||
univ_scope | {expression.variable},
|
||||
used_variables | {expression.variable},
|
||||
)
|
||||
return term.replace(
|
||||
expression.variable,
|
||||
VariableExpression(unique_variable(ignore=used_variables)),
|
||||
)
|
||||
elif isinstance(expression, AndExpression):
|
||||
return skolemize(expression.first, univ_scope, used_variables) & skolemize(
|
||||
expression.second, univ_scope, used_variables
|
||||
)
|
||||
elif isinstance(expression, OrExpression):
|
||||
return to_cnf(
|
||||
skolemize(expression.first, univ_scope, used_variables),
|
||||
skolemize(expression.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(expression, ImpExpression):
|
||||
return to_cnf(
|
||||
skolemize(-expression.first, univ_scope, used_variables),
|
||||
skolemize(expression.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(expression, IffExpression):
|
||||
return to_cnf(
|
||||
skolemize(-expression.first, univ_scope, used_variables),
|
||||
skolemize(expression.second, univ_scope, used_variables),
|
||||
) & to_cnf(
|
||||
skolemize(expression.first, univ_scope, used_variables),
|
||||
skolemize(-expression.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(expression, EqualityExpression):
|
||||
return expression
|
||||
elif isinstance(expression, NegatedExpression):
|
||||
negated = expression.term
|
||||
if isinstance(negated, AllExpression):
|
||||
term = skolemize(
|
||||
-negated.term, univ_scope, used_variables | {negated.variable}
|
||||
)
|
||||
if univ_scope:
|
||||
return term.replace(negated.variable, skolem_function(univ_scope))
|
||||
else:
|
||||
skolem_constant = VariableExpression(
|
||||
unique_variable(ignore=used_variables)
|
||||
)
|
||||
return term.replace(negated.variable, skolem_constant)
|
||||
elif isinstance(negated, AndExpression):
|
||||
return to_cnf(
|
||||
skolemize(-negated.first, univ_scope, used_variables),
|
||||
skolemize(-negated.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(negated, OrExpression):
|
||||
return skolemize(-negated.first, univ_scope, used_variables) & skolemize(
|
||||
-negated.second, univ_scope, used_variables
|
||||
)
|
||||
elif isinstance(negated, ImpExpression):
|
||||
return skolemize(negated.first, univ_scope, used_variables) & skolemize(
|
||||
-negated.second, univ_scope, used_variables
|
||||
)
|
||||
elif isinstance(negated, IffExpression):
|
||||
return to_cnf(
|
||||
skolemize(-negated.first, univ_scope, used_variables),
|
||||
skolemize(-negated.second, univ_scope, used_variables),
|
||||
) & to_cnf(
|
||||
skolemize(negated.first, univ_scope, used_variables),
|
||||
skolemize(negated.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(negated, EqualityExpression):
|
||||
return expression
|
||||
elif isinstance(negated, NegatedExpression):
|
||||
return skolemize(negated.term, univ_scope, used_variables)
|
||||
elif isinstance(negated, ExistsExpression):
|
||||
term = skolemize(
|
||||
-negated.term,
|
||||
univ_scope | {negated.variable},
|
||||
used_variables | {negated.variable},
|
||||
)
|
||||
return term.replace(
|
||||
negated.variable,
|
||||
VariableExpression(unique_variable(ignore=used_variables)),
|
||||
)
|
||||
elif isinstance(negated, ApplicationExpression):
|
||||
return expression
|
||||
else:
|
||||
raise Exception("'%s' cannot be skolemized" % expression)
|
||||
elif isinstance(expression, ExistsExpression):
|
||||
term = skolemize(
|
||||
expression.term, univ_scope, used_variables | {expression.variable}
|
||||
)
|
||||
if univ_scope:
|
||||
return term.replace(expression.variable, skolem_function(univ_scope))
|
||||
else:
|
||||
skolem_constant = VariableExpression(unique_variable(ignore=used_variables))
|
||||
return term.replace(expression.variable, skolem_constant)
|
||||
elif isinstance(expression, ApplicationExpression):
|
||||
return expression
|
||||
else:
|
||||
raise Exception("'%s' cannot be skolemized" % expression)
|
||||
|
||||
|
||||
def to_cnf(first, second):
|
||||
"""
|
||||
Convert this split disjunction to conjunctive normal form (CNF)
|
||||
"""
|
||||
if isinstance(first, AndExpression):
|
||||
r_first = to_cnf(first.first, second)
|
||||
r_second = to_cnf(first.second, second)
|
||||
return r_first & r_second
|
||||
elif isinstance(second, AndExpression):
|
||||
r_first = to_cnf(first, second.first)
|
||||
r_second = to_cnf(first, second.second)
|
||||
return r_first & r_second
|
||||
else:
|
||||
return first | second
|
||||
307
Backend/venv/lib/python3.12/site-packages/nltk/sem/util.py
Normal file
307
Backend/venv/lib/python3.12/site-packages/nltk/sem/util.py
Normal file
@@ -0,0 +1,307 @@
|
||||
# Natural Language Toolkit: Semantic Interpretation
|
||||
#
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Utility functions for batch-processing sentences: parsing and
|
||||
extraction of the semantic representation of the root node of the the
|
||||
syntax tree, followed by evaluation of the semantic representation in
|
||||
a first-order model.
|
||||
"""
|
||||
|
||||
import codecs
|
||||
|
||||
from nltk.sem import evaluate
|
||||
|
||||
##############################################################
|
||||
## Utility functions for connecting parse output to semantics
|
||||
##############################################################
|
||||
|
||||
|
||||
def parse_sents(inputs, grammar, trace=0):
|
||||
"""
|
||||
Convert input sentences into syntactic trees.
|
||||
|
||||
:param inputs: sentences to be parsed
|
||||
:type inputs: list(str)
|
||||
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
|
||||
:type grammar: nltk.grammar.FeatureGrammar
|
||||
:rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
|
||||
:return: a mapping from input sentences to a list of ``Tree`` instances.
|
||||
"""
|
||||
# put imports here to avoid circult dependencies
|
||||
from nltk.grammar import FeatureGrammar
|
||||
from nltk.parse import FeatureChartParser, load_parser
|
||||
|
||||
if isinstance(grammar, FeatureGrammar):
|
||||
cp = FeatureChartParser(grammar)
|
||||
else:
|
||||
cp = load_parser(grammar, trace=trace)
|
||||
parses = []
|
||||
for sent in inputs:
|
||||
tokens = sent.split() # use a tokenizer?
|
||||
syntrees = list(cp.parse(tokens))
|
||||
parses.append(syntrees)
|
||||
return parses
|
||||
|
||||
|
||||
def root_semrep(syntree, semkey="SEM"):
|
||||
"""
|
||||
Find the semantic representation at the root of a tree.
|
||||
|
||||
:param syntree: a parse ``Tree``
|
||||
:param semkey: the feature label to use for the root semantics in the tree
|
||||
:return: the semantic representation at the root of a ``Tree``
|
||||
:rtype: sem.Expression
|
||||
"""
|
||||
from nltk.grammar import FeatStructNonterminal
|
||||
|
||||
node = syntree.label()
|
||||
assert isinstance(node, FeatStructNonterminal)
|
||||
try:
|
||||
return node[semkey]
|
||||
except KeyError:
|
||||
print(node, end=" ")
|
||||
print("has no specification for the feature %s" % semkey)
|
||||
raise
|
||||
|
||||
|
||||
def interpret_sents(inputs, grammar, semkey="SEM", trace=0):
|
||||
"""
|
||||
Add the semantic representation to each syntactic parse tree
|
||||
of each input sentence.
|
||||
|
||||
:param inputs: a list of sentences
|
||||
:type inputs: list(str)
|
||||
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
|
||||
:type grammar: nltk.grammar.FeatureGrammar
|
||||
:return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
|
||||
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
|
||||
"""
|
||||
return [
|
||||
[(syn, root_semrep(syn, semkey)) for syn in syntrees]
|
||||
for syntrees in parse_sents(inputs, grammar, trace=trace)
|
||||
]
|
||||
|
||||
|
||||
def evaluate_sents(inputs, grammar, model, assignment, trace=0):
|
||||
"""
|
||||
Add the truth-in-a-model value to each semantic representation
|
||||
for each syntactic parse of each input sentences.
|
||||
|
||||
:param inputs: a list of sentences
|
||||
:type inputs: list(str)
|
||||
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
|
||||
:type grammar: nltk.grammar.FeatureGrammar
|
||||
:return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
|
||||
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
|
||||
"""
|
||||
return [
|
||||
[
|
||||
(syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
|
||||
for (syn, sem) in interpretations
|
||||
]
|
||||
for interpretations in interpret_sents(inputs, grammar)
|
||||
]
|
||||
|
||||
|
||||
def demo_model0():
|
||||
global m0, g0
|
||||
# Initialize a valuation of non-logical constants."""
|
||||
v = [
|
||||
("john", "b1"),
|
||||
("mary", "g1"),
|
||||
("suzie", "g2"),
|
||||
("fido", "d1"),
|
||||
("tess", "d2"),
|
||||
("noosa", "n"),
|
||||
("girl", {"g1", "g2"}),
|
||||
("boy", {"b1", "b2"}),
|
||||
("dog", {"d1", "d2"}),
|
||||
("bark", {"d1", "d2"}),
|
||||
("walk", {"b1", "g2", "d1"}),
|
||||
("chase", {("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")}),
|
||||
(
|
||||
"see",
|
||||
{("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")},
|
||||
),
|
||||
("in", {("b1", "n"), ("b2", "n"), ("d2", "n")}),
|
||||
("with", {("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")}),
|
||||
]
|
||||
# Read in the data from ``v``
|
||||
val = evaluate.Valuation(v)
|
||||
# Bind ``dom`` to the ``domain`` property of ``val``
|
||||
dom = val.domain
|
||||
# Initialize a model with parameters ``dom`` and ``val``.
|
||||
m0 = evaluate.Model(dom, val)
|
||||
# Initialize a variable assignment with parameter ``dom``
|
||||
g0 = evaluate.Assignment(dom)
|
||||
|
||||
|
||||
def read_sents(filename, encoding="utf8"):
|
||||
with codecs.open(filename, "r", encoding) as fp:
|
||||
sents = [l.rstrip() for l in fp]
|
||||
|
||||
# get rid of blank lines
|
||||
sents = [l for l in sents if len(l) > 0]
|
||||
sents = [l for l in sents if not l[0] == "#"]
|
||||
return sents
|
||||
|
||||
|
||||
def demo_legacy_grammar():
|
||||
"""
|
||||
Check that interpret_sents() is compatible with legacy grammars that use
|
||||
a lowercase 'sem' feature.
|
||||
|
||||
Define 'test.fcfg' to be the following
|
||||
|
||||
"""
|
||||
from nltk.grammar import FeatureGrammar
|
||||
|
||||
g = FeatureGrammar.fromstring(
|
||||
"""
|
||||
% start S
|
||||
S[sem=<hello>] -> 'hello'
|
||||
"""
|
||||
)
|
||||
print("Reading grammar: %s" % g)
|
||||
print("*" * 20)
|
||||
for reading in interpret_sents(["hello"], g, semkey="sem"):
|
||||
syn, sem = reading[0]
|
||||
print()
|
||||
print("output: ", sem)
|
||||
|
||||
|
||||
def demo():
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
description = """
|
||||
Parse and evaluate some sentences.
|
||||
"""
|
||||
|
||||
opts = OptionParser(description=description)
|
||||
|
||||
opts.set_defaults(
|
||||
evaluate=True,
|
||||
beta=True,
|
||||
syntrace=0,
|
||||
semtrace=0,
|
||||
demo="default",
|
||||
grammar="",
|
||||
sentences="",
|
||||
)
|
||||
|
||||
opts.add_option(
|
||||
"-d",
|
||||
"--demo",
|
||||
dest="demo",
|
||||
help="choose demo D; omit this for the default demo, or specify 'chat80'",
|
||||
metavar="D",
|
||||
)
|
||||
opts.add_option(
|
||||
"-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
|
||||
)
|
||||
opts.add_option(
|
||||
"-m",
|
||||
"--model",
|
||||
dest="model",
|
||||
help="import model M (omit '.py' suffix)",
|
||||
metavar="M",
|
||||
)
|
||||
opts.add_option(
|
||||
"-s",
|
||||
"--sentences",
|
||||
dest="sentences",
|
||||
help="read in a file of test sentences S",
|
||||
metavar="S",
|
||||
)
|
||||
opts.add_option(
|
||||
"-e",
|
||||
"--no-eval",
|
||||
action="store_false",
|
||||
dest="evaluate",
|
||||
help="just do a syntactic analysis",
|
||||
)
|
||||
opts.add_option(
|
||||
"-b",
|
||||
"--no-beta-reduction",
|
||||
action="store_false",
|
||||
dest="beta",
|
||||
help="don't carry out beta-reduction",
|
||||
)
|
||||
opts.add_option(
|
||||
"-t",
|
||||
"--syntrace",
|
||||
action="count",
|
||||
dest="syntrace",
|
||||
help="set syntactic tracing on; requires '-e' option",
|
||||
)
|
||||
opts.add_option(
|
||||
"-T",
|
||||
"--semtrace",
|
||||
action="count",
|
||||
dest="semtrace",
|
||||
help="set semantic tracing on",
|
||||
)
|
||||
|
||||
(options, args) = opts.parse_args()
|
||||
|
||||
SPACER = "-" * 30
|
||||
|
||||
demo_model0()
|
||||
|
||||
sents = [
|
||||
"Fido sees a boy with Mary",
|
||||
"John sees Mary",
|
||||
"every girl chases a dog",
|
||||
"every boy chases a girl",
|
||||
"John walks with a girl in Noosa",
|
||||
"who walks",
|
||||
]
|
||||
|
||||
gramfile = "grammars/sample_grammars/sem2.fcfg"
|
||||
|
||||
if options.sentences:
|
||||
sentsfile = options.sentences
|
||||
if options.grammar:
|
||||
gramfile = options.grammar
|
||||
if options.model:
|
||||
exec("import %s as model" % options.model)
|
||||
|
||||
if sents is None:
|
||||
sents = read_sents(sentsfile)
|
||||
|
||||
# Set model and assignment
|
||||
model = m0
|
||||
g = g0
|
||||
|
||||
if options.evaluate:
|
||||
evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
|
||||
else:
|
||||
semreps = interpret_sents(sents, gramfile, trace=options.syntrace)
|
||||
|
||||
for i, sent in enumerate(sents):
|
||||
n = 1
|
||||
print("\nSentence: %s" % sent)
|
||||
print(SPACER)
|
||||
if options.evaluate:
|
||||
for syntree, semrep, value in evaluations[i]:
|
||||
if isinstance(value, dict):
|
||||
value = set(value.keys())
|
||||
print("%d: %s" % (n, semrep))
|
||||
print(value)
|
||||
n += 1
|
||||
else:
|
||||
for syntree, semrep in semreps[i]:
|
||||
print("%d: %s" % (n, semrep))
|
||||
n += 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
demo_legacy_grammar()
|
||||
Reference in New Issue
Block a user