This commit is contained in:
Iliyan Angelov
2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions

View File

@@ -0,0 +1,75 @@
# Natural Language Toolkit: Semantic Interpretation
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
NLTK Semantic Interpretation Package
This package contains classes for representing semantic structure in
formulas of first-order logic and for evaluating such formulas in
set-theoretic models.
>>> from nltk.sem import logic
>>> logic._counter._value = 0
The package has two main components:
- ``logic`` provides support for analyzing expressions of First
Order Logic (FOL).
- ``evaluate`` allows users to recursively determine truth in a
model for formulas of FOL.
A model consists of a domain of discourse and a valuation function,
which assigns values to non-logical constants. We assume that entities
in the domain are represented as strings such as ``'b1'``, ``'g1'``,
etc. A ``Valuation`` is initialized with a list of (symbol, value)
pairs, where values are entities, sets of entities or sets of tuples
of entities.
The domain of discourse can be inferred from the valuation, and model
is then created with domain and valuation as parameters.
>>> from nltk.sem import Valuation, Model
>>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
... ('dog', set(['d1'])),
... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
>>> val = Valuation(v)
>>> dom = val.domain
>>> m = Model(dom, val)
"""
from nltk.sem.boxer import Boxer
from nltk.sem.drt import DRS, DrtExpression
from nltk.sem.evaluate import (
Assignment,
Model,
Undefined,
Valuation,
arity,
is_rel,
read_valuation,
set2rel,
)
from nltk.sem.lfg import FStructure
from nltk.sem.logic import (
ApplicationExpression,
Expression,
LogicalExpressionException,
Variable,
binding_ops,
boolean_ops,
equality_preds,
read_logic,
)
from nltk.sem.relextract import clause, extract_rels, rtuple
from nltk.sem.skolemize import skolemize
from nltk.sem.util import evaluate_sents, interpret_sents, parse_sents, root_semrep
# from nltk.sem.glue import Glue
# from nltk.sem.hole import HoleSemantics
# from nltk.sem.cooper_storage import CooperStore
# don't import chat80 as its names are too generic

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,857 @@
# Natural Language Toolkit: Chat-80 KB Reader
# See https://www.w3.org/TR/swbp-skos-core-guide/
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Ewan Klein <ewan@inf.ed.ac.uk>,
# URL: <https://www.nltk.org>
# For license information, see LICENSE.TXT
r"""
Overview
========
Chat-80 was a natural language system which allowed the user to
interrogate a Prolog knowledge base in the domain of world
geography. It was developed in the early '80s by Warren and Pereira; see
``https://www.aclweb.org/anthology/J82-3002.pdf`` for a description and
``http://www.cis.upenn.edu/~pereira/oldies.html`` for the source
files.
This module contains functions to extract data from the Chat-80
relation files ('the world database'), and convert then into a format
that can be incorporated in the FOL models of
``nltk.sem.evaluate``. The code assumes that the Prolog
input files are available in the NLTK corpora directory.
The Chat-80 World Database consists of the following files::
world0.pl
rivers.pl
cities.pl
countries.pl
contain.pl
borders.pl
This module uses a slightly modified version of ``world0.pl``, in which
a set of Prolog rules have been omitted. The modified file is named
``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since
it uses a list rather than a string in the second field.
Reading Chat-80 Files
=====================
Chat-80 relations are like tables in a relational database. The
relation acts as the name of the table; the first argument acts as the
'primary key'; and subsequent arguments are further fields in the
table. In general, the name of the table provides a label for a unary
predicate whose extension is all the primary keys. For example,
relations in ``cities.pl`` are of the following form::
'city(athens,greece,1368).'
Here, ``'athens'`` is the key, and will be mapped to a member of the
unary predicate *city*.
The fields in the table are mapped to binary predicates. The first
argument of the predicate is the primary key, while the second
argument is the data in the relevant field. Thus, in the above
example, the third field is mapped to the binary predicate
*population_of*, whose extension is a set of pairs such as
``'(athens, 1368)'``.
An exception to this general framework is required by the relations in
the files ``borders.pl`` and ``contains.pl``. These contain facts of the
following form::
'borders(albania,greece).'
'contains0(africa,central_africa).'
We do not want to form a unary concept out the element in
the first field of these records, and we want the label of the binary
relation just to be ``'border'``/``'contain'`` respectively.
In order to drive the extraction process, we use 'relation metadata bundles'
which are Python dictionaries such as the following::
city = {'label': 'city',
'closures': [],
'schema': ['city', 'country', 'population'],
'filename': 'cities.pl'}
According to this, the file ``city['filename']`` contains a list of
relational tuples (or more accurately, the corresponding strings in
Prolog form) whose predicate symbol is ``city['label']`` and whose
relational schema is ``city['schema']``. The notion of a ``closure`` is
discussed in the next section.
Concepts
========
In order to encapsulate the results of the extraction, a class of
``Concept`` objects is introduced. A ``Concept`` object has a number of
attributes, in particular a ``prefLabel`` and ``extension``, which make
it easier to inspect the output of the extraction. In addition, the
``extension`` can be further processed: in the case of the ``'border'``
relation, we check that the relation is symmetric, and in the case
of the ``'contain'`` relation, we carry out the transitive
closure. The closure properties associated with a concept is
indicated in the relation metadata, as indicated earlier.
The ``extension`` of a ``Concept`` object is then incorporated into a
``Valuation`` object.
Persistence
===========
The functions ``val_dump`` and ``val_load`` are provided to allow a
valuation to be stored in a persistent database and re-loaded, rather
than having to be re-computed each time.
Individuals and Lexical Items
=============================
As well as deriving relations from the Chat-80 data, we also create a
set of individual constants, one for each entity in the domain. The
individual constants are string-identical to the entities. For
example, given a data item such as ``'zloty'``, we add to the valuation
a pair ``('zloty', 'zloty')``. In order to parse English sentences that
refer to these entities, we also create a lexical item such as the
following for each individual constant::
PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
The set of rules is written to the file ``chat_pnames.cfg`` in the
current directory.
"""
import os
import re
import shelve
import sys
import nltk.data
###########################################################################
# Chat-80 relation metadata bundles needed to build the valuation
###########################################################################
borders = {
"rel_name": "borders",
"closures": ["symmetric"],
"schema": ["region", "border"],
"filename": "borders.pl",
}
contains = {
"rel_name": "contains0",
"closures": ["transitive"],
"schema": ["region", "contain"],
"filename": "contain.pl",
}
city = {
"rel_name": "city",
"closures": [],
"schema": ["city", "country", "population"],
"filename": "cities.pl",
}
country = {
"rel_name": "country",
"closures": [],
"schema": [
"country",
"region",
"latitude",
"longitude",
"area",
"population",
"capital",
"currency",
],
"filename": "countries.pl",
}
circle_of_lat = {
"rel_name": "circle_of_latitude",
"closures": [],
"schema": ["circle_of_latitude", "degrees"],
"filename": "world1.pl",
}
circle_of_long = {
"rel_name": "circle_of_longitude",
"closures": [],
"schema": ["circle_of_longitude", "degrees"],
"filename": "world1.pl",
}
continent = {
"rel_name": "continent",
"closures": [],
"schema": ["continent"],
"filename": "world1.pl",
}
region = {
"rel_name": "in_continent",
"closures": [],
"schema": ["region", "continent"],
"filename": "world1.pl",
}
ocean = {
"rel_name": "ocean",
"closures": [],
"schema": ["ocean"],
"filename": "world1.pl",
}
sea = {"rel_name": "sea", "closures": [], "schema": ["sea"], "filename": "world1.pl"}
items = [
"borders",
"contains",
"city",
"country",
"circle_of_lat",
"circle_of_long",
"continent",
"region",
"ocean",
"sea",
]
items = tuple(sorted(items))
item_metadata = {
"borders": borders,
"contains": contains,
"city": city,
"country": country,
"circle_of_lat": circle_of_lat,
"circle_of_long": circle_of_long,
"continent": continent,
"region": region,
"ocean": ocean,
"sea": sea,
}
rels = item_metadata.values()
not_unary = ["borders.pl", "contain.pl"]
###########################################################################
class Concept:
"""
A Concept class, loosely based on SKOS
(https://www.w3.org/TR/swbp-skos-core-guide/).
"""
def __init__(self, prefLabel, arity, altLabels=[], closures=[], extension=set()):
"""
:param prefLabel: the preferred label for the concept
:type prefLabel: str
:param arity: the arity of the concept
:type arity: int
:param altLabels: other (related) labels
:type altLabels: list
:param closures: closure properties of the extension
(list items can be ``symmetric``, ``reflexive``, ``transitive``)
:type closures: list
:param extension: the extensional value of the concept
:type extension: set
"""
self.prefLabel = prefLabel
self.arity = arity
self.altLabels = altLabels
self.closures = closures
# keep _extension internally as a set
self._extension = extension
# public access is via a list (for slicing)
self.extension = sorted(list(extension))
def __str__(self):
# _extension = ''
# for element in sorted(self.extension):
# if isinstance(element, tuple):
# element = '(%s, %s)' % (element)
# _extension += element + ', '
# _extension = _extension[:-1]
return "Label = '{}'\nArity = {}\nExtension = {}".format(
self.prefLabel,
self.arity,
self.extension,
)
def __repr__(self):
return "Concept('%s')" % self.prefLabel
def augment(self, data):
"""
Add more data to the ``Concept``'s extension set.
:param data: a new semantic value
:type data: string or pair of strings
:rtype: set
"""
self._extension.add(data)
self.extension = sorted(list(self._extension))
return self._extension
def _make_graph(self, s):
"""
Convert a set of pairs into an adjacency linked list encoding of a graph.
"""
g = {}
for x, y in s:
if x in g:
g[x].append(y)
else:
g[x] = [y]
return g
def _transclose(self, g):
"""
Compute the transitive closure of a graph represented as a linked list.
"""
for x in g:
for adjacent in g[x]:
# check that adjacent is a key
if adjacent in g:
for y in g[adjacent]:
if y not in g[x]:
g[x].append(y)
return g
def _make_pairs(self, g):
"""
Convert an adjacency linked list back into a set of pairs.
"""
pairs = []
for node in g:
for adjacent in g[node]:
pairs.append((node, adjacent))
return set(pairs)
def close(self):
"""
Close a binary relation in the ``Concept``'s extension set.
:return: a new extension for the ``Concept`` in which the
relation is closed under a given property
"""
from nltk.sem import is_rel
assert is_rel(self._extension)
if "symmetric" in self.closures:
pairs = []
for x, y in self._extension:
pairs.append((y, x))
sym = set(pairs)
self._extension = self._extension.union(sym)
if "transitive" in self.closures:
all = self._make_graph(self._extension)
closed = self._transclose(all)
trans = self._make_pairs(closed)
self._extension = self._extension.union(trans)
self.extension = sorted(list(self._extension))
def clause2concepts(filename, rel_name, schema, closures=[]):
"""
Convert a file of Prolog clauses into a list of ``Concept`` objects.
:param filename: filename containing the relations
:type filename: str
:param rel_name: name of the relation
:type rel_name: str
:param schema: the schema used in a set of relational tuples
:type schema: list
:param closures: closure properties for the extension of the concept
:type closures: list
:return: a list of ``Concept`` objects
:rtype: list
"""
concepts = []
# position of the subject of a binary relation
subj = 0
# label of the 'primary key'
pkey = schema[0]
# fields other than the primary key
fields = schema[1:]
# convert a file into a list of lists
records = _str2records(filename, rel_name)
# add a unary concept corresponding to the set of entities
# in the primary key position
# relations in 'not_unary' are more like ordinary binary relations
if not filename in not_unary:
concepts.append(unary_concept(pkey, subj, records))
# add a binary concept for each non-key field
for field in fields:
obj = schema.index(field)
concepts.append(binary_concept(field, closures, subj, obj, records))
return concepts
def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
"""
Convert a file of Prolog clauses into a database table.
This is not generic, since it doesn't allow arbitrary
schemas to be set as a parameter.
Intended usage::
cities2table('cities.pl', 'city', 'city.db', verbose=True, setup=True)
:param filename: filename containing the relations
:type filename: str
:param rel_name: name of the relation
:type rel_name: str
:param dbname: filename of persistent store
:type schema: str
"""
import sqlite3
records = _str2records(filename, rel_name)
connection = sqlite3.connect(dbname)
cur = connection.cursor()
if setup:
cur.execute(
"""CREATE TABLE city_table
(City text, Country text, Population int)"""
)
table_name = "city_table"
for t in records:
cur.execute("insert into %s values (?,?,?)" % table_name, t)
if verbose:
print("inserting values into %s: " % table_name, t)
connection.commit()
if verbose:
print("Committing update to %s" % dbname)
cur.close()
def sql_query(dbname, query):
"""
Execute an SQL query over a database.
:param dbname: filename of persistent store
:type schema: str
:param query: SQL query
:type rel_name: str
"""
import sqlite3
try:
path = nltk.data.find(dbname)
connection = sqlite3.connect(str(path))
cur = connection.cursor()
return cur.execute(query)
except (ValueError, sqlite3.OperationalError):
import warnings
warnings.warn(
"Make sure the database file %s is installed and uncompressed." % dbname
)
raise
def _str2records(filename, rel):
"""
Read a file into memory and convert each relation clause into a list.
"""
recs = []
contents = nltk.data.load("corpora/chat80/%s" % filename, format="text")
for line in contents.splitlines():
if line.startswith(rel):
line = re.sub(rel + r"\(", "", line)
line = re.sub(r"\)\.$", "", line)
record = line.split(",")
recs.append(record)
return recs
def unary_concept(label, subj, records):
"""
Make a unary concept out of the primary key in a record.
A record is a list of entities in some relation, such as
``['france', 'paris']``, where ``'france'`` is acting as the primary
key.
:param label: the preferred label for the concept
:type label: string
:param subj: position in the record of the subject of the predicate
:type subj: int
:param records: a list of records
:type records: list of lists
:return: ``Concept`` of arity 1
:rtype: Concept
"""
c = Concept(label, arity=1, extension=set())
for record in records:
c.augment(record[subj])
return c
def binary_concept(label, closures, subj, obj, records):
"""
Make a binary concept out of the primary key and another field in a record.
A record is a list of entities in some relation, such as
``['france', 'paris']``, where ``'france'`` is acting as the primary
key, and ``'paris'`` stands in the ``'capital_of'`` relation to
``'france'``.
More generally, given a record such as ``['a', 'b', 'c']``, where
label is bound to ``'B'``, and ``obj`` bound to 1, the derived
binary concept will have label ``'B_of'``, and its extension will
be a set of pairs such as ``('a', 'b')``.
:param label: the base part of the preferred label for the concept
:type label: str
:param closures: closure properties for the extension of the concept
:type closures: list
:param subj: position in the record of the subject of the predicate
:type subj: int
:param obj: position in the record of the object of the predicate
:type obj: int
:param records: a list of records
:type records: list of lists
:return: ``Concept`` of arity 2
:rtype: Concept
"""
if not label == "border" and not label == "contain":
label = label + "_of"
c = Concept(label, arity=2, closures=closures, extension=set())
for record in records:
c.augment((record[subj], record[obj]))
# close the concept's extension according to the properties in closures
c.close()
return c
def process_bundle(rels):
"""
Given a list of relation metadata bundles, make a corresponding
dictionary of concepts, indexed by the relation name.
:param rels: bundle of metadata needed for constructing a concept
:type rels: list(dict)
:return: a dictionary of concepts, indexed by the relation name.
:rtype: dict(str): Concept
"""
concepts = {}
for rel in rels:
rel_name = rel["rel_name"]
closures = rel["closures"]
schema = rel["schema"]
filename = rel["filename"]
concept_list = clause2concepts(filename, rel_name, schema, closures)
for c in concept_list:
label = c.prefLabel
if label in concepts:
for data in c.extension:
concepts[label].augment(data)
concepts[label].close()
else:
concepts[label] = c
return concepts
def make_valuation(concepts, read=False, lexicon=False):
"""
Convert a list of ``Concept`` objects into a list of (label, extension) pairs;
optionally create a ``Valuation`` object.
:param concepts: concepts
:type concepts: list(Concept)
:param read: if ``True``, ``(symbol, set)`` pairs are read into a ``Valuation``
:type read: bool
:rtype: list or Valuation
"""
vals = []
for c in concepts:
vals.append((c.prefLabel, c.extension))
if lexicon:
read = True
if read:
from nltk.sem import Valuation
val = Valuation({})
val.update(vals)
# add labels for individuals
val = label_indivs(val, lexicon=lexicon)
return val
else:
return vals
def val_dump(rels, db):
"""
Make a ``Valuation`` from a list of relation metadata bundles and dump to
persistent database.
:param rels: bundle of metadata needed for constructing a concept
:type rels: list of dict
:param db: name of file to which data is written.
The suffix '.db' will be automatically appended.
:type db: str
"""
concepts = process_bundle(rels).values()
valuation = make_valuation(concepts, read=True)
db_out = shelve.open(db, "n")
db_out.update(valuation)
db_out.close()
def val_load(db):
"""
Load a ``Valuation`` from a persistent database.
:param db: name of file from which data is read.
The suffix '.db' should be omitted from the name.
:type db: str
"""
dbname = db + ".db"
if not os.access(dbname, os.R_OK):
sys.exit("Cannot read file: %s" % dbname)
else:
db_in = shelve.open(db)
from nltk.sem import Valuation
val = Valuation(db_in)
# val.read(db_in.items())
return val
# def alpha(str):
# """
# Utility to filter out non-alphabetic constants.
#:param str: candidate constant
#:type str: string
#:rtype: bool
# """
# try:
# int(str)
# return False
# except ValueError:
## some unknown values in records are labeled '?'
# if not str == '?':
# return True
def label_indivs(valuation, lexicon=False):
"""
Assign individual constants to the individuals in the domain of a ``Valuation``.
Given a valuation with an entry of the form ``{'rel': {'a': True}}``,
add a new entry ``{'a': 'a'}``.
:type valuation: Valuation
:rtype: Valuation
"""
# collect all the individuals into a domain
domain = valuation.domain
# convert the domain into a sorted list of alphabetic terms
# use the same string as a label
pairs = [(e, e) for e in domain]
if lexicon:
lex = make_lex(domain)
with open("chat_pnames.cfg", "w") as outfile:
outfile.writelines(lex)
# read the pairs into the valuation
valuation.update(pairs)
return valuation
def make_lex(symbols):
"""
Create lexical CFG rules for each individual symbol.
Given a valuation with an entry of the form ``{'zloty': 'zloty'}``,
create a lexical rule for the proper name 'Zloty'.
:param symbols: a list of individual constants in the semantic representation
:type symbols: sequence -- set(str)
:rtype: list(str)
"""
lex = []
header = """
##################################################################
# Lexical rules automatically generated by running 'chat80.py -x'.
##################################################################
"""
lex.append(header)
template = r"PropN[num=sg, sem=<\P.(P %s)>] -> '%s'\n"
for s in symbols:
parts = s.split("_")
caps = [p.capitalize() for p in parts]
pname = "_".join(caps)
rule = template % (s, pname)
lex.append(rule)
return lex
###########################################################################
# Interface function to emulate other corpus readers
###########################################################################
def concepts(items=items):
"""
Build a list of concepts corresponding to the relation names in ``items``.
:param items: names of the Chat-80 relations to extract
:type items: list(str)
:return: the ``Concept`` objects which are extracted from the relations
:rtype: list(Concept)
"""
if isinstance(items, str):
items = (items,)
rels = [item_metadata[r] for r in items]
concept_map = process_bundle(rels)
return concept_map.values()
###########################################################################
def main():
import sys
from optparse import OptionParser
description = """
Extract data from the Chat-80 Prolog files and convert them into a
Valuation object for use in the NLTK semantics package.
"""
opts = OptionParser(description=description)
opts.set_defaults(verbose=True, lex=False, vocab=False)
opts.add_option(
"-s", "--store", dest="outdb", help="store a valuation in DB", metavar="DB"
)
opts.add_option(
"-l",
"--load",
dest="indb",
help="load a stored valuation from DB",
metavar="DB",
)
opts.add_option(
"-c",
"--concepts",
action="store_true",
help="print concepts instead of a valuation",
)
opts.add_option(
"-r",
"--relation",
dest="label",
help="print concept with label REL (check possible labels with '-v' option)",
metavar="REL",
)
opts.add_option(
"-q",
"--quiet",
action="store_false",
dest="verbose",
help="don't print out progress info",
)
opts.add_option(
"-x",
"--lex",
action="store_true",
dest="lex",
help="write a file of lexical entries for country names, then exit",
)
opts.add_option(
"-v",
"--vocab",
action="store_true",
dest="vocab",
help="print out the vocabulary of concept labels and their arity, then exit",
)
(options, args) = opts.parse_args()
if options.outdb and options.indb:
opts.error("Options --store and --load are mutually exclusive")
if options.outdb:
# write the valuation to a persistent database
if options.verbose:
outdb = options.outdb + ".db"
print("Dumping a valuation to %s" % outdb)
val_dump(rels, options.outdb)
sys.exit(0)
else:
# try to read in a valuation from a database
if options.indb is not None:
dbname = options.indb + ".db"
if not os.access(dbname, os.R_OK):
sys.exit("Cannot read file: %s" % dbname)
else:
valuation = val_load(options.indb)
# we need to create the valuation from scratch
else:
# build some concepts
concept_map = process_bundle(rels)
concepts = concept_map.values()
# just print out the vocabulary
if options.vocab:
items = sorted((c.arity, c.prefLabel) for c in concepts)
for arity, label in items:
print(label, arity)
sys.exit(0)
# show all the concepts
if options.concepts:
for c in concepts:
print(c)
print()
if options.label:
print(concept_map[options.label])
sys.exit(0)
else:
# turn the concepts into a Valuation
if options.lex:
if options.verbose:
print("Writing out lexical rules")
make_valuation(concepts, lexicon=True)
else:
valuation = make_valuation(concepts, read=True)
print(valuation)
def sql_demo():
"""
Print out every row from the 'city.db' database.
"""
print()
print("Using SQL to extract rows from 'city.db' RDB.")
for row in sql_query("corpora/city_database/city.db", "SELECT * FROM city_table"):
print(row)
if __name__ == "__main__":
main()
sql_demo()

View File

@@ -0,0 +1,124 @@
# Natural Language Toolkit: Cooper storage for Quantifier Ambiguity
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
from nltk.parse import load_parser
from nltk.parse.featurechart import InstantiateVarsChart
from nltk.sem.logic import ApplicationExpression, LambdaExpression, Variable
class CooperStore:
"""
A container for handling quantifier ambiguity via Cooper storage.
"""
def __init__(self, featstruct):
"""
:param featstruct: The value of the ``sem`` node in a tree from
``parse_with_bindops()``
:type featstruct: FeatStruct (with features ``core`` and ``store``)
"""
self.featstruct = featstruct
self.readings = []
try:
self.core = featstruct["CORE"]
self.store = featstruct["STORE"]
except KeyError:
print("%s is not a Cooper storage structure" % featstruct)
def _permute(self, lst):
"""
:return: An iterator over the permutations of the input list
:type lst: list
:rtype: iter
"""
remove = lambda lst0, index: lst0[:index] + lst0[index + 1 :]
if lst:
for index, x in enumerate(lst):
for y in self._permute(remove(lst, index)):
yield (x,) + y
else:
yield ()
def s_retrieve(self, trace=False):
r"""
Carry out S-Retrieval of binding operators in store. If hack=True,
serialize the bindop and core as strings and reparse. Ugh.
Each permutation of the store (i.e. list of binding operators) is
taken to be a possible scoping of quantifiers. We iterate through the
binding operators in each permutation, and successively apply them to
the current term, starting with the core semantic representation,
working from the inside out.
Binding operators are of the form::
bo(\P.all x.(man(x) -> P(x)),z1)
"""
for perm, store_perm in enumerate(self._permute(self.store)):
if trace:
print("Permutation %s" % (perm + 1))
term = self.core
for bindop in store_perm:
# we just want the arguments that are wrapped by the 'bo' predicate
quant, varex = tuple(bindop.args)
# use var to make an abstraction over the current term and then
# apply the quantifier to it
term = ApplicationExpression(
quant, LambdaExpression(varex.variable, term)
)
if trace:
print(" ", term)
term = term.simplify()
self.readings.append(term)
def parse_with_bindops(sentence, grammar=None, trace=0):
"""
Use a grammar with Binding Operators to parse a sentence.
"""
if not grammar:
grammar = "grammars/book_grammars/storage.fcfg"
parser = load_parser(grammar, trace=trace, chart_class=InstantiateVarsChart)
# Parse the sentence.
tokens = sentence.split()
return list(parser.parse(tokens))
def demo():
from nltk.sem import cooper_storage as cs
sentence = "every girl chases a dog"
# sentence = "a man gives a bone to every dog"
print()
print("Analysis of sentence '%s'" % sentence)
print("=" * 50)
trees = cs.parse_with_bindops(sentence, trace=0)
for tree in trees:
semrep = cs.CooperStore(tree.label()["SEM"])
print()
print("Binding operators:")
print("-" * 15)
for s in semrep.store:
print(s)
print()
print("Core:")
print("-" * 15)
print(semrep.core)
print()
print("S-Retrieval:")
print("-" * 15)
semrep.s_retrieve(trace=True)
print("Readings:")
print("-" * 15)
for i, reading in enumerate(semrep.readings):
print(f"{i + 1}: {reading}")
if __name__ == "__main__":
demo()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,553 @@
# Natural Language Toolkit: GUI Demo for Glue Semantics with Discourse
# Representation Theory (DRT) as meaning language
#
# Author: Dan Garrette <dhgarrette@gmail.com>
#
# Copyright (C) 2001-2025 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
try:
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
from tkinter.font import Font
from nltk.draw.util import CanvasFrame, ShowText
except ImportError:
"""Ignore ImportError because tkinter might not be available."""
from nltk.parse import MaltParser
from nltk.sem.drt import DrsDrawer, DrtVariableExpression
from nltk.sem.glue import DrtGlue
from nltk.sem.logic import Variable
from nltk.tag import RegexpTagger
from nltk.util import in_idle
class DrtGlueDemo:
def __init__(self, examples):
# Set up the main window.
self._top = Tk()
self._top.title("DRT Glue Demo")
# Set up key bindings.
self._init_bindings()
# Initialize the fonts.self._error = None
self._init_fonts(self._top)
self._examples = examples
self._readingCache = [None for example in examples]
# The user can hide the grammar.
self._show_grammar = IntVar(self._top)
self._show_grammar.set(1)
# Set the data to None
self._curExample = -1
self._readings = []
self._drs = None
self._drsWidget = None
self._error = None
self._init_glue()
# Create the basic frames.
self._init_menubar(self._top)
self._init_buttons(self._top)
self._init_exampleListbox(self._top)
self._init_readingListbox(self._top)
self._init_canvas(self._top)
# Resize callback
self._canvas.bind("<Configure>", self._configure)
#########################################
## Initialization Helpers
#########################################
def _init_glue(self):
tagger = RegexpTagger(
[
("^(David|Mary|John)$", "NNP"),
(
"^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
"VB",
),
("^(go|order|vanish|find|approach)$", "VB"),
("^(a)$", "ex_quant"),
("^(every)$", "univ_quant"),
("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
("^(big|gray|former)$", "JJ"),
("^(him|himself)$", "PRP"),
]
)
depparser = MaltParser(tagger=tagger)
self._glue = DrtGlue(depparser=depparser, remove_duplicates=False)
def _init_fonts(self, root):
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
self._sysfont = Font(font=Button()["font"])
root.option_add("*Font", self._sysfont)
# TWhat's our font size (default=same as sysfont)
self._size = IntVar(root)
self._size.set(self._sysfont.cget("size"))
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
self._font = Font(family="helvetica", size=self._size.get())
if self._size.get() < 0:
big = self._size.get() - 2
else:
big = self._size.get() + 2
self._bigfont = Font(family="helvetica", weight="bold", size=big)
def _init_exampleListbox(self, parent):
self._exampleFrame = listframe = Frame(parent)
self._exampleFrame.pack(fill="both", side="left", padx=2)
self._exampleList_label = Label(
self._exampleFrame, font=self._boldfont, text="Examples"
)
self._exampleList_label.pack()
self._exampleList = Listbox(
self._exampleFrame,
selectmode="single",
relief="groove",
background="white",
foreground="#909090",
font=self._font,
selectforeground="#004040",
selectbackground="#c0f0c0",
)
self._exampleList.pack(side="right", fill="both", expand=1)
for example in self._examples:
self._exampleList.insert("end", (" %s" % example))
self._exampleList.config(height=min(len(self._examples), 25), width=40)
# Add a scrollbar if there are more than 25 examples.
if len(self._examples) > 25:
listscroll = Scrollbar(self._exampleFrame, orient="vertical")
self._exampleList.config(yscrollcommand=listscroll.set)
listscroll.config(command=self._exampleList.yview)
listscroll.pack(side="left", fill="y")
# If they select a example, apply it.
self._exampleList.bind("<<ListboxSelect>>", self._exampleList_select)
def _init_readingListbox(self, parent):
self._readingFrame = listframe = Frame(parent)
self._readingFrame.pack(fill="both", side="left", padx=2)
self._readingList_label = Label(
self._readingFrame, font=self._boldfont, text="Readings"
)
self._readingList_label.pack()
self._readingList = Listbox(
self._readingFrame,
selectmode="single",
relief="groove",
background="white",
foreground="#909090",
font=self._font,
selectforeground="#004040",
selectbackground="#c0f0c0",
)
self._readingList.pack(side="right", fill="both", expand=1)
# Add a scrollbar if there are more than 25 examples.
listscroll = Scrollbar(self._readingFrame, orient="vertical")
self._readingList.config(yscrollcommand=listscroll.set)
listscroll.config(command=self._readingList.yview)
listscroll.pack(side="right", fill="y")
self._populate_readingListbox()
def _populate_readingListbox(self):
# Populate the listbox with integers
self._readingList.delete(0, "end")
for i in range(len(self._readings)):
self._readingList.insert("end", (" %s" % (i + 1)))
self._readingList.config(height=min(len(self._readings), 25), width=5)
# If they select a example, apply it.
self._readingList.bind("<<ListboxSelect>>", self._readingList_select)
def _init_bindings(self):
# Key bindings are a good thing.
self._top.bind("<Control-q>", self.destroy)
self._top.bind("<Control-x>", self.destroy)
self._top.bind("<Escape>", self.destroy)
self._top.bind("n", self.next)
self._top.bind("<space>", self.next)
self._top.bind("p", self.prev)
self._top.bind("<BackSpace>", self.prev)
def _init_buttons(self, parent):
# Set up the frames.
self._buttonframe = buttonframe = Frame(parent)
buttonframe.pack(fill="none", side="bottom", padx=3, pady=2)
Button(
buttonframe,
text="Prev",
background="#90c0d0",
foreground="black",
command=self.prev,
).pack(side="left")
Button(
buttonframe,
text="Next",
background="#90c0d0",
foreground="black",
command=self.next,
).pack(side="left")
def _configure(self, event):
self._autostep = 0
(x1, y1, x2, y2) = self._cframe.scrollregion()
y2 = event.height - 6
self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2)
self._redraw()
def _init_canvas(self, parent):
self._cframe = CanvasFrame(
parent,
background="white",
# width=525, height=250,
closeenough=10,
border=2,
relief="sunken",
)
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
canvas = self._canvas = self._cframe.canvas()
# Initially, there's no tree or text
self._tree = None
self._textwidgets = []
self._textline = None
def _init_menubar(self, parent):
menubar = Menu(parent)
filemenu = Menu(menubar, tearoff=0)
filemenu.add_command(
label="Exit", underline=1, command=self.destroy, accelerator="q"
)
menubar.add_cascade(label="File", underline=0, menu=filemenu)
actionmenu = Menu(menubar, tearoff=0)
actionmenu.add_command(
label="Next", underline=0, command=self.next, accelerator="n, Space"
)
actionmenu.add_command(
label="Previous", underline=0, command=self.prev, accelerator="p, Backspace"
)
menubar.add_cascade(label="Action", underline=0, menu=actionmenu)
optionmenu = Menu(menubar, tearoff=0)
optionmenu.add_checkbutton(
label="Remove Duplicates",
underline=0,
variable=self._glue.remove_duplicates,
command=self._toggle_remove_duplicates,
accelerator="r",
)
menubar.add_cascade(label="Options", underline=0, menu=optionmenu)
viewmenu = Menu(menubar, tearoff=0)
viewmenu.add_radiobutton(
label="Tiny",
variable=self._size,
underline=0,
value=10,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Small",
variable=self._size,
underline=0,
value=12,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Medium",
variable=self._size,
underline=0,
value=14,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Large",
variable=self._size,
underline=0,
value=18,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Huge",
variable=self._size,
underline=0,
value=24,
command=self.resize,
)
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
helpmenu = Menu(menubar, tearoff=0)
helpmenu.add_command(label="About", underline=0, command=self.about)
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
parent.config(menu=menubar)
#########################################
## Main draw procedure
#########################################
def _redraw(self):
canvas = self._canvas
# Delete the old DRS, widgets, etc.
if self._drsWidget is not None:
self._drsWidget.clear()
if self._drs:
self._drsWidget = DrsWidget(self._canvas, self._drs)
self._drsWidget.draw()
if self._error:
self._drsWidget = DrsWidget(self._canvas, self._error)
self._drsWidget.draw()
#########################################
## Button Callbacks
#########################################
def destroy(self, *e):
self._autostep = 0
if self._top is None:
return
self._top.destroy()
self._top = None
def prev(self, *e):
selection = self._readingList.curselection()
readingListSize = self._readingList.size()
# there are readings
if readingListSize > 0:
# if one reading is currently selected
if len(selection) == 1:
index = int(selection[0])
# if it's on (or before) the first item
if index <= 0:
self._select_previous_example()
else:
self._readingList_store_selection(index - 1)
else:
# select its first reading
self._readingList_store_selection(readingListSize - 1)
else:
self._select_previous_example()
def _select_previous_example(self):
# if the current example is not the first example
if self._curExample > 0:
self._exampleList_store_selection(self._curExample - 1)
else:
# go to the last example
self._exampleList_store_selection(len(self._examples) - 1)
def next(self, *e):
selection = self._readingList.curselection()
readingListSize = self._readingList.size()
# if there are readings
if readingListSize > 0:
# if one reading is currently selected
if len(selection) == 1:
index = int(selection[0])
# if it's on (or past) the last item
if index >= (readingListSize - 1):
self._select_next_example()
else:
self._readingList_store_selection(index + 1)
else:
# select its first reading
self._readingList_store_selection(0)
else:
self._select_next_example()
def _select_next_example(self):
# if the current example is not the last example
if self._curExample < len(self._examples) - 1:
self._exampleList_store_selection(self._curExample + 1)
else:
# go to the first example
self._exampleList_store_selection(0)
def about(self, *e):
ABOUT = (
"NLTK Discourse Representation Theory (DRT) Glue Semantics Demo\n"
+ "Written by Daniel H. Garrette"
)
TITLE = "About: NLTK DRT Glue Demo"
try:
from tkinter.messagebox import Message
Message(message=ABOUT, title=TITLE).show()
except:
ShowText(self._top, TITLE, ABOUT)
def postscript(self, *e):
self._autostep = 0
self._cframe.print_to_file()
def mainloop(self, *args, **kwargs):
"""
Enter the Tkinter mainloop. This function must be called if
this demo is created from a non-interactive program (e.g.
from a secript); otherwise, the demo will close as soon as
the script completes.
"""
if in_idle():
return
self._top.mainloop(*args, **kwargs)
def resize(self, size=None):
if size is not None:
self._size.set(size)
size = self._size.get()
self._font.configure(size=-(abs(size)))
self._boldfont.configure(size=-(abs(size)))
self._sysfont.configure(size=-(abs(size)))
self._bigfont.configure(size=-(abs(size + 2)))
self._redraw()
def _toggle_remove_duplicates(self):
self._glue.remove_duplicates = not self._glue.remove_duplicates
self._exampleList.selection_clear(0, "end")
self._readings = []
self._populate_readingListbox()
self._readingCache = [None for ex in self._examples]
self._curExample = -1
self._error = None
self._drs = None
self._redraw()
def _exampleList_select(self, event):
selection = self._exampleList.curselection()
if len(selection) != 1:
return
self._exampleList_store_selection(int(selection[0]))
def _exampleList_store_selection(self, index):
self._curExample = index
example = self._examples[index]
self._exampleList.selection_clear(0, "end")
if example:
cache = self._readingCache[index]
if cache:
if isinstance(cache, list):
self._readings = cache
self._error = None
else:
self._readings = []
self._error = cache
else:
try:
self._readings = self._glue.parse_to_meaning(example)
self._error = None
self._readingCache[index] = self._readings
except Exception as e:
self._readings = []
self._error = DrtVariableExpression(Variable("Error: " + str(e)))
self._readingCache[index] = self._error
# add a star to the end of the example
self._exampleList.delete(index)
self._exampleList.insert(index, (" %s *" % example))
self._exampleList.config(
height=min(len(self._examples), 25), width=40
)
self._populate_readingListbox()
self._exampleList.selection_set(index)
self._drs = None
self._redraw()
def _readingList_select(self, event):
selection = self._readingList.curselection()
if len(selection) != 1:
return
self._readingList_store_selection(int(selection[0]))
def _readingList_store_selection(self, index):
reading = self._readings[index]
self._readingList.selection_clear(0, "end")
if reading:
self._readingList.selection_set(index)
self._drs = reading.simplify().normalize().resolve_anaphora()
self._redraw()
class DrsWidget:
def __init__(self, canvas, drs, **attribs):
self._drs = drs
self._canvas = canvas
canvas.font = Font(
font=canvas.itemcget(canvas.create_text(0, 0, text=""), "font")
)
canvas._BUFFER = 3
self.bbox = (0, 0, 0, 0)
def draw(self):
(right, bottom) = DrsDrawer(self._drs, canvas=self._canvas).draw()
self.bbox = (0, 0, right + 1, bottom + 1)
def clear(self):
self._canvas.create_rectangle(self.bbox, fill="white", width="0")
def demo():
examples = [
"John walks",
"David sees Mary",
"David eats a sandwich",
"every man chases a dog",
# 'every man believes a dog yawns',
# 'John gives David a sandwich',
"John chases himself",
# 'John persuades David to order a pizza',
# 'John tries to go',
# 'John tries to find a unicorn',
# 'John seems to vanish',
# 'a unicorn seems to approach',
# 'every big cat leaves',
# 'every gray cat leaves',
# 'every big gray cat leaves',
# 'a former senator leaves',
# 'John likes a cat',
# 'John likes every cat',
# 'he walks',
# 'John walks and he leaves'
]
DrtGlueDemo(examples).mainloop()
if __name__ == "__main__":
demo()

View File

@@ -0,0 +1,830 @@
# Natural Language Toolkit: Models for first-order languages with lambda
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Ewan Klein <ewan@inf.ed.ac.uk>,
# URL: <https://www.nltk.org>
# For license information, see LICENSE.TXT
# TODO:
# - fix tracing
# - fix iterator-based approach to existentials
"""
This module provides data structures for representing first-order
models.
"""
import inspect
import re
import sys
import textwrap
from pprint import pformat
from nltk.decorators import decorator # this used in code that is commented out
from nltk.sem.logic import (
AbstractVariableExpression,
AllExpression,
AndExpression,
ApplicationExpression,
EqualityExpression,
ExistsExpression,
Expression,
IffExpression,
ImpExpression,
IndividualVariableExpression,
IotaExpression,
LambdaExpression,
NegatedExpression,
OrExpression,
Variable,
is_indvar,
)
class Error(Exception):
pass
class Undefined(Error):
pass
def trace(f, *args, **kw):
argspec = inspect.getfullargspec(f)
d = dict(zip(argspec[0], args))
if d.pop("trace", None):
print()
for item in d.items():
print("%s => %s" % item)
return f(*args, **kw)
def is_rel(s):
"""
Check whether a set represents a relation (of any arity).
:param s: a set containing tuples of str elements
:type s: set
:rtype: bool
"""
# we have the empty relation, i.e. set()
if len(s) == 0:
return True
# all the elements are tuples of the same length
elif all(isinstance(el, tuple) for el in s) and len(max(s)) == len(min(s)):
return True
else:
raise ValueError("Set %r contains sequences of different lengths" % s)
def set2rel(s):
"""
Convert a set containing individuals (strings or numbers) into a set of
unary tuples. Any tuples of strings already in the set are passed through
unchanged.
For example:
- set(['a', 'b']) => set([('a',), ('b',)])
- set([3, 27]) => set([('3',), ('27',)])
:type s: set
:rtype: set of tuple of str
"""
new = set()
for elem in s:
if isinstance(elem, str):
new.add((elem,))
elif isinstance(elem, int):
new.add(str(elem))
else:
new.add(elem)
return new
def arity(rel):
"""
Check the arity of a relation.
:type rel: set of tuples
:rtype: int of tuple of str
"""
if len(rel) == 0:
return 0
return len(list(rel)[0])
class Valuation(dict):
"""
A dictionary which represents a model-theoretic Valuation of non-logical constants.
Keys are strings representing the constants to be interpreted, and values correspond
to individuals (represented as strings) and n-ary relations (represented as sets of tuples
of strings).
An instance of ``Valuation`` will raise a KeyError exception (i.e.,
just behave like a standard dictionary) if indexed with an expression that
is not in its list of symbols.
"""
def __init__(self, xs):
"""
:param xs: a list of (symbol, value) pairs.
"""
super().__init__()
for sym, val in xs:
if isinstance(val, str) or isinstance(val, bool):
self[sym] = val
elif isinstance(val, set):
self[sym] = set2rel(val)
else:
msg = textwrap.fill(
"Error in initializing Valuation. "
"Unrecognized value for symbol '%s':\n%s" % (sym, val),
width=66,
)
raise ValueError(msg)
def __getitem__(self, key):
if key in self:
return dict.__getitem__(self, key)
else:
raise Undefined("Unknown expression: '%s'" % key)
def __str__(self):
return pformat(self)
@property
def domain(self):
"""Set-theoretic domain of the value-space of a Valuation."""
dom = []
for val in self.values():
if isinstance(val, str):
dom.append(val)
elif not isinstance(val, bool):
dom.extend(
[elem for tuple_ in val for elem in tuple_ if elem is not None]
)
return set(dom)
@property
def symbols(self):
"""The non-logical constants which the Valuation recognizes."""
return sorted(self.keys())
@classmethod
def fromstring(cls, s):
return read_valuation(s)
##########################################
# REs used by the _read_valuation function
##########################################
_VAL_SPLIT_RE = re.compile(r"\s*=+>\s*")
_ELEMENT_SPLIT_RE = re.compile(r"\s*,\s*")
_TUPLES_RE = re.compile(
r"""\s*
(\([^)]+\)) # tuple-expression
\s*""",
re.VERBOSE,
)
def _read_valuation_line(s):
"""
Read a line in a valuation file.
Lines are expected to be of the form::
noosa => n
girl => {g1, g2}
chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
:param s: input line
:type s: str
:return: a pair (symbol, value)
:rtype: tuple
"""
pieces = _VAL_SPLIT_RE.split(s)
symbol = pieces[0]
value = pieces[1]
# check whether the value is meant to be a set
if value.startswith("{"):
value = value[1:-1]
tuple_strings = _TUPLES_RE.findall(value)
# are the set elements tuples?
if tuple_strings:
set_elements = []
for ts in tuple_strings:
ts = ts[1:-1]
element = tuple(_ELEMENT_SPLIT_RE.split(ts))
set_elements.append(element)
else:
set_elements = _ELEMENT_SPLIT_RE.split(value)
value = set(set_elements)
return symbol, value
def read_valuation(s, encoding=None):
"""
Convert a valuation string into a valuation.
:param s: a valuation string
:type s: str
:param encoding: the encoding of the input string, if it is binary
:type encoding: str
:return: a ``nltk.sem`` valuation
:rtype: Valuation
"""
if encoding is not None:
s = s.decode(encoding)
statements = []
for linenum, line in enumerate(s.splitlines()):
line = line.strip()
if line.startswith("#") or line == "":
continue
try:
statements.append(_read_valuation_line(line))
except ValueError as e:
raise ValueError(f"Unable to parse line {linenum}: {line}") from e
return Valuation(statements)
class Assignment(dict):
r"""
A dictionary which represents an assignment of values to variables.
An assignment can only assign values from its domain.
If an unknown expression *a* is passed to a model *M*\ 's
interpretation function *i*, *i* will first check whether *M*\ 's
valuation assigns an interpretation to *a* as a constant, and if
this fails, *i* will delegate the interpretation of *a* to
*g*. *g* only assigns values to individual variables (i.e.,
members of the class ``IndividualVariableExpression`` in the ``logic``
module. If a variable is not assigned a value by *g*, it will raise
an ``Undefined`` exception.
A variable *Assignment* is a mapping from individual variables to
entities in the domain. Individual variables are usually indicated
with the letters ``'x'``, ``'y'``, ``'w'`` and ``'z'``, optionally
followed by an integer (e.g., ``'x0'``, ``'y332'``). Assignments are
created using the ``Assignment`` constructor, which also takes the
domain as a parameter.
>>> from nltk.sem.evaluate import Assignment
>>> dom = set(['u1', 'u2', 'u3', 'u4'])
>>> g3 = Assignment(dom, [('x', 'u1'), ('y', 'u2')])
>>> g3 == {'x': 'u1', 'y': 'u2'}
True
There is also a ``print`` format for assignments which uses a notation
closer to that in logic textbooks:
>>> print(g3)
g[u1/x][u2/y]
It is also possible to update an assignment using the ``add`` method:
>>> dom = set(['u1', 'u2', 'u3', 'u4'])
>>> g4 = Assignment(dom)
>>> g4.add('x', 'u1')
{'x': 'u1'}
With no arguments, ``purge()`` is equivalent to ``clear()`` on a dictionary:
>>> g4.purge()
>>> g4
{}
:param domain: the domain of discourse
:type domain: set
:param assign: a list of (varname, value) associations
:type assign: list
"""
def __init__(self, domain, assign=None):
super().__init__()
self.domain = domain
if assign:
for var, val in assign:
assert val in self.domain, "'{}' is not in the domain: {}".format(
val,
self.domain,
)
assert is_indvar(var), (
"Wrong format for an Individual Variable: '%s'" % var
)
self[var] = val
self.variant = None
self._addvariant()
def __getitem__(self, key):
if key in self:
return dict.__getitem__(self, key)
else:
raise Undefined("Not recognized as a variable: '%s'" % key)
def copy(self):
new = Assignment(self.domain)
new.update(self)
return new
def purge(self, var=None):
"""
Remove one or all keys (i.e. logic variables) from an
assignment, and update ``self.variant``.
:param var: a Variable acting as a key for the assignment.
"""
if var:
del self[var]
else:
self.clear()
self._addvariant()
return None
def __str__(self):
"""
Pretty printing for assignments. {'x', 'u'} appears as 'g[u/x]'
"""
gstring = "g"
# Deterministic output for unit testing.
variant = sorted(self.variant)
for val, var in variant:
gstring += f"[{val}/{var}]"
return gstring
def _addvariant(self):
"""
Create a more pretty-printable version of the assignment.
"""
list_ = []
for item in self.items():
pair = (item[1], item[0])
list_.append(pair)
self.variant = list_
return None
def add(self, var, val):
"""
Add a new variable-value pair to the assignment, and update
``self.variant``.
"""
assert val in self.domain, f"{val} is not in the domain {self.domain}"
assert is_indvar(var), "Wrong format for an Individual Variable: '%s'" % var
self[var] = val
self._addvariant()
return self
class Model:
"""
A first order model is a domain *D* of discourse and a valuation *V*.
A domain *D* is a set, and a valuation *V* is a map that associates
expressions with values in the model.
The domain of *V* should be a subset of *D*.
Construct a new ``Model``.
:type domain: set
:param domain: A set of entities representing the domain of discourse of the model.
:type valuation: Valuation
:param valuation: the valuation of the model.
:param prop: If this is set, then we are building a propositional\
model and don't require the domain of *V* to be subset of *D*.
"""
def __init__(self, domain, valuation):
assert isinstance(domain, set)
self.domain = domain
self.valuation = valuation
if not domain.issuperset(valuation.domain):
raise Error(
"The valuation domain, %s, must be a subset of the model's domain, %s"
% (valuation.domain, domain)
)
def __repr__(self):
return f"({self.domain!r}, {self.valuation!r})"
def __str__(self):
return f"Domain = {self.domain},\nValuation = \n{self.valuation}"
def evaluate(self, expr, g, trace=None):
"""
Read input expressions, and provide a handler for ``satisfy``
that blocks further propagation of the ``Undefined`` error.
:param expr: An ``Expression`` of ``logic``.
:type g: Assignment
:param g: an assignment to individual variables.
:rtype: bool or 'Undefined'
"""
try:
parsed = Expression.fromstring(expr)
value = self.satisfy(parsed, g, trace=trace)
if trace:
print()
print(f"'{expr}' evaluates to {value} under M, {g}")
return value
except Undefined:
if trace:
print()
print(f"'{expr}' is undefined under M, {g}")
return "Undefined"
def satisfy(self, parsed, g, trace=None):
"""
Recursive interpretation function for a formula of first-order logic.
Raises an ``Undefined`` error when ``parsed`` is an atomic string
but is not a symbol or an individual variable.
:return: Returns a truth value or ``Undefined`` if ``parsed`` is\
complex, and calls the interpretation function ``i`` if ``parsed``\
is atomic.
:param parsed: An expression of ``logic``.
:type g: Assignment
:param g: an assignment to individual variables.
"""
if isinstance(parsed, ApplicationExpression):
function, arguments = parsed.uncurry()
if isinstance(function, AbstractVariableExpression):
# It's a predicate expression ("P(x,y)"), so used uncurried arguments
funval = self.satisfy(function, g)
argvals = tuple(self.satisfy(arg, g) for arg in arguments)
return argvals in funval
else:
# It must be a lambda expression, so use curried form
funval = self.satisfy(parsed.function, g)
argval = self.satisfy(parsed.argument, g)
return funval[argval]
elif isinstance(parsed, NegatedExpression):
return not self.satisfy(parsed.term, g)
elif isinstance(parsed, AndExpression):
return self.satisfy(parsed.first, g) and self.satisfy(parsed.second, g)
elif isinstance(parsed, OrExpression):
return self.satisfy(parsed.first, g) or self.satisfy(parsed.second, g)
elif isinstance(parsed, ImpExpression):
return (not self.satisfy(parsed.first, g)) or self.satisfy(parsed.second, g)
elif isinstance(parsed, IffExpression):
return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
elif isinstance(parsed, EqualityExpression):
return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
elif isinstance(parsed, AllExpression):
new_g = g.copy()
for u in self.domain:
new_g.add(parsed.variable.name, u)
if not self.satisfy(parsed.term, new_g):
return False
return True
elif isinstance(parsed, ExistsExpression):
new_g = g.copy()
for u in self.domain:
new_g.add(parsed.variable.name, u)
if self.satisfy(parsed.term, new_g):
return True
return False
elif isinstance(parsed, IotaExpression):
new_g = g.copy()
for u in self.domain:
new_g.add(parsed.variable.name, u)
if self.satisfy(parsed.term, new_g):
return True
return False
elif isinstance(parsed, LambdaExpression):
cf = {}
var = parsed.variable.name
for u in self.domain:
val = self.satisfy(parsed.term, g.add(var, u))
# NB the dict would be a lot smaller if we do this:
# if val: cf[u] = val
# But then need to deal with cases where f(a) should yield
# a function rather than just False.
cf[u] = val
return cf
else:
return self.i(parsed, g, trace)
# @decorator(trace_eval)
def i(self, parsed, g, trace=False):
"""
An interpretation function.
Assuming that ``parsed`` is atomic:
- if ``parsed`` is a non-logical constant, calls the valuation *V*
- else if ``parsed`` is an individual variable, calls assignment *g*
- else returns ``Undefined``.
:param parsed: an ``Expression`` of ``logic``.
:type g: Assignment
:param g: an assignment to individual variables.
:return: a semantic value
"""
# If parsed is a propositional letter 'p', 'q', etc, it could be in valuation.symbols
# and also be an IndividualVariableExpression. We want to catch this first case.
# So there is a procedural consequence to the ordering of clauses here:
if parsed.variable.name in self.valuation.symbols:
return self.valuation[parsed.variable.name]
elif isinstance(parsed, IndividualVariableExpression):
return g[parsed.variable.name]
else:
raise Undefined("Can't find a value for %s" % parsed)
def satisfiers(self, parsed, varex, g, trace=None, nesting=0):
"""
Generate the entities from the model's domain that satisfy an open formula.
:param parsed: an open formula
:type parsed: Expression
:param varex: the relevant free individual variable in ``parsed``.
:type varex: VariableExpression or str
:param g: a variable assignment
:type g: Assignment
:return: a set of the entities that satisfy ``parsed``.
"""
spacer = " "
indent = spacer + (spacer * nesting)
candidates = []
if isinstance(varex, str):
var = Variable(varex)
else:
var = varex
if var in parsed.free():
if trace:
print()
print(
(spacer * nesting)
+ f"Open formula is '{parsed}' with assignment {g}"
)
for u in self.domain:
new_g = g.copy()
new_g.add(var.name, u)
if trace and trace > 1:
lowtrace = trace - 1
else:
lowtrace = 0
value = self.satisfy(parsed, new_g, lowtrace)
if trace:
print(indent + "(trying assignment %s)" % new_g)
# parsed == False under g[u/var]?
if value == False:
if trace:
print(indent + f"value of '{parsed}' under {new_g} is False")
# so g[u/var] is a satisfying assignment
else:
candidates.append(u)
if trace:
print(indent + f"value of '{parsed}' under {new_g} is {value}")
result = {c for c in candidates}
# var isn't free in parsed
else:
raise Undefined(f"{var.name} is not free in {parsed}")
return result
# //////////////////////////////////////////////////////////////////////
# Demo..
# //////////////////////////////////////////////////////////////////////
# number of spacer chars
mult = 30
# Demo 1: Propositional Logic
#################
def propdemo(trace=None):
"""Example of a propositional model."""
global val1, dom1, m1, g1
val1 = Valuation([("P", True), ("Q", True), ("R", False)])
dom1 = set()
m1 = Model(dom1, val1)
g1 = Assignment(dom1)
print()
print("*" * mult)
print("Propositional Formulas Demo")
print("*" * mult)
print("(Propositional constants treated as nullary predicates)")
print()
print("Model m1:\n", m1)
print("*" * mult)
sentences = [
"(P & Q)",
"(P & R)",
"- P",
"- R",
"- - P",
"- (P & R)",
"(P | R)",
"(R | P)",
"(R | R)",
"(- P | R)",
"(P | - P)",
"(P -> Q)",
"(P -> R)",
"(R -> P)",
"(P <-> P)",
"(R <-> R)",
"(P <-> R)",
]
for sent in sentences:
if trace:
print()
m1.evaluate(sent, g1, trace)
else:
print(f"The value of '{sent}' is: {m1.evaluate(sent, g1)}")
# Demo 2: FOL Model
#############
def folmodel(quiet=False, trace=None):
"""Example of a first-order model."""
global val2, v2, dom2, m2, g2
v2 = [
("adam", "b1"),
("betty", "g1"),
("fido", "d1"),
("girl", {"g1", "g2"}),
("boy", {"b1", "b2"}),
("dog", {"d1"}),
("love", {("b1", "g1"), ("b2", "g2"), ("g1", "b1"), ("g2", "b1")}),
]
val2 = Valuation(v2)
dom2 = val2.domain
m2 = Model(dom2, val2)
g2 = Assignment(dom2, [("x", "b1"), ("y", "g2")])
if not quiet:
print()
print("*" * mult)
print("Models Demo")
print("*" * mult)
print("Model m2:\n", "-" * 14, "\n", m2)
print("Variable assignment = ", g2)
exprs = ["adam", "boy", "love", "walks", "x", "y", "z"]
parsed_exprs = [Expression.fromstring(e) for e in exprs]
print()
for parsed in parsed_exprs:
try:
print(
"The interpretation of '%s' in m2 is %s"
% (parsed, m2.i(parsed, g2))
)
except Undefined:
print("The interpretation of '%s' in m2 is Undefined" % parsed)
applications = [
("boy", ("adam")),
("walks", ("adam",)),
("love", ("adam", "y")),
("love", ("y", "adam")),
]
for fun, args in applications:
try:
funval = m2.i(Expression.fromstring(fun), g2)
argsval = tuple(m2.i(Expression.fromstring(arg), g2) for arg in args)
print(f"{fun}({args}) evaluates to {argsval in funval}")
except Undefined:
print(f"{fun}({args}) evaluates to Undefined")
# Demo 3: FOL
#########
def foldemo(trace=None):
"""
Interpretation of closed expressions in a first-order model.
"""
folmodel(quiet=True)
print()
print("*" * mult)
print("FOL Formulas Demo")
print("*" * mult)
formulas = [
"love (adam, betty)",
"(adam = mia)",
"\\x. (boy(x) | girl(x))",
"\\x. boy(x)(adam)",
"\\x y. love(x, y)",
"\\x y. love(x, y)(adam)(betty)",
"\\x y. love(x, y)(adam, betty)",
"\\x y. (boy(x) & love(x, y))",
"\\x. exists y. (boy(x) & love(x, y))",
"exists z1. boy(z1)",
"exists x. (boy(x) & -(x = adam))",
"exists x. (boy(x) & all y. love(y, x))",
"all x. (boy(x) | girl(x))",
"all x. (girl(x) -> exists y. boy(y) & love(x, y))", # Every girl loves exists boy.
"exists x. (boy(x) & all y. (girl(y) -> love(y, x)))", # There is exists boy that every girl loves.
"exists x. (boy(x) & all y. (girl(y) -> love(x, y)))", # exists boy loves every girl.
"all x. (dog(x) -> - girl(x))",
"exists x. exists y. (love(x, y) & love(x, y))",
]
for fmla in formulas:
g2.purge()
if trace:
m2.evaluate(fmla, g2, trace)
else:
print(f"The value of '{fmla}' is: {m2.evaluate(fmla, g2)}")
# Demo 3: Satisfaction
#############
def satdemo(trace=None):
"""Satisfiers of an open formula in a first order model."""
print()
print("*" * mult)
print("Satisfiers Demo")
print("*" * mult)
folmodel(quiet=True)
formulas = [
"boy(x)",
"(x = x)",
"(boy(x) | girl(x))",
"(boy(x) & girl(x))",
"love(adam, x)",
"love(x, adam)",
"-(x = adam)",
"exists z22. love(x, z22)",
"exists y. love(y, x)",
"all y. (girl(y) -> love(x, y))",
"all y. (girl(y) -> love(y, x))",
"all y. (girl(y) -> (boy(x) & love(y, x)))",
"(boy(x) & all y. (girl(y) -> love(x, y)))",
"(boy(x) & all y. (girl(y) -> love(y, x)))",
"(boy(x) & exists y. (girl(y) & love(y, x)))",
"(girl(x) -> dog(x))",
"all y. (dog(y) -> (x = y))",
"exists y. love(y, x)",
"exists y. (love(adam, y) & love(y, x))",
]
if trace:
print(m2)
for fmla in formulas:
print(fmla)
Expression.fromstring(fmla)
parsed = [Expression.fromstring(fmla) for fmla in formulas]
for p in parsed:
g2.purge()
print(
"The satisfiers of '{}' are: {}".format(p, m2.satisfiers(p, "x", g2, trace))
)
def demo(num=0, trace=None):
"""
Run exists demos.
- num = 1: propositional logic demo
- num = 2: first order model demo (only if trace is set)
- num = 3: first order sentences demo
- num = 4: satisfaction of open formulas demo
- any other value: run all the demos
:param trace: trace = 1, or trace = 2 for more verbose tracing
"""
demos = {1: propdemo, 2: folmodel, 3: foldemo, 4: satdemo}
try:
demos[num](trace=trace)
except KeyError:
for num in demos:
demos[num](trace=trace)
if __name__ == "__main__":
demo(2, trace=0)

View File

@@ -0,0 +1,835 @@
# Natural Language Toolkit: Glue Semantics
#
# Author: Dan Garrette <dhgarrette@gmail.com>
#
# Copyright (C) 2001-2025 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
import os
from itertools import chain
import nltk
from nltk.internals import Counter
from nltk.sem import drt, linearlogic
from nltk.sem.logic import (
AbstractVariableExpression,
Expression,
LambdaExpression,
Variable,
VariableExpression,
)
from nltk.tag import BigramTagger, RegexpTagger, TrigramTagger, UnigramTagger
SPEC_SEMTYPES = {
"a": "ex_quant",
"an": "ex_quant",
"every": "univ_quant",
"the": "def_art",
"no": "no_quant",
"default": "ex_quant",
}
OPTIONAL_RELATIONSHIPS = ["nmod", "vmod", "punct"]
class GlueFormula:
def __init__(self, meaning, glue, indices=None):
if not indices:
indices = set()
if isinstance(meaning, str):
self.meaning = Expression.fromstring(meaning)
elif isinstance(meaning, Expression):
self.meaning = meaning
else:
raise RuntimeError(
"Meaning term neither string or expression: %s, %s"
% (meaning, meaning.__class__)
)
if isinstance(glue, str):
self.glue = linearlogic.LinearLogicParser().parse(glue)
elif isinstance(glue, linearlogic.Expression):
self.glue = glue
else:
raise RuntimeError(
"Glue term neither string or expression: %s, %s"
% (glue, glue.__class__)
)
self.indices = indices
def applyto(self, arg):
"""self = (\\x.(walk x), (subj -o f))
arg = (john , subj)
returns ((walk john), f)
"""
if self.indices & arg.indices: # if the sets are NOT disjoint
raise linearlogic.LinearLogicApplicationException(
f"'{self}' applied to '{arg}'. Indices are not disjoint."
)
else: # if the sets ARE disjoint
return_indices = self.indices | arg.indices
try:
return_glue = linearlogic.ApplicationExpression(
self.glue, arg.glue, arg.indices
)
except linearlogic.LinearLogicApplicationException as e:
raise linearlogic.LinearLogicApplicationException(
f"'{self.simplify()}' applied to '{arg.simplify()}'"
) from e
arg_meaning_abstracted = arg.meaning
if return_indices:
for dep in self.glue.simplify().antecedent.dependencies[
::-1
]: # if self.glue is (A -o B), dep is in A.dependencies
arg_meaning_abstracted = self.make_LambdaExpression(
Variable("v%s" % dep), arg_meaning_abstracted
)
return_meaning = self.meaning.applyto(arg_meaning_abstracted)
return self.__class__(return_meaning, return_glue, return_indices)
def make_VariableExpression(self, name):
return VariableExpression(name)
def make_LambdaExpression(self, variable, term):
return LambdaExpression(variable, term)
def lambda_abstract(self, other):
assert isinstance(other, GlueFormula)
assert isinstance(other.meaning, AbstractVariableExpression)
return self.__class__(
self.make_LambdaExpression(other.meaning.variable, self.meaning),
linearlogic.ImpExpression(other.glue, self.glue),
)
def compile(self, counter=None):
"""From Iddo Lev's PhD Dissertation p108-109"""
if not counter:
counter = Counter()
(compiled_glue, new_forms) = self.glue.simplify().compile_pos(
counter, self.__class__
)
return new_forms + [
self.__class__(self.meaning, compiled_glue, {counter.get()})
]
def simplify(self):
return self.__class__(
self.meaning.simplify(), self.glue.simplify(), self.indices
)
def __eq__(self, other):
return (
self.__class__ == other.__class__
and self.meaning == other.meaning
and self.glue == other.glue
)
def __ne__(self, other):
return not self == other
# sorting for use in doctests which must be deterministic
def __lt__(self, other):
return str(self) < str(other)
def __str__(self):
assert isinstance(self.indices, set)
accum = f"{self.meaning} : {self.glue}"
if self.indices:
accum += (
" : {" + ", ".join(str(index) for index in sorted(self.indices)) + "}"
)
return accum
def __repr__(self):
return "%s" % self
class GlueDict(dict):
def __init__(self, filename, encoding=None):
self.filename = filename
self.file_encoding = encoding
self.read_file()
def read_file(self, empty_first=True):
if empty_first:
self.clear()
try:
contents = nltk.data.load(
self.filename, format="text", encoding=self.file_encoding
)
# TODO: the above can't handle zip files, but this should anyway be fixed in nltk.data.load()
except LookupError as e:
try:
contents = nltk.data.load(
"file:" + self.filename, format="text", encoding=self.file_encoding
)
except LookupError:
raise e
lines = contents.splitlines()
for line in lines: # example: 'n : (\\x.(<word> x), (v-or))'
# lambdacalc -^ linear logic -^
line = line.strip() # remove trailing newline
if not len(line):
continue # skip empty lines
if line[0] == "#":
continue # skip commented out lines
parts = line.split(
" : ", 2
) # ['verb', '(\\x.(<word> x), ( subj -o f ))', '[subj]']
glue_formulas = []
paren_count = 0
tuple_start = 0
tuple_comma = 0
relationships = None
if len(parts) > 1:
for i, c in enumerate(parts[1]):
if c == "(":
if paren_count == 0: # if it's the first '(' of a tuple
tuple_start = i + 1 # then save the index
paren_count += 1
elif c == ")":
paren_count -= 1
if paren_count == 0: # if it's the last ')' of a tuple
meaning_term = parts[1][
tuple_start:tuple_comma
] # '\\x.(<word> x)'
glue_term = parts[1][tuple_comma + 1 : i] # '(v-r)'
glue_formulas.append(
[meaning_term, glue_term]
) # add the GlueFormula to the list
elif c == ",":
if (
paren_count == 1
): # if it's a comma separating the parts of the tuple
tuple_comma = i # then save the index
elif c == "#": # skip comments at the ends of lines
if (
paren_count != 0
): # if the line hasn't parsed correctly so far
raise RuntimeError(
"Formula syntax is incorrect for entry " + line
)
break # break to the next line
if len(parts) > 2: # if there is a relationship entry at the end
rel_start = parts[2].index("[") + 1
rel_end = parts[2].index("]")
if rel_start == rel_end:
relationships = frozenset()
else:
relationships = frozenset(
r.strip() for r in parts[2][rel_start:rel_end].split(",")
)
try:
start_inheritance = parts[0].index("(")
end_inheritance = parts[0].index(")")
sem = parts[0][:start_inheritance].strip()
supertype = parts[0][start_inheritance + 1 : end_inheritance]
except:
sem = parts[0].strip()
supertype = None
if sem not in self:
self[sem] = {}
if (
relationships is None
): # if not specified for a specific relationship set
# add all relationship entries for parents
if supertype:
for rels in self[supertype]:
if rels not in self[sem]:
self[sem][rels] = []
glue = self[supertype][rels]
self[sem][rels].extend(glue)
self[sem][rels].extend(
glue_formulas
) # add the glue formulas to every rel entry
else:
if None not in self[sem]:
self[sem][None] = []
self[sem][None].extend(
glue_formulas
) # add the glue formulas to every rel entry
else:
if relationships not in self[sem]:
self[sem][relationships] = []
if supertype:
self[sem][relationships].extend(self[supertype][relationships])
self[sem][relationships].extend(
glue_formulas
) # add the glue entry to the dictionary
def __str__(self):
accum = ""
for pos in self:
str_pos = "%s" % pos
for relset in self[pos]:
i = 1
for gf in self[pos][relset]:
if i == 1:
accum += str_pos + ": "
else:
accum += " " * (len(str_pos) + 2)
accum += "%s" % gf
if relset and i == len(self[pos][relset]):
accum += " : %s" % relset
accum += "\n"
i += 1
return accum
def to_glueformula_list(self, depgraph, node=None, counter=None, verbose=False):
if node is None:
# TODO: should it be depgraph.root? Is this code tested?
top = depgraph.nodes[0]
depList = list(chain.from_iterable(top["deps"].values()))
root = depgraph.nodes[depList[0]]
return self.to_glueformula_list(depgraph, root, Counter(), verbose)
glueformulas = self.lookup(node, depgraph, counter)
for dep_idx in chain.from_iterable(node["deps"].values()):
dep = depgraph.nodes[dep_idx]
glueformulas.extend(
self.to_glueformula_list(depgraph, dep, counter, verbose)
)
return glueformulas
def lookup(self, node, depgraph, counter):
semtype_names = self.get_semtypes(node)
semtype = None
for name in semtype_names:
if name in self:
semtype = self[name]
break
if semtype is None:
# raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word)
return []
self.add_missing_dependencies(node, depgraph)
lookup = self._lookup_semtype_option(semtype, node, depgraph)
if not len(lookup):
raise KeyError(
"There is no GlueDict entry for sem type of '%s' "
"with tag '%s', and rel '%s'" % (node["word"], node["tag"], node["rel"])
)
return self.get_glueformulas_from_semtype_entry(
lookup, node["word"], node, depgraph, counter
)
def add_missing_dependencies(self, node, depgraph):
rel = node["rel"].lower()
if rel == "main":
headnode = depgraph.nodes[node["head"]]
subj = self.lookup_unique("subj", headnode, depgraph)
relation = subj["rel"]
node["deps"].setdefault(relation, [])
node["deps"][relation].append(subj["address"])
# node['deps'].append(subj['address'])
def _lookup_semtype_option(self, semtype, node, depgraph):
relationships = frozenset(
depgraph.nodes[dep]["rel"].lower()
for dep in chain.from_iterable(node["deps"].values())
if depgraph.nodes[dep]["rel"].lower() not in OPTIONAL_RELATIONSHIPS
)
try:
lookup = semtype[relationships]
except KeyError:
# An exact match is not found, so find the best match where
# 'best' is defined as the glue entry whose relationship set has the
# most relations of any possible relationship set that is a subset
# of the actual depgraph
best_match = frozenset()
for relset_option in set(semtype) - {None}:
if (
len(relset_option) > len(best_match)
and relset_option < relationships
):
best_match = relset_option
if not best_match:
if None in semtype:
best_match = None
else:
return None
lookup = semtype[best_match]
return lookup
def get_semtypes(self, node):
"""
Based on the node, return a list of plausible semtypes in order of
plausibility.
"""
rel = node["rel"].lower()
word = node["word"].lower()
if rel == "spec":
if word in SPEC_SEMTYPES:
return [SPEC_SEMTYPES[word]]
else:
return [SPEC_SEMTYPES["default"]]
elif rel in ["nmod", "vmod"]:
return [node["tag"], rel]
else:
return [node["tag"]]
def get_glueformulas_from_semtype_entry(
self, lookup, word, node, depgraph, counter
):
glueformulas = []
glueFormulaFactory = self.get_GlueFormula_factory()
for meaning, glue in lookup:
gf = glueFormulaFactory(self.get_meaning_formula(meaning, word), glue)
if not len(glueformulas):
gf.word = word
else:
gf.word = f"{word}{len(glueformulas) + 1}"
gf.glue = self.initialize_labels(gf.glue, node, depgraph, counter.get())
glueformulas.append(gf)
return glueformulas
def get_meaning_formula(self, generic, word):
"""
:param generic: A meaning formula string containing the
parameter "<word>"
:param word: The actual word to be replace "<word>"
"""
word = word.replace(".", "")
return generic.replace("<word>", word)
def initialize_labels(self, expr, node, depgraph, unique_index):
if isinstance(expr, linearlogic.AtomicExpression):
name = self.find_label_name(expr.name.lower(), node, depgraph, unique_index)
if name[0].isupper():
return linearlogic.VariableExpression(name)
else:
return linearlogic.ConstantExpression(name)
else:
return linearlogic.ImpExpression(
self.initialize_labels(expr.antecedent, node, depgraph, unique_index),
self.initialize_labels(expr.consequent, node, depgraph, unique_index),
)
def find_label_name(self, name, node, depgraph, unique_index):
try:
dot = name.index(".")
before_dot = name[:dot]
after_dot = name[dot + 1 :]
if before_dot == "super":
return self.find_label_name(
after_dot, depgraph.nodes[node["head"]], depgraph, unique_index
)
else:
return self.find_label_name(
after_dot,
self.lookup_unique(before_dot, node, depgraph),
depgraph,
unique_index,
)
except ValueError:
lbl = self.get_label(node)
if name == "f":
return lbl
elif name == "v":
return "%sv" % lbl
elif name == "r":
return "%sr" % lbl
elif name == "super":
return self.get_label(depgraph.nodes[node["head"]])
elif name == "var":
return f"{lbl.upper()}{unique_index}"
elif name == "a":
return self.get_label(self.lookup_unique("conja", node, depgraph))
elif name == "b":
return self.get_label(self.lookup_unique("conjb", node, depgraph))
else:
return self.get_label(self.lookup_unique(name, node, depgraph))
def get_label(self, node):
"""
Pick an alphabetic character as identifier for an entity in the model.
:param value: where to index into the list of characters
:type value: int
"""
value = node["address"]
letter = [
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"a",
"b",
"c",
"d",
"e",
][value - 1]
num = int(value) // 26
if num > 0:
return letter + str(num)
else:
return letter
def lookup_unique(self, rel, node, depgraph):
"""
Lookup 'key'. There should be exactly one item in the associated relation.
"""
deps = [
depgraph.nodes[dep]
for dep in chain.from_iterable(node["deps"].values())
if depgraph.nodes[dep]["rel"].lower() == rel.lower()
]
if len(deps) == 0:
raise KeyError(
"'{}' doesn't contain a feature '{}'".format(node["word"], rel)
)
elif len(deps) > 1:
raise KeyError(
"'{}' should only have one feature '{}'".format(node["word"], rel)
)
else:
return deps[0]
def get_GlueFormula_factory(self):
return GlueFormula
class Glue:
def __init__(
self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
):
self.verbose = verbose
self.remove_duplicates = remove_duplicates
self.depparser = depparser
from nltk import Prover9
self.prover = Prover9()
if semtype_file:
self.semtype_file = semtype_file
else:
self.semtype_file = os.path.join(
"grammars", "sample_grammars", "glue.semtype"
)
def train_depparser(self, depgraphs=None):
if depgraphs:
self.depparser.train(depgraphs)
else:
self.depparser.train_from_file(
nltk.data.find(
os.path.join("grammars", "sample_grammars", "glue_train.conll")
)
)
def parse_to_meaning(self, sentence):
readings = []
for agenda in self.parse_to_compiled(sentence):
readings.extend(self.get_readings(agenda))
return readings
def get_readings(self, agenda):
readings = []
agenda_length = len(agenda)
atomics = dict()
nonatomics = dict()
while agenda: # is not empty
cur = agenda.pop()
glue_simp = cur.glue.simplify()
if isinstance(
glue_simp, linearlogic.ImpExpression
): # if cur.glue is non-atomic
for key in atomics:
try:
if isinstance(cur.glue, linearlogic.ApplicationExpression):
bindings = cur.glue.bindings
else:
bindings = linearlogic.BindingDict()
glue_simp.antecedent.unify(key, bindings)
for atomic in atomics[key]:
if not (
cur.indices & atomic.indices
): # if the sets of indices are disjoint
try:
agenda.append(cur.applyto(atomic))
except linearlogic.LinearLogicApplicationException:
pass
except linearlogic.UnificationException:
pass
try:
nonatomics[glue_simp.antecedent].append(cur)
except KeyError:
nonatomics[glue_simp.antecedent] = [cur]
else: # else cur.glue is atomic
for key in nonatomics:
for nonatomic in nonatomics[key]:
try:
if isinstance(
nonatomic.glue, linearlogic.ApplicationExpression
):
bindings = nonatomic.glue.bindings
else:
bindings = linearlogic.BindingDict()
glue_simp.unify(key, bindings)
if not (
cur.indices & nonatomic.indices
): # if the sets of indices are disjoint
try:
agenda.append(nonatomic.applyto(cur))
except linearlogic.LinearLogicApplicationException:
pass
except linearlogic.UnificationException:
pass
try:
atomics[glue_simp].append(cur)
except KeyError:
atomics[glue_simp] = [cur]
for entry in atomics:
for gf in atomics[entry]:
if len(gf.indices) == agenda_length:
self._add_to_reading_list(gf, readings)
for entry in nonatomics:
for gf in nonatomics[entry]:
if len(gf.indices) == agenda_length:
self._add_to_reading_list(gf, readings)
return readings
def _add_to_reading_list(self, glueformula, reading_list):
add_reading = True
if self.remove_duplicates:
for reading in reading_list:
try:
if reading.equiv(glueformula.meaning, self.prover):
add_reading = False
break
except Exception as e:
# if there is an exception, the syntax of the formula
# may not be understandable by the prover, so don't
# throw out the reading.
print("Error when checking logical equality of statements", e)
if add_reading:
reading_list.append(glueformula.meaning)
def parse_to_compiled(self, sentence):
gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)]
return [self.gfl_to_compiled(gfl) for gfl in gfls]
def dep_parse(self, sentence):
"""
Return a dependency graph for the sentence.
:param sentence: the sentence to be parsed
:type sentence: list(str)
:rtype: DependencyGraph
"""
# Lazy-initialize the depparser
if self.depparser is None:
from nltk.parse import MaltParser
self.depparser = MaltParser(tagger=self.get_pos_tagger())
if not self.depparser._trained:
self.train_depparser()
return self.depparser.parse(sentence, verbose=self.verbose)
def depgraph_to_glue(self, depgraph):
return self.get_glue_dict().to_glueformula_list(depgraph)
def get_glue_dict(self):
return GlueDict(self.semtype_file)
def gfl_to_compiled(self, gfl):
index_counter = Counter()
return_list = []
for gf in gfl:
return_list.extend(gf.compile(index_counter))
if self.verbose:
print("Compiled Glue Premises:")
for cgf in return_list:
print(cgf)
return return_list
def get_pos_tagger(self):
from nltk.corpus import brown
regexp_tagger = RegexpTagger(
[
(r"^-?[0-9]+(\.[0-9]+)?$", "CD"), # cardinal numbers
(r"(The|the|A|a|An|an)$", "AT"), # articles
(r".*able$", "JJ"), # adjectives
(r".*ness$", "NN"), # nouns formed from adjectives
(r".*ly$", "RB"), # adverbs
(r".*s$", "NNS"), # plural nouns
(r".*ing$", "VBG"), # gerunds
(r".*ed$", "VBD"), # past tense verbs
(r".*", "NN"), # nouns (default)
]
)
brown_train = brown.tagged_sents(categories="news")
unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger)
bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger)
trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger)
# Override particular words
main_tagger = RegexpTagger(
[(r"(A|a|An|an)$", "ex_quant"), (r"(Every|every|All|all)$", "univ_quant")],
backoff=trigram_tagger,
)
return main_tagger
class DrtGlueFormula(GlueFormula):
def __init__(self, meaning, glue, indices=None):
if not indices:
indices = set()
if isinstance(meaning, str):
self.meaning = drt.DrtExpression.fromstring(meaning)
elif isinstance(meaning, drt.DrtExpression):
self.meaning = meaning
else:
raise RuntimeError(
"Meaning term neither string or expression: %s, %s"
% (meaning, meaning.__class__)
)
if isinstance(glue, str):
self.glue = linearlogic.LinearLogicParser().parse(glue)
elif isinstance(glue, linearlogic.Expression):
self.glue = glue
else:
raise RuntimeError(
"Glue term neither string or expression: %s, %s"
% (glue, glue.__class__)
)
self.indices = indices
def make_VariableExpression(self, name):
return drt.DrtVariableExpression(name)
def make_LambdaExpression(self, variable, term):
return drt.DrtLambdaExpression(variable, term)
class DrtGlueDict(GlueDict):
def get_GlueFormula_factory(self):
return DrtGlueFormula
class DrtGlue(Glue):
def __init__(
self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
):
if not semtype_file:
semtype_file = os.path.join(
"grammars", "sample_grammars", "drt_glue.semtype"
)
Glue.__init__(self, semtype_file, remove_duplicates, depparser, verbose)
def get_glue_dict(self):
return DrtGlueDict(self.semtype_file)
def demo(show_example=-1):
from nltk.parse import MaltParser
examples = [
"David sees Mary",
"David eats a sandwich",
"every man chases a dog",
"every man believes a dog sleeps",
"John gives David a sandwich",
"John chases himself",
]
# 'John persuades David to order a pizza',
# 'John tries to go',
# 'John tries to find a unicorn',
# 'John seems to vanish',
# 'a unicorn seems to approach',
# 'every big cat leaves',
# 'every gray cat leaves',
# 'every big gray cat leaves',
# 'a former senator leaves',
print("============== DEMO ==============")
tagger = RegexpTagger(
[
("^(David|Mary|John)$", "NNP"),
(
"^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
"VB",
),
("^(go|order|vanish|find|approach)$", "VB"),
("^(a)$", "ex_quant"),
("^(every)$", "univ_quant"),
("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
("^(big|gray|former)$", "JJ"),
("^(him|himself)$", "PRP"),
]
)
depparser = MaltParser(tagger=tagger)
glue = Glue(depparser=depparser, verbose=False)
for i, sentence in enumerate(examples):
if i == show_example or show_example == -1:
print(f"[[[Example {i}]]] {sentence}")
for reading in glue.parse_to_meaning(sentence.split()):
print(reading.simplify())
print("")
if __name__ == "__main__":
demo()

View File

@@ -0,0 +1,395 @@
# Natural Language Toolkit: Logic
#
# Author: Peter Wang
# Updated by: Dan Garrette <dhgarrette@gmail.com>
#
# Copyright (C) 2001-2025 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
An implementation of the Hole Semantics model, following Blackburn and Bos,
Representation and Inference for Natural Language (CSLI, 2005).
The semantic representations are built by the grammar hole.fcfg.
This module contains driver code to read in sentences and parse them
according to a hole semantics grammar.
After parsing, the semantic representation is in the form of an underspecified
representation that is not easy to read. We use a "plugging" algorithm to
convert that representation into first-order logic formulas.
"""
from functools import reduce
from nltk.parse import load_parser
from nltk.sem.logic import (
AllExpression,
AndExpression,
ApplicationExpression,
ExistsExpression,
IffExpression,
ImpExpression,
LambdaExpression,
NegatedExpression,
OrExpression,
)
from nltk.sem.skolemize import skolemize
# Note that in this code there may be multiple types of trees being referred to:
#
# 1. parse trees
# 2. the underspecified representation
# 3. first-order logic formula trees
# 4. the search space when plugging (search tree)
#
class Constants:
ALL = "ALL"
EXISTS = "EXISTS"
NOT = "NOT"
AND = "AND"
OR = "OR"
IMP = "IMP"
IFF = "IFF"
PRED = "PRED"
LEQ = "LEQ"
HOLE = "HOLE"
LABEL = "LABEL"
MAP = {
ALL: lambda v, e: AllExpression(v.variable, e),
EXISTS: lambda v, e: ExistsExpression(v.variable, e),
NOT: NegatedExpression,
AND: AndExpression,
OR: OrExpression,
IMP: ImpExpression,
IFF: IffExpression,
PRED: ApplicationExpression,
}
class HoleSemantics:
"""
This class holds the broken-down components of a hole semantics, i.e. it
extracts the holes, labels, logic formula fragments and constraints out of
a big conjunction of such as produced by the hole semantics grammar. It
then provides some operations on the semantics dealing with holes, labels
and finding legal ways to plug holes with labels.
"""
def __init__(self, usr):
"""
Constructor. `usr' is a ``sem.Expression`` representing an
Underspecified Representation Structure (USR). A USR has the following
special predicates:
ALL(l,v,n),
EXISTS(l,v,n),
AND(l,n,n),
OR(l,n,n),
IMP(l,n,n),
IFF(l,n,n),
PRED(l,v,n,v[,v]*) where the brackets and star indicate zero or more repetitions,
LEQ(n,n),
HOLE(n),
LABEL(n)
where l is the label of the node described by the predicate, n is either
a label or a hole, and v is a variable.
"""
self.holes = set()
self.labels = set()
self.fragments = {} # mapping of label -> formula fragment
self.constraints = set() # set of Constraints
self._break_down(usr)
self.top_most_labels = self._find_top_most_labels()
self.top_hole = self._find_top_hole()
def is_node(self, x):
"""
Return true if x is a node (label or hole) in this semantic
representation.
"""
return x in (self.labels | self.holes)
def _break_down(self, usr):
"""
Extract holes, labels, formula fragments and constraints from the hole
semantics underspecified representation (USR).
"""
if isinstance(usr, AndExpression):
self._break_down(usr.first)
self._break_down(usr.second)
elif isinstance(usr, ApplicationExpression):
func, args = usr.uncurry()
if func.variable.name == Constants.LEQ:
self.constraints.add(Constraint(args[0], args[1]))
elif func.variable.name == Constants.HOLE:
self.holes.add(args[0])
elif func.variable.name == Constants.LABEL:
self.labels.add(args[0])
else:
label = args[0]
assert label not in self.fragments
self.fragments[label] = (func, args[1:])
else:
raise ValueError(usr.label())
def _find_top_nodes(self, node_list):
top_nodes = node_list.copy()
for f in self.fragments.values():
# the label is the first argument of the predicate
args = f[1]
for arg in args:
if arg in node_list:
top_nodes.discard(arg)
return top_nodes
def _find_top_most_labels(self):
"""
Return the set of labels which are not referenced directly as part of
another formula fragment. These will be the top-most labels for the
subtree that they are part of.
"""
return self._find_top_nodes(self.labels)
def _find_top_hole(self):
"""
Return the hole that will be the top of the formula tree.
"""
top_holes = self._find_top_nodes(self.holes)
assert len(top_holes) == 1 # it must be unique
return top_holes.pop()
def pluggings(self):
"""
Calculate and return all the legal pluggings (mappings of labels to
holes) of this semantics given the constraints.
"""
record = []
self._plug_nodes([(self.top_hole, [])], self.top_most_labels, {}, record)
return record
def _plug_nodes(self, queue, potential_labels, plug_acc, record):
"""
Plug the nodes in `queue' with the labels in `potential_labels'.
Each element of `queue' is a tuple of the node to plug and the list of
ancestor holes from the root of the graph to that node.
`potential_labels' is a set of the labels which are still available for
plugging.
`plug_acc' is the incomplete mapping of holes to labels made on the
current branch of the search tree so far.
`record' is a list of all the complete pluggings that we have found in
total so far. It is the only parameter that is destructively updated.
"""
if queue != []:
(node, ancestors) = queue[0]
if node in self.holes:
# The node is a hole, try to plug it.
self._plug_hole(
node, ancestors, queue[1:], potential_labels, plug_acc, record
)
else:
assert node in self.labels
# The node is a label. Replace it in the queue by the holes and
# labels in the formula fragment named by that label.
args = self.fragments[node][1]
head = [(a, ancestors) for a in args if self.is_node(a)]
self._plug_nodes(head + queue[1:], potential_labels, plug_acc, record)
else:
raise Exception("queue empty")
def _plug_hole(self, hole, ancestors0, queue, potential_labels0, plug_acc0, record):
"""
Try all possible ways of plugging a single hole.
See _plug_nodes for the meanings of the parameters.
"""
# Add the current hole we're trying to plug into the list of ancestors.
assert hole not in ancestors0
ancestors = [hole] + ancestors0
# Try each potential label in this hole in turn.
for l in potential_labels0:
# Is the label valid in this hole?
if self._violates_constraints(l, ancestors):
continue
plug_acc = plug_acc0.copy()
plug_acc[hole] = l
potential_labels = potential_labels0.copy()
potential_labels.remove(l)
if len(potential_labels) == 0:
# No more potential labels. That must mean all the holes have
# been filled so we have found a legal plugging so remember it.
#
# Note that the queue might not be empty because there might
# be labels on there that point to formula fragments with
# no holes in them. _sanity_check_plugging will make sure
# all holes are filled.
self._sanity_check_plugging(plug_acc, self.top_hole, [])
record.append(plug_acc)
else:
# Recursively try to fill in the rest of the holes in the
# queue. The label we just plugged into the hole could have
# holes of its own so at the end of the queue. Putting it on
# the end of the queue gives us a breadth-first search, so that
# all the holes at level i of the formula tree are filled
# before filling level i+1.
# A depth-first search would work as well since the trees must
# be finite but the bookkeeping would be harder.
self._plug_nodes(
queue + [(l, ancestors)], potential_labels, plug_acc, record
)
def _violates_constraints(self, label, ancestors):
"""
Return True if the `label' cannot be placed underneath the holes given
by the set `ancestors' because it would violate the constraints imposed
on it.
"""
for c in self.constraints:
if c.lhs == label:
if c.rhs not in ancestors:
return True
return False
def _sanity_check_plugging(self, plugging, node, ancestors):
"""
Make sure that a given plugging is legal. We recursively go through
each node and make sure that no constraints are violated.
We also check that all holes have been filled.
"""
if node in self.holes:
ancestors = [node] + ancestors
label = plugging[node]
else:
label = node
assert label in self.labels
for c in self.constraints:
if c.lhs == label:
assert c.rhs in ancestors
args = self.fragments[label][1]
for arg in args:
if self.is_node(arg):
self._sanity_check_plugging(plugging, arg, [label] + ancestors)
def formula_tree(self, plugging):
"""
Return the first-order logic formula tree for this underspecified
representation using the plugging given.
"""
return self._formula_tree(plugging, self.top_hole)
def _formula_tree(self, plugging, node):
if node in plugging:
return self._formula_tree(plugging, plugging[node])
elif node in self.fragments:
pred, args = self.fragments[node]
children = [self._formula_tree(plugging, arg) for arg in args]
return reduce(Constants.MAP[pred.variable.name], children)
else:
return node
class Constraint:
"""
This class represents a constraint of the form (L =< N),
where L is a label and N is a node (a label or a hole).
"""
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def __eq__(self, other):
if self.__class__ == other.__class__:
return self.lhs == other.lhs and self.rhs == other.rhs
else:
return False
def __ne__(self, other):
return not (self == other)
def __hash__(self):
return hash(repr(self))
def __repr__(self):
return f"({self.lhs} < {self.rhs})"
def hole_readings(sentence, grammar_filename=None, verbose=False):
if not grammar_filename:
grammar_filename = "grammars/sample_grammars/hole.fcfg"
if verbose:
print("Reading grammar file", grammar_filename)
parser = load_parser(grammar_filename)
# Parse the sentence.
tokens = sentence.split()
trees = list(parser.parse(tokens))
if verbose:
print("Got %d different parses" % len(trees))
all_readings = []
for tree in trees:
# Get the semantic feature from the top of the parse tree.
sem = tree.label()["SEM"].simplify()
# Print the raw semantic representation.
if verbose:
print("Raw: ", sem)
# Skolemize away all quantifiers. All variables become unique.
while isinstance(sem, LambdaExpression):
sem = sem.term
skolemized = skolemize(sem)
if verbose:
print("Skolemized:", skolemized)
# Break the hole semantics representation down into its components
# i.e. holes, labels, formula fragments and constraints.
hole_sem = HoleSemantics(skolemized)
# Maybe show the details of the semantic representation.
if verbose:
print("Holes: ", hole_sem.holes)
print("Labels: ", hole_sem.labels)
print("Constraints: ", hole_sem.constraints)
print("Top hole: ", hole_sem.top_hole)
print("Top labels: ", hole_sem.top_most_labels)
print("Fragments:")
for l, f in hole_sem.fragments.items():
print(f"\t{l}: {f}")
# Find all the possible ways to plug the formulas together.
pluggings = hole_sem.pluggings()
# Build FOL formula trees using the pluggings.
readings = list(map(hole_sem.formula_tree, pluggings))
# Print out the formulas in a textual format.
if verbose:
for i, r in enumerate(readings):
print()
print("%d. %s" % (i, r))
print()
all_readings.extend(readings)
return all_readings
if __name__ == "__main__":
for r in hole_readings("a dog barks"):
print(r)
print()
for r in hole_readings("every girl chases a dog"):
print(r)

View File

@@ -0,0 +1,261 @@
# Natural Language Toolkit: Lexical Functional Grammar
#
# Author: Dan Garrette <dhgarrette@gmail.com>
#
# Copyright (C) 2001-2025 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
from itertools import chain
from nltk.internals import Counter
class FStructure(dict):
def safeappend(self, key, item):
"""
Append 'item' to the list at 'key'. If no list exists for 'key', then
construct one.
"""
if key not in self:
self[key] = []
self[key].append(item)
def __setitem__(self, key, value):
dict.__setitem__(self, key.lower(), value)
def __getitem__(self, key):
return dict.__getitem__(self, key.lower())
def __contains__(self, key):
return dict.__contains__(self, key.lower())
def to_glueformula_list(self, glue_dict):
depgraph = self.to_depgraph()
return glue_dict.to_glueformula_list(depgraph)
def to_depgraph(self, rel=None):
from nltk.parse.dependencygraph import DependencyGraph
depgraph = DependencyGraph()
nodes = depgraph.nodes
self._to_depgraph(nodes, 0, "ROOT")
# Add all the dependencies for all the nodes
for address, node in nodes.items():
for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
if n2["head"] == address:
relation = n2["rel"]
node["deps"].setdefault(relation, [])
node["deps"][relation].append(n2["address"])
depgraph.root = nodes[1]
return depgraph
def _to_depgraph(self, nodes, head, rel):
index = len(nodes)
nodes[index].update(
{
"address": index,
"word": self.pred[0],
"tag": self.pred[1],
"head": head,
"rel": rel,
}
)
for feature in sorted(self):
for item in sorted(self[feature]):
if isinstance(item, FStructure):
item._to_depgraph(nodes, index, feature)
elif isinstance(item, tuple):
new_index = len(nodes)
nodes[new_index].update(
{
"address": new_index,
"word": item[0],
"tag": item[1],
"head": index,
"rel": feature,
}
)
elif isinstance(item, list):
for n in item:
n._to_depgraph(nodes, index, feature)
else:
raise Exception(
"feature %s is not an FStruct, a list, or a tuple" % feature
)
@staticmethod
def read_depgraph(depgraph):
return FStructure._read_depgraph(depgraph.root, depgraph)
@staticmethod
def _read_depgraph(node, depgraph, label_counter=None, parent=None):
if not label_counter:
label_counter = Counter()
if node["rel"].lower() in ["spec", "punct"]:
# the value of a 'spec' entry is a word, not an FStructure
return (node["word"], node["tag"])
else:
fstruct = FStructure()
fstruct.pred = None
fstruct.label = FStructure._make_label(label_counter.get())
fstruct.parent = parent
word, tag = node["word"], node["tag"]
if tag[:2] == "VB":
if tag[2:3] == "D":
fstruct.safeappend("tense", ("PAST", "tense"))
fstruct.pred = (word, tag[:2])
if not fstruct.pred:
fstruct.pred = (word, tag)
children = [
depgraph.nodes[idx]
for idx in chain.from_iterable(node["deps"].values())
]
for child in children:
fstruct.safeappend(
child["rel"],
FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
)
return fstruct
@staticmethod
def _make_label(value):
"""
Pick an alphabetic character as identifier for an entity in the model.
:param value: where to index into the list of characters
:type value: int
"""
letter = [
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"a",
"b",
"c",
"d",
"e",
][value - 1]
num = int(value) // 26
if num > 0:
return letter + str(num)
else:
return letter
def __repr__(self):
return self.__str__().replace("\n", "")
def __str__(self):
return self.pretty_format()
def pretty_format(self, indent=3):
try:
accum = "%s:[" % self.label
except NameError:
accum = "["
try:
accum += "pred '%s'" % (self.pred[0])
except NameError:
pass
for feature in sorted(self):
for item in self[feature]:
if isinstance(item, FStructure):
next_indent = indent + len(feature) + 3 + len(self.label)
accum += "\n{}{} {}".format(
" " * (indent),
feature,
item.pretty_format(next_indent),
)
elif isinstance(item, tuple):
accum += "\n{}{} '{}'".format(" " * (indent), feature, item[0])
elif isinstance(item, list):
accum += "\n{}{} {{{}}}".format(
" " * (indent),
feature,
("\n%s" % (" " * (indent + len(feature) + 2))).join(item),
)
else: # ERROR
raise Exception(
"feature %s is not an FStruct, a list, or a tuple" % feature
)
return accum + "]"
def demo_read_depgraph():
from nltk.parse.dependencygraph import DependencyGraph
dg1 = DependencyGraph(
"""\
Esso NNP 2 SUB
said VBD 0 ROOT
the DT 5 NMOD
Whiting NNP 5 NMOD
field NN 6 SUB
started VBD 2 VMOD
production NN 6 OBJ
Tuesday NNP 6 VMOD
"""
)
dg2 = DependencyGraph(
"""\
John NNP 2 SUB
sees VBP 0 ROOT
Mary NNP 2 OBJ
"""
)
dg3 = DependencyGraph(
"""\
a DT 2 SPEC
man NN 3 SUBJ
walks VB 0 ROOT
"""
)
dg4 = DependencyGraph(
"""\
every DT 2 SPEC
girl NN 3 SUBJ
chases VB 0 ROOT
a DT 5 SPEC
dog NN 3 OBJ
"""
)
depgraphs = [dg1, dg2, dg3, dg4]
for dg in depgraphs:
print(FStructure.read_depgraph(dg))
if __name__ == "__main__":
demo_read_depgraph()

View File

@@ -0,0 +1,481 @@
# Natural Language Toolkit: Linear Logic
#
# Author: Dan Garrette <dhgarrette@gmail.com>
#
# Copyright (C) 2001-2025 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
from nltk.internals import Counter
from nltk.sem.logic import APP, LogicParser
_counter = Counter()
class Tokens:
# Punctuation
OPEN = "("
CLOSE = ")"
# Operations
IMP = "-o"
PUNCT = [OPEN, CLOSE]
TOKENS = PUNCT + [IMP]
class LinearLogicParser(LogicParser):
"""A linear logic expression parser."""
def __init__(self):
LogicParser.__init__(self)
self.operator_precedence = {APP: 1, Tokens.IMP: 2, None: 3}
self.right_associated_operations += [Tokens.IMP]
def get_all_symbols(self):
return Tokens.TOKENS
def handle(self, tok, context):
if tok not in Tokens.TOKENS:
return self.handle_variable(tok, context)
elif tok == Tokens.OPEN:
return self.handle_open(tok, context)
def get_BooleanExpression_factory(self, tok):
if tok == Tokens.IMP:
return ImpExpression
else:
return None
def make_BooleanExpression(self, factory, first, second):
return factory(first, second)
def attempt_ApplicationExpression(self, expression, context):
"""Attempt to make an application expression. If the next tokens
are an argument in parens, then the argument expression is a
function being applied to the arguments. Otherwise, return the
argument expression."""
if self.has_priority(APP, context):
if self.inRange(0) and self.token(0) == Tokens.OPEN:
self.token() # swallow then open paren
argument = self.process_next_expression(APP)
self.assertNextToken(Tokens.CLOSE)
expression = ApplicationExpression(expression, argument, None)
return expression
def make_VariableExpression(self, name):
if name[0].isupper():
return VariableExpression(name)
else:
return ConstantExpression(name)
class Expression:
_linear_logic_parser = LinearLogicParser()
@classmethod
def fromstring(cls, s):
return cls._linear_logic_parser.parse(s)
def applyto(self, other, other_indices=None):
return ApplicationExpression(self, other, other_indices)
def __call__(self, other):
return self.applyto(other)
def __repr__(self):
return f"<{self.__class__.__name__} {self}>"
class AtomicExpression(Expression):
def __init__(self, name, dependencies=None):
"""
:param name: str for the constant name
:param dependencies: list of int for the indices on which this atom is dependent
"""
assert isinstance(name, str)
self.name = name
if not dependencies:
dependencies = []
self.dependencies = dependencies
def simplify(self, bindings=None):
"""
If 'self' is bound by 'bindings', return the atomic to which it is bound.
Otherwise, return self.
:param bindings: ``BindingDict`` A dictionary of bindings used to simplify
:return: ``AtomicExpression``
"""
if bindings and self in bindings:
return bindings[self]
else:
return self
def compile_pos(self, index_counter, glueFormulaFactory):
"""
From Iddo Lev's PhD Dissertation p108-109
:param index_counter: ``Counter`` for unique indices
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
"""
self.dependencies = []
return (self, [])
def compile_neg(self, index_counter, glueFormulaFactory):
"""
From Iddo Lev's PhD Dissertation p108-109
:param index_counter: ``Counter`` for unique indices
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
"""
self.dependencies = []
return (self, [])
def initialize_labels(self, fstruct):
self.name = fstruct.initialize_label(self.name.lower())
def __eq__(self, other):
return self.__class__ == other.__class__ and self.name == other.name
def __ne__(self, other):
return not self == other
def __str__(self):
accum = self.name
if self.dependencies:
accum += "%s" % self.dependencies
return accum
def __hash__(self):
return hash(self.name)
class ConstantExpression(AtomicExpression):
def unify(self, other, bindings):
"""
If 'other' is a constant, then it must be equal to 'self'. If 'other' is a variable,
then it must not be bound to anything other than 'self'.
:param other: ``Expression``
:param bindings: ``BindingDict`` A dictionary of all current bindings
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new binding
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
"""
assert isinstance(other, Expression)
if isinstance(other, VariableExpression):
try:
return bindings + BindingDict([(other, self)])
except VariableBindingException:
pass
elif self == other:
return bindings
raise UnificationException(self, other, bindings)
class VariableExpression(AtomicExpression):
def unify(self, other, bindings):
"""
'self' must not be bound to anything other than 'other'.
:param other: ``Expression``
:param bindings: ``BindingDict`` A dictionary of all current bindings
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and the new binding
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
"""
assert isinstance(other, Expression)
try:
if self == other:
return bindings
else:
return bindings + BindingDict([(self, other)])
except VariableBindingException as e:
raise UnificationException(self, other, bindings) from e
class ImpExpression(Expression):
def __init__(self, antecedent, consequent):
"""
:param antecedent: ``Expression`` for the antecedent
:param consequent: ``Expression`` for the consequent
"""
assert isinstance(antecedent, Expression)
assert isinstance(consequent, Expression)
self.antecedent = antecedent
self.consequent = consequent
def simplify(self, bindings=None):
return self.__class__(
self.antecedent.simplify(bindings), self.consequent.simplify(bindings)
)
def unify(self, other, bindings):
"""
Both the antecedent and consequent of 'self' and 'other' must unify.
:param other: ``ImpExpression``
:param bindings: ``BindingDict`` A dictionary of all current bindings
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new bindings
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
"""
assert isinstance(other, ImpExpression)
try:
return (
bindings
+ self.antecedent.unify(other.antecedent, bindings)
+ self.consequent.unify(other.consequent, bindings)
)
except VariableBindingException as e:
raise UnificationException(self, other, bindings) from e
def compile_pos(self, index_counter, glueFormulaFactory):
"""
From Iddo Lev's PhD Dissertation p108-109
:param index_counter: ``Counter`` for unique indices
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
"""
(a, a_new) = self.antecedent.compile_neg(index_counter, glueFormulaFactory)
(c, c_new) = self.consequent.compile_pos(index_counter, glueFormulaFactory)
return (ImpExpression(a, c), a_new + c_new)
def compile_neg(self, index_counter, glueFormulaFactory):
"""
From Iddo Lev's PhD Dissertation p108-109
:param index_counter: ``Counter`` for unique indices
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
:return: (``Expression``,list of ``GlueFormula``) for the compiled linear logic and any newly created glue formulas
"""
(a, a_new) = self.antecedent.compile_pos(index_counter, glueFormulaFactory)
(c, c_new) = self.consequent.compile_neg(index_counter, glueFormulaFactory)
fresh_index = index_counter.get()
c.dependencies.append(fresh_index)
new_v = glueFormulaFactory("v%s" % fresh_index, a, {fresh_index})
return (c, a_new + c_new + [new_v])
def initialize_labels(self, fstruct):
self.antecedent.initialize_labels(fstruct)
self.consequent.initialize_labels(fstruct)
def __eq__(self, other):
return (
self.__class__ == other.__class__
and self.antecedent == other.antecedent
and self.consequent == other.consequent
)
def __ne__(self, other):
return not self == other
def __str__(self):
return "{}{} {} {}{}".format(
Tokens.OPEN,
self.antecedent,
Tokens.IMP,
self.consequent,
Tokens.CLOSE,
)
def __hash__(self):
return hash(f"{hash(self.antecedent)}{Tokens.IMP}{hash(self.consequent)}")
class ApplicationExpression(Expression):
def __init__(self, function, argument, argument_indices=None):
"""
:param function: ``Expression`` for the function
:param argument: ``Expression`` for the argument
:param argument_indices: set for the indices of the glue formula from which the argument came
:raise LinearLogicApplicationException: If 'function' cannot be applied to 'argument' given 'argument_indices'.
"""
function_simp = function.simplify()
argument_simp = argument.simplify()
assert isinstance(function_simp, ImpExpression)
assert isinstance(argument_simp, Expression)
bindings = BindingDict()
try:
if isinstance(function, ApplicationExpression):
bindings += function.bindings
if isinstance(argument, ApplicationExpression):
bindings += argument.bindings
bindings += function_simp.antecedent.unify(argument_simp, bindings)
except UnificationException as e:
raise LinearLogicApplicationException(
f"Cannot apply {function_simp} to {argument_simp}. {e}"
) from e
# If you are running it on complied premises, more conditions apply
if argument_indices:
# A.dependencies of (A -o (B -o C)) must be a proper subset of argument_indices
if not set(function_simp.antecedent.dependencies) < argument_indices:
raise LinearLogicApplicationException(
"Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s"
% (function_simp, argument_simp)
)
if set(function_simp.antecedent.dependencies) == argument_indices:
raise LinearLogicApplicationException(
"Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s"
% (function_simp, argument_simp)
)
self.function = function
self.argument = argument
self.bindings = bindings
def simplify(self, bindings=None):
"""
Since function is an implication, return its consequent. There should be
no need to check that the application is valid since the checking is done
by the constructor.
:param bindings: ``BindingDict`` A dictionary of bindings used to simplify
:return: ``Expression``
"""
if not bindings:
bindings = self.bindings
return self.function.simplify(bindings).consequent
def __eq__(self, other):
return (
self.__class__ == other.__class__
and self.function == other.function
and self.argument == other.argument
)
def __ne__(self, other):
return not self == other
def __str__(self):
return "%s" % self.function + Tokens.OPEN + "%s" % self.argument + Tokens.CLOSE
def __hash__(self):
return hash(f"{hash(self.antecedent)}{Tokens.OPEN}{hash(self.consequent)}")
class BindingDict:
def __init__(self, bindings=None):
"""
:param bindings:
list [(``VariableExpression``, ``AtomicExpression``)] to initialize the dictionary
dict {``VariableExpression``: ``AtomicExpression``} to initialize the dictionary
"""
self.d = {}
if isinstance(bindings, dict):
bindings = bindings.items()
if bindings:
for v, b in bindings:
self[v] = b
def __setitem__(self, variable, binding):
"""
A binding is consistent with the dict if its variable is not already bound, OR if its
variable is already bound to its argument.
:param variable: ``VariableExpression`` The variable bind
:param binding: ``Expression`` The expression to which 'variable' should be bound
:raise VariableBindingException: If the variable cannot be bound in this dictionary
"""
assert isinstance(variable, VariableExpression)
assert isinstance(binding, Expression)
assert variable != binding
existing = self.d.get(variable, None)
if not existing or binding == existing:
self.d[variable] = binding
else:
raise VariableBindingException(
"Variable %s already bound to another value" % (variable)
)
def __getitem__(self, variable):
"""
Return the expression to which 'variable' is bound
"""
assert isinstance(variable, VariableExpression)
intermediate = self.d[variable]
while intermediate:
try:
intermediate = self.d[intermediate]
except KeyError:
return intermediate
def __contains__(self, item):
return item in self.d
def __add__(self, other):
"""
:param other: ``BindingDict`` The dict with which to combine self
:return: ``BindingDict`` A new dict containing all the elements of both parameters
:raise VariableBindingException: If the parameter dictionaries are not consistent with each other
"""
try:
combined = BindingDict()
for v in self.d:
combined[v] = self.d[v]
for v in other.d:
combined[v] = other.d[v]
return combined
except VariableBindingException as e:
raise VariableBindingException(
"Attempting to add two contradicting"
" VariableBindingsLists: %s, %s" % (self, other)
) from e
def __ne__(self, other):
return not self == other
def __eq__(self, other):
if not isinstance(other, BindingDict):
raise TypeError
return self.d == other.d
def __str__(self):
return "{" + ", ".join(f"{v}: {self.d[v]}" for v in sorted(self.d.keys())) + "}"
def __repr__(self):
return "BindingDict: %s" % self
class VariableBindingException(Exception):
pass
class UnificationException(Exception):
def __init__(self, a, b, bindings):
Exception.__init__(self, f"Cannot unify {a} with {b} given {bindings}")
class LinearLogicApplicationException(Exception):
pass
def demo():
lexpr = Expression.fromstring
print(lexpr(r"f"))
print(lexpr(r"(g -o f)"))
print(lexpr(r"((g -o G) -o G)"))
print(lexpr(r"g -o h -o f"))
print(lexpr(r"(g -o f)(g)").simplify())
print(lexpr(r"(H -o f)(g)").simplify())
print(lexpr(r"((g -o G) -o G)((g -o f))").simplify())
print(lexpr(r"(H -o H)((g -o f))").simplify())
if __name__ == "__main__":
demo()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,539 @@
# Natural Language Toolkit: Relation Extraction
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
Code for extracting relational triples from the ieer and conll2002 corpora.
Relations are stored internally as dictionaries ('reldicts').
The two serialization outputs are "rtuple" and "clause".
- An rtuple is a tuple of the form ``(subj, filler, obj)``,
where ``subj`` and ``obj`` are pairs of Named Entity mentions, and ``filler`` is the string of words
occurring between ``sub`` and ``obj`` (with no intervening NEs). Strings are printed via ``repr()`` to
circumvent locale variations in rendering utf-8 encoded strings.
- A clause is an atom of the form ``relsym(subjsym, objsym)``,
where the relation, subject and object have been canonicalized to single strings.
"""
# todo: get a more general solution to canonicalized symbols for clauses -- maybe use xmlcharrefs?
import html
import re
from collections import defaultdict
# Dictionary that associates corpora with NE classes
NE_CLASSES = {
"ieer": [
"LOCATION",
"ORGANIZATION",
"PERSON",
"DURATION",
"DATE",
"CARDINAL",
"PERCENT",
"MONEY",
"MEASURE",
],
"conll2002": ["LOC", "PER", "ORG"],
"ace": [
"LOCATION",
"ORGANIZATION",
"PERSON",
"DURATION",
"DATE",
"CARDINAL",
"PERCENT",
"MONEY",
"MEASURE",
"FACILITY",
"GPE",
],
}
# Allow abbreviated class labels
short2long = dict(LOC="LOCATION", ORG="ORGANIZATION", PER="PERSON")
long2short = dict(LOCATION="LOC", ORGANIZATION="ORG", PERSON="PER")
def _expand(type):
"""
Expand an NE class name.
:type type: str
:rtype: str
"""
try:
return short2long[type]
except KeyError:
return type
def class_abbrev(type):
"""
Abbreviate an NE class name.
:type type: str
:rtype: str
"""
try:
return long2short[type]
except KeyError:
return type
def _join(lst, sep=" ", untag=False):
"""
Join a list into a string, turning tags tuples into tag strings or just words.
:param untag: if ``True``, omit the tag from tagged input strings.
:type lst: list
:rtype: str
"""
try:
return sep.join(lst)
except TypeError:
if untag:
return sep.join(tup[0] for tup in lst)
from nltk.tag import tuple2str
return sep.join(tuple2str(tup) for tup in lst)
def descape_entity(m, defs=html.entities.entitydefs):
"""
Translate one entity to its ISO Latin value.
Inspired by example from effbot.org
"""
try:
return defs[m.group(1)]
except KeyError:
return m.group(0) # use as is
def list2sym(lst):
"""
Convert a list of strings into a canonical symbol.
:type lst: list
:return: a Unicode string without whitespace
:rtype: unicode
"""
sym = _join(lst, "_", untag=True)
sym = sym.lower()
ENT = re.compile(r"&(\w+?);")
sym = ENT.sub(descape_entity, sym)
sym = sym.replace(".", "")
return sym
def tree2semi_rel(tree):
"""
Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``).
In order to facilitate the construction of (``Tree``, string, ``Tree``) triples, this
identifies pairs whose first member is a list (possibly empty) of terminal
strings, and whose second member is a ``Tree`` of the form (NE_label, terminals).
:param tree: a chunk tree
:return: a list of pairs (list(str), ``Tree``)
:rtype: list of tuple
"""
from nltk.tree import Tree
semi_rels = []
semi_rel = [[], None]
for dtr in tree:
if not isinstance(dtr, Tree):
semi_rel[0].append(dtr)
else:
# dtr is a Tree
semi_rel[1] = dtr
semi_rels.append(semi_rel)
semi_rel = [[], None]
return semi_rels
def semi_rel2reldict(pairs, window=5, trace=False):
"""
Converts the pairs generated by ``tree2semi_rel`` into a 'reldict': a dictionary which
stores information about the subject and object NEs plus the filler between them.
Additionally, a left and right context of length =< window are captured (within
a given input sentence).
:param pairs: a pair of list(str) and ``Tree``, as generated by
:param window: a threshold for the number of items to include in the left and right context
:type window: int
:return: 'relation' dictionaries whose keys are 'lcon', 'subjclass', 'subjtext', 'subjsym', 'filler', objclass', objtext', 'objsym' and 'rcon'
:rtype: list(defaultdict)
"""
result = []
while len(pairs) > 2:
reldict = defaultdict(str)
reldict["lcon"] = _join(pairs[0][0][-window:])
reldict["subjclass"] = pairs[0][1].label()
reldict["subjtext"] = _join(pairs[0][1].leaves())
reldict["subjsym"] = list2sym(pairs[0][1].leaves())
reldict["filler"] = _join(pairs[1][0])
reldict["untagged_filler"] = _join(pairs[1][0], untag=True)
reldict["objclass"] = pairs[1][1].label()
reldict["objtext"] = _join(pairs[1][1].leaves())
reldict["objsym"] = list2sym(pairs[1][1].leaves())
reldict["rcon"] = _join(pairs[2][0][:window])
if trace:
print(
"(%s(%s, %s)"
% (
reldict["untagged_filler"],
reldict["subjclass"],
reldict["objclass"],
)
)
result.append(reldict)
pairs = pairs[1:]
return result
def extract_rels(subjclass, objclass, doc, corpus="ace", pattern=None, window=10):
"""
Filter the output of ``semi_rel2reldict`` according to specified NE classes and a filler pattern.
The parameters ``subjclass`` and ``objclass`` can be used to restrict the
Named Entities to particular types (any of 'LOCATION', 'ORGANIZATION',
'PERSON', 'DURATION', 'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE').
:param subjclass: the class of the subject Named Entity.
:type subjclass: str
:param objclass: the class of the object Named Entity.
:type objclass: str
:param doc: input document
:type doc: ieer document or a list of chunk trees
:param corpus: name of the corpus to take as input; possible values are
'ieer' and 'conll2002'
:type corpus: str
:param pattern: a regular expression for filtering the fillers of
retrieved triples.
:type pattern: SRE_Pattern
:param window: filters out fillers which exceed this threshold
:type window: int
:return: see ``mk_reldicts``
:rtype: list(defaultdict)
"""
if subjclass and subjclass not in NE_CLASSES[corpus]:
if _expand(subjclass) in NE_CLASSES[corpus]:
subjclass = _expand(subjclass)
else:
raise ValueError(
"your value for the subject type has not been recognized: %s"
% subjclass
)
if objclass and objclass not in NE_CLASSES[corpus]:
if _expand(objclass) in NE_CLASSES[corpus]:
objclass = _expand(objclass)
else:
raise ValueError(
"your value for the object type has not been recognized: %s" % objclass
)
if corpus == "ace" or corpus == "conll2002":
pairs = tree2semi_rel(doc)
elif corpus == "ieer":
pairs = tree2semi_rel(doc.text) + tree2semi_rel(doc.headline)
else:
raise ValueError("corpus type not recognized")
reldicts = semi_rel2reldict(pairs)
relfilter = lambda x: (
x["subjclass"] == subjclass
and len(x["filler"].split()) <= window
and pattern.match(x["filler"])
and x["objclass"] == objclass
)
return list(filter(relfilter, reldicts))
def rtuple(reldict, lcon=False, rcon=False):
"""
Pretty print the reldict as an rtuple.
:param reldict: a relation dictionary
:type reldict: defaultdict
"""
items = [
class_abbrev(reldict["subjclass"]),
reldict["subjtext"],
reldict["filler"],
class_abbrev(reldict["objclass"]),
reldict["objtext"],
]
format = "[%s: %r] %r [%s: %r]"
if lcon:
items = [reldict["lcon"]] + items
format = "...%r)" + format
if rcon:
items.append(reldict["rcon"])
format = format + "(%r..."
printargs = tuple(items)
return format % printargs
def clause(reldict, relsym):
"""
Print the relation in clausal form.
:param reldict: a relation dictionary
:type reldict: defaultdict
:param relsym: a label for the relation
:type relsym: str
"""
items = (relsym, reldict["subjsym"], reldict["objsym"])
return "%s(%r, %r)" % items
#######################################################
# Demos of relation extraction with regular expressions
#######################################################
############################################
# Example of in(ORG, LOC)
############################################
def in_demo(trace=0, sql=True):
"""
Select pairs of organizations and locations whose mentions occur with an
intervening occurrence of the preposition "in".
If the sql parameter is set to True, then the entity pairs are loaded into
an in-memory database, and subsequently pulled out using an SQL "SELECT"
query.
"""
from nltk.corpus import ieer
if sql:
try:
import sqlite3
connection = sqlite3.connect(":memory:")
cur = connection.cursor()
cur.execute(
"""create table Locations
(OrgName text, LocationName text, DocID text)"""
)
except ImportError:
import warnings
warnings.warn("Cannot import sqlite; sql flag will be ignored.")
IN = re.compile(r".*\bin\b(?!\b.+ing)")
print()
print("IEER: in(ORG, LOC) -- just the clauses:")
print("=" * 45)
for file in ieer.fileids():
for doc in ieer.parsed_docs(file):
if trace:
print(doc.docno)
print("=" * 15)
for rel in extract_rels("ORG", "LOC", doc, corpus="ieer", pattern=IN):
print(clause(rel, relsym="IN"))
if sql:
try:
rtuple = (rel["subjtext"], rel["objtext"], doc.docno)
cur.execute(
"""insert into Locations
values (?, ?, ?)""",
rtuple,
)
connection.commit()
except NameError:
pass
if sql:
try:
cur.execute(
"""select OrgName from Locations
where LocationName = 'Atlanta'"""
)
print()
print("Extract data from SQL table: ORGs in Atlanta")
print("-" * 15)
for row in cur:
print(row)
except NameError:
pass
############################################
# Example of has_role(PER, LOC)
############################################
def roles_demo(trace=0):
from nltk.corpus import ieer
roles = r"""
(.*( # assorted roles
analyst|
chair(wo)?man|
commissioner|
counsel|
director|
economist|
editor|
executive|
foreman|
governor|
head|
lawyer|
leader|
librarian).*)|
manager|
partner|
president|
producer|
professor|
researcher|
spokes(wo)?man|
writer|
,\sof\sthe?\s* # "X, of (the) Y"
"""
ROLES = re.compile(roles, re.VERBOSE)
print()
print("IEER: has_role(PER, ORG) -- raw rtuples:")
print("=" * 45)
for file in ieer.fileids():
for doc in ieer.parsed_docs(file):
lcon = rcon = False
if trace:
print(doc.docno)
print("=" * 15)
lcon = rcon = True
for rel in extract_rels("PER", "ORG", doc, corpus="ieer", pattern=ROLES):
print(rtuple(rel, lcon=lcon, rcon=rcon))
##############################################
### Show what's in the IEER Headlines
##############################################
def ieer_headlines():
from nltk.corpus import ieer
from nltk.tree import Tree
print("IEER: First 20 Headlines")
print("=" * 45)
trees = [
(doc.docno, doc.headline)
for file in ieer.fileids()
for doc in ieer.parsed_docs(file)
]
for tree in trees[:20]:
print()
print("%s:\n%s" % tree)
#############################################
## Dutch CONLL2002: take_on_role(PER, ORG
#############################################
def conllned(trace=1):
"""
Find the copula+'van' relation ('of') in the Dutch tagged training corpus
from CoNLL 2002.
"""
from nltk.corpus import conll2002
vnv = """
(
is/V| # 3rd sing present and
was/V| # past forms of the verb zijn ('be')
werd/V| # and also present
wordt/V # past of worden ('become)
)
.* # followed by anything
van/Prep # followed by van ('of')
"""
VAN = re.compile(vnv, re.VERBOSE)
print()
print("Dutch CoNLL2002: van(PER, ORG) -- raw rtuples with context:")
print("=" * 45)
for doc in conll2002.chunked_sents("ned.train"):
lcon = rcon = False
if trace:
lcon = rcon = True
for rel in extract_rels(
"PER", "ORG", doc, corpus="conll2002", pattern=VAN, window=10
):
print(rtuple(rel, lcon=lcon, rcon=rcon))
#############################################
## Spanish CONLL2002: (PER, ORG)
#############################################
def conllesp():
from nltk.corpus import conll2002
de = """
.*
(
de/SP|
del/SP
)
"""
DE = re.compile(de, re.VERBOSE)
print()
print("Spanish CoNLL2002: de(ORG, LOC) -- just the first 10 clauses:")
print("=" * 45)
rels = [
rel
for doc in conll2002.chunked_sents("esp.train")
for rel in extract_rels("ORG", "LOC", doc, corpus="conll2002", pattern=DE)
]
for r in rels[:10]:
print(clause(r, relsym="DE"))
print()
def ne_chunked():
print()
print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker")
print("=" * 45)
ROLE = re.compile(
r".*(chairman|president|trader|scientist|economist|analyst|partner).*"
)
rels = []
for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]):
sent = nltk.ne_chunk(sent)
rels = extract_rels("PER", "ORG", sent, corpus="ace", pattern=ROLE, window=7)
for rel in rels:
print(f"{i:<5}{rtuple(rel)}")
if __name__ == "__main__":
import nltk
from nltk.sem import relextract
in_demo(trace=0)
roles_demo(trace=0)
conllned()
conllesp()
ieer_headlines()
ne_chunked()

View File

@@ -0,0 +1,148 @@
# Natural Language Toolkit: Semantic Interpretation
#
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
#
# Copyright (C) 2001-2025 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
from nltk.sem.logic import (
AllExpression,
AndExpression,
ApplicationExpression,
EqualityExpression,
ExistsExpression,
IffExpression,
ImpExpression,
NegatedExpression,
OrExpression,
VariableExpression,
skolem_function,
unique_variable,
)
def skolemize(expression, univ_scope=None, used_variables=None):
"""
Skolemize the expression and convert to conjunctive normal form (CNF)
"""
if univ_scope is None:
univ_scope = set()
if used_variables is None:
used_variables = set()
if isinstance(expression, AllExpression):
term = skolemize(
expression.term,
univ_scope | {expression.variable},
used_variables | {expression.variable},
)
return term.replace(
expression.variable,
VariableExpression(unique_variable(ignore=used_variables)),
)
elif isinstance(expression, AndExpression):
return skolemize(expression.first, univ_scope, used_variables) & skolemize(
expression.second, univ_scope, used_variables
)
elif isinstance(expression, OrExpression):
return to_cnf(
skolemize(expression.first, univ_scope, used_variables),
skolemize(expression.second, univ_scope, used_variables),
)
elif isinstance(expression, ImpExpression):
return to_cnf(
skolemize(-expression.first, univ_scope, used_variables),
skolemize(expression.second, univ_scope, used_variables),
)
elif isinstance(expression, IffExpression):
return to_cnf(
skolemize(-expression.first, univ_scope, used_variables),
skolemize(expression.second, univ_scope, used_variables),
) & to_cnf(
skolemize(expression.first, univ_scope, used_variables),
skolemize(-expression.second, univ_scope, used_variables),
)
elif isinstance(expression, EqualityExpression):
return expression
elif isinstance(expression, NegatedExpression):
negated = expression.term
if isinstance(negated, AllExpression):
term = skolemize(
-negated.term, univ_scope, used_variables | {negated.variable}
)
if univ_scope:
return term.replace(negated.variable, skolem_function(univ_scope))
else:
skolem_constant = VariableExpression(
unique_variable(ignore=used_variables)
)
return term.replace(negated.variable, skolem_constant)
elif isinstance(negated, AndExpression):
return to_cnf(
skolemize(-negated.first, univ_scope, used_variables),
skolemize(-negated.second, univ_scope, used_variables),
)
elif isinstance(negated, OrExpression):
return skolemize(-negated.first, univ_scope, used_variables) & skolemize(
-negated.second, univ_scope, used_variables
)
elif isinstance(negated, ImpExpression):
return skolemize(negated.first, univ_scope, used_variables) & skolemize(
-negated.second, univ_scope, used_variables
)
elif isinstance(negated, IffExpression):
return to_cnf(
skolemize(-negated.first, univ_scope, used_variables),
skolemize(-negated.second, univ_scope, used_variables),
) & to_cnf(
skolemize(negated.first, univ_scope, used_variables),
skolemize(negated.second, univ_scope, used_variables),
)
elif isinstance(negated, EqualityExpression):
return expression
elif isinstance(negated, NegatedExpression):
return skolemize(negated.term, univ_scope, used_variables)
elif isinstance(negated, ExistsExpression):
term = skolemize(
-negated.term,
univ_scope | {negated.variable},
used_variables | {negated.variable},
)
return term.replace(
negated.variable,
VariableExpression(unique_variable(ignore=used_variables)),
)
elif isinstance(negated, ApplicationExpression):
return expression
else:
raise Exception("'%s' cannot be skolemized" % expression)
elif isinstance(expression, ExistsExpression):
term = skolemize(
expression.term, univ_scope, used_variables | {expression.variable}
)
if univ_scope:
return term.replace(expression.variable, skolem_function(univ_scope))
else:
skolem_constant = VariableExpression(unique_variable(ignore=used_variables))
return term.replace(expression.variable, skolem_constant)
elif isinstance(expression, ApplicationExpression):
return expression
else:
raise Exception("'%s' cannot be skolemized" % expression)
def to_cnf(first, second):
"""
Convert this split disjunction to conjunctive normal form (CNF)
"""
if isinstance(first, AndExpression):
r_first = to_cnf(first.first, second)
r_second = to_cnf(first.second, second)
return r_first & r_second
elif isinstance(second, AndExpression):
r_first = to_cnf(first, second.first)
r_second = to_cnf(first, second.second)
return r_first & r_second
else:
return first | second

View File

@@ -0,0 +1,307 @@
# Natural Language Toolkit: Semantic Interpretation
#
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
#
# Copyright (C) 2001-2025 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
Utility functions for batch-processing sentences: parsing and
extraction of the semantic representation of the root node of the the
syntax tree, followed by evaluation of the semantic representation in
a first-order model.
"""
import codecs
from nltk.sem import evaluate
##############################################################
## Utility functions for connecting parse output to semantics
##############################################################
def parse_sents(inputs, grammar, trace=0):
"""
Convert input sentences into syntactic trees.
:param inputs: sentences to be parsed
:type inputs: list(str)
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
:type grammar: nltk.grammar.FeatureGrammar
:rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
:return: a mapping from input sentences to a list of ``Tree`` instances.
"""
# put imports here to avoid circult dependencies
from nltk.grammar import FeatureGrammar
from nltk.parse import FeatureChartParser, load_parser
if isinstance(grammar, FeatureGrammar):
cp = FeatureChartParser(grammar)
else:
cp = load_parser(grammar, trace=trace)
parses = []
for sent in inputs:
tokens = sent.split() # use a tokenizer?
syntrees = list(cp.parse(tokens))
parses.append(syntrees)
return parses
def root_semrep(syntree, semkey="SEM"):
"""
Find the semantic representation at the root of a tree.
:param syntree: a parse ``Tree``
:param semkey: the feature label to use for the root semantics in the tree
:return: the semantic representation at the root of a ``Tree``
:rtype: sem.Expression
"""
from nltk.grammar import FeatStructNonterminal
node = syntree.label()
assert isinstance(node, FeatStructNonterminal)
try:
return node[semkey]
except KeyError:
print(node, end=" ")
print("has no specification for the feature %s" % semkey)
raise
def interpret_sents(inputs, grammar, semkey="SEM", trace=0):
"""
Add the semantic representation to each syntactic parse tree
of each input sentence.
:param inputs: a list of sentences
:type inputs: list(str)
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
:type grammar: nltk.grammar.FeatureGrammar
:return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
"""
return [
[(syn, root_semrep(syn, semkey)) for syn in syntrees]
for syntrees in parse_sents(inputs, grammar, trace=trace)
]
def evaluate_sents(inputs, grammar, model, assignment, trace=0):
"""
Add the truth-in-a-model value to each semantic representation
for each syntactic parse of each input sentences.
:param inputs: a list of sentences
:type inputs: list(str)
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
:type grammar: nltk.grammar.FeatureGrammar
:return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
"""
return [
[
(syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
for (syn, sem) in interpretations
]
for interpretations in interpret_sents(inputs, grammar)
]
def demo_model0():
global m0, g0
# Initialize a valuation of non-logical constants."""
v = [
("john", "b1"),
("mary", "g1"),
("suzie", "g2"),
("fido", "d1"),
("tess", "d2"),
("noosa", "n"),
("girl", {"g1", "g2"}),
("boy", {"b1", "b2"}),
("dog", {"d1", "d2"}),
("bark", {"d1", "d2"}),
("walk", {"b1", "g2", "d1"}),
("chase", {("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")}),
(
"see",
{("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")},
),
("in", {("b1", "n"), ("b2", "n"), ("d2", "n")}),
("with", {("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")}),
]
# Read in the data from ``v``
val = evaluate.Valuation(v)
# Bind ``dom`` to the ``domain`` property of ``val``
dom = val.domain
# Initialize a model with parameters ``dom`` and ``val``.
m0 = evaluate.Model(dom, val)
# Initialize a variable assignment with parameter ``dom``
g0 = evaluate.Assignment(dom)
def read_sents(filename, encoding="utf8"):
with codecs.open(filename, "r", encoding) as fp:
sents = [l.rstrip() for l in fp]
# get rid of blank lines
sents = [l for l in sents if len(l) > 0]
sents = [l for l in sents if not l[0] == "#"]
return sents
def demo_legacy_grammar():
"""
Check that interpret_sents() is compatible with legacy grammars that use
a lowercase 'sem' feature.
Define 'test.fcfg' to be the following
"""
from nltk.grammar import FeatureGrammar
g = FeatureGrammar.fromstring(
"""
% start S
S[sem=<hello>] -> 'hello'
"""
)
print("Reading grammar: %s" % g)
print("*" * 20)
for reading in interpret_sents(["hello"], g, semkey="sem"):
syn, sem = reading[0]
print()
print("output: ", sem)
def demo():
import sys
from optparse import OptionParser
description = """
Parse and evaluate some sentences.
"""
opts = OptionParser(description=description)
opts.set_defaults(
evaluate=True,
beta=True,
syntrace=0,
semtrace=0,
demo="default",
grammar="",
sentences="",
)
opts.add_option(
"-d",
"--demo",
dest="demo",
help="choose demo D; omit this for the default demo, or specify 'chat80'",
metavar="D",
)
opts.add_option(
"-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
)
opts.add_option(
"-m",
"--model",
dest="model",
help="import model M (omit '.py' suffix)",
metavar="M",
)
opts.add_option(
"-s",
"--sentences",
dest="sentences",
help="read in a file of test sentences S",
metavar="S",
)
opts.add_option(
"-e",
"--no-eval",
action="store_false",
dest="evaluate",
help="just do a syntactic analysis",
)
opts.add_option(
"-b",
"--no-beta-reduction",
action="store_false",
dest="beta",
help="don't carry out beta-reduction",
)
opts.add_option(
"-t",
"--syntrace",
action="count",
dest="syntrace",
help="set syntactic tracing on; requires '-e' option",
)
opts.add_option(
"-T",
"--semtrace",
action="count",
dest="semtrace",
help="set semantic tracing on",
)
(options, args) = opts.parse_args()
SPACER = "-" * 30
demo_model0()
sents = [
"Fido sees a boy with Mary",
"John sees Mary",
"every girl chases a dog",
"every boy chases a girl",
"John walks with a girl in Noosa",
"who walks",
]
gramfile = "grammars/sample_grammars/sem2.fcfg"
if options.sentences:
sentsfile = options.sentences
if options.grammar:
gramfile = options.grammar
if options.model:
exec("import %s as model" % options.model)
if sents is None:
sents = read_sents(sentsfile)
# Set model and assignment
model = m0
g = g0
if options.evaluate:
evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
else:
semreps = interpret_sents(sents, gramfile, trace=options.syntrace)
for i, sent in enumerate(sents):
n = 1
print("\nSentence: %s" % sent)
print(SPACER)
if options.evaluate:
for syntree, semrep, value in evaluations[i]:
if isinstance(value, dict):
value = set(value.keys())
print("%d: %s" % (n, semrep))
print(value)
n += 1
else:
for syntree, semrep in semreps[i]:
print("%d: %s" % (n, semrep))
n += 1
if __name__ == "__main__":
demo()
demo_legacy_grammar()