updates

2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions
--- a/Backend/venv/lib/python3.12/site-packages/nltk/sem/util.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/sem/util.py
@@ -0,0 +1,307 @@
+# Natural Language Toolkit: Semantic Interpretation
+#
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+#
+# Copyright (C) 2001-2025 NLTK Project
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Utility functions for batch-processing sentences: parsing and
+extraction of the semantic representation of the root node of the the
+syntax tree, followed by evaluation of the semantic representation in
+a first-order model.
+"""
+
+import codecs
+
+from nltk.sem import evaluate
+
+##############################################################
+## Utility functions for connecting parse output to semantics
+##############################################################
+
+
+def parse_sents(inputs, grammar, trace=0):
+    """
+    Convert input sentences into syntactic trees.
+
+    :param inputs: sentences to be parsed
+    :type inputs: list(str)
+    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
+    :type grammar: nltk.grammar.FeatureGrammar
+    :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
+    :return: a mapping from input sentences to a list of ``Tree`` instances.
+    """
+    # put imports here to avoid circult dependencies
+    from nltk.grammar import FeatureGrammar
+    from nltk.parse import FeatureChartParser, load_parser
+
+    if isinstance(grammar, FeatureGrammar):
+        cp = FeatureChartParser(grammar)
+    else:
+        cp = load_parser(grammar, trace=trace)
+    parses = []
+    for sent in inputs:
+        tokens = sent.split()  # use a tokenizer?
+        syntrees = list(cp.parse(tokens))
+        parses.append(syntrees)
+    return parses
+
+
+def root_semrep(syntree, semkey="SEM"):
+    """
+    Find the semantic representation at the root of a tree.
+
+    :param syntree: a parse ``Tree``
+    :param semkey: the feature label to use for the root semantics in the tree
+    :return: the semantic representation at the root of a ``Tree``
+    :rtype: sem.Expression
+    """
+    from nltk.grammar import FeatStructNonterminal
+
+    node = syntree.label()
+    assert isinstance(node, FeatStructNonterminal)
+    try:
+        return node[semkey]
+    except KeyError:
+        print(node, end=" ")
+        print("has no specification for the feature %s" % semkey)
+    raise
+
+
+def interpret_sents(inputs, grammar, semkey="SEM", trace=0):
+    """
+    Add the semantic representation to each syntactic parse tree
+    of each input sentence.
+
+    :param inputs: a list of sentences
+    :type inputs: list(str)
+    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
+    :type grammar: nltk.grammar.FeatureGrammar
+    :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
+    :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
+    """
+    return [
+        [(syn, root_semrep(syn, semkey)) for syn in syntrees]
+        for syntrees in parse_sents(inputs, grammar, trace=trace)
+    ]
+
+
+def evaluate_sents(inputs, grammar, model, assignment, trace=0):
+    """
+    Add the truth-in-a-model value to each semantic representation
+    for each syntactic parse of each input sentences.
+
+    :param inputs: a list of sentences
+    :type inputs: list(str)
+    :param grammar: ``FeatureGrammar`` or name of feature-based grammar
+    :type grammar: nltk.grammar.FeatureGrammar
+    :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
+    :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
+    """
+    return [
+        [
+            (syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
+            for (syn, sem) in interpretations
+        ]
+        for interpretations in interpret_sents(inputs, grammar)
+    ]
+
+
+def demo_model0():
+    global m0, g0
+    # Initialize a valuation of non-logical constants."""
+    v = [
+        ("john", "b1"),
+        ("mary", "g1"),
+        ("suzie", "g2"),
+        ("fido", "d1"),
+        ("tess", "d2"),
+        ("noosa", "n"),
+        ("girl", {"g1", "g2"}),
+        ("boy", {"b1", "b2"}),
+        ("dog", {"d1", "d2"}),
+        ("bark", {"d1", "d2"}),
+        ("walk", {"b1", "g2", "d1"}),
+        ("chase", {("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")}),
+        (
+            "see",
+            {("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")},
+        ),
+        ("in", {("b1", "n"), ("b2", "n"), ("d2", "n")}),
+        ("with", {("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")}),
+    ]
+    # Read in the data from ``v``
+    val = evaluate.Valuation(v)
+    # Bind ``dom`` to the ``domain`` property of ``val``
+    dom = val.domain
+    # Initialize a model with parameters ``dom`` and ``val``.
+    m0 = evaluate.Model(dom, val)
+    # Initialize a variable assignment with parameter ``dom``
+    g0 = evaluate.Assignment(dom)
+
+
+def read_sents(filename, encoding="utf8"):
+    with codecs.open(filename, "r", encoding) as fp:
+        sents = [l.rstrip() for l in fp]
+
+    # get rid of blank lines
+    sents = [l for l in sents if len(l) > 0]
+    sents = [l for l in sents if not l[0] == "#"]
+    return sents
+
+
+def demo_legacy_grammar():
+    """
+    Check that interpret_sents() is compatible with legacy grammars that use
+    a lowercase 'sem' feature.
+
+    Define 'test.fcfg' to be the following
+
+    """
+    from nltk.grammar import FeatureGrammar
+
+    g = FeatureGrammar.fromstring(
+        """
+    % start S
+    S[sem=<hello>] -> 'hello'
+    """
+    )
+    print("Reading grammar: %s" % g)
+    print("*" * 20)
+    for reading in interpret_sents(["hello"], g, semkey="sem"):
+        syn, sem = reading[0]
+        print()
+        print("output: ", sem)
+
+
+def demo():
+    import sys
+    from optparse import OptionParser
+
+    description = """
+    Parse and evaluate some sentences.
+    """
+
+    opts = OptionParser(description=description)
+
+    opts.set_defaults(
+        evaluate=True,
+        beta=True,
+        syntrace=0,
+        semtrace=0,
+        demo="default",
+        grammar="",
+        sentences="",
+    )
+
+    opts.add_option(
+        "-d",
+        "--demo",
+        dest="demo",
+        help="choose demo D; omit this for the default demo, or specify 'chat80'",
+        metavar="D",
+    )
+    opts.add_option(
+        "-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
+    )
+    opts.add_option(
+        "-m",
+        "--model",
+        dest="model",
+        help="import model M (omit '.py' suffix)",
+        metavar="M",
+    )
+    opts.add_option(
+        "-s",
+        "--sentences",
+        dest="sentences",
+        help="read in a file of test sentences S",
+        metavar="S",
+    )
+    opts.add_option(
+        "-e",
+        "--no-eval",
+        action="store_false",
+        dest="evaluate",
+        help="just do a syntactic analysis",
+    )
+    opts.add_option(
+        "-b",
+        "--no-beta-reduction",
+        action="store_false",
+        dest="beta",
+        help="don't carry out beta-reduction",
+    )
+    opts.add_option(
+        "-t",
+        "--syntrace",
+        action="count",
+        dest="syntrace",
+        help="set syntactic tracing on; requires '-e' option",
+    )
+    opts.add_option(
+        "-T",
+        "--semtrace",
+        action="count",
+        dest="semtrace",
+        help="set semantic tracing on",
+    )
+
+    (options, args) = opts.parse_args()
+
+    SPACER = "-" * 30
+
+    demo_model0()
+
+    sents = [
+        "Fido sees a boy with Mary",
+        "John sees Mary",
+        "every girl chases a dog",
+        "every boy chases a girl",
+        "John walks with a girl in Noosa",
+        "who walks",
+    ]
+
+    gramfile = "grammars/sample_grammars/sem2.fcfg"
+
+    if options.sentences:
+        sentsfile = options.sentences
+    if options.grammar:
+        gramfile = options.grammar
+    if options.model:
+        exec("import %s as model" % options.model)
+
+    if sents is None:
+        sents = read_sents(sentsfile)
+
+    # Set model and assignment
+    model = m0
+    g = g0
+
+    if options.evaluate:
+        evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
+    else:
+        semreps = interpret_sents(sents, gramfile, trace=options.syntrace)
+
+    for i, sent in enumerate(sents):
+        n = 1
+        print("\nSentence: %s" % sent)
+        print(SPACER)
+        if options.evaluate:
+            for syntree, semrep, value in evaluations[i]:
+                if isinstance(value, dict):
+                    value = set(value.keys())
+                print("%d:  %s" % (n, semrep))
+                print(value)
+                n += 1
+        else:
+            for syntree, semrep in semreps[i]:
+                print("%d:  %s" % (n, semrep))
+                n += 1
+
+
+if __name__ == "__main__":
+    demo()
+    demo_legacy_grammar()